p.chroma[X265_CSP_I444].copy_ps[i] = p.luma_copy_ps[i];
p.chroma[X265_CSP_I444].copy_sp[i] = p.luma_copy_sp[i];
p.chroma[X265_CSP_I444].copy_ss[i] = p.luma_copy_ss[i];
- p.chroma[X265_CSP_I444].addAvg[i] = p.luma_addAvg[i];
+ p.chroma[X265_CSP_I444].addAvg[i] = p.luma_addAvg[i];
+ p.chroma[X265_CSP_I444].satd[i] = p.satd[i];
}
for (int i = 0; i < NUM_SQUARE_BLOCKS; i++)
p.chroma[X265_CSP_I444].sub_ps[i] = p.luma_sub_ps[i];
}
- for (int i = 0; i < NUM_SQUARE_BLOCKS; i++)
- {
- int partL = partitionFromLog2Size(i + 2);
- p.square_copy_pp[i] = p.luma_copy_pp[partL];
- p.square_copy_ps[i] = p.luma_copy_ps[partL];
- p.square_copy_sp[i] = p.luma_copy_sp[partL];
- p.square_copy_ss[i] = p.luma_copy_ss[partL];
- }
-
primitives.sa8d[BLOCK_4x4] = primitives.sa8d_inter[LUMA_4x4];
primitives.sa8d[BLOCK_8x8] = primitives.sa8d_inter[LUMA_8x8];
primitives.sa8d[BLOCK_16x16] = primitives.sa8d_inter[LUMA_16x16];
primitives.sa8d_inter[LUMA_16x4] = primitives.satd[LUMA_16x4];
primitives.sa8d_inter[LUMA_16x12] = primitives.satd[LUMA_16x12];
primitives.sa8d_inter[LUMA_12x16] = primitives.satd[LUMA_12x16];
+
+ // Chroma SATD can often reuse luma primitives
+ p.chroma[X265_CSP_I420].satd[CHROMA_4x4] = primitives.satd[LUMA_4x4];
+ p.chroma[X265_CSP_I420].satd[CHROMA_8x8] = primitives.satd[LUMA_8x8];
+ p.chroma[X265_CSP_I420].satd[CHROMA_16x16] = primitives.satd[LUMA_16x16];
+ p.chroma[X265_CSP_I420].satd[CHROMA_32x32] = primitives.satd[LUMA_32x32];
+
+ p.chroma[X265_CSP_I420].satd[CHROMA_8x4] = primitives.satd[LUMA_8x4];
+ p.chroma[X265_CSP_I420].satd[CHROMA_4x8] = primitives.satd[LUMA_4x8];
+ p.chroma[X265_CSP_I420].satd[CHROMA_16x8] = primitives.satd[LUMA_16x8];
+ p.chroma[X265_CSP_I420].satd[CHROMA_8x16] = primitives.satd[LUMA_8x16];
+ p.chroma[X265_CSP_I420].satd[CHROMA_32x16] = primitives.satd[LUMA_32x16];
+ p.chroma[X265_CSP_I420].satd[CHROMA_16x32] = primitives.satd[LUMA_16x32];
+
+ p.chroma[X265_CSP_I420].satd[CHROMA_16x12] = primitives.satd[LUMA_16x12];
+ p.chroma[X265_CSP_I420].satd[CHROMA_12x16] = primitives.satd[LUMA_12x16];
+ p.chroma[X265_CSP_I420].satd[CHROMA_16x4] = primitives.satd[LUMA_16x4];
+ p.chroma[X265_CSP_I420].satd[CHROMA_4x16] = primitives.satd[LUMA_4x16];
+ p.chroma[X265_CSP_I420].satd[CHROMA_32x24] = primitives.satd[LUMA_32x24];
+ p.chroma[X265_CSP_I420].satd[CHROMA_24x32] = primitives.satd[LUMA_24x32];
+ p.chroma[X265_CSP_I420].satd[CHROMA_32x8] = primitives.satd[LUMA_32x8];
+ p.chroma[X265_CSP_I420].satd[CHROMA_8x32] = primitives.satd[LUMA_8x32];
+
+ p.chroma[X265_CSP_I422].satd[CHROMA422_4x8] = primitives.satd[LUMA_4x8];
+ p.chroma[X265_CSP_I422].satd[CHROMA422_8x16] = primitives.satd[LUMA_8x16];
+ p.chroma[X265_CSP_I422].satd[CHROMA422_16x32] = primitives.satd[LUMA_16x32];
+ p.chroma[X265_CSP_I422].satd[CHROMA422_32x64] = primitives.satd[LUMA_32x64];
+
+ p.chroma[X265_CSP_I422].satd[CHROMA422_4x4] = primitives.satd[LUMA_4x4];
+ p.chroma[X265_CSP_I422].satd[CHROMA422_8x8] = primitives.satd[LUMA_8x8];
+ p.chroma[X265_CSP_I422].satd[CHROMA422_4x16] = primitives.satd[LUMA_4x16];
+ p.chroma[X265_CSP_I422].satd[CHROMA422_16x16] = primitives.satd[LUMA_16x16];
+ p.chroma[X265_CSP_I422].satd[CHROMA422_8x32] = primitives.satd[LUMA_8x32];
+ p.chroma[X265_CSP_I422].satd[CHROMA422_32x32] = primitives.satd[LUMA_32x32];
+ p.chroma[X265_CSP_I422].satd[CHROMA422_16x64] = primitives.satd[LUMA_16x64];
+
+ //p.chroma[X265_CSP_I422].satd[CHROMA422_8x12] = satd4<8, 12>;
+ p.chroma[X265_CSP_I422].satd[CHROMA422_8x4] = primitives.satd[LUMA_8x4];
+ //p.chroma[X265_CSP_I422].satd[CHROMA422_16x24] = satd8<16, 24>;
+ //p.chroma[X265_CSP_I422].satd[CHROMA422_12x32] = satd4<12, 32>;
+ p.chroma[X265_CSP_I422].satd[CHROMA422_16x8] = primitives.satd[LUMA_16x8];
+ //p.chroma[X265_CSP_I422].satd[CHROMA422_4x32] = satd4<4, 32>;
+ //p.chroma[X265_CSP_I422].satd[CHROMA422_32x48] = satd8<32, 48>;
+ //p.chroma[X265_CSP_I422].satd[CHROMA422_24x64] = satd8<24, 64>;
+ p.chroma[X265_CSP_I422].satd[CHROMA422_32x16] = primitives.satd[LUMA_32x16];
+ //p.chroma[X265_CSP_I422].satd[CHROMA422_8x64] = satd8<8, 64>;
}
}
using namespace x265;
if (!primitives.sad[0])
{
Setup_C_Primitives(primitives);
- Setup_Instrinsic_Primitives(primitives, cpuid);
#if ENABLE_ASSEMBLY
+ Setup_Instrinsic_Primitives(primitives, cpuid);
Setup_Assembly_Primitives(primitives, cpuid);
#else
x265_log(param, X265_LOG_WARNING, "Assembly not supported in this binary\n");
#endif
Setup_Alias_Primitives(primitives);
-
- initROM();
}
if (param->logLevel >= X265_LOG_INFO)
}
}
-#if !defined(ENABLE_ASSEMBLY)
-#if defined(_MSC_VER)
-#include <intrin.h>
-#endif
-
+#if ENABLE_ASSEMBLY
+/* these functions are implemented in assembly. When assembly is not being
+ * compiled, they are unnecessary and can be NOPs */
+#else
extern "C" {
-// the intrinsic primitives will not use MMX instructions, so if assembly
-// is disabled there should be no reason to use EMMS.
+int x265_cpu_cpuid_test(void) { return 0; }
void x265_cpu_emms(void) {}
-
-#if defined(X265_ARCH_X86)
-
-#if defined(_MSC_VER)
-# pragma warning(disable: 4100)
-#elif defined(__GNUC__) || defined(__clang__) // use inline assembly, Gnu/AT&T syntax
-# define __cpuidex(regsArray, level, index) \
- __asm__ __volatile__ ("cpuid" \
- : "=a" ((regsArray)[0]), "=b" ((regsArray)[1]), "=c" ((regsArray)[2]), "=d" ((regsArray)[3]) \
- : "0" (level), "2" (index));
-#else
-# error "compiler not supported"
-#endif
-
-int x265_cpu_cpuid_test(void)
-{
- return 0;
+void x265_cpu_cpuid(uint32_t, uint32_t *, uint32_t *, uint32_t *, uint32_t *) {}
+void x265_cpu_xgetbv(uint32_t, uint32_t *, uint32_t *) {}
}
-
-void x265_cpu_cpuid(uint32_t op, uint32_t *eax, uint32_t *ebx, uint32_t *ecx, uint32_t *edx)
-{
- int output[4];
-
- __cpuidex(output, op, 0);
- *eax = output[0];
- *ebx = output[1];
- *ecx = output[2];
- *edx = output[3];
-}
-
-void x265_cpu_xgetbv(uint32_t op, uint32_t *eax, uint32_t *edx)
-{
- uint64_t out = 0;
-
-#if X265_ARCH_X86
-
-#if (defined(_MSC_FULL_VER) && _MSC_FULL_VER >= 160040000) || (defined(__INTEL_COMPILER) && __INTEL_COMPILER >= 1200)
-
- // MSVC 2010 SP1 or later, or similar Intel release
- out = _xgetbv(op);
-
-#elif defined(__GNUC__) || defined(__clang__) // use inline assembly, Gnu/AT&T syntax
-
- uint32_t a, d;
- __asm("xgetbv" : "=a" (a), "=d" (d) : "c" (op) :);
- *eax = a;
- *edx = d;
- return;
-
-#elif defined(_WIN64) // On x64 with older compilers, this is impossible
-
-#endif // if (defined(_MSC_FULL_VER) && _MSC_FULL_VER >= 160040000) || (defined(__INTEL_COMPILER) && __INTEL_COMPILER >= 1200)
-
-#endif // if x86
-
- *eax = (uint32_t)out;
- *edx = (uint32_t)(out >> 32);
-}
-
-#endif // X265_ARCH_X86
-}
-#endif // if !ENABLE_ASSEMBLY
+#endif