Imported Upstream version 1.4+222+hg5f9f7194267b
[deb_x265.git] / source / common / primitives.cpp
index 7592d27da7c37eadc637beec06b1f6a5d16b3ee2..ebb8af61036e4ddafae29dc671380ba79330706e 100644 (file)
@@ -75,7 +75,8 @@ void Setup_Alias_Primitives(EncoderPrimitives &p)
         p.chroma[X265_CSP_I444].copy_ps[i] = p.luma_copy_ps[i];
         p.chroma[X265_CSP_I444].copy_sp[i] = p.luma_copy_sp[i];
         p.chroma[X265_CSP_I444].copy_ss[i] = p.luma_copy_ss[i];
-        p.chroma[X265_CSP_I444].addAvg[i]  = p.luma_addAvg[i];
+        p.chroma[X265_CSP_I444].addAvg[i] = p.luma_addAvg[i];
+        p.chroma[X265_CSP_I444].satd[i] = p.satd[i];
     }
 
     for (int i = 0; i < NUM_SQUARE_BLOCKS; i++)
@@ -84,15 +85,6 @@ void Setup_Alias_Primitives(EncoderPrimitives &p)
         p.chroma[X265_CSP_I444].sub_ps[i]  = p.luma_sub_ps[i];
     }
 
-    for (int i = 0; i < NUM_SQUARE_BLOCKS; i++)
-    {
-        int partL = partitionFromLog2Size(i + 2);
-        p.square_copy_pp[i] = p.luma_copy_pp[partL];
-        p.square_copy_ps[i] = p.luma_copy_ps[partL];
-        p.square_copy_sp[i] = p.luma_copy_sp[partL];
-        p.square_copy_ss[i] = p.luma_copy_ss[partL];
-    }
-
     primitives.sa8d[BLOCK_4x4]   = primitives.sa8d_inter[LUMA_4x4];
     primitives.sa8d[BLOCK_8x8]   = primitives.sa8d_inter[LUMA_8x8];
     primitives.sa8d[BLOCK_16x16] = primitives.sa8d_inter[LUMA_16x16];
@@ -107,6 +99,52 @@ void Setup_Alias_Primitives(EncoderPrimitives &p)
     primitives.sa8d_inter[LUMA_16x4]  = primitives.satd[LUMA_16x4];
     primitives.sa8d_inter[LUMA_16x12] = primitives.satd[LUMA_16x12];
     primitives.sa8d_inter[LUMA_12x16] = primitives.satd[LUMA_12x16];
+
+    // Chroma SATD can often reuse luma primitives
+    p.chroma[X265_CSP_I420].satd[CHROMA_4x4]   = primitives.satd[LUMA_4x4];
+    p.chroma[X265_CSP_I420].satd[CHROMA_8x8]   = primitives.satd[LUMA_8x8];
+    p.chroma[X265_CSP_I420].satd[CHROMA_16x16] = primitives.satd[LUMA_16x16];
+    p.chroma[X265_CSP_I420].satd[CHROMA_32x32] = primitives.satd[LUMA_32x32];
+
+    p.chroma[X265_CSP_I420].satd[CHROMA_8x4]   = primitives.satd[LUMA_8x4];
+    p.chroma[X265_CSP_I420].satd[CHROMA_4x8]   = primitives.satd[LUMA_4x8];
+    p.chroma[X265_CSP_I420].satd[CHROMA_16x8]  = primitives.satd[LUMA_16x8];
+    p.chroma[X265_CSP_I420].satd[CHROMA_8x16]  = primitives.satd[LUMA_8x16];
+    p.chroma[X265_CSP_I420].satd[CHROMA_32x16] = primitives.satd[LUMA_32x16];
+    p.chroma[X265_CSP_I420].satd[CHROMA_16x32] = primitives.satd[LUMA_16x32];
+
+    p.chroma[X265_CSP_I420].satd[CHROMA_16x12] = primitives.satd[LUMA_16x12];
+    p.chroma[X265_CSP_I420].satd[CHROMA_12x16] = primitives.satd[LUMA_12x16];
+    p.chroma[X265_CSP_I420].satd[CHROMA_16x4]  = primitives.satd[LUMA_16x4];
+    p.chroma[X265_CSP_I420].satd[CHROMA_4x16]  = primitives.satd[LUMA_4x16];
+    p.chroma[X265_CSP_I420].satd[CHROMA_32x24] = primitives.satd[LUMA_32x24];
+    p.chroma[X265_CSP_I420].satd[CHROMA_24x32] = primitives.satd[LUMA_24x32];
+    p.chroma[X265_CSP_I420].satd[CHROMA_32x8]  = primitives.satd[LUMA_32x8];
+    p.chroma[X265_CSP_I420].satd[CHROMA_8x32]  = primitives.satd[LUMA_8x32];
+
+    p.chroma[X265_CSP_I422].satd[CHROMA422_4x8]   = primitives.satd[LUMA_4x8];
+    p.chroma[X265_CSP_I422].satd[CHROMA422_8x16]  = primitives.satd[LUMA_8x16];
+    p.chroma[X265_CSP_I422].satd[CHROMA422_16x32] = primitives.satd[LUMA_16x32];
+    p.chroma[X265_CSP_I422].satd[CHROMA422_32x64] = primitives.satd[LUMA_32x64];
+
+    p.chroma[X265_CSP_I422].satd[CHROMA422_4x4]   = primitives.satd[LUMA_4x4];
+    p.chroma[X265_CSP_I422].satd[CHROMA422_8x8]   = primitives.satd[LUMA_8x8];
+    p.chroma[X265_CSP_I422].satd[CHROMA422_4x16]  = primitives.satd[LUMA_4x16];
+    p.chroma[X265_CSP_I422].satd[CHROMA422_16x16] = primitives.satd[LUMA_16x16];
+    p.chroma[X265_CSP_I422].satd[CHROMA422_8x32]  = primitives.satd[LUMA_8x32];
+    p.chroma[X265_CSP_I422].satd[CHROMA422_32x32] = primitives.satd[LUMA_32x32];
+    p.chroma[X265_CSP_I422].satd[CHROMA422_16x64] = primitives.satd[LUMA_16x64];
+
+    //p.chroma[X265_CSP_I422].satd[CHROMA422_8x12]  = satd4<8, 12>;
+    p.chroma[X265_CSP_I422].satd[CHROMA422_8x4]   = primitives.satd[LUMA_8x4];
+    //p.chroma[X265_CSP_I422].satd[CHROMA422_16x24] = satd8<16, 24>;
+    //p.chroma[X265_CSP_I422].satd[CHROMA422_12x32] = satd4<12, 32>;
+    p.chroma[X265_CSP_I422].satd[CHROMA422_16x8]  = primitives.satd[LUMA_16x8];
+    //p.chroma[X265_CSP_I422].satd[CHROMA422_4x32]  = satd4<4, 32>;
+    //p.chroma[X265_CSP_I422].satd[CHROMA422_32x48] = satd8<32, 48>;
+    //p.chroma[X265_CSP_I422].satd[CHROMA422_24x64] = satd8<24, 64>;
+    p.chroma[X265_CSP_I422].satd[CHROMA422_32x16] = primitives.satd[LUMA_32x16];
+    //p.chroma[X265_CSP_I422].satd[CHROMA422_8x64]  = satd8<8, 64>;
 }
 }
 using namespace x265;
@@ -123,17 +161,15 @@ void x265_setup_primitives(x265_param *param, int cpuid)
     if (!primitives.sad[0])
     {
         Setup_C_Primitives(primitives);
-        Setup_Instrinsic_Primitives(primitives, cpuid);
 
 #if ENABLE_ASSEMBLY
+        Setup_Instrinsic_Primitives(primitives, cpuid);
         Setup_Assembly_Primitives(primitives, cpuid);
 #else
         x265_log(param, X265_LOG_WARNING, "Assembly not supported in this binary\n");
 #endif
 
         Setup_Alias_Primitives(primitives);
-
-        initROM();
     }
 
     if (param->logLevel >= X265_LOG_INFO)
@@ -169,74 +205,14 @@ void x265_setup_primitives(x265_param *param, int cpuid)
     }
 }
 
-#if !defined(ENABLE_ASSEMBLY)
-#if defined(_MSC_VER)
-#include <intrin.h>
-#endif
-
+#if ENABLE_ASSEMBLY
+/* these functions are implemented in assembly. When assembly is not being
+ * compiled, they are unnecessary and can be NOPs */
+#else
 extern "C" {
-// the intrinsic primitives will not use MMX instructions, so if assembly
-// is disabled there should be no reason to use EMMS.
+int x265_cpu_cpuid_test(void) { return 0; }
 void x265_cpu_emms(void) {}
-
-#if defined(X265_ARCH_X86)
-
-#if defined(_MSC_VER)
-# pragma warning(disable: 4100)
-#elif defined(__GNUC__) || defined(__clang__)    // use inline assembly, Gnu/AT&T syntax
-# define __cpuidex(regsArray, level, index) \
-    __asm__ __volatile__ ("cpuid" \
-                          : "=a" ((regsArray)[0]), "=b" ((regsArray)[1]), "=c" ((regsArray)[2]), "=d" ((regsArray)[3]) \
-                          : "0" (level), "2" (index));
-#else
-# error "compiler not supported"
-#endif
-
-int x265_cpu_cpuid_test(void)
-{
-    return 0;
+void x265_cpu_cpuid(uint32_t, uint32_t *, uint32_t *, uint32_t *, uint32_t *) {}
+void x265_cpu_xgetbv(uint32_t, uint32_t *, uint32_t *) {}
 }
-
-void x265_cpu_cpuid(uint32_t op, uint32_t *eax, uint32_t *ebx, uint32_t *ecx, uint32_t *edx)
-{
-    int output[4];
-
-    __cpuidex(output, op, 0);
-    *eax = output[0];
-    *ebx = output[1];
-    *ecx = output[2];
-    *edx = output[3];
-}
-
-void x265_cpu_xgetbv(uint32_t op, uint32_t *eax, uint32_t *edx)
-{
-    uint64_t out = 0;
-
-#if X265_ARCH_X86
-
-#if (defined(_MSC_FULL_VER) && _MSC_FULL_VER >= 160040000) || (defined(__INTEL_COMPILER) && __INTEL_COMPILER >= 1200)
-
-    // MSVC 2010 SP1 or later, or similar Intel release
-    out = _xgetbv(op);
-
-#elif defined(__GNUC__) || defined(__clang__)    // use inline assembly, Gnu/AT&T syntax
-
-    uint32_t a, d;
-    __asm("xgetbv" : "=a" (a), "=d" (d) : "c" (op) :);
-    *eax = a;
-    *edx = d;
-    return;
-
-#elif defined(_WIN64)      // On x64 with older compilers, this is impossible
-
-#endif // if (defined(_MSC_FULL_VER) && _MSC_FULL_VER >= 160040000) || (defined(__INTEL_COMPILER) && __INTEL_COMPILER >= 1200)
-
-#endif // if x86
-
-    *eax = (uint32_t)out;
-    *edx = (uint32_t)(out >> 32);
-}
-
-#endif // X265_ARCH_X86
-}
-#endif // if !ENABLE_ASSEMBLY
+#endif