X-Git-Url: https://git.piment-noir.org/?p=deb_x265.git;a=blobdiff_plain;f=source%2Fcommon%2Fprimitives.cpp;h=ebb8af61036e4ddafae29dc671380ba79330706e;hp=7592d27da7c37eadc637beec06b1f6a5d16b3ee2;hb=b53f7c52d8280ab63876efd6eb292c21430ac607;hpb=5c9b45285dd64723ad1dac380b98a7b1f3095674 diff --git a/source/common/primitives.cpp b/source/common/primitives.cpp index 7592d27..ebb8af6 100644 --- a/source/common/primitives.cpp +++ b/source/common/primitives.cpp @@ -75,7 +75,8 @@ void Setup_Alias_Primitives(EncoderPrimitives &p) p.chroma[X265_CSP_I444].copy_ps[i] = p.luma_copy_ps[i]; p.chroma[X265_CSP_I444].copy_sp[i] = p.luma_copy_sp[i]; p.chroma[X265_CSP_I444].copy_ss[i] = p.luma_copy_ss[i]; - p.chroma[X265_CSP_I444].addAvg[i] = p.luma_addAvg[i]; + p.chroma[X265_CSP_I444].addAvg[i] = p.luma_addAvg[i]; + p.chroma[X265_CSP_I444].satd[i] = p.satd[i]; } for (int i = 0; i < NUM_SQUARE_BLOCKS; i++) @@ -84,15 +85,6 @@ void Setup_Alias_Primitives(EncoderPrimitives &p) p.chroma[X265_CSP_I444].sub_ps[i] = p.luma_sub_ps[i]; } - for (int i = 0; i < NUM_SQUARE_BLOCKS; i++) - { - int partL = partitionFromLog2Size(i + 2); - p.square_copy_pp[i] = p.luma_copy_pp[partL]; - p.square_copy_ps[i] = p.luma_copy_ps[partL]; - p.square_copy_sp[i] = p.luma_copy_sp[partL]; - p.square_copy_ss[i] = p.luma_copy_ss[partL]; - } - primitives.sa8d[BLOCK_4x4] = primitives.sa8d_inter[LUMA_4x4]; primitives.sa8d[BLOCK_8x8] = primitives.sa8d_inter[LUMA_8x8]; primitives.sa8d[BLOCK_16x16] = primitives.sa8d_inter[LUMA_16x16]; @@ -107,6 +99,52 @@ void Setup_Alias_Primitives(EncoderPrimitives &p) primitives.sa8d_inter[LUMA_16x4] = primitives.satd[LUMA_16x4]; primitives.sa8d_inter[LUMA_16x12] = primitives.satd[LUMA_16x12]; primitives.sa8d_inter[LUMA_12x16] = primitives.satd[LUMA_12x16]; + + // Chroma SATD can often reuse luma primitives + p.chroma[X265_CSP_I420].satd[CHROMA_4x4] = primitives.satd[LUMA_4x4]; + p.chroma[X265_CSP_I420].satd[CHROMA_8x8] = primitives.satd[LUMA_8x8]; + p.chroma[X265_CSP_I420].satd[CHROMA_16x16] = primitives.satd[LUMA_16x16]; + p.chroma[X265_CSP_I420].satd[CHROMA_32x32] = primitives.satd[LUMA_32x32]; + + p.chroma[X265_CSP_I420].satd[CHROMA_8x4] = primitives.satd[LUMA_8x4]; + p.chroma[X265_CSP_I420].satd[CHROMA_4x8] = primitives.satd[LUMA_4x8]; + p.chroma[X265_CSP_I420].satd[CHROMA_16x8] = primitives.satd[LUMA_16x8]; + p.chroma[X265_CSP_I420].satd[CHROMA_8x16] = primitives.satd[LUMA_8x16]; + p.chroma[X265_CSP_I420].satd[CHROMA_32x16] = primitives.satd[LUMA_32x16]; + p.chroma[X265_CSP_I420].satd[CHROMA_16x32] = primitives.satd[LUMA_16x32]; + + p.chroma[X265_CSP_I420].satd[CHROMA_16x12] = primitives.satd[LUMA_16x12]; + p.chroma[X265_CSP_I420].satd[CHROMA_12x16] = primitives.satd[LUMA_12x16]; + p.chroma[X265_CSP_I420].satd[CHROMA_16x4] = primitives.satd[LUMA_16x4]; + p.chroma[X265_CSP_I420].satd[CHROMA_4x16] = primitives.satd[LUMA_4x16]; + p.chroma[X265_CSP_I420].satd[CHROMA_32x24] = primitives.satd[LUMA_32x24]; + p.chroma[X265_CSP_I420].satd[CHROMA_24x32] = primitives.satd[LUMA_24x32]; + p.chroma[X265_CSP_I420].satd[CHROMA_32x8] = primitives.satd[LUMA_32x8]; + p.chroma[X265_CSP_I420].satd[CHROMA_8x32] = primitives.satd[LUMA_8x32]; + + p.chroma[X265_CSP_I422].satd[CHROMA422_4x8] = primitives.satd[LUMA_4x8]; + p.chroma[X265_CSP_I422].satd[CHROMA422_8x16] = primitives.satd[LUMA_8x16]; + p.chroma[X265_CSP_I422].satd[CHROMA422_16x32] = primitives.satd[LUMA_16x32]; + p.chroma[X265_CSP_I422].satd[CHROMA422_32x64] = primitives.satd[LUMA_32x64]; + + p.chroma[X265_CSP_I422].satd[CHROMA422_4x4] = primitives.satd[LUMA_4x4]; + p.chroma[X265_CSP_I422].satd[CHROMA422_8x8] = primitives.satd[LUMA_8x8]; + p.chroma[X265_CSP_I422].satd[CHROMA422_4x16] = primitives.satd[LUMA_4x16]; + p.chroma[X265_CSP_I422].satd[CHROMA422_16x16] = primitives.satd[LUMA_16x16]; + p.chroma[X265_CSP_I422].satd[CHROMA422_8x32] = primitives.satd[LUMA_8x32]; + p.chroma[X265_CSP_I422].satd[CHROMA422_32x32] = primitives.satd[LUMA_32x32]; + p.chroma[X265_CSP_I422].satd[CHROMA422_16x64] = primitives.satd[LUMA_16x64]; + + //p.chroma[X265_CSP_I422].satd[CHROMA422_8x12] = satd4<8, 12>; + p.chroma[X265_CSP_I422].satd[CHROMA422_8x4] = primitives.satd[LUMA_8x4]; + //p.chroma[X265_CSP_I422].satd[CHROMA422_16x24] = satd8<16, 24>; + //p.chroma[X265_CSP_I422].satd[CHROMA422_12x32] = satd4<12, 32>; + p.chroma[X265_CSP_I422].satd[CHROMA422_16x8] = primitives.satd[LUMA_16x8]; + //p.chroma[X265_CSP_I422].satd[CHROMA422_4x32] = satd4<4, 32>; + //p.chroma[X265_CSP_I422].satd[CHROMA422_32x48] = satd8<32, 48>; + //p.chroma[X265_CSP_I422].satd[CHROMA422_24x64] = satd8<24, 64>; + p.chroma[X265_CSP_I422].satd[CHROMA422_32x16] = primitives.satd[LUMA_32x16]; + //p.chroma[X265_CSP_I422].satd[CHROMA422_8x64] = satd8<8, 64>; } } using namespace x265; @@ -123,17 +161,15 @@ void x265_setup_primitives(x265_param *param, int cpuid) if (!primitives.sad[0]) { Setup_C_Primitives(primitives); - Setup_Instrinsic_Primitives(primitives, cpuid); #if ENABLE_ASSEMBLY + Setup_Instrinsic_Primitives(primitives, cpuid); Setup_Assembly_Primitives(primitives, cpuid); #else x265_log(param, X265_LOG_WARNING, "Assembly not supported in this binary\n"); #endif Setup_Alias_Primitives(primitives); - - initROM(); } if (param->logLevel >= X265_LOG_INFO) @@ -169,74 +205,14 @@ void x265_setup_primitives(x265_param *param, int cpuid) } } -#if !defined(ENABLE_ASSEMBLY) -#if defined(_MSC_VER) -#include -#endif - +#if ENABLE_ASSEMBLY +/* these functions are implemented in assembly. When assembly is not being + * compiled, they are unnecessary and can be NOPs */ +#else extern "C" { -// the intrinsic primitives will not use MMX instructions, so if assembly -// is disabled there should be no reason to use EMMS. +int x265_cpu_cpuid_test(void) { return 0; } void x265_cpu_emms(void) {} - -#if defined(X265_ARCH_X86) - -#if defined(_MSC_VER) -# pragma warning(disable: 4100) -#elif defined(__GNUC__) || defined(__clang__) // use inline assembly, Gnu/AT&T syntax -# define __cpuidex(regsArray, level, index) \ - __asm__ __volatile__ ("cpuid" \ - : "=a" ((regsArray)[0]), "=b" ((regsArray)[1]), "=c" ((regsArray)[2]), "=d" ((regsArray)[3]) \ - : "0" (level), "2" (index)); -#else -# error "compiler not supported" -#endif - -int x265_cpu_cpuid_test(void) -{ - return 0; +void x265_cpu_cpuid(uint32_t, uint32_t *, uint32_t *, uint32_t *, uint32_t *) {} +void x265_cpu_xgetbv(uint32_t, uint32_t *, uint32_t *) {} } - -void x265_cpu_cpuid(uint32_t op, uint32_t *eax, uint32_t *ebx, uint32_t *ecx, uint32_t *edx) -{ - int output[4]; - - __cpuidex(output, op, 0); - *eax = output[0]; - *ebx = output[1]; - *ecx = output[2]; - *edx = output[3]; -} - -void x265_cpu_xgetbv(uint32_t op, uint32_t *eax, uint32_t *edx) -{ - uint64_t out = 0; - -#if X265_ARCH_X86 - -#if (defined(_MSC_FULL_VER) && _MSC_FULL_VER >= 160040000) || (defined(__INTEL_COMPILER) && __INTEL_COMPILER >= 1200) - - // MSVC 2010 SP1 or later, or similar Intel release - out = _xgetbv(op); - -#elif defined(__GNUC__) || defined(__clang__) // use inline assembly, Gnu/AT&T syntax - - uint32_t a, d; - __asm("xgetbv" : "=a" (a), "=d" (d) : "c" (op) :); - *eax = a; - *edx = d; - return; - -#elif defined(_WIN64) // On x64 with older compilers, this is impossible - -#endif // if (defined(_MSC_FULL_VER) && _MSC_FULL_VER >= 160040000) || (defined(__INTEL_COMPILER) && __INTEL_COMPILER >= 1200) - -#endif // if x86 - - *eax = (uint32_t)out; - *edx = (uint32_t)(out >> 32); -} - -#endif // X265_ARCH_X86 -} -#endif // if !ENABLE_ASSEMBLY +#endif