1 /*****************************************************************************
2 * Copyright (C) 2013 x265 project
4 * Authors: Steve Borho <steve@borho.org>
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
20 * This program is also available under a commercial proprietary license.
21 * For more information, contact us at license @ x265.com.
22 *****************************************************************************/
25 #include "primitives.h"
28 // x265 private namespace
30 extern const uint8_t lumaPartitionMapTable
[] =
32 // 4 8 12 16 20 24 28 32 36 40 44 48 52 56 60 64
33 LUMA_4x4
, LUMA_4x8
, 255, LUMA_4x16
, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, // 4
34 LUMA_8x4
, LUMA_8x8
, 255, LUMA_8x16
, 255, 255, 255, LUMA_8x32
, 255, 255, 255, 255, 255, 255, 255, 255, // 8
35 255, 255, 255, LUMA_12x16
, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, // 12
36 LUMA_16x4
, LUMA_16x8
, LUMA_16x12
, LUMA_16x16
, 255, 255, 255, LUMA_16x32
, 255, 255, 255, 255, 255, 255, 255, LUMA_16x64
, // 16
37 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, // 20
38 255, 255, 255, 255, 255, 255, 255, LUMA_24x32
, 255, 255, 255, 255, 255, 255, 255, 255, // 24
39 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, // 28
40 255, LUMA_32x8
, 255, LUMA_32x16
, 255, LUMA_32x24
, 255, LUMA_32x32
, 255, 255, 255, 255, 255, 255, 255, LUMA_32x64
, // 32
41 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, // 36
42 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, // 40
43 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, // 44
44 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, LUMA_48x64
, // 48
45 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, // 52
46 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, // 56
47 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, // 60
48 255, 255, 255, LUMA_64x16
, 255, 255, 255, LUMA_64x32
, 255, 255, 255, LUMA_64x48
, 255, 255, 255, LUMA_64x64
// 64
51 /* the "authoritative" set of encoder primitives */
52 EncoderPrimitives primitives
;
54 void Setup_C_PixelPrimitives(EncoderPrimitives
&p
);
55 void Setup_C_DCTPrimitives(EncoderPrimitives
&p
);
56 void Setup_C_IPFilterPrimitives(EncoderPrimitives
&p
);
57 void Setup_C_IPredPrimitives(EncoderPrimitives
&p
);
58 void Setup_C_LoopFilterPrimitives(EncoderPrimitives
&p
);
60 void Setup_C_Primitives(EncoderPrimitives
&p
)
62 Setup_C_PixelPrimitives(p
); // pixel.cpp
63 Setup_C_DCTPrimitives(p
); // dct.cpp
64 Setup_C_IPFilterPrimitives(p
); // ipfilter.cpp
65 Setup_C_IPredPrimitives(p
); // intrapred.cpp
66 Setup_C_LoopFilterPrimitives(p
); // loopfilter.cpp
69 void Setup_Alias_Primitives(EncoderPrimitives
&p
)
71 /* copy reusable luma primitives to chroma 4:4:4 */
72 for (int i
= 0; i
< NUM_LUMA_PARTITIONS
; i
++)
74 p
.chroma
[X265_CSP_I444
].copy_pp
[i
] = p
.luma_copy_pp
[i
];
75 p
.chroma
[X265_CSP_I444
].copy_ps
[i
] = p
.luma_copy_ps
[i
];
76 p
.chroma
[X265_CSP_I444
].copy_sp
[i
] = p
.luma_copy_sp
[i
];
77 p
.chroma
[X265_CSP_I444
].copy_ss
[i
] = p
.luma_copy_ss
[i
];
78 p
.chroma
[X265_CSP_I444
].addAvg
[i
] = p
.luma_addAvg
[i
];
81 for (int i
= 0; i
< NUM_SQUARE_BLOCKS
; i
++)
83 p
.chroma
[X265_CSP_I444
].add_ps
[i
] = p
.luma_add_ps
[i
];
84 p
.chroma
[X265_CSP_I444
].sub_ps
[i
] = p
.luma_sub_ps
[i
];
87 for (int i
= 0; i
< NUM_SQUARE_BLOCKS
; i
++)
89 int partL
= partitionFromLog2Size(i
+ 2);
90 p
.square_copy_pp
[i
] = p
.luma_copy_pp
[partL
];
91 p
.square_copy_ps
[i
] = p
.luma_copy_ps
[partL
];
92 p
.square_copy_sp
[i
] = p
.luma_copy_sp
[partL
];
93 p
.square_copy_ss
[i
] = p
.luma_copy_ss
[partL
];
96 primitives
.sa8d
[BLOCK_4x4
] = primitives
.sa8d_inter
[LUMA_4x4
];
97 primitives
.sa8d
[BLOCK_8x8
] = primitives
.sa8d_inter
[LUMA_8x8
];
98 primitives
.sa8d
[BLOCK_16x16
] = primitives
.sa8d_inter
[LUMA_16x16
];
99 primitives
.sa8d
[BLOCK_32x32
] = primitives
.sa8d_inter
[LUMA_32x32
];
100 primitives
.sa8d
[BLOCK_64x64
] = primitives
.sa8d_inter
[LUMA_64x64
];
102 // SA8D devolves to SATD for blocks not even multiples of 8x8
103 primitives
.sa8d_inter
[LUMA_4x4
] = primitives
.satd
[LUMA_4x4
];
104 primitives
.sa8d_inter
[LUMA_4x8
] = primitives
.satd
[LUMA_4x8
];
105 primitives
.sa8d_inter
[LUMA_4x16
] = primitives
.satd
[LUMA_4x16
];
106 primitives
.sa8d_inter
[LUMA_8x4
] = primitives
.satd
[LUMA_8x4
];
107 primitives
.sa8d_inter
[LUMA_16x4
] = primitives
.satd
[LUMA_16x4
];
108 primitives
.sa8d_inter
[LUMA_16x12
] = primitives
.satd
[LUMA_16x12
];
109 primitives
.sa8d_inter
[LUMA_12x16
] = primitives
.satd
[LUMA_12x16
];
112 using namespace x265
;
114 /* cpuid >= 0 - force CPU type
115 * cpuid < 0 - auto-detect if uninitialized */
117 void x265_setup_primitives(x265_param
*param
, int cpuid
)
120 cpuid
= x265::cpu_detect();
122 // initialize global variables
123 if (!primitives
.sad
[0])
125 Setup_C_Primitives(primitives
);
126 Setup_Instrinsic_Primitives(primitives
, cpuid
);
129 Setup_Assembly_Primitives(primitives
, cpuid
);
131 x265_log(param
, X265_LOG_WARNING
, "Assembly not supported in this binary\n");
134 Setup_Alias_Primitives(primitives
);
139 if (param
->logLevel
>= X265_LOG_INFO
)
142 char *p
= buf
+ sprintf(buf
, "using cpu capabilities:");
144 for (int i
= 0; x265::cpu_names
[i
].flags
; i
++)
146 if (!strcmp(x265::cpu_names
[i
].name
, "SSE")
147 && (cpuid
& X265_CPU_SSE2
))
149 if (!strcmp(x265::cpu_names
[i
].name
, "SSE2")
150 && (cpuid
& (X265_CPU_SSE2_IS_FAST
| X265_CPU_SSE2_IS_SLOW
)))
152 if (!strcmp(x265::cpu_names
[i
].name
, "SSE3")
153 && (cpuid
& X265_CPU_SSSE3
|| !(cpuid
& X265_CPU_CACHELINE_64
)))
155 if (!strcmp(x265::cpu_names
[i
].name
, "SSE4.1")
156 && (cpuid
& X265_CPU_SSE42
))
158 if (!strcmp(x265::cpu_names
[i
].name
, "BMI1")
159 && (cpuid
& X265_CPU_BMI2
))
161 if ((cpuid
& x265::cpu_names
[i
].flags
) == x265::cpu_names
[i
].flags
162 && (!i
|| x265::cpu_names
[i
].flags
!= x265::cpu_names
[i
- 1].flags
))
163 p
+= sprintf(p
, " %s", x265::cpu_names
[i
].name
);
167 sprintf(p
, " none!");
168 x265_log(param
, X265_LOG_INFO
, "%s\n", buf
);
172 #if !defined(ENABLE_ASSEMBLY)
173 #if defined(_MSC_VER)
178 // the intrinsic primitives will not use MMX instructions, so if assembly
179 // is disabled there should be no reason to use EMMS.
180 void x265_cpu_emms(void) {}
182 #if defined(X265_ARCH_X86)
184 #if defined(_MSC_VER)
185 # pragma warning(disable: 4100)
186 #elif defined(__GNUC__) || defined(__clang__) // use inline assembly, Gnu/AT&T syntax
187 # define __cpuidex(regsArray, level, index) \
188 __asm__ __volatile__ ("cpuid" \
189 : "=a" ((regsArray)[0]), "=b" ((regsArray)[1]), "=c" ((regsArray)[2]), "=d" ((regsArray)[3]) \
190 : "0" (level), "2" (index));
192 # error "compiler not supported"
195 int x265_cpu_cpuid_test(void)
200 void x265_cpu_cpuid(uint32_t op
, uint32_t *eax
, uint32_t *ebx
, uint32_t *ecx
, uint32_t *edx
)
204 __cpuidex(output
, op
, 0);
211 void x265_cpu_xgetbv(uint32_t op
, uint32_t *eax
, uint32_t *edx
)
217 #if (defined(_MSC_FULL_VER) && _MSC_FULL_VER >= 160040000) || (defined(__INTEL_COMPILER) && __INTEL_COMPILER >= 1200)
219 // MSVC 2010 SP1 or later, or similar Intel release
222 #elif defined(__GNUC__) || defined(__clang__) // use inline assembly, Gnu/AT&T syntax
225 __asm("xgetbv" : "=a" (a
), "=d" (d
) : "c" (op
) :);
230 #elif defined(_WIN64) // On x64 with older compilers, this is impossible
232 #endif // if (defined(_MSC_FULL_VER) && _MSC_FULL_VER >= 160040000) || (defined(__INTEL_COMPILER) && __INTEL_COMPILER >= 1200)
236 *eax
= (uint32_t)out
;
237 *edx
= (uint32_t)(out
>> 32);
240 #endif // X265_ARCH_X86
242 #endif // if !ENABLE_ASSEMBLY