Commit | Line | Data |
---|---|---|
72b9787e JB |
1 | /***************************************************************************** |
2 | * Copyright (C) 2013 x265 project | |
3 | * | |
4 | * Authors: Steve Borho <steve@borho.org> | |
5 | * | |
6 | * This program is free software; you can redistribute it and/or modify | |
7 | * it under the terms of the GNU General Public License as published by | |
8 | * the Free Software Foundation; either version 2 of the License, or | |
9 | * (at your option) any later version. | |
10 | * | |
11 | * This program is distributed in the hope that it will be useful, | |
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
14 | * GNU General Public License for more details. | |
15 | * | |
16 | * You should have received a copy of the GNU General Public License | |
17 | * along with this program; if not, write to the Free Software | |
18 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA. | |
19 | * | |
20 | * This program is also available under a commercial proprietary license. | |
21 | * For more information, contact us at license @ x265.com. | |
22 | *****************************************************************************/ | |
23 | ||
24 | #include "common.h" | |
25 | #include "primitives.h" | |
26 | ||
27 | namespace x265 { | |
28 | // x265 private namespace | |
29 | ||
30 | extern const uint8_t lumaPartitionMapTable[] = | |
31 | { | |
32 | // 4 8 12 16 20 24 28 32 36 40 44 48 52 56 60 64 | |
33 | LUMA_4x4, LUMA_4x8, 255, LUMA_4x16, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, // 4 | |
34 | LUMA_8x4, LUMA_8x8, 255, LUMA_8x16, 255, 255, 255, LUMA_8x32, 255, 255, 255, 255, 255, 255, 255, 255, // 8 | |
35 | 255, 255, 255, LUMA_12x16, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, // 12 | |
36 | LUMA_16x4, LUMA_16x8, LUMA_16x12, LUMA_16x16, 255, 255, 255, LUMA_16x32, 255, 255, 255, 255, 255, 255, 255, LUMA_16x64, // 16 | |
37 | 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, // 20 | |
38 | 255, 255, 255, 255, 255, 255, 255, LUMA_24x32, 255, 255, 255, 255, 255, 255, 255, 255, // 24 | |
39 | 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, // 28 | |
40 | 255, LUMA_32x8, 255, LUMA_32x16, 255, LUMA_32x24, 255, LUMA_32x32, 255, 255, 255, 255, 255, 255, 255, LUMA_32x64, // 32 | |
41 | 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, // 36 | |
42 | 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, // 40 | |
43 | 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, // 44 | |
44 | 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, LUMA_48x64, // 48 | |
45 | 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, // 52 | |
46 | 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, // 56 | |
47 | 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, // 60 | |
48 | 255, 255, 255, LUMA_64x16, 255, 255, 255, LUMA_64x32, 255, 255, 255, LUMA_64x48, 255, 255, 255, LUMA_64x64 // 64 | |
49 | }; | |
50 | ||
51 | /* the "authoritative" set of encoder primitives */ | |
52 | EncoderPrimitives primitives; | |
53 | ||
54 | void Setup_C_PixelPrimitives(EncoderPrimitives &p); | |
55 | void Setup_C_DCTPrimitives(EncoderPrimitives &p); | |
56 | void Setup_C_IPFilterPrimitives(EncoderPrimitives &p); | |
57 | void Setup_C_IPredPrimitives(EncoderPrimitives &p); | |
58 | void Setup_C_LoopFilterPrimitives(EncoderPrimitives &p); | |
59 | ||
60 | void Setup_C_Primitives(EncoderPrimitives &p) | |
61 | { | |
62 | Setup_C_PixelPrimitives(p); // pixel.cpp | |
63 | Setup_C_DCTPrimitives(p); // dct.cpp | |
64 | Setup_C_IPFilterPrimitives(p); // ipfilter.cpp | |
65 | Setup_C_IPredPrimitives(p); // intrapred.cpp | |
66 | Setup_C_LoopFilterPrimitives(p); // loopfilter.cpp | |
67 | } | |
68 | ||
69 | void Setup_Alias_Primitives(EncoderPrimitives &p) | |
70 | { | |
71 | /* copy reusable luma primitives to chroma 4:4:4 */ | |
72 | for (int i = 0; i < NUM_LUMA_PARTITIONS; i++) | |
73 | { | |
74 | p.chroma[X265_CSP_I444].copy_pp[i] = p.luma_copy_pp[i]; | |
75 | p.chroma[X265_CSP_I444].copy_ps[i] = p.luma_copy_ps[i]; | |
76 | p.chroma[X265_CSP_I444].copy_sp[i] = p.luma_copy_sp[i]; | |
77 | p.chroma[X265_CSP_I444].copy_ss[i] = p.luma_copy_ss[i]; | |
b53f7c52 JB |
78 | p.chroma[X265_CSP_I444].addAvg[i] = p.luma_addAvg[i]; |
79 | p.chroma[X265_CSP_I444].satd[i] = p.satd[i]; | |
72b9787e JB |
80 | } |
81 | ||
82 | for (int i = 0; i < NUM_SQUARE_BLOCKS; i++) | |
83 | { | |
84 | p.chroma[X265_CSP_I444].add_ps[i] = p.luma_add_ps[i]; | |
85 | p.chroma[X265_CSP_I444].sub_ps[i] = p.luma_sub_ps[i]; | |
86 | } | |
87 | ||
72b9787e JB |
88 | primitives.sa8d[BLOCK_4x4] = primitives.sa8d_inter[LUMA_4x4]; |
89 | primitives.sa8d[BLOCK_8x8] = primitives.sa8d_inter[LUMA_8x8]; | |
90 | primitives.sa8d[BLOCK_16x16] = primitives.sa8d_inter[LUMA_16x16]; | |
91 | primitives.sa8d[BLOCK_32x32] = primitives.sa8d_inter[LUMA_32x32]; | |
92 | primitives.sa8d[BLOCK_64x64] = primitives.sa8d_inter[LUMA_64x64]; | |
93 | ||
94 | // SA8D devolves to SATD for blocks not even multiples of 8x8 | |
95 | primitives.sa8d_inter[LUMA_4x4] = primitives.satd[LUMA_4x4]; | |
96 | primitives.sa8d_inter[LUMA_4x8] = primitives.satd[LUMA_4x8]; | |
97 | primitives.sa8d_inter[LUMA_4x16] = primitives.satd[LUMA_4x16]; | |
98 | primitives.sa8d_inter[LUMA_8x4] = primitives.satd[LUMA_8x4]; | |
99 | primitives.sa8d_inter[LUMA_16x4] = primitives.satd[LUMA_16x4]; | |
100 | primitives.sa8d_inter[LUMA_16x12] = primitives.satd[LUMA_16x12]; | |
101 | primitives.sa8d_inter[LUMA_12x16] = primitives.satd[LUMA_12x16]; | |
b53f7c52 JB |
102 | |
103 | // Chroma SATD can often reuse luma primitives | |
104 | p.chroma[X265_CSP_I420].satd[CHROMA_4x4] = primitives.satd[LUMA_4x4]; | |
105 | p.chroma[X265_CSP_I420].satd[CHROMA_8x8] = primitives.satd[LUMA_8x8]; | |
106 | p.chroma[X265_CSP_I420].satd[CHROMA_16x16] = primitives.satd[LUMA_16x16]; | |
107 | p.chroma[X265_CSP_I420].satd[CHROMA_32x32] = primitives.satd[LUMA_32x32]; | |
108 | ||
109 | p.chroma[X265_CSP_I420].satd[CHROMA_8x4] = primitives.satd[LUMA_8x4]; | |
110 | p.chroma[X265_CSP_I420].satd[CHROMA_4x8] = primitives.satd[LUMA_4x8]; | |
111 | p.chroma[X265_CSP_I420].satd[CHROMA_16x8] = primitives.satd[LUMA_16x8]; | |
112 | p.chroma[X265_CSP_I420].satd[CHROMA_8x16] = primitives.satd[LUMA_8x16]; | |
113 | p.chroma[X265_CSP_I420].satd[CHROMA_32x16] = primitives.satd[LUMA_32x16]; | |
114 | p.chroma[X265_CSP_I420].satd[CHROMA_16x32] = primitives.satd[LUMA_16x32]; | |
115 | ||
116 | p.chroma[X265_CSP_I420].satd[CHROMA_16x12] = primitives.satd[LUMA_16x12]; | |
117 | p.chroma[X265_CSP_I420].satd[CHROMA_12x16] = primitives.satd[LUMA_12x16]; | |
118 | p.chroma[X265_CSP_I420].satd[CHROMA_16x4] = primitives.satd[LUMA_16x4]; | |
119 | p.chroma[X265_CSP_I420].satd[CHROMA_4x16] = primitives.satd[LUMA_4x16]; | |
120 | p.chroma[X265_CSP_I420].satd[CHROMA_32x24] = primitives.satd[LUMA_32x24]; | |
121 | p.chroma[X265_CSP_I420].satd[CHROMA_24x32] = primitives.satd[LUMA_24x32]; | |
122 | p.chroma[X265_CSP_I420].satd[CHROMA_32x8] = primitives.satd[LUMA_32x8]; | |
123 | p.chroma[X265_CSP_I420].satd[CHROMA_8x32] = primitives.satd[LUMA_8x32]; | |
124 | ||
125 | p.chroma[X265_CSP_I422].satd[CHROMA422_4x8] = primitives.satd[LUMA_4x8]; | |
126 | p.chroma[X265_CSP_I422].satd[CHROMA422_8x16] = primitives.satd[LUMA_8x16]; | |
127 | p.chroma[X265_CSP_I422].satd[CHROMA422_16x32] = primitives.satd[LUMA_16x32]; | |
128 | p.chroma[X265_CSP_I422].satd[CHROMA422_32x64] = primitives.satd[LUMA_32x64]; | |
129 | ||
130 | p.chroma[X265_CSP_I422].satd[CHROMA422_4x4] = primitives.satd[LUMA_4x4]; | |
131 | p.chroma[X265_CSP_I422].satd[CHROMA422_8x8] = primitives.satd[LUMA_8x8]; | |
132 | p.chroma[X265_CSP_I422].satd[CHROMA422_4x16] = primitives.satd[LUMA_4x16]; | |
133 | p.chroma[X265_CSP_I422].satd[CHROMA422_16x16] = primitives.satd[LUMA_16x16]; | |
134 | p.chroma[X265_CSP_I422].satd[CHROMA422_8x32] = primitives.satd[LUMA_8x32]; | |
135 | p.chroma[X265_CSP_I422].satd[CHROMA422_32x32] = primitives.satd[LUMA_32x32]; | |
136 | p.chroma[X265_CSP_I422].satd[CHROMA422_16x64] = primitives.satd[LUMA_16x64]; | |
137 | ||
138 | //p.chroma[X265_CSP_I422].satd[CHROMA422_8x12] = satd4<8, 12>; | |
139 | p.chroma[X265_CSP_I422].satd[CHROMA422_8x4] = primitives.satd[LUMA_8x4]; | |
140 | //p.chroma[X265_CSP_I422].satd[CHROMA422_16x24] = satd8<16, 24>; | |
141 | //p.chroma[X265_CSP_I422].satd[CHROMA422_12x32] = satd4<12, 32>; | |
142 | p.chroma[X265_CSP_I422].satd[CHROMA422_16x8] = primitives.satd[LUMA_16x8]; | |
143 | //p.chroma[X265_CSP_I422].satd[CHROMA422_4x32] = satd4<4, 32>; | |
144 | //p.chroma[X265_CSP_I422].satd[CHROMA422_32x48] = satd8<32, 48>; | |
145 | //p.chroma[X265_CSP_I422].satd[CHROMA422_24x64] = satd8<24, 64>; | |
146 | p.chroma[X265_CSP_I422].satd[CHROMA422_32x16] = primitives.satd[LUMA_32x16]; | |
147 | //p.chroma[X265_CSP_I422].satd[CHROMA422_8x64] = satd8<8, 64>; | |
72b9787e JB |
148 | } |
149 | } | |
150 | using namespace x265; | |
151 | ||
152 | /* cpuid >= 0 - force CPU type | |
153 | * cpuid < 0 - auto-detect if uninitialized */ | |
154 | extern "C" | |
155 | void x265_setup_primitives(x265_param *param, int cpuid) | |
156 | { | |
157 | if (cpuid < 0) | |
158 | cpuid = x265::cpu_detect(); | |
159 | ||
160 | // initialize global variables | |
161 | if (!primitives.sad[0]) | |
162 | { | |
163 | Setup_C_Primitives(primitives); | |
72b9787e JB |
164 | |
165 | #if ENABLE_ASSEMBLY | |
b53f7c52 | 166 | Setup_Instrinsic_Primitives(primitives, cpuid); |
72b9787e JB |
167 | Setup_Assembly_Primitives(primitives, cpuid); |
168 | #else | |
169 | x265_log(param, X265_LOG_WARNING, "Assembly not supported in this binary\n"); | |
170 | #endif | |
171 | ||
172 | Setup_Alias_Primitives(primitives); | |
72b9787e JB |
173 | } |
174 | ||
175 | if (param->logLevel >= X265_LOG_INFO) | |
176 | { | |
177 | char buf[1000]; | |
178 | char *p = buf + sprintf(buf, "using cpu capabilities:"); | |
179 | char *none = p; | |
180 | for (int i = 0; x265::cpu_names[i].flags; i++) | |
181 | { | |
182 | if (!strcmp(x265::cpu_names[i].name, "SSE") | |
183 | && (cpuid & X265_CPU_SSE2)) | |
184 | continue; | |
185 | if (!strcmp(x265::cpu_names[i].name, "SSE2") | |
186 | && (cpuid & (X265_CPU_SSE2_IS_FAST | X265_CPU_SSE2_IS_SLOW))) | |
187 | continue; | |
188 | if (!strcmp(x265::cpu_names[i].name, "SSE3") | |
189 | && (cpuid & X265_CPU_SSSE3 || !(cpuid & X265_CPU_CACHELINE_64))) | |
190 | continue; | |
191 | if (!strcmp(x265::cpu_names[i].name, "SSE4.1") | |
192 | && (cpuid & X265_CPU_SSE42)) | |
193 | continue; | |
194 | if (!strcmp(x265::cpu_names[i].name, "BMI1") | |
195 | && (cpuid & X265_CPU_BMI2)) | |
196 | continue; | |
197 | if ((cpuid & x265::cpu_names[i].flags) == x265::cpu_names[i].flags | |
198 | && (!i || x265::cpu_names[i].flags != x265::cpu_names[i - 1].flags)) | |
199 | p += sprintf(p, " %s", x265::cpu_names[i].name); | |
200 | } | |
201 | ||
202 | if (p == none) | |
203 | sprintf(p, " none!"); | |
204 | x265_log(param, X265_LOG_INFO, "%s\n", buf); | |
205 | } | |
206 | } | |
207 | ||
b53f7c52 JB |
208 | #if ENABLE_ASSEMBLY |
209 | /* these functions are implemented in assembly. When assembly is not being | |
210 | * compiled, they are unnecessary and can be NOPs */ | |
211 | #else | |
72b9787e | 212 | extern "C" { |
b53f7c52 | 213 | int x265_cpu_cpuid_test(void) { return 0; } |
72b9787e | 214 | void x265_cpu_emms(void) {} |
b53f7c52 JB |
215 | void x265_cpu_cpuid(uint32_t, uint32_t *, uint32_t *, uint32_t *, uint32_t *) {} |
216 | void x265_cpu_xgetbv(uint32_t, uint32_t *, uint32_t *) {} | |
72b9787e | 217 | } |
b53f7c52 | 218 | #endif |