Imported Upstream version 1.4+222+hg5f9f7194267b
[deb_x265.git] / source / common / primitives.cpp
CommitLineData
72b9787e
JB
1/*****************************************************************************
2 * Copyright (C) 2013 x265 project
3 *
4 * Authors: Steve Borho <steve@borho.org>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
19 *
20 * This program is also available under a commercial proprietary license.
21 * For more information, contact us at license @ x265.com.
22 *****************************************************************************/
23
24#include "common.h"
25#include "primitives.h"
26
27namespace x265 {
28// x265 private namespace
29
30extern const uint8_t lumaPartitionMapTable[] =
31{
32// 4 8 12 16 20 24 28 32 36 40 44 48 52 56 60 64
33 LUMA_4x4, LUMA_4x8, 255, LUMA_4x16, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, // 4
34 LUMA_8x4, LUMA_8x8, 255, LUMA_8x16, 255, 255, 255, LUMA_8x32, 255, 255, 255, 255, 255, 255, 255, 255, // 8
35 255, 255, 255, LUMA_12x16, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, // 12
36 LUMA_16x4, LUMA_16x8, LUMA_16x12, LUMA_16x16, 255, 255, 255, LUMA_16x32, 255, 255, 255, 255, 255, 255, 255, LUMA_16x64, // 16
37 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, // 20
38 255, 255, 255, 255, 255, 255, 255, LUMA_24x32, 255, 255, 255, 255, 255, 255, 255, 255, // 24
39 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, // 28
40 255, LUMA_32x8, 255, LUMA_32x16, 255, LUMA_32x24, 255, LUMA_32x32, 255, 255, 255, 255, 255, 255, 255, LUMA_32x64, // 32
41 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, // 36
42 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, // 40
43 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, // 44
44 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, LUMA_48x64, // 48
45 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, // 52
46 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, // 56
47 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, // 60
48 255, 255, 255, LUMA_64x16, 255, 255, 255, LUMA_64x32, 255, 255, 255, LUMA_64x48, 255, 255, 255, LUMA_64x64 // 64
49};
50
51/* the "authoritative" set of encoder primitives */
52EncoderPrimitives primitives;
53
54void Setup_C_PixelPrimitives(EncoderPrimitives &p);
55void Setup_C_DCTPrimitives(EncoderPrimitives &p);
56void Setup_C_IPFilterPrimitives(EncoderPrimitives &p);
57void Setup_C_IPredPrimitives(EncoderPrimitives &p);
58void Setup_C_LoopFilterPrimitives(EncoderPrimitives &p);
59
60void Setup_C_Primitives(EncoderPrimitives &p)
61{
62 Setup_C_PixelPrimitives(p); // pixel.cpp
63 Setup_C_DCTPrimitives(p); // dct.cpp
64 Setup_C_IPFilterPrimitives(p); // ipfilter.cpp
65 Setup_C_IPredPrimitives(p); // intrapred.cpp
66 Setup_C_LoopFilterPrimitives(p); // loopfilter.cpp
67}
68
69void Setup_Alias_Primitives(EncoderPrimitives &p)
70{
71 /* copy reusable luma primitives to chroma 4:4:4 */
72 for (int i = 0; i < NUM_LUMA_PARTITIONS; i++)
73 {
74 p.chroma[X265_CSP_I444].copy_pp[i] = p.luma_copy_pp[i];
75 p.chroma[X265_CSP_I444].copy_ps[i] = p.luma_copy_ps[i];
76 p.chroma[X265_CSP_I444].copy_sp[i] = p.luma_copy_sp[i];
77 p.chroma[X265_CSP_I444].copy_ss[i] = p.luma_copy_ss[i];
b53f7c52
JB
78 p.chroma[X265_CSP_I444].addAvg[i] = p.luma_addAvg[i];
79 p.chroma[X265_CSP_I444].satd[i] = p.satd[i];
72b9787e
JB
80 }
81
82 for (int i = 0; i < NUM_SQUARE_BLOCKS; i++)
83 {
84 p.chroma[X265_CSP_I444].add_ps[i] = p.luma_add_ps[i];
85 p.chroma[X265_CSP_I444].sub_ps[i] = p.luma_sub_ps[i];
86 }
87
72b9787e
JB
88 primitives.sa8d[BLOCK_4x4] = primitives.sa8d_inter[LUMA_4x4];
89 primitives.sa8d[BLOCK_8x8] = primitives.sa8d_inter[LUMA_8x8];
90 primitives.sa8d[BLOCK_16x16] = primitives.sa8d_inter[LUMA_16x16];
91 primitives.sa8d[BLOCK_32x32] = primitives.sa8d_inter[LUMA_32x32];
92 primitives.sa8d[BLOCK_64x64] = primitives.sa8d_inter[LUMA_64x64];
93
94 // SA8D devolves to SATD for blocks not even multiples of 8x8
95 primitives.sa8d_inter[LUMA_4x4] = primitives.satd[LUMA_4x4];
96 primitives.sa8d_inter[LUMA_4x8] = primitives.satd[LUMA_4x8];
97 primitives.sa8d_inter[LUMA_4x16] = primitives.satd[LUMA_4x16];
98 primitives.sa8d_inter[LUMA_8x4] = primitives.satd[LUMA_8x4];
99 primitives.sa8d_inter[LUMA_16x4] = primitives.satd[LUMA_16x4];
100 primitives.sa8d_inter[LUMA_16x12] = primitives.satd[LUMA_16x12];
101 primitives.sa8d_inter[LUMA_12x16] = primitives.satd[LUMA_12x16];
b53f7c52
JB
102
103 // Chroma SATD can often reuse luma primitives
104 p.chroma[X265_CSP_I420].satd[CHROMA_4x4] = primitives.satd[LUMA_4x4];
105 p.chroma[X265_CSP_I420].satd[CHROMA_8x8] = primitives.satd[LUMA_8x8];
106 p.chroma[X265_CSP_I420].satd[CHROMA_16x16] = primitives.satd[LUMA_16x16];
107 p.chroma[X265_CSP_I420].satd[CHROMA_32x32] = primitives.satd[LUMA_32x32];
108
109 p.chroma[X265_CSP_I420].satd[CHROMA_8x4] = primitives.satd[LUMA_8x4];
110 p.chroma[X265_CSP_I420].satd[CHROMA_4x8] = primitives.satd[LUMA_4x8];
111 p.chroma[X265_CSP_I420].satd[CHROMA_16x8] = primitives.satd[LUMA_16x8];
112 p.chroma[X265_CSP_I420].satd[CHROMA_8x16] = primitives.satd[LUMA_8x16];
113 p.chroma[X265_CSP_I420].satd[CHROMA_32x16] = primitives.satd[LUMA_32x16];
114 p.chroma[X265_CSP_I420].satd[CHROMA_16x32] = primitives.satd[LUMA_16x32];
115
116 p.chroma[X265_CSP_I420].satd[CHROMA_16x12] = primitives.satd[LUMA_16x12];
117 p.chroma[X265_CSP_I420].satd[CHROMA_12x16] = primitives.satd[LUMA_12x16];
118 p.chroma[X265_CSP_I420].satd[CHROMA_16x4] = primitives.satd[LUMA_16x4];
119 p.chroma[X265_CSP_I420].satd[CHROMA_4x16] = primitives.satd[LUMA_4x16];
120 p.chroma[X265_CSP_I420].satd[CHROMA_32x24] = primitives.satd[LUMA_32x24];
121 p.chroma[X265_CSP_I420].satd[CHROMA_24x32] = primitives.satd[LUMA_24x32];
122 p.chroma[X265_CSP_I420].satd[CHROMA_32x8] = primitives.satd[LUMA_32x8];
123 p.chroma[X265_CSP_I420].satd[CHROMA_8x32] = primitives.satd[LUMA_8x32];
124
125 p.chroma[X265_CSP_I422].satd[CHROMA422_4x8] = primitives.satd[LUMA_4x8];
126 p.chroma[X265_CSP_I422].satd[CHROMA422_8x16] = primitives.satd[LUMA_8x16];
127 p.chroma[X265_CSP_I422].satd[CHROMA422_16x32] = primitives.satd[LUMA_16x32];
128 p.chroma[X265_CSP_I422].satd[CHROMA422_32x64] = primitives.satd[LUMA_32x64];
129
130 p.chroma[X265_CSP_I422].satd[CHROMA422_4x4] = primitives.satd[LUMA_4x4];
131 p.chroma[X265_CSP_I422].satd[CHROMA422_8x8] = primitives.satd[LUMA_8x8];
132 p.chroma[X265_CSP_I422].satd[CHROMA422_4x16] = primitives.satd[LUMA_4x16];
133 p.chroma[X265_CSP_I422].satd[CHROMA422_16x16] = primitives.satd[LUMA_16x16];
134 p.chroma[X265_CSP_I422].satd[CHROMA422_8x32] = primitives.satd[LUMA_8x32];
135 p.chroma[X265_CSP_I422].satd[CHROMA422_32x32] = primitives.satd[LUMA_32x32];
136 p.chroma[X265_CSP_I422].satd[CHROMA422_16x64] = primitives.satd[LUMA_16x64];
137
138 //p.chroma[X265_CSP_I422].satd[CHROMA422_8x12] = satd4<8, 12>;
139 p.chroma[X265_CSP_I422].satd[CHROMA422_8x4] = primitives.satd[LUMA_8x4];
140 //p.chroma[X265_CSP_I422].satd[CHROMA422_16x24] = satd8<16, 24>;
141 //p.chroma[X265_CSP_I422].satd[CHROMA422_12x32] = satd4<12, 32>;
142 p.chroma[X265_CSP_I422].satd[CHROMA422_16x8] = primitives.satd[LUMA_16x8];
143 //p.chroma[X265_CSP_I422].satd[CHROMA422_4x32] = satd4<4, 32>;
144 //p.chroma[X265_CSP_I422].satd[CHROMA422_32x48] = satd8<32, 48>;
145 //p.chroma[X265_CSP_I422].satd[CHROMA422_24x64] = satd8<24, 64>;
146 p.chroma[X265_CSP_I422].satd[CHROMA422_32x16] = primitives.satd[LUMA_32x16];
147 //p.chroma[X265_CSP_I422].satd[CHROMA422_8x64] = satd8<8, 64>;
72b9787e
JB
148}
149}
150using namespace x265;
151
152/* cpuid >= 0 - force CPU type
153 * cpuid < 0 - auto-detect if uninitialized */
154extern "C"
155void x265_setup_primitives(x265_param *param, int cpuid)
156{
157 if (cpuid < 0)
158 cpuid = x265::cpu_detect();
159
160 // initialize global variables
161 if (!primitives.sad[0])
162 {
163 Setup_C_Primitives(primitives);
72b9787e
JB
164
165#if ENABLE_ASSEMBLY
b53f7c52 166 Setup_Instrinsic_Primitives(primitives, cpuid);
72b9787e
JB
167 Setup_Assembly_Primitives(primitives, cpuid);
168#else
169 x265_log(param, X265_LOG_WARNING, "Assembly not supported in this binary\n");
170#endif
171
172 Setup_Alias_Primitives(primitives);
72b9787e
JB
173 }
174
175 if (param->logLevel >= X265_LOG_INFO)
176 {
177 char buf[1000];
178 char *p = buf + sprintf(buf, "using cpu capabilities:");
179 char *none = p;
180 for (int i = 0; x265::cpu_names[i].flags; i++)
181 {
182 if (!strcmp(x265::cpu_names[i].name, "SSE")
183 && (cpuid & X265_CPU_SSE2))
184 continue;
185 if (!strcmp(x265::cpu_names[i].name, "SSE2")
186 && (cpuid & (X265_CPU_SSE2_IS_FAST | X265_CPU_SSE2_IS_SLOW)))
187 continue;
188 if (!strcmp(x265::cpu_names[i].name, "SSE3")
189 && (cpuid & X265_CPU_SSSE3 || !(cpuid & X265_CPU_CACHELINE_64)))
190 continue;
191 if (!strcmp(x265::cpu_names[i].name, "SSE4.1")
192 && (cpuid & X265_CPU_SSE42))
193 continue;
194 if (!strcmp(x265::cpu_names[i].name, "BMI1")
195 && (cpuid & X265_CPU_BMI2))
196 continue;
197 if ((cpuid & x265::cpu_names[i].flags) == x265::cpu_names[i].flags
198 && (!i || x265::cpu_names[i].flags != x265::cpu_names[i - 1].flags))
199 p += sprintf(p, " %s", x265::cpu_names[i].name);
200 }
201
202 if (p == none)
203 sprintf(p, " none!");
204 x265_log(param, X265_LOG_INFO, "%s\n", buf);
205 }
206}
207
b53f7c52
JB
208#if ENABLE_ASSEMBLY
209/* these functions are implemented in assembly. When assembly is not being
210 * compiled, they are unnecessary and can be NOPs */
211#else
72b9787e 212extern "C" {
b53f7c52 213int x265_cpu_cpuid_test(void) { return 0; }
72b9787e 214void x265_cpu_emms(void) {}
b53f7c52
JB
215void x265_cpu_cpuid(uint32_t, uint32_t *, uint32_t *, uint32_t *, uint32_t *) {}
216void x265_cpu_xgetbv(uint32_t, uint32_t *, uint32_t *) {}
72b9787e 217}
b53f7c52 218#endif