1 /*****************************************************************************
2 * Copyright (C) 2013 x265 project
4 * Authors: Steve Borho <steve@borho.org>
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
20 * This program is also available under a commercial proprietary license.
21 * For more information, contact us at license @ x265.com.
22 *****************************************************************************/
25 #include "primitives.h"
28 // x265 private namespace
30 extern const uint8_t lumaPartitionMapTable
[] =
32 // 4 8 12 16 20 24 28 32 36 40 44 48 52 56 60 64
33 LUMA_4x4
, LUMA_4x8
, 255, LUMA_4x16
, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, // 4
34 LUMA_8x4
, LUMA_8x8
, 255, LUMA_8x16
, 255, 255, 255, LUMA_8x32
, 255, 255, 255, 255, 255, 255, 255, 255, // 8
35 255, 255, 255, LUMA_12x16
, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, // 12
36 LUMA_16x4
, LUMA_16x8
, LUMA_16x12
, LUMA_16x16
, 255, 255, 255, LUMA_16x32
, 255, 255, 255, 255, 255, 255, 255, LUMA_16x64
, // 16
37 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, // 20
38 255, 255, 255, 255, 255, 255, 255, LUMA_24x32
, 255, 255, 255, 255, 255, 255, 255, 255, // 24
39 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, // 28
40 255, LUMA_32x8
, 255, LUMA_32x16
, 255, LUMA_32x24
, 255, LUMA_32x32
, 255, 255, 255, 255, 255, 255, 255, LUMA_32x64
, // 32
41 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, // 36
42 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, // 40
43 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, // 44
44 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, LUMA_48x64
, // 48
45 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, // 52
46 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, // 56
47 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, // 60
48 255, 255, 255, LUMA_64x16
, 255, 255, 255, LUMA_64x32
, 255, 255, 255, LUMA_64x48
, 255, 255, 255, LUMA_64x64
// 64
51 /* the "authoritative" set of encoder primitives */
52 EncoderPrimitives primitives
;
54 void Setup_C_PixelPrimitives(EncoderPrimitives
&p
);
55 void Setup_C_DCTPrimitives(EncoderPrimitives
&p
);
56 void Setup_C_IPFilterPrimitives(EncoderPrimitives
&p
);
57 void Setup_C_IPredPrimitives(EncoderPrimitives
&p
);
58 void Setup_C_LoopFilterPrimitives(EncoderPrimitives
&p
);
60 void Setup_C_Primitives(EncoderPrimitives
&p
)
62 Setup_C_PixelPrimitives(p
); // pixel.cpp
63 Setup_C_DCTPrimitives(p
); // dct.cpp
64 Setup_C_IPFilterPrimitives(p
); // ipfilter.cpp
65 Setup_C_IPredPrimitives(p
); // intrapred.cpp
66 Setup_C_LoopFilterPrimitives(p
); // loopfilter.cpp
69 void Setup_Alias_Primitives(EncoderPrimitives
&p
)
71 /* copy reusable luma primitives to chroma 4:4:4 */
72 for (int i
= 0; i
< NUM_LUMA_PARTITIONS
; i
++)
74 p
.chroma
[X265_CSP_I444
].copy_pp
[i
] = p
.luma_copy_pp
[i
];
75 p
.chroma
[X265_CSP_I444
].copy_ps
[i
] = p
.luma_copy_ps
[i
];
76 p
.chroma
[X265_CSP_I444
].copy_sp
[i
] = p
.luma_copy_sp
[i
];
77 p
.chroma
[X265_CSP_I444
].copy_ss
[i
] = p
.luma_copy_ss
[i
];
78 p
.chroma
[X265_CSP_I444
].addAvg
[i
] = p
.luma_addAvg
[i
];
79 p
.chroma
[X265_CSP_I444
].satd
[i
] = p
.satd
[i
];
82 for (int i
= 0; i
< NUM_SQUARE_BLOCKS
; i
++)
84 p
.chroma
[X265_CSP_I444
].add_ps
[i
] = p
.luma_add_ps
[i
];
85 p
.chroma
[X265_CSP_I444
].sub_ps
[i
] = p
.luma_sub_ps
[i
];
88 primitives
.sa8d
[BLOCK_4x4
] = primitives
.sa8d_inter
[LUMA_4x4
];
89 primitives
.sa8d
[BLOCK_8x8
] = primitives
.sa8d_inter
[LUMA_8x8
];
90 primitives
.sa8d
[BLOCK_16x16
] = primitives
.sa8d_inter
[LUMA_16x16
];
91 primitives
.sa8d
[BLOCK_32x32
] = primitives
.sa8d_inter
[LUMA_32x32
];
92 primitives
.sa8d
[BLOCK_64x64
] = primitives
.sa8d_inter
[LUMA_64x64
];
94 // SA8D devolves to SATD for blocks not even multiples of 8x8
95 primitives
.sa8d_inter
[LUMA_4x4
] = primitives
.satd
[LUMA_4x4
];
96 primitives
.sa8d_inter
[LUMA_4x8
] = primitives
.satd
[LUMA_4x8
];
97 primitives
.sa8d_inter
[LUMA_4x16
] = primitives
.satd
[LUMA_4x16
];
98 primitives
.sa8d_inter
[LUMA_8x4
] = primitives
.satd
[LUMA_8x4
];
99 primitives
.sa8d_inter
[LUMA_16x4
] = primitives
.satd
[LUMA_16x4
];
100 primitives
.sa8d_inter
[LUMA_16x12
] = primitives
.satd
[LUMA_16x12
];
101 primitives
.sa8d_inter
[LUMA_12x16
] = primitives
.satd
[LUMA_12x16
];
103 // Chroma SATD can often reuse luma primitives
104 p
.chroma
[X265_CSP_I420
].satd
[CHROMA_4x4
] = primitives
.satd
[LUMA_4x4
];
105 p
.chroma
[X265_CSP_I420
].satd
[CHROMA_8x8
] = primitives
.satd
[LUMA_8x8
];
106 p
.chroma
[X265_CSP_I420
].satd
[CHROMA_16x16
] = primitives
.satd
[LUMA_16x16
];
107 p
.chroma
[X265_CSP_I420
].satd
[CHROMA_32x32
] = primitives
.satd
[LUMA_32x32
];
109 p
.chroma
[X265_CSP_I420
].satd
[CHROMA_8x4
] = primitives
.satd
[LUMA_8x4
];
110 p
.chroma
[X265_CSP_I420
].satd
[CHROMA_4x8
] = primitives
.satd
[LUMA_4x8
];
111 p
.chroma
[X265_CSP_I420
].satd
[CHROMA_16x8
] = primitives
.satd
[LUMA_16x8
];
112 p
.chroma
[X265_CSP_I420
].satd
[CHROMA_8x16
] = primitives
.satd
[LUMA_8x16
];
113 p
.chroma
[X265_CSP_I420
].satd
[CHROMA_32x16
] = primitives
.satd
[LUMA_32x16
];
114 p
.chroma
[X265_CSP_I420
].satd
[CHROMA_16x32
] = primitives
.satd
[LUMA_16x32
];
116 p
.chroma
[X265_CSP_I420
].satd
[CHROMA_16x12
] = primitives
.satd
[LUMA_16x12
];
117 p
.chroma
[X265_CSP_I420
].satd
[CHROMA_12x16
] = primitives
.satd
[LUMA_12x16
];
118 p
.chroma
[X265_CSP_I420
].satd
[CHROMA_16x4
] = primitives
.satd
[LUMA_16x4
];
119 p
.chroma
[X265_CSP_I420
].satd
[CHROMA_4x16
] = primitives
.satd
[LUMA_4x16
];
120 p
.chroma
[X265_CSP_I420
].satd
[CHROMA_32x24
] = primitives
.satd
[LUMA_32x24
];
121 p
.chroma
[X265_CSP_I420
].satd
[CHROMA_24x32
] = primitives
.satd
[LUMA_24x32
];
122 p
.chroma
[X265_CSP_I420
].satd
[CHROMA_32x8
] = primitives
.satd
[LUMA_32x8
];
123 p
.chroma
[X265_CSP_I420
].satd
[CHROMA_8x32
] = primitives
.satd
[LUMA_8x32
];
125 p
.chroma
[X265_CSP_I422
].satd
[CHROMA422_4x8
] = primitives
.satd
[LUMA_4x8
];
126 p
.chroma
[X265_CSP_I422
].satd
[CHROMA422_8x16
] = primitives
.satd
[LUMA_8x16
];
127 p
.chroma
[X265_CSP_I422
].satd
[CHROMA422_16x32
] = primitives
.satd
[LUMA_16x32
];
128 p
.chroma
[X265_CSP_I422
].satd
[CHROMA422_32x64
] = primitives
.satd
[LUMA_32x64
];
130 p
.chroma
[X265_CSP_I422
].satd
[CHROMA422_4x4
] = primitives
.satd
[LUMA_4x4
];
131 p
.chroma
[X265_CSP_I422
].satd
[CHROMA422_8x8
] = primitives
.satd
[LUMA_8x8
];
132 p
.chroma
[X265_CSP_I422
].satd
[CHROMA422_4x16
] = primitives
.satd
[LUMA_4x16
];
133 p
.chroma
[X265_CSP_I422
].satd
[CHROMA422_16x16
] = primitives
.satd
[LUMA_16x16
];
134 p
.chroma
[X265_CSP_I422
].satd
[CHROMA422_8x32
] = primitives
.satd
[LUMA_8x32
];
135 p
.chroma
[X265_CSP_I422
].satd
[CHROMA422_32x32
] = primitives
.satd
[LUMA_32x32
];
136 p
.chroma
[X265_CSP_I422
].satd
[CHROMA422_16x64
] = primitives
.satd
[LUMA_16x64
];
138 //p.chroma[X265_CSP_I422].satd[CHROMA422_8x12] = satd4<8, 12>;
139 p
.chroma
[X265_CSP_I422
].satd
[CHROMA422_8x4
] = primitives
.satd
[LUMA_8x4
];
140 //p.chroma[X265_CSP_I422].satd[CHROMA422_16x24] = satd8<16, 24>;
141 //p.chroma[X265_CSP_I422].satd[CHROMA422_12x32] = satd4<12, 32>;
142 p
.chroma
[X265_CSP_I422
].satd
[CHROMA422_16x8
] = primitives
.satd
[LUMA_16x8
];
143 //p.chroma[X265_CSP_I422].satd[CHROMA422_4x32] = satd4<4, 32>;
144 //p.chroma[X265_CSP_I422].satd[CHROMA422_32x48] = satd8<32, 48>;
145 //p.chroma[X265_CSP_I422].satd[CHROMA422_24x64] = satd8<24, 64>;
146 p
.chroma
[X265_CSP_I422
].satd
[CHROMA422_32x16
] = primitives
.satd
[LUMA_32x16
];
147 //p.chroma[X265_CSP_I422].satd[CHROMA422_8x64] = satd8<8, 64>;
150 using namespace x265
;
152 /* cpuid >= 0 - force CPU type
153 * cpuid < 0 - auto-detect if uninitialized */
155 void x265_setup_primitives(x265_param
*param
, int cpuid
)
158 cpuid
= x265::cpu_detect();
160 // initialize global variables
161 if (!primitives
.sad
[0])
163 Setup_C_Primitives(primitives
);
166 Setup_Instrinsic_Primitives(primitives
, cpuid
);
167 Setup_Assembly_Primitives(primitives
, cpuid
);
169 x265_log(param
, X265_LOG_WARNING
, "Assembly not supported in this binary\n");
172 Setup_Alias_Primitives(primitives
);
175 if (param
->logLevel
>= X265_LOG_INFO
)
178 char *p
= buf
+ sprintf(buf
, "using cpu capabilities:");
180 for (int i
= 0; x265::cpu_names
[i
].flags
; i
++)
182 if (!strcmp(x265::cpu_names
[i
].name
, "SSE")
183 && (cpuid
& X265_CPU_SSE2
))
185 if (!strcmp(x265::cpu_names
[i
].name
, "SSE2")
186 && (cpuid
& (X265_CPU_SSE2_IS_FAST
| X265_CPU_SSE2_IS_SLOW
)))
188 if (!strcmp(x265::cpu_names
[i
].name
, "SSE3")
189 && (cpuid
& X265_CPU_SSSE3
|| !(cpuid
& X265_CPU_CACHELINE_64
)))
191 if (!strcmp(x265::cpu_names
[i
].name
, "SSE4.1")
192 && (cpuid
& X265_CPU_SSE42
))
194 if (!strcmp(x265::cpu_names
[i
].name
, "BMI1")
195 && (cpuid
& X265_CPU_BMI2
))
197 if ((cpuid
& x265::cpu_names
[i
].flags
) == x265::cpu_names
[i
].flags
198 && (!i
|| x265::cpu_names
[i
].flags
!= x265::cpu_names
[i
- 1].flags
))
199 p
+= sprintf(p
, " %s", x265::cpu_names
[i
].name
);
203 sprintf(p
, " none!");
204 x265_log(param
, X265_LOG_INFO
, "%s\n", buf
);
209 /* these functions are implemented in assembly. When assembly is not being
210 * compiled, they are unnecessary and can be NOPs */
213 int x265_cpu_cpuid_test(void) { return 0; }
214 void x265_cpu_emms(void) {}
215 void x265_cpu_cpuid(uint32_t, uint32_t *, uint32_t *, uint32_t *, uint32_t *) {}
216 void x265_cpu_xgetbv(uint32_t, uint32_t *, uint32_t *) {}