Commit | Line | Data |
---|---|---|
2ba45a60 DM |
1 | ;****************************************************************************** |
2 | ;* Copyright (c) 2012 Michael Niedermayer | |
3 | ;* | |
4 | ;* This file is part of FFmpeg. | |
5 | ;* | |
6 | ;* FFmpeg is free software; you can redistribute it and/or | |
7 | ;* modify it under the terms of the GNU Lesser General Public | |
8 | ;* License as published by the Free Software Foundation; either | |
9 | ;* version 2.1 of the License, or (at your option) any later version. | |
10 | ;* | |
11 | ;* FFmpeg is distributed in the hope that it will be useful, | |
12 | ;* but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
14 | ;* Lesser General Public License for more details. | |
15 | ;* | |
16 | ;* You should have received a copy of the GNU Lesser General Public | |
17 | ;* License along with FFmpeg; if not, write to the Free Software | |
18 | ;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |
19 | ;****************************************************************************** | |
20 | ||
21 | %include "libavutil/x86/x86util.asm" | |
22 | ||
23 | SECTION_RODATA 32 | |
24 | flt2pm31: times 8 dd 4.6566129e-10 | |
25 | flt2p31 : times 8 dd 2147483648.0 | |
26 | flt2p15 : times 8 dd 32768.0 | |
27 | ||
28 | word_unpack_shuf : db 0, 1, 4, 5, 8, 9,12,13, 2, 3, 6, 7,10,11,14,15 | |
29 | ||
30 | SECTION .text | |
31 | ||
32 | ||
33 | ;to, from, a/u, log2_outsize, log_intsize, const | |
34 | %macro PACK_2CH 5-7 | |
35 | cglobal pack_2ch_%2_to_%1_%3, 3, 4, 6, dst, src, len, src2 | |
36 | mov src2q , [srcq+gprsize] | |
37 | mov srcq , [srcq] | |
38 | mov dstq , [dstq] | |
39 | %ifidn %3, a | |
40 | test dstq, mmsize-1 | |
41 | jne pack_2ch_%2_to_%1_u_int %+ SUFFIX | |
42 | test srcq, mmsize-1 | |
43 | jne pack_2ch_%2_to_%1_u_int %+ SUFFIX | |
44 | test src2q, mmsize-1 | |
45 | jne pack_2ch_%2_to_%1_u_int %+ SUFFIX | |
46 | %else | |
47 | pack_2ch_%2_to_%1_u_int %+ SUFFIX | |
48 | %endif | |
49 | lea srcq , [srcq + (1<<%5)*lenq] | |
50 | lea src2q, [src2q + (1<<%5)*lenq] | |
51 | lea dstq , [dstq + (2<<%4)*lenq] | |
52 | neg lenq | |
53 | %7 m0,m1,m2,m3,m4,m5 | |
54 | .next: | |
55 | %if %4 >= %5 | |
56 | mov%3 m0, [ srcq +(1<<%5)*lenq] | |
57 | mova m1, m0 | |
58 | mov%3 m2, [ src2q+(1<<%5)*lenq] | |
59 | %if %5 == 1 | |
60 | punpcklwd m0, m2 | |
61 | punpckhwd m1, m2 | |
62 | %else | |
63 | punpckldq m0, m2 | |
64 | punpckhdq m1, m2 | |
65 | %endif | |
66 | %6 m0,m1,m2,m3,m4,m5 | |
67 | %else | |
68 | mov%3 m0, [ srcq +(1<<%5)*lenq] | |
69 | mov%3 m1, [mmsize + srcq +(1<<%5)*lenq] | |
70 | mov%3 m2, [ src2q+(1<<%5)*lenq] | |
71 | mov%3 m3, [mmsize + src2q+(1<<%5)*lenq] | |
72 | %6 m0,m1,m2,m3,m4,m5 | |
73 | mova m2, m0 | |
74 | punpcklwd m0, m1 | |
75 | punpckhwd m2, m1 | |
76 | SWAP 1,2 | |
77 | %endif | |
78 | mov%3 [ dstq+(2<<%4)*lenq], m0 | |
79 | mov%3 [ mmsize + dstq+(2<<%4)*lenq], m1 | |
80 | %if %4 > %5 | |
81 | mov%3 [2*mmsize + dstq+(2<<%4)*lenq], m2 | |
82 | mov%3 [3*mmsize + dstq+(2<<%4)*lenq], m3 | |
83 | add lenq, 4*mmsize/(2<<%4) | |
84 | %else | |
85 | add lenq, 2*mmsize/(2<<%4) | |
86 | %endif | |
87 | jl .next | |
88 | REP_RET | |
89 | %endmacro | |
90 | ||
91 | %macro UNPACK_2CH 5-7 | |
92 | cglobal unpack_2ch_%2_to_%1_%3, 3, 4, 7, dst, src, len, dst2 | |
93 | mov dst2q , [dstq+gprsize] | |
94 | mov srcq , [srcq] | |
95 | mov dstq , [dstq] | |
96 | %ifidn %3, a | |
97 | test dstq, mmsize-1 | |
98 | jne unpack_2ch_%2_to_%1_u_int %+ SUFFIX | |
99 | test srcq, mmsize-1 | |
100 | jne unpack_2ch_%2_to_%1_u_int %+ SUFFIX | |
101 | test dst2q, mmsize-1 | |
102 | jne unpack_2ch_%2_to_%1_u_int %+ SUFFIX | |
103 | %else | |
104 | unpack_2ch_%2_to_%1_u_int %+ SUFFIX | |
105 | %endif | |
106 | lea srcq , [srcq + (2<<%5)*lenq] | |
107 | lea dstq , [dstq + (1<<%4)*lenq] | |
108 | lea dst2q, [dst2q + (1<<%4)*lenq] | |
109 | neg lenq | |
110 | %7 m0,m1,m2,m3,m4,m5 | |
111 | mova m6, [word_unpack_shuf] | |
112 | .next: | |
113 | mov%3 m0, [ srcq +(2<<%5)*lenq] | |
114 | mov%3 m2, [ mmsize + srcq +(2<<%5)*lenq] | |
115 | %if %5 == 1 | |
116 | %ifidn SUFFIX, _ssse3 | |
117 | pshufb m0, m6 | |
118 | mova m1, m0 | |
119 | pshufb m2, m6 | |
120 | punpcklqdq m0,m2 | |
121 | punpckhqdq m1,m2 | |
122 | %else | |
123 | mova m1, m0 | |
124 | punpcklwd m0,m2 | |
125 | punpckhwd m1,m2 | |
126 | ||
127 | mova m2, m0 | |
128 | punpcklwd m0,m1 | |
129 | punpckhwd m2,m1 | |
130 | ||
131 | mova m1, m0 | |
132 | punpcklwd m0,m2 | |
133 | punpckhwd m1,m2 | |
134 | %endif | |
135 | %else | |
136 | mova m1, m0 | |
137 | shufps m0, m2, 10001000b | |
138 | shufps m1, m2, 11011101b | |
139 | %endif | |
140 | %if %4 < %5 | |
141 | mov%3 m2, [2*mmsize + srcq +(2<<%5)*lenq] | |
142 | mova m3, m2 | |
143 | mov%3 m4, [3*mmsize + srcq +(2<<%5)*lenq] | |
144 | shufps m2, m4, 10001000b | |
145 | shufps m3, m4, 11011101b | |
146 | SWAP 1,2 | |
147 | %endif | |
148 | %6 m0,m1,m2,m3,m4,m5 | |
149 | mov%3 [ dstq+(1<<%4)*lenq], m0 | |
150 | %if %4 > %5 | |
151 | mov%3 [ dst2q+(1<<%4)*lenq], m2 | |
152 | mov%3 [ mmsize + dstq+(1<<%4)*lenq], m1 | |
153 | mov%3 [ mmsize + dst2q+(1<<%4)*lenq], m3 | |
154 | add lenq, 2*mmsize/(1<<%4) | |
155 | %else | |
156 | mov%3 [ dst2q+(1<<%4)*lenq], m1 | |
157 | add lenq, mmsize/(1<<%4) | |
158 | %endif | |
159 | jl .next | |
160 | REP_RET | |
161 | %endmacro | |
162 | ||
163 | %macro CONV 5-7 | |
164 | cglobal %2_to_%1_%3, 3, 3, 6, dst, src, len | |
165 | mov srcq , [srcq] | |
166 | mov dstq , [dstq] | |
167 | %ifidn %3, a | |
168 | test dstq, mmsize-1 | |
169 | jne %2_to_%1_u_int %+ SUFFIX | |
170 | test srcq, mmsize-1 | |
171 | jne %2_to_%1_u_int %+ SUFFIX | |
172 | %else | |
173 | %2_to_%1_u_int %+ SUFFIX | |
174 | %endif | |
175 | lea srcq , [srcq + (1<<%5)*lenq] | |
176 | lea dstq , [dstq + (1<<%4)*lenq] | |
177 | neg lenq | |
178 | %7 m0,m1,m2,m3,m4,m5 | |
179 | .next: | |
180 | mov%3 m0, [ srcq +(1<<%5)*lenq] | |
181 | mov%3 m1, [ mmsize + srcq +(1<<%5)*lenq] | |
182 | %if %4 < %5 | |
183 | mov%3 m2, [2*mmsize + srcq +(1<<%5)*lenq] | |
184 | mov%3 m3, [3*mmsize + srcq +(1<<%5)*lenq] | |
185 | %endif | |
186 | %6 m0,m1,m2,m3,m4,m5 | |
187 | mov%3 [ dstq+(1<<%4)*lenq], m0 | |
188 | mov%3 [ mmsize + dstq+(1<<%4)*lenq], m1 | |
189 | %if %4 > %5 | |
190 | mov%3 [2*mmsize + dstq+(1<<%4)*lenq], m2 | |
191 | mov%3 [3*mmsize + dstq+(1<<%4)*lenq], m3 | |
192 | add lenq, 4*mmsize/(1<<%4) | |
193 | %else | |
194 | add lenq, 2*mmsize/(1<<%4) | |
195 | %endif | |
196 | jl .next | |
197 | %if mmsize == 8 | |
198 | emms | |
199 | RET | |
200 | %else | |
201 | REP_RET | |
202 | %endif | |
203 | %endmacro | |
204 | ||
205 | %macro PACK_6CH 5-7 | |
206 | cglobal pack_6ch_%2_to_%1_%3, 2,8,7, dst, src, src1, src2, src3, src4, src5, len | |
207 | %if ARCH_X86_64 | |
208 | mov lend, r2d | |
209 | %else | |
210 | %define lend dword r2m | |
211 | %endif | |
212 | mov src1q, [srcq+1*gprsize] | |
213 | mov src2q, [srcq+2*gprsize] | |
214 | mov src3q, [srcq+3*gprsize] | |
215 | mov src4q, [srcq+4*gprsize] | |
216 | mov src5q, [srcq+5*gprsize] | |
217 | mov srcq, [srcq] | |
218 | mov dstq, [dstq] | |
219 | %ifidn %3, a | |
220 | test dstq, mmsize-1 | |
221 | jne pack_6ch_%2_to_%1_u_int %+ SUFFIX | |
222 | test srcq, mmsize-1 | |
223 | jne pack_6ch_%2_to_%1_u_int %+ SUFFIX | |
224 | test src2q, mmsize-1 | |
225 | jne pack_6ch_%2_to_%1_u_int %+ SUFFIX | |
226 | test src3q, mmsize-1 | |
227 | jne pack_6ch_%2_to_%1_u_int %+ SUFFIX | |
228 | test src4q, mmsize-1 | |
229 | jne pack_6ch_%2_to_%1_u_int %+ SUFFIX | |
230 | test src5q, mmsize-1 | |
231 | jne pack_6ch_%2_to_%1_u_int %+ SUFFIX | |
232 | %else | |
233 | pack_6ch_%2_to_%1_u_int %+ SUFFIX | |
234 | %endif | |
235 | sub src1q, srcq | |
236 | sub src2q, srcq | |
237 | sub src3q, srcq | |
238 | sub src4q, srcq | |
239 | sub src5q, srcq | |
240 | .loop: | |
241 | mov%3 m0, [srcq ] | |
242 | mov%3 m1, [srcq+src1q] | |
243 | mov%3 m2, [srcq+src2q] | |
244 | mov%3 m3, [srcq+src3q] | |
245 | mov%3 m4, [srcq+src4q] | |
246 | mov%3 m5, [srcq+src5q] | |
247 | %7 x,x,x,x,m7,x | |
f6fa7814 | 248 | %if cpuflag(sse) |
2ba45a60 DM |
249 | SBUTTERFLYPS 0, 1, 6 |
250 | SBUTTERFLYPS 2, 3, 6 | |
251 | SBUTTERFLYPS 4, 5, 6 | |
252 | ||
f6fa7814 | 253 | %if cpuflag(avx) |
2ba45a60 | 254 | blendps m6, m4, m0, 1100b |
f6fa7814 DM |
255 | %else |
256 | movaps m6, m4 | |
257 | shufps m4, m0, q3210 | |
258 | SWAP 4,6 | |
259 | %endif | |
2ba45a60 DM |
260 | movlhps m0, m2 |
261 | movhlps m4, m2 | |
f6fa7814 | 262 | %if cpuflag(avx) |
2ba45a60 | 263 | blendps m2, m5, m1, 1100b |
f6fa7814 DM |
264 | %else |
265 | movaps m2, m5 | |
266 | shufps m5, m1, q3210 | |
267 | SWAP 2,5 | |
268 | %endif | |
2ba45a60 DM |
269 | movlhps m1, m3 |
270 | movhlps m5, m3 | |
271 | ||
272 | %6 m0,m6,x,x,m7,m3 | |
273 | %6 m4,m1,x,x,m7,m3 | |
274 | %6 m2,m5,x,x,m7,m3 | |
275 | ||
276 | mov %+ %3 %+ ps [dstq ], m0 | |
277 | mov %+ %3 %+ ps [dstq+16], m6 | |
278 | mov %+ %3 %+ ps [dstq+32], m4 | |
279 | mov %+ %3 %+ ps [dstq+48], m1 | |
280 | mov %+ %3 %+ ps [dstq+64], m2 | |
281 | mov %+ %3 %+ ps [dstq+80], m5 | |
282 | %else ; mmx | |
283 | SBUTTERFLY dq, 0, 1, 6 | |
284 | SBUTTERFLY dq, 2, 3, 6 | |
285 | SBUTTERFLY dq, 4, 5, 6 | |
286 | ||
287 | movq [dstq ], m0 | |
288 | movq [dstq+ 8], m2 | |
289 | movq [dstq+16], m4 | |
290 | movq [dstq+24], m1 | |
291 | movq [dstq+32], m3 | |
292 | movq [dstq+40], m5 | |
293 | %endif | |
294 | add srcq, mmsize | |
295 | add dstq, mmsize*6 | |
296 | sub lend, mmsize/4 | |
297 | jg .loop | |
298 | %if mmsize == 8 | |
299 | emms | |
300 | RET | |
301 | %else | |
302 | REP_RET | |
303 | %endif | |
304 | %endmacro | |
305 | ||
306 | %macro INT16_TO_INT32_N 6 | |
307 | pxor m2, m2 | |
308 | pxor m3, m3 | |
309 | punpcklwd m2, m1 | |
310 | punpckhwd m3, m1 | |
311 | SWAP 4,0 | |
312 | pxor m0, m0 | |
313 | pxor m1, m1 | |
314 | punpcklwd m0, m4 | |
315 | punpckhwd m1, m4 | |
316 | %endmacro | |
317 | ||
318 | %macro INT32_TO_INT16_N 6 | |
319 | psrad m0, 16 | |
320 | psrad m1, 16 | |
321 | psrad m2, 16 | |
322 | psrad m3, 16 | |
323 | packssdw m0, m1 | |
324 | packssdw m2, m3 | |
325 | SWAP 1,2 | |
326 | %endmacro | |
327 | ||
328 | %macro INT32_TO_FLOAT_INIT 6 | |
329 | mova %5, [flt2pm31] | |
330 | %endmacro | |
331 | %macro INT32_TO_FLOAT_N 6 | |
332 | cvtdq2ps %1, %1 | |
333 | cvtdq2ps %2, %2 | |
334 | mulps %1, %1, %5 | |
335 | mulps %2, %2, %5 | |
336 | %endmacro | |
337 | ||
338 | %macro FLOAT_TO_INT32_INIT 6 | |
339 | mova %5, [flt2p31] | |
340 | %endmacro | |
341 | %macro FLOAT_TO_INT32_N 6 | |
342 | mulps %1, %5 | |
343 | mulps %2, %5 | |
344 | cvtps2dq %6, %1 | |
f6fa7814 | 345 | cmpps %1, %1, %5, 5 |
2ba45a60 DM |
346 | paddd %1, %6 |
347 | cvtps2dq %6, %2 | |
f6fa7814 | 348 | cmpps %2, %2, %5, 5 |
2ba45a60 DM |
349 | paddd %2, %6 |
350 | %endmacro | |
351 | ||
352 | %macro INT16_TO_FLOAT_INIT 6 | |
353 | mova m5, [flt2pm31] | |
354 | %endmacro | |
355 | %macro INT16_TO_FLOAT_N 6 | |
356 | INT16_TO_INT32_N %1,%2,%3,%4,%5,%6 | |
357 | cvtdq2ps m0, m0 | |
358 | cvtdq2ps m1, m1 | |
359 | cvtdq2ps m2, m2 | |
360 | cvtdq2ps m3, m3 | |
361 | mulps m0, m0, m5 | |
362 | mulps m1, m1, m5 | |
363 | mulps m2, m2, m5 | |
364 | mulps m3, m3, m5 | |
365 | %endmacro | |
366 | ||
367 | %macro FLOAT_TO_INT16_INIT 6 | |
368 | mova m5, [flt2p15] | |
369 | %endmacro | |
370 | %macro FLOAT_TO_INT16_N 6 | |
371 | mulps m0, m5 | |
372 | mulps m1, m5 | |
373 | mulps m2, m5 | |
374 | mulps m3, m5 | |
375 | cvtps2dq m0, m0 | |
376 | cvtps2dq m1, m1 | |
377 | packssdw m0, m1 | |
378 | cvtps2dq m1, m2 | |
379 | cvtps2dq m3, m3 | |
380 | packssdw m1, m3 | |
381 | %endmacro | |
382 | ||
383 | %macro NOP_N 0-6 | |
384 | %endmacro | |
385 | ||
386 | INIT_MMX mmx | |
387 | CONV int32, int16, u, 2, 1, INT16_TO_INT32_N, NOP_N | |
388 | CONV int32, int16, a, 2, 1, INT16_TO_INT32_N, NOP_N | |
389 | CONV int16, int32, u, 1, 2, INT32_TO_INT16_N, NOP_N | |
390 | CONV int16, int32, a, 1, 2, INT32_TO_INT16_N, NOP_N | |
391 | ||
392 | PACK_6CH float, float, u, 2, 2, NOP_N, NOP_N | |
393 | PACK_6CH float, float, a, 2, 2, NOP_N, NOP_N | |
394 | ||
f6fa7814 DM |
395 | INIT_XMM sse |
396 | PACK_6CH float, float, u, 2, 2, NOP_N, NOP_N | |
397 | PACK_6CH float, float, a, 2, 2, NOP_N, NOP_N | |
398 | ||
2ba45a60 DM |
399 | INIT_XMM sse2 |
400 | CONV int32, int16, u, 2, 1, INT16_TO_INT32_N, NOP_N | |
401 | CONV int32, int16, a, 2, 1, INT16_TO_INT32_N, NOP_N | |
402 | CONV int16, int32, u, 1, 2, INT32_TO_INT16_N, NOP_N | |
403 | CONV int16, int32, a, 1, 2, INT32_TO_INT16_N, NOP_N | |
404 | ||
405 | PACK_2CH int16, int16, u, 1, 1, NOP_N, NOP_N | |
406 | PACK_2CH int16, int16, a, 1, 1, NOP_N, NOP_N | |
407 | PACK_2CH int32, int32, u, 2, 2, NOP_N, NOP_N | |
408 | PACK_2CH int32, int32, a, 2, 2, NOP_N, NOP_N | |
409 | PACK_2CH int32, int16, u, 2, 1, INT16_TO_INT32_N, NOP_N | |
410 | PACK_2CH int32, int16, a, 2, 1, INT16_TO_INT32_N, NOP_N | |
411 | PACK_2CH int16, int32, u, 1, 2, INT32_TO_INT16_N, NOP_N | |
412 | PACK_2CH int16, int32, a, 1, 2, INT32_TO_INT16_N, NOP_N | |
413 | ||
414 | UNPACK_2CH int16, int16, u, 1, 1, NOP_N, NOP_N | |
415 | UNPACK_2CH int16, int16, a, 1, 1, NOP_N, NOP_N | |
416 | UNPACK_2CH int32, int32, u, 2, 2, NOP_N, NOP_N | |
417 | UNPACK_2CH int32, int32, a, 2, 2, NOP_N, NOP_N | |
418 | UNPACK_2CH int32, int16, u, 2, 1, INT16_TO_INT32_N, NOP_N | |
419 | UNPACK_2CH int32, int16, a, 2, 1, INT16_TO_INT32_N, NOP_N | |
420 | UNPACK_2CH int16, int32, u, 1, 2, INT32_TO_INT16_N, NOP_N | |
421 | UNPACK_2CH int16, int32, a, 1, 2, INT32_TO_INT16_N, NOP_N | |
422 | ||
423 | CONV float, int32, u, 2, 2, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT | |
424 | CONV float, int32, a, 2, 2, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT | |
425 | CONV int32, float, u, 2, 2, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT | |
426 | CONV int32, float, a, 2, 2, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT | |
427 | CONV float, int16, u, 2, 1, INT16_TO_FLOAT_N, INT16_TO_FLOAT_INIT | |
428 | CONV float, int16, a, 2, 1, INT16_TO_FLOAT_N, INT16_TO_FLOAT_INIT | |
429 | CONV int16, float, u, 1, 2, FLOAT_TO_INT16_N, FLOAT_TO_INT16_INIT | |
430 | CONV int16, float, a, 1, 2, FLOAT_TO_INT16_N, FLOAT_TO_INT16_INIT | |
431 | ||
432 | PACK_2CH float, int32, u, 2, 2, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT | |
433 | PACK_2CH float, int32, a, 2, 2, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT | |
434 | PACK_2CH int32, float, u, 2, 2, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT | |
435 | PACK_2CH int32, float, a, 2, 2, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT | |
436 | PACK_2CH float, int16, u, 2, 1, INT16_TO_FLOAT_N, INT16_TO_FLOAT_INIT | |
437 | PACK_2CH float, int16, a, 2, 1, INT16_TO_FLOAT_N, INT16_TO_FLOAT_INIT | |
438 | PACK_2CH int16, float, u, 1, 2, FLOAT_TO_INT16_N, FLOAT_TO_INT16_INIT | |
439 | PACK_2CH int16, float, a, 1, 2, FLOAT_TO_INT16_N, FLOAT_TO_INT16_INIT | |
440 | ||
441 | UNPACK_2CH float, int32, u, 2, 2, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT | |
442 | UNPACK_2CH float, int32, a, 2, 2, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT | |
443 | UNPACK_2CH int32, float, u, 2, 2, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT | |
444 | UNPACK_2CH int32, float, a, 2, 2, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT | |
445 | UNPACK_2CH float, int16, u, 2, 1, INT16_TO_FLOAT_N, INT16_TO_FLOAT_INIT | |
446 | UNPACK_2CH float, int16, a, 2, 1, INT16_TO_FLOAT_N, INT16_TO_FLOAT_INIT | |
447 | UNPACK_2CH int16, float, u, 1, 2, FLOAT_TO_INT16_N, FLOAT_TO_INT16_INIT | |
448 | UNPACK_2CH int16, float, a, 1, 2, FLOAT_TO_INT16_N, FLOAT_TO_INT16_INIT | |
449 | ||
f6fa7814 DM |
450 | PACK_6CH float, int32, u, 2, 2, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT |
451 | PACK_6CH float, int32, a, 2, 2, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT | |
452 | PACK_6CH int32, float, u, 2, 2, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT | |
453 | PACK_6CH int32, float, a, 2, 2, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT | |
2ba45a60 DM |
454 | |
455 | INIT_XMM ssse3 | |
456 | UNPACK_2CH int16, int16, u, 1, 1, NOP_N, NOP_N | |
457 | UNPACK_2CH int16, int16, a, 1, 1, NOP_N, NOP_N | |
458 | UNPACK_2CH int32, int16, u, 2, 1, INT16_TO_INT32_N, NOP_N | |
459 | UNPACK_2CH int32, int16, a, 2, 1, INT16_TO_INT32_N, NOP_N | |
460 | UNPACK_2CH float, int16, u, 2, 1, INT16_TO_FLOAT_N, INT16_TO_FLOAT_INIT | |
461 | UNPACK_2CH float, int16, a, 2, 1, INT16_TO_FLOAT_N, INT16_TO_FLOAT_INIT | |
462 | ||
2ba45a60 DM |
463 | %if HAVE_AVX_EXTERNAL |
464 | INIT_XMM avx | |
465 | PACK_6CH float, float, u, 2, 2, NOP_N, NOP_N | |
466 | PACK_6CH float, float, a, 2, 2, NOP_N, NOP_N | |
467 | ||
468 | PACK_6CH float, int32, u, 2, 2, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT | |
469 | PACK_6CH float, int32, a, 2, 2, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT | |
470 | PACK_6CH int32, float, u, 2, 2, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT | |
471 | PACK_6CH int32, float, a, 2, 2, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT | |
472 | ||
473 | INIT_YMM avx | |
474 | CONV float, int32, u, 2, 2, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT | |
475 | CONV float, int32, a, 2, 2, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT | |
476 | %endif | |
f6fa7814 DM |
477 | |
478 | %if HAVE_AVX2_EXTERNAL | |
479 | INIT_YMM avx2 | |
480 | CONV int32, float, u, 2, 2, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT | |
481 | CONV int32, float, a, 2, 2, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT | |
482 | %endif |