Imported Debian version 2.5.0~trusty1.1
[deb_ffmpeg.git] / ffmpeg / libswresample / x86 / audio_convert.asm
CommitLineData
2ba45a60
DM
1;******************************************************************************
2;* Copyright (c) 2012 Michael Niedermayer
3;*
4;* This file is part of FFmpeg.
5;*
6;* FFmpeg is free software; you can redistribute it and/or
7;* modify it under the terms of the GNU Lesser General Public
8;* License as published by the Free Software Foundation; either
9;* version 2.1 of the License, or (at your option) any later version.
10;*
11;* FFmpeg is distributed in the hope that it will be useful,
12;* but WITHOUT ANY WARRANTY; without even the implied warranty of
13;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14;* Lesser General Public License for more details.
15;*
16;* You should have received a copy of the GNU Lesser General Public
17;* License along with FFmpeg; if not, write to the Free Software
18;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19;******************************************************************************
20
21%include "libavutil/x86/x86util.asm"
22
23SECTION_RODATA 32
24flt2pm31: times 8 dd 4.6566129e-10
25flt2p31 : times 8 dd 2147483648.0
26flt2p15 : times 8 dd 32768.0
27
28word_unpack_shuf : db 0, 1, 4, 5, 8, 9,12,13, 2, 3, 6, 7,10,11,14,15
29
30SECTION .text
31
32
33;to, from, a/u, log2_outsize, log_intsize, const
34%macro PACK_2CH 5-7
35cglobal pack_2ch_%2_to_%1_%3, 3, 4, 6, dst, src, len, src2
36 mov src2q , [srcq+gprsize]
37 mov srcq , [srcq]
38 mov dstq , [dstq]
39%ifidn %3, a
40 test dstq, mmsize-1
41 jne pack_2ch_%2_to_%1_u_int %+ SUFFIX
42 test srcq, mmsize-1
43 jne pack_2ch_%2_to_%1_u_int %+ SUFFIX
44 test src2q, mmsize-1
45 jne pack_2ch_%2_to_%1_u_int %+ SUFFIX
46%else
47pack_2ch_%2_to_%1_u_int %+ SUFFIX
48%endif
49 lea srcq , [srcq + (1<<%5)*lenq]
50 lea src2q, [src2q + (1<<%5)*lenq]
51 lea dstq , [dstq + (2<<%4)*lenq]
52 neg lenq
53 %7 m0,m1,m2,m3,m4,m5
54.next:
55%if %4 >= %5
56 mov%3 m0, [ srcq +(1<<%5)*lenq]
57 mova m1, m0
58 mov%3 m2, [ src2q+(1<<%5)*lenq]
59%if %5 == 1
60 punpcklwd m0, m2
61 punpckhwd m1, m2
62%else
63 punpckldq m0, m2
64 punpckhdq m1, m2
65%endif
66 %6 m0,m1,m2,m3,m4,m5
67%else
68 mov%3 m0, [ srcq +(1<<%5)*lenq]
69 mov%3 m1, [mmsize + srcq +(1<<%5)*lenq]
70 mov%3 m2, [ src2q+(1<<%5)*lenq]
71 mov%3 m3, [mmsize + src2q+(1<<%5)*lenq]
72 %6 m0,m1,m2,m3,m4,m5
73 mova m2, m0
74 punpcklwd m0, m1
75 punpckhwd m2, m1
76 SWAP 1,2
77%endif
78 mov%3 [ dstq+(2<<%4)*lenq], m0
79 mov%3 [ mmsize + dstq+(2<<%4)*lenq], m1
80%if %4 > %5
81 mov%3 [2*mmsize + dstq+(2<<%4)*lenq], m2
82 mov%3 [3*mmsize + dstq+(2<<%4)*lenq], m3
83 add lenq, 4*mmsize/(2<<%4)
84%else
85 add lenq, 2*mmsize/(2<<%4)
86%endif
87 jl .next
88 REP_RET
89%endmacro
90
91%macro UNPACK_2CH 5-7
92cglobal unpack_2ch_%2_to_%1_%3, 3, 4, 7, dst, src, len, dst2
93 mov dst2q , [dstq+gprsize]
94 mov srcq , [srcq]
95 mov dstq , [dstq]
96%ifidn %3, a
97 test dstq, mmsize-1
98 jne unpack_2ch_%2_to_%1_u_int %+ SUFFIX
99 test srcq, mmsize-1
100 jne unpack_2ch_%2_to_%1_u_int %+ SUFFIX
101 test dst2q, mmsize-1
102 jne unpack_2ch_%2_to_%1_u_int %+ SUFFIX
103%else
104unpack_2ch_%2_to_%1_u_int %+ SUFFIX
105%endif
106 lea srcq , [srcq + (2<<%5)*lenq]
107 lea dstq , [dstq + (1<<%4)*lenq]
108 lea dst2q, [dst2q + (1<<%4)*lenq]
109 neg lenq
110 %7 m0,m1,m2,m3,m4,m5
111 mova m6, [word_unpack_shuf]
112.next:
113 mov%3 m0, [ srcq +(2<<%5)*lenq]
114 mov%3 m2, [ mmsize + srcq +(2<<%5)*lenq]
115%if %5 == 1
116%ifidn SUFFIX, _ssse3
117 pshufb m0, m6
118 mova m1, m0
119 pshufb m2, m6
120 punpcklqdq m0,m2
121 punpckhqdq m1,m2
122%else
123 mova m1, m0
124 punpcklwd m0,m2
125 punpckhwd m1,m2
126
127 mova m2, m0
128 punpcklwd m0,m1
129 punpckhwd m2,m1
130
131 mova m1, m0
132 punpcklwd m0,m2
133 punpckhwd m1,m2
134%endif
135%else
136 mova m1, m0
137 shufps m0, m2, 10001000b
138 shufps m1, m2, 11011101b
139%endif
140%if %4 < %5
141 mov%3 m2, [2*mmsize + srcq +(2<<%5)*lenq]
142 mova m3, m2
143 mov%3 m4, [3*mmsize + srcq +(2<<%5)*lenq]
144 shufps m2, m4, 10001000b
145 shufps m3, m4, 11011101b
146 SWAP 1,2
147%endif
148 %6 m0,m1,m2,m3,m4,m5
149 mov%3 [ dstq+(1<<%4)*lenq], m0
150%if %4 > %5
151 mov%3 [ dst2q+(1<<%4)*lenq], m2
152 mov%3 [ mmsize + dstq+(1<<%4)*lenq], m1
153 mov%3 [ mmsize + dst2q+(1<<%4)*lenq], m3
154 add lenq, 2*mmsize/(1<<%4)
155%else
156 mov%3 [ dst2q+(1<<%4)*lenq], m1
157 add lenq, mmsize/(1<<%4)
158%endif
159 jl .next
160 REP_RET
161%endmacro
162
163%macro CONV 5-7
164cglobal %2_to_%1_%3, 3, 3, 6, dst, src, len
165 mov srcq , [srcq]
166 mov dstq , [dstq]
167%ifidn %3, a
168 test dstq, mmsize-1
169 jne %2_to_%1_u_int %+ SUFFIX
170 test srcq, mmsize-1
171 jne %2_to_%1_u_int %+ SUFFIX
172%else
173%2_to_%1_u_int %+ SUFFIX
174%endif
175 lea srcq , [srcq + (1<<%5)*lenq]
176 lea dstq , [dstq + (1<<%4)*lenq]
177 neg lenq
178 %7 m0,m1,m2,m3,m4,m5
179.next:
180 mov%3 m0, [ srcq +(1<<%5)*lenq]
181 mov%3 m1, [ mmsize + srcq +(1<<%5)*lenq]
182%if %4 < %5
183 mov%3 m2, [2*mmsize + srcq +(1<<%5)*lenq]
184 mov%3 m3, [3*mmsize + srcq +(1<<%5)*lenq]
185%endif
186 %6 m0,m1,m2,m3,m4,m5
187 mov%3 [ dstq+(1<<%4)*lenq], m0
188 mov%3 [ mmsize + dstq+(1<<%4)*lenq], m1
189%if %4 > %5
190 mov%3 [2*mmsize + dstq+(1<<%4)*lenq], m2
191 mov%3 [3*mmsize + dstq+(1<<%4)*lenq], m3
192 add lenq, 4*mmsize/(1<<%4)
193%else
194 add lenq, 2*mmsize/(1<<%4)
195%endif
196 jl .next
197%if mmsize == 8
198 emms
199 RET
200%else
201 REP_RET
202%endif
203%endmacro
204
205%macro PACK_6CH 5-7
206cglobal pack_6ch_%2_to_%1_%3, 2,8,7, dst, src, src1, src2, src3, src4, src5, len
207%if ARCH_X86_64
208 mov lend, r2d
209%else
210 %define lend dword r2m
211%endif
212 mov src1q, [srcq+1*gprsize]
213 mov src2q, [srcq+2*gprsize]
214 mov src3q, [srcq+3*gprsize]
215 mov src4q, [srcq+4*gprsize]
216 mov src5q, [srcq+5*gprsize]
217 mov srcq, [srcq]
218 mov dstq, [dstq]
219%ifidn %3, a
220 test dstq, mmsize-1
221 jne pack_6ch_%2_to_%1_u_int %+ SUFFIX
222 test srcq, mmsize-1
223 jne pack_6ch_%2_to_%1_u_int %+ SUFFIX
224 test src2q, mmsize-1
225 jne pack_6ch_%2_to_%1_u_int %+ SUFFIX
226 test src3q, mmsize-1
227 jne pack_6ch_%2_to_%1_u_int %+ SUFFIX
228 test src4q, mmsize-1
229 jne pack_6ch_%2_to_%1_u_int %+ SUFFIX
230 test src5q, mmsize-1
231 jne pack_6ch_%2_to_%1_u_int %+ SUFFIX
232%else
233pack_6ch_%2_to_%1_u_int %+ SUFFIX
234%endif
235 sub src1q, srcq
236 sub src2q, srcq
237 sub src3q, srcq
238 sub src4q, srcq
239 sub src5q, srcq
240.loop:
241 mov%3 m0, [srcq ]
242 mov%3 m1, [srcq+src1q]
243 mov%3 m2, [srcq+src2q]
244 mov%3 m3, [srcq+src3q]
245 mov%3 m4, [srcq+src4q]
246 mov%3 m5, [srcq+src5q]
247 %7 x,x,x,x,m7,x
f6fa7814 248%if cpuflag(sse)
2ba45a60
DM
249 SBUTTERFLYPS 0, 1, 6
250 SBUTTERFLYPS 2, 3, 6
251 SBUTTERFLYPS 4, 5, 6
252
f6fa7814 253%if cpuflag(avx)
2ba45a60 254 blendps m6, m4, m0, 1100b
f6fa7814
DM
255%else
256 movaps m6, m4
257 shufps m4, m0, q3210
258 SWAP 4,6
259%endif
2ba45a60
DM
260 movlhps m0, m2
261 movhlps m4, m2
f6fa7814 262%if cpuflag(avx)
2ba45a60 263 blendps m2, m5, m1, 1100b
f6fa7814
DM
264%else
265 movaps m2, m5
266 shufps m5, m1, q3210
267 SWAP 2,5
268%endif
2ba45a60
DM
269 movlhps m1, m3
270 movhlps m5, m3
271
272 %6 m0,m6,x,x,m7,m3
273 %6 m4,m1,x,x,m7,m3
274 %6 m2,m5,x,x,m7,m3
275
276 mov %+ %3 %+ ps [dstq ], m0
277 mov %+ %3 %+ ps [dstq+16], m6
278 mov %+ %3 %+ ps [dstq+32], m4
279 mov %+ %3 %+ ps [dstq+48], m1
280 mov %+ %3 %+ ps [dstq+64], m2
281 mov %+ %3 %+ ps [dstq+80], m5
282%else ; mmx
283 SBUTTERFLY dq, 0, 1, 6
284 SBUTTERFLY dq, 2, 3, 6
285 SBUTTERFLY dq, 4, 5, 6
286
287 movq [dstq ], m0
288 movq [dstq+ 8], m2
289 movq [dstq+16], m4
290 movq [dstq+24], m1
291 movq [dstq+32], m3
292 movq [dstq+40], m5
293%endif
294 add srcq, mmsize
295 add dstq, mmsize*6
296 sub lend, mmsize/4
297 jg .loop
298%if mmsize == 8
299 emms
300 RET
301%else
302 REP_RET
303%endif
304%endmacro
305
306%macro INT16_TO_INT32_N 6
307 pxor m2, m2
308 pxor m3, m3
309 punpcklwd m2, m1
310 punpckhwd m3, m1
311 SWAP 4,0
312 pxor m0, m0
313 pxor m1, m1
314 punpcklwd m0, m4
315 punpckhwd m1, m4
316%endmacro
317
318%macro INT32_TO_INT16_N 6
319 psrad m0, 16
320 psrad m1, 16
321 psrad m2, 16
322 psrad m3, 16
323 packssdw m0, m1
324 packssdw m2, m3
325 SWAP 1,2
326%endmacro
327
328%macro INT32_TO_FLOAT_INIT 6
329 mova %5, [flt2pm31]
330%endmacro
331%macro INT32_TO_FLOAT_N 6
332 cvtdq2ps %1, %1
333 cvtdq2ps %2, %2
334 mulps %1, %1, %5
335 mulps %2, %2, %5
336%endmacro
337
338%macro FLOAT_TO_INT32_INIT 6
339 mova %5, [flt2p31]
340%endmacro
341%macro FLOAT_TO_INT32_N 6
342 mulps %1, %5
343 mulps %2, %5
344 cvtps2dq %6, %1
f6fa7814 345 cmpps %1, %1, %5, 5
2ba45a60
DM
346 paddd %1, %6
347 cvtps2dq %6, %2
f6fa7814 348 cmpps %2, %2, %5, 5
2ba45a60
DM
349 paddd %2, %6
350%endmacro
351
352%macro INT16_TO_FLOAT_INIT 6
353 mova m5, [flt2pm31]
354%endmacro
355%macro INT16_TO_FLOAT_N 6
356 INT16_TO_INT32_N %1,%2,%3,%4,%5,%6
357 cvtdq2ps m0, m0
358 cvtdq2ps m1, m1
359 cvtdq2ps m2, m2
360 cvtdq2ps m3, m3
361 mulps m0, m0, m5
362 mulps m1, m1, m5
363 mulps m2, m2, m5
364 mulps m3, m3, m5
365%endmacro
366
367%macro FLOAT_TO_INT16_INIT 6
368 mova m5, [flt2p15]
369%endmacro
370%macro FLOAT_TO_INT16_N 6
371 mulps m0, m5
372 mulps m1, m5
373 mulps m2, m5
374 mulps m3, m5
375 cvtps2dq m0, m0
376 cvtps2dq m1, m1
377 packssdw m0, m1
378 cvtps2dq m1, m2
379 cvtps2dq m3, m3
380 packssdw m1, m3
381%endmacro
382
383%macro NOP_N 0-6
384%endmacro
385
386INIT_MMX mmx
387CONV int32, int16, u, 2, 1, INT16_TO_INT32_N, NOP_N
388CONV int32, int16, a, 2, 1, INT16_TO_INT32_N, NOP_N
389CONV int16, int32, u, 1, 2, INT32_TO_INT16_N, NOP_N
390CONV int16, int32, a, 1, 2, INT32_TO_INT16_N, NOP_N
391
392PACK_6CH float, float, u, 2, 2, NOP_N, NOP_N
393PACK_6CH float, float, a, 2, 2, NOP_N, NOP_N
394
f6fa7814
DM
395INIT_XMM sse
396PACK_6CH float, float, u, 2, 2, NOP_N, NOP_N
397PACK_6CH float, float, a, 2, 2, NOP_N, NOP_N
398
2ba45a60
DM
399INIT_XMM sse2
400CONV int32, int16, u, 2, 1, INT16_TO_INT32_N, NOP_N
401CONV int32, int16, a, 2, 1, INT16_TO_INT32_N, NOP_N
402CONV int16, int32, u, 1, 2, INT32_TO_INT16_N, NOP_N
403CONV int16, int32, a, 1, 2, INT32_TO_INT16_N, NOP_N
404
405PACK_2CH int16, int16, u, 1, 1, NOP_N, NOP_N
406PACK_2CH int16, int16, a, 1, 1, NOP_N, NOP_N
407PACK_2CH int32, int32, u, 2, 2, NOP_N, NOP_N
408PACK_2CH int32, int32, a, 2, 2, NOP_N, NOP_N
409PACK_2CH int32, int16, u, 2, 1, INT16_TO_INT32_N, NOP_N
410PACK_2CH int32, int16, a, 2, 1, INT16_TO_INT32_N, NOP_N
411PACK_2CH int16, int32, u, 1, 2, INT32_TO_INT16_N, NOP_N
412PACK_2CH int16, int32, a, 1, 2, INT32_TO_INT16_N, NOP_N
413
414UNPACK_2CH int16, int16, u, 1, 1, NOP_N, NOP_N
415UNPACK_2CH int16, int16, a, 1, 1, NOP_N, NOP_N
416UNPACK_2CH int32, int32, u, 2, 2, NOP_N, NOP_N
417UNPACK_2CH int32, int32, a, 2, 2, NOP_N, NOP_N
418UNPACK_2CH int32, int16, u, 2, 1, INT16_TO_INT32_N, NOP_N
419UNPACK_2CH int32, int16, a, 2, 1, INT16_TO_INT32_N, NOP_N
420UNPACK_2CH int16, int32, u, 1, 2, INT32_TO_INT16_N, NOP_N
421UNPACK_2CH int16, int32, a, 1, 2, INT32_TO_INT16_N, NOP_N
422
423CONV float, int32, u, 2, 2, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT
424CONV float, int32, a, 2, 2, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT
425CONV int32, float, u, 2, 2, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT
426CONV int32, float, a, 2, 2, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT
427CONV float, int16, u, 2, 1, INT16_TO_FLOAT_N, INT16_TO_FLOAT_INIT
428CONV float, int16, a, 2, 1, INT16_TO_FLOAT_N, INT16_TO_FLOAT_INIT
429CONV int16, float, u, 1, 2, FLOAT_TO_INT16_N, FLOAT_TO_INT16_INIT
430CONV int16, float, a, 1, 2, FLOAT_TO_INT16_N, FLOAT_TO_INT16_INIT
431
432PACK_2CH float, int32, u, 2, 2, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT
433PACK_2CH float, int32, a, 2, 2, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT
434PACK_2CH int32, float, u, 2, 2, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT
435PACK_2CH int32, float, a, 2, 2, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT
436PACK_2CH float, int16, u, 2, 1, INT16_TO_FLOAT_N, INT16_TO_FLOAT_INIT
437PACK_2CH float, int16, a, 2, 1, INT16_TO_FLOAT_N, INT16_TO_FLOAT_INIT
438PACK_2CH int16, float, u, 1, 2, FLOAT_TO_INT16_N, FLOAT_TO_INT16_INIT
439PACK_2CH int16, float, a, 1, 2, FLOAT_TO_INT16_N, FLOAT_TO_INT16_INIT
440
441UNPACK_2CH float, int32, u, 2, 2, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT
442UNPACK_2CH float, int32, a, 2, 2, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT
443UNPACK_2CH int32, float, u, 2, 2, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT
444UNPACK_2CH int32, float, a, 2, 2, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT
445UNPACK_2CH float, int16, u, 2, 1, INT16_TO_FLOAT_N, INT16_TO_FLOAT_INIT
446UNPACK_2CH float, int16, a, 2, 1, INT16_TO_FLOAT_N, INT16_TO_FLOAT_INIT
447UNPACK_2CH int16, float, u, 1, 2, FLOAT_TO_INT16_N, FLOAT_TO_INT16_INIT
448UNPACK_2CH int16, float, a, 1, 2, FLOAT_TO_INT16_N, FLOAT_TO_INT16_INIT
449
f6fa7814
DM
450PACK_6CH float, int32, u, 2, 2, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT
451PACK_6CH float, int32, a, 2, 2, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT
452PACK_6CH int32, float, u, 2, 2, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT
453PACK_6CH int32, float, a, 2, 2, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT
2ba45a60
DM
454
455INIT_XMM ssse3
456UNPACK_2CH int16, int16, u, 1, 1, NOP_N, NOP_N
457UNPACK_2CH int16, int16, a, 1, 1, NOP_N, NOP_N
458UNPACK_2CH int32, int16, u, 2, 1, INT16_TO_INT32_N, NOP_N
459UNPACK_2CH int32, int16, a, 2, 1, INT16_TO_INT32_N, NOP_N
460UNPACK_2CH float, int16, u, 2, 1, INT16_TO_FLOAT_N, INT16_TO_FLOAT_INIT
461UNPACK_2CH float, int16, a, 2, 1, INT16_TO_FLOAT_N, INT16_TO_FLOAT_INIT
462
2ba45a60
DM
463%if HAVE_AVX_EXTERNAL
464INIT_XMM avx
465PACK_6CH float, float, u, 2, 2, NOP_N, NOP_N
466PACK_6CH float, float, a, 2, 2, NOP_N, NOP_N
467
468PACK_6CH float, int32, u, 2, 2, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT
469PACK_6CH float, int32, a, 2, 2, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT
470PACK_6CH int32, float, u, 2, 2, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT
471PACK_6CH int32, float, a, 2, 2, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT
472
473INIT_YMM avx
474CONV float, int32, u, 2, 2, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT
475CONV float, int32, a, 2, 2, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT
476%endif
f6fa7814
DM
477
478%if HAVE_AVX2_EXTERNAL
479INIT_YMM avx2
480CONV int32, float, u, 2, 2, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT
481CONV int32, float, a, 2, 2, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT
482%endif