Imported Debian version 2.4.3~trusty1
[deb_ffmpeg.git] / ffmpeg / libswresample / x86 / audio_convert.asm
CommitLineData
2ba45a60
DM
1;******************************************************************************
2;* Copyright (c) 2012 Michael Niedermayer
3;*
4;* This file is part of FFmpeg.
5;*
6;* FFmpeg is free software; you can redistribute it and/or
7;* modify it under the terms of the GNU Lesser General Public
8;* License as published by the Free Software Foundation; either
9;* version 2.1 of the License, or (at your option) any later version.
10;*
11;* FFmpeg is distributed in the hope that it will be useful,
12;* but WITHOUT ANY WARRANTY; without even the implied warranty of
13;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14;* Lesser General Public License for more details.
15;*
16;* You should have received a copy of the GNU Lesser General Public
17;* License along with FFmpeg; if not, write to the Free Software
18;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19;******************************************************************************
20
21%include "libavutil/x86/x86util.asm"
22
23SECTION_RODATA 32
24flt2pm31: times 8 dd 4.6566129e-10
25flt2p31 : times 8 dd 2147483648.0
26flt2p15 : times 8 dd 32768.0
27
28word_unpack_shuf : db 0, 1, 4, 5, 8, 9,12,13, 2, 3, 6, 7,10,11,14,15
29
30SECTION .text
31
32
33;to, from, a/u, log2_outsize, log_intsize, const
34%macro PACK_2CH 5-7
35cglobal pack_2ch_%2_to_%1_%3, 3, 4, 6, dst, src, len, src2
36 mov src2q , [srcq+gprsize]
37 mov srcq , [srcq]
38 mov dstq , [dstq]
39%ifidn %3, a
40 test dstq, mmsize-1
41 jne pack_2ch_%2_to_%1_u_int %+ SUFFIX
42 test srcq, mmsize-1
43 jne pack_2ch_%2_to_%1_u_int %+ SUFFIX
44 test src2q, mmsize-1
45 jne pack_2ch_%2_to_%1_u_int %+ SUFFIX
46%else
47pack_2ch_%2_to_%1_u_int %+ SUFFIX
48%endif
49 lea srcq , [srcq + (1<<%5)*lenq]
50 lea src2q, [src2q + (1<<%5)*lenq]
51 lea dstq , [dstq + (2<<%4)*lenq]
52 neg lenq
53 %7 m0,m1,m2,m3,m4,m5
54.next:
55%if %4 >= %5
56 mov%3 m0, [ srcq +(1<<%5)*lenq]
57 mova m1, m0
58 mov%3 m2, [ src2q+(1<<%5)*lenq]
59%if %5 == 1
60 punpcklwd m0, m2
61 punpckhwd m1, m2
62%else
63 punpckldq m0, m2
64 punpckhdq m1, m2
65%endif
66 %6 m0,m1,m2,m3,m4,m5
67%else
68 mov%3 m0, [ srcq +(1<<%5)*lenq]
69 mov%3 m1, [mmsize + srcq +(1<<%5)*lenq]
70 mov%3 m2, [ src2q+(1<<%5)*lenq]
71 mov%3 m3, [mmsize + src2q+(1<<%5)*lenq]
72 %6 m0,m1,m2,m3,m4,m5
73 mova m2, m0
74 punpcklwd m0, m1
75 punpckhwd m2, m1
76 SWAP 1,2
77%endif
78 mov%3 [ dstq+(2<<%4)*lenq], m0
79 mov%3 [ mmsize + dstq+(2<<%4)*lenq], m1
80%if %4 > %5
81 mov%3 [2*mmsize + dstq+(2<<%4)*lenq], m2
82 mov%3 [3*mmsize + dstq+(2<<%4)*lenq], m3
83 add lenq, 4*mmsize/(2<<%4)
84%else
85 add lenq, 2*mmsize/(2<<%4)
86%endif
87 jl .next
88 REP_RET
89%endmacro
90
91%macro UNPACK_2CH 5-7
92cglobal unpack_2ch_%2_to_%1_%3, 3, 4, 7, dst, src, len, dst2
93 mov dst2q , [dstq+gprsize]
94 mov srcq , [srcq]
95 mov dstq , [dstq]
96%ifidn %3, a
97 test dstq, mmsize-1
98 jne unpack_2ch_%2_to_%1_u_int %+ SUFFIX
99 test srcq, mmsize-1
100 jne unpack_2ch_%2_to_%1_u_int %+ SUFFIX
101 test dst2q, mmsize-1
102 jne unpack_2ch_%2_to_%1_u_int %+ SUFFIX
103%else
104unpack_2ch_%2_to_%1_u_int %+ SUFFIX
105%endif
106 lea srcq , [srcq + (2<<%5)*lenq]
107 lea dstq , [dstq + (1<<%4)*lenq]
108 lea dst2q, [dst2q + (1<<%4)*lenq]
109 neg lenq
110 %7 m0,m1,m2,m3,m4,m5
111 mova m6, [word_unpack_shuf]
112.next:
113 mov%3 m0, [ srcq +(2<<%5)*lenq]
114 mov%3 m2, [ mmsize + srcq +(2<<%5)*lenq]
115%if %5 == 1
116%ifidn SUFFIX, _ssse3
117 pshufb m0, m6
118 mova m1, m0
119 pshufb m2, m6
120 punpcklqdq m0,m2
121 punpckhqdq m1,m2
122%else
123 mova m1, m0
124 punpcklwd m0,m2
125 punpckhwd m1,m2
126
127 mova m2, m0
128 punpcklwd m0,m1
129 punpckhwd m2,m1
130
131 mova m1, m0
132 punpcklwd m0,m2
133 punpckhwd m1,m2
134%endif
135%else
136 mova m1, m0
137 shufps m0, m2, 10001000b
138 shufps m1, m2, 11011101b
139%endif
140%if %4 < %5
141 mov%3 m2, [2*mmsize + srcq +(2<<%5)*lenq]
142 mova m3, m2
143 mov%3 m4, [3*mmsize + srcq +(2<<%5)*lenq]
144 shufps m2, m4, 10001000b
145 shufps m3, m4, 11011101b
146 SWAP 1,2
147%endif
148 %6 m0,m1,m2,m3,m4,m5
149 mov%3 [ dstq+(1<<%4)*lenq], m0
150%if %4 > %5
151 mov%3 [ dst2q+(1<<%4)*lenq], m2
152 mov%3 [ mmsize + dstq+(1<<%4)*lenq], m1
153 mov%3 [ mmsize + dst2q+(1<<%4)*lenq], m3
154 add lenq, 2*mmsize/(1<<%4)
155%else
156 mov%3 [ dst2q+(1<<%4)*lenq], m1
157 add lenq, mmsize/(1<<%4)
158%endif
159 jl .next
160 REP_RET
161%endmacro
162
163%macro CONV 5-7
164cglobal %2_to_%1_%3, 3, 3, 6, dst, src, len
165 mov srcq , [srcq]
166 mov dstq , [dstq]
167%ifidn %3, a
168 test dstq, mmsize-1
169 jne %2_to_%1_u_int %+ SUFFIX
170 test srcq, mmsize-1
171 jne %2_to_%1_u_int %+ SUFFIX
172%else
173%2_to_%1_u_int %+ SUFFIX
174%endif
175 lea srcq , [srcq + (1<<%5)*lenq]
176 lea dstq , [dstq + (1<<%4)*lenq]
177 neg lenq
178 %7 m0,m1,m2,m3,m4,m5
179.next:
180 mov%3 m0, [ srcq +(1<<%5)*lenq]
181 mov%3 m1, [ mmsize + srcq +(1<<%5)*lenq]
182%if %4 < %5
183 mov%3 m2, [2*mmsize + srcq +(1<<%5)*lenq]
184 mov%3 m3, [3*mmsize + srcq +(1<<%5)*lenq]
185%endif
186 %6 m0,m1,m2,m3,m4,m5
187 mov%3 [ dstq+(1<<%4)*lenq], m0
188 mov%3 [ mmsize + dstq+(1<<%4)*lenq], m1
189%if %4 > %5
190 mov%3 [2*mmsize + dstq+(1<<%4)*lenq], m2
191 mov%3 [3*mmsize + dstq+(1<<%4)*lenq], m3
192 add lenq, 4*mmsize/(1<<%4)
193%else
194 add lenq, 2*mmsize/(1<<%4)
195%endif
196 jl .next
197%if mmsize == 8
198 emms
199 RET
200%else
201 REP_RET
202%endif
203%endmacro
204
205%macro PACK_6CH 5-7
206cglobal pack_6ch_%2_to_%1_%3, 2,8,7, dst, src, src1, src2, src3, src4, src5, len
207%if ARCH_X86_64
208 mov lend, r2d
209%else
210 %define lend dword r2m
211%endif
212 mov src1q, [srcq+1*gprsize]
213 mov src2q, [srcq+2*gprsize]
214 mov src3q, [srcq+3*gprsize]
215 mov src4q, [srcq+4*gprsize]
216 mov src5q, [srcq+5*gprsize]
217 mov srcq, [srcq]
218 mov dstq, [dstq]
219%ifidn %3, a
220 test dstq, mmsize-1
221 jne pack_6ch_%2_to_%1_u_int %+ SUFFIX
222 test srcq, mmsize-1
223 jne pack_6ch_%2_to_%1_u_int %+ SUFFIX
224 test src2q, mmsize-1
225 jne pack_6ch_%2_to_%1_u_int %+ SUFFIX
226 test src3q, mmsize-1
227 jne pack_6ch_%2_to_%1_u_int %+ SUFFIX
228 test src4q, mmsize-1
229 jne pack_6ch_%2_to_%1_u_int %+ SUFFIX
230 test src5q, mmsize-1
231 jne pack_6ch_%2_to_%1_u_int %+ SUFFIX
232%else
233pack_6ch_%2_to_%1_u_int %+ SUFFIX
234%endif
235 sub src1q, srcq
236 sub src2q, srcq
237 sub src3q, srcq
238 sub src4q, srcq
239 sub src5q, srcq
240.loop:
241 mov%3 m0, [srcq ]
242 mov%3 m1, [srcq+src1q]
243 mov%3 m2, [srcq+src2q]
244 mov%3 m3, [srcq+src3q]
245 mov%3 m4, [srcq+src4q]
246 mov%3 m5, [srcq+src5q]
247 %7 x,x,x,x,m7,x
248%if cpuflag(sse4)
249 SBUTTERFLYPS 0, 1, 6
250 SBUTTERFLYPS 2, 3, 6
251 SBUTTERFLYPS 4, 5, 6
252
253 blendps m6, m4, m0, 1100b
254 movlhps m0, m2
255 movhlps m4, m2
256 blendps m2, m5, m1, 1100b
257 movlhps m1, m3
258 movhlps m5, m3
259
260 %6 m0,m6,x,x,m7,m3
261 %6 m4,m1,x,x,m7,m3
262 %6 m2,m5,x,x,m7,m3
263
264 mov %+ %3 %+ ps [dstq ], m0
265 mov %+ %3 %+ ps [dstq+16], m6
266 mov %+ %3 %+ ps [dstq+32], m4
267 mov %+ %3 %+ ps [dstq+48], m1
268 mov %+ %3 %+ ps [dstq+64], m2
269 mov %+ %3 %+ ps [dstq+80], m5
270%else ; mmx
271 SBUTTERFLY dq, 0, 1, 6
272 SBUTTERFLY dq, 2, 3, 6
273 SBUTTERFLY dq, 4, 5, 6
274
275 movq [dstq ], m0
276 movq [dstq+ 8], m2
277 movq [dstq+16], m4
278 movq [dstq+24], m1
279 movq [dstq+32], m3
280 movq [dstq+40], m5
281%endif
282 add srcq, mmsize
283 add dstq, mmsize*6
284 sub lend, mmsize/4
285 jg .loop
286%if mmsize == 8
287 emms
288 RET
289%else
290 REP_RET
291%endif
292%endmacro
293
294%macro INT16_TO_INT32_N 6
295 pxor m2, m2
296 pxor m3, m3
297 punpcklwd m2, m1
298 punpckhwd m3, m1
299 SWAP 4,0
300 pxor m0, m0
301 pxor m1, m1
302 punpcklwd m0, m4
303 punpckhwd m1, m4
304%endmacro
305
306%macro INT32_TO_INT16_N 6
307 psrad m0, 16
308 psrad m1, 16
309 psrad m2, 16
310 psrad m3, 16
311 packssdw m0, m1
312 packssdw m2, m3
313 SWAP 1,2
314%endmacro
315
316%macro INT32_TO_FLOAT_INIT 6
317 mova %5, [flt2pm31]
318%endmacro
319%macro INT32_TO_FLOAT_N 6
320 cvtdq2ps %1, %1
321 cvtdq2ps %2, %2
322 mulps %1, %1, %5
323 mulps %2, %2, %5
324%endmacro
325
326%macro FLOAT_TO_INT32_INIT 6
327 mova %5, [flt2p31]
328%endmacro
329%macro FLOAT_TO_INT32_N 6
330 mulps %1, %5
331 mulps %2, %5
332 cvtps2dq %6, %1
333 cmpnltps %1, %5
334 paddd %1, %6
335 cvtps2dq %6, %2
336 cmpnltps %2, %5
337 paddd %2, %6
338%endmacro
339
340%macro INT16_TO_FLOAT_INIT 6
341 mova m5, [flt2pm31]
342%endmacro
343%macro INT16_TO_FLOAT_N 6
344 INT16_TO_INT32_N %1,%2,%3,%4,%5,%6
345 cvtdq2ps m0, m0
346 cvtdq2ps m1, m1
347 cvtdq2ps m2, m2
348 cvtdq2ps m3, m3
349 mulps m0, m0, m5
350 mulps m1, m1, m5
351 mulps m2, m2, m5
352 mulps m3, m3, m5
353%endmacro
354
355%macro FLOAT_TO_INT16_INIT 6
356 mova m5, [flt2p15]
357%endmacro
358%macro FLOAT_TO_INT16_N 6
359 mulps m0, m5
360 mulps m1, m5
361 mulps m2, m5
362 mulps m3, m5
363 cvtps2dq m0, m0
364 cvtps2dq m1, m1
365 packssdw m0, m1
366 cvtps2dq m1, m2
367 cvtps2dq m3, m3
368 packssdw m1, m3
369%endmacro
370
371%macro NOP_N 0-6
372%endmacro
373
374INIT_MMX mmx
375CONV int32, int16, u, 2, 1, INT16_TO_INT32_N, NOP_N
376CONV int32, int16, a, 2, 1, INT16_TO_INT32_N, NOP_N
377CONV int16, int32, u, 1, 2, INT32_TO_INT16_N, NOP_N
378CONV int16, int32, a, 1, 2, INT32_TO_INT16_N, NOP_N
379
380PACK_6CH float, float, u, 2, 2, NOP_N, NOP_N
381PACK_6CH float, float, a, 2, 2, NOP_N, NOP_N
382
383INIT_XMM sse2
384CONV int32, int16, u, 2, 1, INT16_TO_INT32_N, NOP_N
385CONV int32, int16, a, 2, 1, INT16_TO_INT32_N, NOP_N
386CONV int16, int32, u, 1, 2, INT32_TO_INT16_N, NOP_N
387CONV int16, int32, a, 1, 2, INT32_TO_INT16_N, NOP_N
388
389PACK_2CH int16, int16, u, 1, 1, NOP_N, NOP_N
390PACK_2CH int16, int16, a, 1, 1, NOP_N, NOP_N
391PACK_2CH int32, int32, u, 2, 2, NOP_N, NOP_N
392PACK_2CH int32, int32, a, 2, 2, NOP_N, NOP_N
393PACK_2CH int32, int16, u, 2, 1, INT16_TO_INT32_N, NOP_N
394PACK_2CH int32, int16, a, 2, 1, INT16_TO_INT32_N, NOP_N
395PACK_2CH int16, int32, u, 1, 2, INT32_TO_INT16_N, NOP_N
396PACK_2CH int16, int32, a, 1, 2, INT32_TO_INT16_N, NOP_N
397
398UNPACK_2CH int16, int16, u, 1, 1, NOP_N, NOP_N
399UNPACK_2CH int16, int16, a, 1, 1, NOP_N, NOP_N
400UNPACK_2CH int32, int32, u, 2, 2, NOP_N, NOP_N
401UNPACK_2CH int32, int32, a, 2, 2, NOP_N, NOP_N
402UNPACK_2CH int32, int16, u, 2, 1, INT16_TO_INT32_N, NOP_N
403UNPACK_2CH int32, int16, a, 2, 1, INT16_TO_INT32_N, NOP_N
404UNPACK_2CH int16, int32, u, 1, 2, INT32_TO_INT16_N, NOP_N
405UNPACK_2CH int16, int32, a, 1, 2, INT32_TO_INT16_N, NOP_N
406
407CONV float, int32, u, 2, 2, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT
408CONV float, int32, a, 2, 2, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT
409CONV int32, float, u, 2, 2, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT
410CONV int32, float, a, 2, 2, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT
411CONV float, int16, u, 2, 1, INT16_TO_FLOAT_N, INT16_TO_FLOAT_INIT
412CONV float, int16, a, 2, 1, INT16_TO_FLOAT_N, INT16_TO_FLOAT_INIT
413CONV int16, float, u, 1, 2, FLOAT_TO_INT16_N, FLOAT_TO_INT16_INIT
414CONV int16, float, a, 1, 2, FLOAT_TO_INT16_N, FLOAT_TO_INT16_INIT
415
416PACK_2CH float, int32, u, 2, 2, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT
417PACK_2CH float, int32, a, 2, 2, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT
418PACK_2CH int32, float, u, 2, 2, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT
419PACK_2CH int32, float, a, 2, 2, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT
420PACK_2CH float, int16, u, 2, 1, INT16_TO_FLOAT_N, INT16_TO_FLOAT_INIT
421PACK_2CH float, int16, a, 2, 1, INT16_TO_FLOAT_N, INT16_TO_FLOAT_INIT
422PACK_2CH int16, float, u, 1, 2, FLOAT_TO_INT16_N, FLOAT_TO_INT16_INIT
423PACK_2CH int16, float, a, 1, 2, FLOAT_TO_INT16_N, FLOAT_TO_INT16_INIT
424
425UNPACK_2CH float, int32, u, 2, 2, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT
426UNPACK_2CH float, int32, a, 2, 2, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT
427UNPACK_2CH int32, float, u, 2, 2, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT
428UNPACK_2CH int32, float, a, 2, 2, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT
429UNPACK_2CH float, int16, u, 2, 1, INT16_TO_FLOAT_N, INT16_TO_FLOAT_INIT
430UNPACK_2CH float, int16, a, 2, 1, INT16_TO_FLOAT_N, INT16_TO_FLOAT_INIT
431UNPACK_2CH int16, float, u, 1, 2, FLOAT_TO_INT16_N, FLOAT_TO_INT16_INIT
432UNPACK_2CH int16, float, a, 1, 2, FLOAT_TO_INT16_N, FLOAT_TO_INT16_INIT
433
434
435INIT_XMM ssse3
436UNPACK_2CH int16, int16, u, 1, 1, NOP_N, NOP_N
437UNPACK_2CH int16, int16, a, 1, 1, NOP_N, NOP_N
438UNPACK_2CH int32, int16, u, 2, 1, INT16_TO_INT32_N, NOP_N
439UNPACK_2CH int32, int16, a, 2, 1, INT16_TO_INT32_N, NOP_N
440UNPACK_2CH float, int16, u, 2, 1, INT16_TO_FLOAT_N, INT16_TO_FLOAT_INIT
441UNPACK_2CH float, int16, a, 2, 1, INT16_TO_FLOAT_N, INT16_TO_FLOAT_INIT
442
443INIT_XMM sse4
444PACK_6CH float, float, u, 2, 2, NOP_N, NOP_N
445PACK_6CH float, float, a, 2, 2, NOP_N, NOP_N
446
447PACK_6CH float, int32, u, 2, 2, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT
448PACK_6CH float, int32, a, 2, 2, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT
449PACK_6CH int32, float, u, 2, 2, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT
450PACK_6CH int32, float, a, 2, 2, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT
451
452%if HAVE_AVX_EXTERNAL
453INIT_XMM avx
454PACK_6CH float, float, u, 2, 2, NOP_N, NOP_N
455PACK_6CH float, float, a, 2, 2, NOP_N, NOP_N
456
457PACK_6CH float, int32, u, 2, 2, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT
458PACK_6CH float, int32, a, 2, 2, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT
459PACK_6CH int32, float, u, 2, 2, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT
460PACK_6CH int32, float, a, 2, 2, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT
461
462INIT_YMM avx
463CONV float, int32, u, 2, 2, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT
464CONV float, int32, a, 2, 2, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT
465%endif