Imported Debian version 2.4.3~trusty1
[deb_ffmpeg.git] / ffmpeg / libavcodec / x86 / audiodsp.asm
CommitLineData
2ba45a60
DM
1;******************************************************************************
2;* optimized audio functions
3;* Copyright (c) 2008 Loren Merritt
4;*
5;* This file is part of FFmpeg.
6;*
7;* FFmpeg is free software; you can redistribute it and/or
8;* modify it under the terms of the GNU Lesser General Public
9;* License as published by the Free Software Foundation; either
10;* version 2.1 of the License, or (at your option) any later version.
11;*
12;* FFmpeg is distributed in the hope that it will be useful,
13;* but WITHOUT ANY WARRANTY; without even the implied warranty of
14;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15;* Lesser General Public License for more details.
16;*
17;* You should have received a copy of the GNU Lesser General Public
18;* License along with FFmpeg; if not, write to the Free Software
19;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20;******************************************************************************
21
22%include "libavutil/x86/x86util.asm"
23
24SECTION_TEXT
25
26%macro SCALARPRODUCT 0
27; int ff_scalarproduct_int16(int16_t *v1, int16_t *v2, int order)
28cglobal scalarproduct_int16, 3,3,3, v1, v2, order
29 shl orderq, 1
30 add v1q, orderq
31 add v2q, orderq
32 neg orderq
33 pxor m2, m2
34.loop:
35 movu m0, [v1q + orderq]
36 movu m1, [v1q + orderq + mmsize]
37 pmaddwd m0, [v2q + orderq]
38 pmaddwd m1, [v2q + orderq + mmsize]
39 paddd m2, m0
40 paddd m2, m1
41 add orderq, mmsize*2
42 jl .loop
43 HADDD m2, m0
44 movd eax, m2
45%if mmsize == 8
46 emms
47%endif
48 RET
49%endmacro
50
51INIT_MMX mmxext
52SCALARPRODUCT
53INIT_XMM sse2
54SCALARPRODUCT
55
56
57;-----------------------------------------------------------------------------
58; void ff_vector_clip_int32(int32_t *dst, const int32_t *src, int32_t min,
59; int32_t max, unsigned int len)
60;-----------------------------------------------------------------------------
61
62; %1 = number of xmm registers used
63; %2 = number of inline load/process/store loops per asm loop
64; %3 = process 4*mmsize (%3=0) or 8*mmsize (%3=1) bytes per loop
65; %4 = CLIPD function takes min/max as float instead of int (CLIPD_SSE2)
66; %5 = suffix
67%macro VECTOR_CLIP_INT32 4-5
68cglobal vector_clip_int32%5, 5,5,%1, dst, src, min, max, len
69%if %4
70 cvtsi2ss m4, minm
71 cvtsi2ss m5, maxm
72%else
73 movd m4, minm
74 movd m5, maxm
75%endif
76 SPLATD m4
77 SPLATD m5
78.loop:
79%assign %%i 0
80%rep %2
81 mova m0, [srcq+mmsize*(0+%%i)]
82 mova m1, [srcq+mmsize*(1+%%i)]
83 mova m2, [srcq+mmsize*(2+%%i)]
84 mova m3, [srcq+mmsize*(3+%%i)]
85%if %3
86 mova m7, [srcq+mmsize*(4+%%i)]
87 mova m8, [srcq+mmsize*(5+%%i)]
88 mova m9, [srcq+mmsize*(6+%%i)]
89 mova m10, [srcq+mmsize*(7+%%i)]
90%endif
91 CLIPD m0, m4, m5, m6
92 CLIPD m1, m4, m5, m6
93 CLIPD m2, m4, m5, m6
94 CLIPD m3, m4, m5, m6
95%if %3
96 CLIPD m7, m4, m5, m6
97 CLIPD m8, m4, m5, m6
98 CLIPD m9, m4, m5, m6
99 CLIPD m10, m4, m5, m6
100%endif
101 mova [dstq+mmsize*(0+%%i)], m0
102 mova [dstq+mmsize*(1+%%i)], m1
103 mova [dstq+mmsize*(2+%%i)], m2
104 mova [dstq+mmsize*(3+%%i)], m3
105%if %3
106 mova [dstq+mmsize*(4+%%i)], m7
107 mova [dstq+mmsize*(5+%%i)], m8
108 mova [dstq+mmsize*(6+%%i)], m9
109 mova [dstq+mmsize*(7+%%i)], m10
110%endif
111%assign %%i %%i+4*(%3+1)
112%endrep
113 add srcq, mmsize*4*(%2+%3)
114 add dstq, mmsize*4*(%2+%3)
115 sub lend, mmsize*(%2+%3)
116 jg .loop
117 REP_RET
118%endmacro
119
120INIT_MMX mmx
121%define CLIPD CLIPD_MMX
122VECTOR_CLIP_INT32 0, 1, 0, 0
123INIT_XMM sse2
124VECTOR_CLIP_INT32 6, 1, 0, 0, _int
125%define CLIPD CLIPD_SSE2
126VECTOR_CLIP_INT32 6, 2, 0, 1
127INIT_XMM sse4
128%define CLIPD CLIPD_SSE41
129%ifdef m8
130VECTOR_CLIP_INT32 11, 1, 1, 0
131%else
132VECTOR_CLIP_INT32 6, 1, 0, 0
133%endif
134
135;-----------------------------------------------------
136;void ff_vector_clipf(float *dst, const float *src,
137; float min, float max, int len)
138;-----------------------------------------------------
139INIT_XMM sse
140%if UNIX64
141cglobal vector_clipf, 3,3,6, dst, src, len
142%else
143cglobal vector_clipf, 5,5,6, dst, src, min, max, len
144%endif
145%if WIN64
146 SWAP 0, 2
147 SWAP 1, 3
148%elif ARCH_X86_32
149 movss m0, minm
150 movss m1, maxm
151%endif
152 SPLATD m0
153 SPLATD m1
154 shl lend, 2
155 add srcq, lenq
156 add dstq, lenq
157 neg lenq
158.loop:
159 mova m2, [srcq+lenq+mmsize*0]
160 mova m3, [srcq+lenq+mmsize*1]
161 mova m4, [srcq+lenq+mmsize*2]
162 mova m5, [srcq+lenq+mmsize*3]
163 maxps m2, m0
164 maxps m3, m0
165 maxps m4, m0
166 maxps m5, m0
167 minps m2, m1
168 minps m3, m1
169 minps m4, m1
170 minps m5, m1
171 mova [dstq+lenq+mmsize*0], m2
172 mova [dstq+lenq+mmsize*1], m3
173 mova [dstq+lenq+mmsize*2], m4
174 mova [dstq+lenq+mmsize*3], m5
175 add lenq, mmsize*4
176 jl .loop
177 REP_RET