Commit | Line | Data |
---|---|---|
2ba45a60 DM |
1 | /* |
2 | * This file is part of FFmpeg. | |
3 | * | |
4 | * FFmpeg is free software; you can redistribute it and/or | |
5 | * modify it under the terms of the GNU Lesser General Public | |
6 | * License as published by the Free Software Foundation; either | |
7 | * version 2.1 of the License, or (at your option) any later version. | |
8 | * | |
9 | * FFmpeg is distributed in the hope that it will be useful, | |
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
12 | * Lesser General Public License for more details. | |
13 | * | |
14 | * You should have received a copy of the GNU Lesser General Public | |
15 | * License along with FFmpeg; if not, write to the Free Software | |
16 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |
17 | */ | |
18 | ||
19 | #ifndef AVUTIL_FLOAT_DSP_H | |
20 | #define AVUTIL_FLOAT_DSP_H | |
21 | ||
22 | #include "config.h" | |
23 | ||
24 | typedef struct AVFloatDSPContext { | |
25 | /** | |
26 | * Calculate the product of two vectors of floats and store the result in | |
27 | * a vector of floats. | |
28 | * | |
29 | * @param dst output vector | |
30 | * constraints: 32-byte aligned | |
31 | * @param src0 first input vector | |
32 | * constraints: 32-byte aligned | |
33 | * @param src1 second input vector | |
34 | * constraints: 32-byte aligned | |
35 | * @param len number of elements in the input | |
36 | * constraints: multiple of 16 | |
37 | */ | |
38 | void (*vector_fmul)(float *dst, const float *src0, const float *src1, | |
39 | int len); | |
40 | ||
41 | /** | |
42 | * Multiply a vector of floats by a scalar float and add to | |
43 | * destination vector. Source and destination vectors must | |
44 | * overlap exactly or not at all. | |
45 | * | |
46 | * @param dst result vector | |
47 | * constraints: 32-byte aligned | |
48 | * @param src input vector | |
49 | * constraints: 32-byte aligned | |
50 | * @param mul scalar value | |
51 | * @param len length of vector | |
52 | * constraints: multiple of 16 | |
53 | */ | |
54 | void (*vector_fmac_scalar)(float *dst, const float *src, float mul, | |
55 | int len); | |
56 | ||
57 | /** | |
58 | * Multiply a vector of floats by a scalar float. Source and | |
59 | * destination vectors must overlap exactly or not at all. | |
60 | * | |
61 | * @param dst result vector | |
62 | * constraints: 16-byte aligned | |
63 | * @param src input vector | |
64 | * constraints: 16-byte aligned | |
65 | * @param mul scalar value | |
66 | * @param len length of vector | |
67 | * constraints: multiple of 4 | |
68 | */ | |
69 | void (*vector_fmul_scalar)(float *dst, const float *src, float mul, | |
70 | int len); | |
71 | ||
72 | /** | |
73 | * Multiply a vector of double by a scalar double. Source and | |
74 | * destination vectors must overlap exactly or not at all. | |
75 | * | |
76 | * @param dst result vector | |
77 | * constraints: 32-byte aligned | |
78 | * @param src input vector | |
79 | * constraints: 32-byte aligned | |
80 | * @param mul scalar value | |
81 | * @param len length of vector | |
82 | * constraints: multiple of 8 | |
83 | */ | |
84 | void (*vector_dmul_scalar)(double *dst, const double *src, double mul, | |
85 | int len); | |
86 | ||
87 | /** | |
88 | * Overlap/add with window function. | |
89 | * Used primarily by MDCT-based audio codecs. | |
90 | * Source and destination vectors must overlap exactly or not at all. | |
91 | * | |
92 | * @param dst result vector | |
93 | * constraints: 16-byte aligned | |
94 | * @param src0 first source vector | |
95 | * constraints: 16-byte aligned | |
96 | * @param src1 second source vector | |
97 | * constraints: 16-byte aligned | |
98 | * @param win half-window vector | |
99 | * constraints: 16-byte aligned | |
100 | * @param len length of vector | |
101 | * constraints: multiple of 4 | |
102 | */ | |
103 | void (*vector_fmul_window)(float *dst, const float *src0, | |
104 | const float *src1, const float *win, int len); | |
105 | ||
106 | /** | |
107 | * Calculate the product of two vectors of floats, add a third vector of | |
108 | * floats and store the result in a vector of floats. | |
109 | * | |
110 | * @param dst output vector | |
111 | * constraints: 32-byte aligned | |
112 | * @param src0 first input vector | |
113 | * constraints: 32-byte aligned | |
114 | * @param src1 second input vector | |
115 | * constraints: 32-byte aligned | |
116 | * @param src2 third input vector | |
117 | * constraints: 32-byte aligned | |
118 | * @param len number of elements in the input | |
119 | * constraints: multiple of 16 | |
120 | */ | |
121 | void (*vector_fmul_add)(float *dst, const float *src0, const float *src1, | |
122 | const float *src2, int len); | |
123 | ||
124 | /** | |
125 | * Calculate the product of two vectors of floats, and store the result | |
126 | * in a vector of floats. The second vector of floats is iterated over | |
127 | * in reverse order. | |
128 | * | |
129 | * @param dst output vector | |
130 | * constraints: 32-byte aligned | |
131 | * @param src0 first input vector | |
132 | * constraints: 32-byte aligned | |
133 | * @param src1 second input vector | |
134 | * constraints: 32-byte aligned | |
135 | * @param len number of elements in the input | |
136 | * constraints: multiple of 16 | |
137 | */ | |
138 | void (*vector_fmul_reverse)(float *dst, const float *src0, | |
139 | const float *src1, int len); | |
140 | ||
141 | /** | |
142 | * Calculate the sum and difference of two vectors of floats. | |
143 | * | |
144 | * @param v1 first input vector, sum output, 16-byte aligned | |
145 | * @param v2 second input vector, difference output, 16-byte aligned | |
146 | * @param len length of vectors, multiple of 4 | |
147 | */ | |
148 | void (*butterflies_float)(float *av_restrict v1, float *av_restrict v2, int len); | |
149 | ||
150 | /** | |
151 | * Calculate the scalar product of two vectors of floats. | |
152 | * | |
153 | * @param v1 first vector, 16-byte aligned | |
154 | * @param v2 second vector, 16-byte aligned | |
155 | * @param len length of vectors, multiple of 4 | |
156 | * | |
157 | * @return sum of elementwise products | |
158 | */ | |
159 | float (*scalarproduct_float)(const float *v1, const float *v2, int len); | |
160 | } AVFloatDSPContext; | |
161 | ||
162 | /** | |
163 | * Return the scalar product of two vectors. | |
164 | * | |
165 | * @param v1 first input vector | |
166 | * @param v2 first input vector | |
167 | * @param len number of elements | |
168 | * | |
169 | * @return sum of elementwise products | |
170 | */ | |
171 | float avpriv_scalarproduct_float_c(const float *v1, const float *v2, int len); | |
172 | ||
173 | /** | |
174 | * Initialize a float DSP context. | |
175 | * | |
176 | * @param fdsp float DSP context | |
177 | * @param strict setting to non-zero avoids using functions which may not be IEEE-754 compliant | |
178 | */ | |
179 | void avpriv_float_dsp_init(AVFloatDSPContext *fdsp, int strict); | |
180 | ||
181 | ||
182 | void ff_float_dsp_init_aarch64(AVFloatDSPContext *fdsp); | |
183 | void ff_float_dsp_init_arm(AVFloatDSPContext *fdsp); | |
184 | void ff_float_dsp_init_ppc(AVFloatDSPContext *fdsp, int strict); | |
185 | void ff_float_dsp_init_x86(AVFloatDSPContext *fdsp); | |
186 | void ff_float_dsp_init_mips(AVFloatDSPContext *fdsp); | |
187 | ||
188 | #endif /* AVUTIL_FLOAT_DSP_H */ |