543f7014eeacc4e13125dbb672554fe5eccde77a
2 * This file is part of FFmpeg.
4 * FFmpeg is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public
6 * License as published by the Free Software Foundation; either
7 * version 2.1 of the License, or (at your option) any later version.
9 * FFmpeg is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Lesser General Public License for more details.
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with FFmpeg; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 #ifndef AVUTIL_FLOAT_DSP_H
20 #define AVUTIL_FLOAT_DSP_H
24 typedef struct AVFloatDSPContext
{
26 * Calculate the product of two vectors of floats and store the result in
29 * @param dst output vector
30 * constraints: 32-byte aligned
31 * @param src0 first input vector
32 * constraints: 32-byte aligned
33 * @param src1 second input vector
34 * constraints: 32-byte aligned
35 * @param len number of elements in the input
36 * constraints: multiple of 16
38 void (*vector_fmul
)(float *dst
, const float *src0
, const float *src1
,
42 * Multiply a vector of floats by a scalar float and add to
43 * destination vector. Source and destination vectors must
44 * overlap exactly or not at all.
46 * @param dst result vector
47 * constraints: 32-byte aligned
48 * @param src input vector
49 * constraints: 32-byte aligned
50 * @param mul scalar value
51 * @param len length of vector
52 * constraints: multiple of 16
54 void (*vector_fmac_scalar
)(float *dst
, const float *src
, float mul
,
58 * Multiply a vector of floats by a scalar float. Source and
59 * destination vectors must overlap exactly or not at all.
61 * @param dst result vector
62 * constraints: 16-byte aligned
63 * @param src input vector
64 * constraints: 16-byte aligned
65 * @param mul scalar value
66 * @param len length of vector
67 * constraints: multiple of 4
69 void (*vector_fmul_scalar
)(float *dst
, const float *src
, float mul
,
73 * Multiply a vector of double by a scalar double. Source and
74 * destination vectors must overlap exactly or not at all.
76 * @param dst result vector
77 * constraints: 32-byte aligned
78 * @param src input vector
79 * constraints: 32-byte aligned
80 * @param mul scalar value
81 * @param len length of vector
82 * constraints: multiple of 8
84 void (*vector_dmul_scalar
)(double *dst
, const double *src
, double mul
,
88 * Overlap/add with window function.
89 * Used primarily by MDCT-based audio codecs.
90 * Source and destination vectors must overlap exactly or not at all.
92 * @param dst result vector
93 * constraints: 16-byte aligned
94 * @param src0 first source vector
95 * constraints: 16-byte aligned
96 * @param src1 second source vector
97 * constraints: 16-byte aligned
98 * @param win half-window vector
99 * constraints: 16-byte aligned
100 * @param len length of vector
101 * constraints: multiple of 4
103 void (*vector_fmul_window
)(float *dst
, const float *src0
,
104 const float *src1
, const float *win
, int len
);
107 * Calculate the product of two vectors of floats, add a third vector of
108 * floats and store the result in a vector of floats.
110 * @param dst output vector
111 * constraints: 32-byte aligned
112 * @param src0 first input vector
113 * constraints: 32-byte aligned
114 * @param src1 second input vector
115 * constraints: 32-byte aligned
116 * @param src2 third input vector
117 * constraints: 32-byte aligned
118 * @param len number of elements in the input
119 * constraints: multiple of 16
121 void (*vector_fmul_add
)(float *dst
, const float *src0
, const float *src1
,
122 const float *src2
, int len
);
125 * Calculate the product of two vectors of floats, and store the result
126 * in a vector of floats. The second vector of floats is iterated over
129 * @param dst output vector
130 * constraints: 32-byte aligned
131 * @param src0 first input vector
132 * constraints: 32-byte aligned
133 * @param src1 second input vector
134 * constraints: 32-byte aligned
135 * @param len number of elements in the input
136 * constraints: multiple of 16
138 void (*vector_fmul_reverse
)(float *dst
, const float *src0
,
139 const float *src1
, int len
);
142 * Calculate the sum and difference of two vectors of floats.
144 * @param v1 first input vector, sum output, 16-byte aligned
145 * @param v2 second input vector, difference output, 16-byte aligned
146 * @param len length of vectors, multiple of 4
148 void (*butterflies_float
)(float *av_restrict v1
, float *av_restrict v2
, int len
);
151 * Calculate the scalar product of two vectors of floats.
153 * @param v1 first vector, 16-byte aligned
154 * @param v2 second vector, 16-byte aligned
155 * @param len length of vectors, multiple of 4
157 * @return sum of elementwise products
159 float (*scalarproduct_float
)(const float *v1
, const float *v2
, int len
);
163 * Return the scalar product of two vectors.
165 * @param v1 first input vector
166 * @param v2 first input vector
167 * @param len number of elements
169 * @return sum of elementwise products
171 float avpriv_scalarproduct_float_c(const float *v1
, const float *v2
, int len
);
174 * Initialize a float DSP context.
176 * @param fdsp float DSP context
177 * @param strict setting to non-zero avoids using functions which may not be IEEE-754 compliant
179 void avpriv_float_dsp_init(AVFloatDSPContext
*fdsp
, int strict
);
182 void ff_float_dsp_init_aarch64(AVFloatDSPContext
*fdsp
);
183 void ff_float_dsp_init_arm(AVFloatDSPContext
*fdsp
);
184 void ff_float_dsp_init_ppc(AVFloatDSPContext
*fdsp
, int strict
);
185 void ff_float_dsp_init_x86(AVFloatDSPContext
*fdsp
);
186 void ff_float_dsp_init_mips(AVFloatDSPContext
*fdsp
);
189 * Allocate a float DSP context.
191 * @param strict setting to non-zero avoids using functions which may not be IEEE-754 compliant
193 AVFloatDSPContext
*avpriv_float_dsp_alloc(int strict
);
195 #endif /* AVUTIL_FLOAT_DSP_H */