Commit | Line | Data |
---|---|---|
2ba45a60 DM |
1 | /* |
2 | * Copyright 2005 Balatoni Denes | |
3 | * Copyright 2006 Loren Merritt | |
4 | * | |
5 | * This file is part of FFmpeg. | |
6 | * | |
7 | * FFmpeg is free software; you can redistribute it and/or | |
8 | * modify it under the terms of the GNU Lesser General Public | |
9 | * License as published by the Free Software Foundation; either | |
10 | * version 2.1 of the License, or (at your option) any later version. | |
11 | * | |
12 | * FFmpeg is distributed in the hope that it will be useful, | |
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
15 | * Lesser General Public License for more details. | |
16 | * | |
17 | * You should have received a copy of the GNU Lesser General Public | |
18 | * License along with FFmpeg; if not, write to the Free Software | |
19 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |
20 | */ | |
21 | ||
22 | #include "config.h" | |
23 | #include "attributes.h" | |
24 | #include "float_dsp.h" | |
f6fa7814 | 25 | #include "mem.h" |
2ba45a60 DM |
26 | |
27 | static void vector_fmul_c(float *dst, const float *src0, const float *src1, | |
28 | int len) | |
29 | { | |
30 | int i; | |
31 | for (i = 0; i < len; i++) | |
32 | dst[i] = src0[i] * src1[i]; | |
33 | } | |
34 | ||
35 | static void vector_fmac_scalar_c(float *dst, const float *src, float mul, | |
36 | int len) | |
37 | { | |
38 | int i; | |
39 | for (i = 0; i < len; i++) | |
40 | dst[i] += src[i] * mul; | |
41 | } | |
42 | ||
43 | static void vector_fmul_scalar_c(float *dst, const float *src, float mul, | |
44 | int len) | |
45 | { | |
46 | int i; | |
47 | for (i = 0; i < len; i++) | |
48 | dst[i] = src[i] * mul; | |
49 | } | |
50 | ||
51 | static void vector_dmul_scalar_c(double *dst, const double *src, double mul, | |
52 | int len) | |
53 | { | |
54 | int i; | |
55 | for (i = 0; i < len; i++) | |
56 | dst[i] = src[i] * mul; | |
57 | } | |
58 | ||
59 | static void vector_fmul_window_c(float *dst, const float *src0, | |
60 | const float *src1, const float *win, int len) | |
61 | { | |
62 | int i, j; | |
63 | ||
64 | dst += len; | |
65 | win += len; | |
66 | src0 += len; | |
67 | ||
68 | for (i = -len, j = len - 1; i < 0; i++, j--) { | |
69 | float s0 = src0[i]; | |
70 | float s1 = src1[j]; | |
71 | float wi = win[i]; | |
72 | float wj = win[j]; | |
73 | dst[i] = s0 * wj - s1 * wi; | |
74 | dst[j] = s0 * wi + s1 * wj; | |
75 | } | |
76 | } | |
77 | ||
78 | static void vector_fmul_add_c(float *dst, const float *src0, const float *src1, | |
79 | const float *src2, int len){ | |
80 | int i; | |
81 | ||
82 | for (i = 0; i < len; i++) | |
83 | dst[i] = src0[i] * src1[i] + src2[i]; | |
84 | } | |
85 | ||
86 | static void vector_fmul_reverse_c(float *dst, const float *src0, | |
87 | const float *src1, int len) | |
88 | { | |
89 | int i; | |
90 | ||
91 | src1 += len-1; | |
92 | for (i = 0; i < len; i++) | |
93 | dst[i] = src0[i] * src1[-i]; | |
94 | } | |
95 | ||
96 | static void butterflies_float_c(float *av_restrict v1, float *av_restrict v2, | |
97 | int len) | |
98 | { | |
99 | int i; | |
100 | ||
101 | for (i = 0; i < len; i++) { | |
102 | float t = v1[i] - v2[i]; | |
103 | v1[i] += v2[i]; | |
104 | v2[i] = t; | |
105 | } | |
106 | } | |
107 | ||
108 | float avpriv_scalarproduct_float_c(const float *v1, const float *v2, int len) | |
109 | { | |
110 | float p = 0.0; | |
111 | int i; | |
112 | ||
113 | for (i = 0; i < len; i++) | |
114 | p += v1[i] * v2[i]; | |
115 | ||
116 | return p; | |
117 | } | |
118 | ||
119 | av_cold void avpriv_float_dsp_init(AVFloatDSPContext *fdsp, int bit_exact) | |
120 | { | |
121 | fdsp->vector_fmul = vector_fmul_c; | |
122 | fdsp->vector_fmac_scalar = vector_fmac_scalar_c; | |
123 | fdsp->vector_fmul_scalar = vector_fmul_scalar_c; | |
124 | fdsp->vector_dmul_scalar = vector_dmul_scalar_c; | |
125 | fdsp->vector_fmul_window = vector_fmul_window_c; | |
126 | fdsp->vector_fmul_add = vector_fmul_add_c; | |
127 | fdsp->vector_fmul_reverse = vector_fmul_reverse_c; | |
128 | fdsp->butterflies_float = butterflies_float_c; | |
129 | fdsp->scalarproduct_float = avpriv_scalarproduct_float_c; | |
130 | ||
131 | if (ARCH_AARCH64) | |
132 | ff_float_dsp_init_aarch64(fdsp); | |
133 | if (ARCH_ARM) | |
134 | ff_float_dsp_init_arm(fdsp); | |
135 | if (ARCH_PPC) | |
136 | ff_float_dsp_init_ppc(fdsp, bit_exact); | |
137 | if (ARCH_X86) | |
138 | ff_float_dsp_init_x86(fdsp); | |
139 | if (ARCH_MIPS) | |
140 | ff_float_dsp_init_mips(fdsp); | |
141 | } | |
142 | ||
f6fa7814 DM |
143 | av_cold AVFloatDSPContext *avpriv_float_dsp_alloc(int bit_exact) |
144 | { | |
145 | AVFloatDSPContext *ret = av_mallocz(sizeof(AVFloatDSPContext)); | |
146 | if (ret) | |
147 | avpriv_float_dsp_init(ret, bit_exact); | |
148 | return ret; | |
149 | } | |
150 | ||
151 | ||
2ba45a60 DM |
152 | #ifdef TEST |
153 | ||
154 | #include <float.h> | |
155 | #include <math.h> | |
156 | #include <stdint.h> | |
157 | #include <stdlib.h> | |
158 | #include <string.h> | |
f6fa7814 DM |
159 | #if HAVE_UNISTD_H |
160 | #include <unistd.h> /* for getopt */ | |
161 | #endif | |
162 | #if !HAVE_GETOPT | |
163 | #include "compat/getopt.c" | |
164 | #endif | |
2ba45a60 DM |
165 | |
166 | #include "common.h" | |
167 | #include "cpu.h" | |
168 | #include "internal.h" | |
169 | #include "lfg.h" | |
170 | #include "log.h" | |
2ba45a60 DM |
171 | #include "random_seed.h" |
172 | ||
173 | #define LEN 240 | |
174 | ||
175 | static void fill_float_array(AVLFG *lfg, float *a, int len) | |
176 | { | |
177 | int i; | |
178 | double bmg[2], stddev = 10.0, mean = 0.0; | |
179 | ||
180 | for (i = 0; i < len; i += 2) { | |
181 | av_bmg_get(lfg, bmg); | |
182 | a[i] = bmg[0] * stddev + mean; | |
183 | a[i + 1] = bmg[1] * stddev + mean; | |
184 | } | |
185 | } | |
186 | static int compare_floats(const float *a, const float *b, int len, | |
187 | float max_diff) | |
188 | { | |
189 | int i; | |
190 | for (i = 0; i < len; i++) { | |
191 | if (fabsf(a[i] - b[i]) > max_diff) { | |
192 | av_log(NULL, AV_LOG_ERROR, "%d: %- .12f - %- .12f = % .12g\n", | |
193 | i, a[i], b[i], a[i] - b[i]); | |
194 | return -1; | |
195 | } | |
196 | } | |
197 | return 0; | |
198 | } | |
199 | ||
200 | static void fill_double_array(AVLFG *lfg, double *a, int len) | |
201 | { | |
202 | int i; | |
203 | double bmg[2], stddev = 10.0, mean = 0.0; | |
204 | ||
205 | for (i = 0; i < len; i += 2) { | |
206 | av_bmg_get(lfg, bmg); | |
207 | a[i] = bmg[0] * stddev + mean; | |
208 | a[i + 1] = bmg[1] * stddev + mean; | |
209 | } | |
210 | } | |
211 | ||
212 | static int compare_doubles(const double *a, const double *b, int len, | |
213 | double max_diff) | |
214 | { | |
215 | int i; | |
216 | ||
217 | for (i = 0; i < len; i++) { | |
218 | if (fabs(a[i] - b[i]) > max_diff) { | |
219 | av_log(NULL, AV_LOG_ERROR, "%d: %- .12f - %- .12f = % .12g\n", | |
220 | i, a[i], b[i], a[i] - b[i]); | |
221 | return -1; | |
222 | } | |
223 | } | |
224 | return 0; | |
225 | } | |
226 | ||
227 | static int test_vector_fmul(AVFloatDSPContext *fdsp, AVFloatDSPContext *cdsp, | |
228 | const float *v1, const float *v2) | |
229 | { | |
230 | LOCAL_ALIGNED(32, float, cdst, [LEN]); | |
231 | LOCAL_ALIGNED(32, float, odst, [LEN]); | |
232 | int ret; | |
233 | ||
234 | cdsp->vector_fmul(cdst, v1, v2, LEN); | |
235 | fdsp->vector_fmul(odst, v1, v2, LEN); | |
236 | ||
237 | if (ret = compare_floats(cdst, odst, LEN, FLT_EPSILON)) | |
238 | av_log(NULL, AV_LOG_ERROR, "vector_fmul failed\n"); | |
239 | ||
240 | return ret; | |
241 | } | |
242 | ||
243 | #define ARBITRARY_FMAC_SCALAR_CONST 0.005 | |
244 | static int test_vector_fmac_scalar(AVFloatDSPContext *fdsp, AVFloatDSPContext *cdsp, | |
245 | const float *v1, const float *src0, float scale) | |
246 | { | |
247 | LOCAL_ALIGNED(32, float, cdst, [LEN]); | |
248 | LOCAL_ALIGNED(32, float, odst, [LEN]); | |
249 | int ret; | |
250 | ||
251 | memcpy(cdst, v1, LEN * sizeof(*v1)); | |
252 | memcpy(odst, v1, LEN * sizeof(*v1)); | |
253 | ||
254 | cdsp->vector_fmac_scalar(cdst, src0, scale, LEN); | |
255 | fdsp->vector_fmac_scalar(odst, src0, scale, LEN); | |
256 | ||
257 | if (ret = compare_floats(cdst, odst, LEN, ARBITRARY_FMAC_SCALAR_CONST)) | |
258 | av_log(NULL, AV_LOG_ERROR, "vector_fmac_scalar failed\n"); | |
259 | ||
260 | return ret; | |
261 | } | |
262 | ||
263 | static int test_vector_fmul_scalar(AVFloatDSPContext *fdsp, AVFloatDSPContext *cdsp, | |
264 | const float *v1, float scale) | |
265 | { | |
266 | LOCAL_ALIGNED(32, float, cdst, [LEN]); | |
267 | LOCAL_ALIGNED(32, float, odst, [LEN]); | |
268 | int ret; | |
269 | ||
270 | cdsp->vector_fmul_scalar(cdst, v1, scale, LEN); | |
271 | fdsp->vector_fmul_scalar(odst, v1, scale, LEN); | |
272 | ||
273 | if (ret = compare_floats(cdst, odst, LEN, FLT_EPSILON)) | |
274 | av_log(NULL, AV_LOG_ERROR, "vector_fmul_scalar failed\n"); | |
275 | ||
276 | return ret; | |
277 | } | |
278 | ||
279 | static int test_vector_dmul_scalar(AVFloatDSPContext *fdsp, AVFloatDSPContext *cdsp, | |
280 | const double *v1, double scale) | |
281 | { | |
282 | LOCAL_ALIGNED(32, double, cdst, [LEN]); | |
283 | LOCAL_ALIGNED(32, double, odst, [LEN]); | |
284 | int ret; | |
285 | ||
286 | cdsp->vector_dmul_scalar(cdst, v1, scale, LEN); | |
287 | fdsp->vector_dmul_scalar(odst, v1, scale, LEN); | |
288 | ||
289 | if (ret = compare_doubles(cdst, odst, LEN, DBL_EPSILON)) | |
290 | av_log(NULL, AV_LOG_ERROR, "vector_dmul_scalar failed\n"); | |
291 | ||
292 | return ret; | |
293 | } | |
294 | ||
295 | #define ARBITRARY_FMUL_WINDOW_CONST 0.008 | |
296 | static int test_vector_fmul_window(AVFloatDSPContext *fdsp, AVFloatDSPContext *cdsp, | |
297 | const float *v1, const float *v2, const float *v3) | |
298 | { | |
299 | LOCAL_ALIGNED(32, float, cdst, [LEN]); | |
300 | LOCAL_ALIGNED(32, float, odst, [LEN]); | |
301 | int ret; | |
302 | ||
303 | cdsp->vector_fmul_window(cdst, v1, v2, v3, LEN / 2); | |
304 | fdsp->vector_fmul_window(odst, v1, v2, v3, LEN / 2); | |
305 | ||
306 | if (ret = compare_floats(cdst, odst, LEN, ARBITRARY_FMUL_WINDOW_CONST)) | |
307 | av_log(NULL, AV_LOG_ERROR, "vector_fmul_window failed\n"); | |
308 | ||
309 | return ret; | |
310 | } | |
311 | ||
312 | #define ARBITRARY_FMUL_ADD_CONST 0.005 | |
313 | static int test_vector_fmul_add(AVFloatDSPContext *fdsp, AVFloatDSPContext *cdsp, | |
314 | const float *v1, const float *v2, const float *v3) | |
315 | { | |
316 | LOCAL_ALIGNED(32, float, cdst, [LEN]); | |
317 | LOCAL_ALIGNED(32, float, odst, [LEN]); | |
318 | int ret; | |
319 | ||
320 | cdsp->vector_fmul_add(cdst, v1, v2, v3, LEN); | |
321 | fdsp->vector_fmul_add(odst, v1, v2, v3, LEN); | |
322 | ||
323 | if (ret = compare_floats(cdst, odst, LEN, ARBITRARY_FMUL_ADD_CONST)) | |
324 | av_log(NULL, AV_LOG_ERROR, "vector_fmul_add failed\n"); | |
325 | ||
326 | return ret; | |
327 | } | |
328 | ||
329 | static int test_vector_fmul_reverse(AVFloatDSPContext *fdsp, AVFloatDSPContext *cdsp, | |
330 | const float *v1, const float *v2) | |
331 | { | |
332 | LOCAL_ALIGNED(32, float, cdst, [LEN]); | |
333 | LOCAL_ALIGNED(32, float, odst, [LEN]); | |
334 | int ret; | |
335 | ||
336 | cdsp->vector_fmul_reverse(cdst, v1, v2, LEN); | |
337 | fdsp->vector_fmul_reverse(odst, v1, v2, LEN); | |
338 | ||
339 | if (ret = compare_floats(cdst, odst, LEN, FLT_EPSILON)) | |
340 | av_log(NULL, AV_LOG_ERROR, "vector_fmul_reverse failed\n"); | |
341 | ||
342 | return ret; | |
343 | } | |
344 | ||
345 | static int test_butterflies_float(AVFloatDSPContext *fdsp, AVFloatDSPContext *cdsp, | |
346 | const float *v1, const float *v2) | |
347 | { | |
348 | LOCAL_ALIGNED(32, float, cv1, [LEN]); | |
349 | LOCAL_ALIGNED(32, float, cv2, [LEN]); | |
350 | LOCAL_ALIGNED(32, float, ov1, [LEN]); | |
351 | LOCAL_ALIGNED(32, float, ov2, [LEN]); | |
352 | int ret; | |
353 | ||
354 | memcpy(cv1, v1, LEN * sizeof(*v1)); | |
355 | memcpy(cv2, v2, LEN * sizeof(*v2)); | |
356 | memcpy(ov1, v1, LEN * sizeof(*v1)); | |
357 | memcpy(ov2, v2, LEN * sizeof(*v2)); | |
358 | ||
359 | cdsp->butterflies_float(cv1, cv2, LEN); | |
360 | fdsp->butterflies_float(ov1, ov2, LEN); | |
361 | ||
362 | if ((ret = compare_floats(cv1, ov1, LEN, FLT_EPSILON)) || | |
363 | (ret = compare_floats(cv2, ov2, LEN, FLT_EPSILON))) | |
364 | av_log(NULL, AV_LOG_ERROR, "butterflies_float failed\n"); | |
365 | ||
366 | return ret; | |
367 | } | |
368 | ||
369 | #define ARBITRARY_SCALARPRODUCT_CONST 0.2 | |
370 | static int test_scalarproduct_float(AVFloatDSPContext *fdsp, AVFloatDSPContext *cdsp, | |
371 | const float *v1, const float *v2) | |
372 | { | |
373 | float cprod, oprod; | |
374 | int ret; | |
375 | ||
376 | cprod = cdsp->scalarproduct_float(v1, v2, LEN); | |
377 | oprod = fdsp->scalarproduct_float(v1, v2, LEN); | |
378 | ||
379 | if (ret = compare_floats(&cprod, &oprod, 1, ARBITRARY_SCALARPRODUCT_CONST)) | |
380 | av_log(NULL, AV_LOG_ERROR, "scalarproduct_float failed\n"); | |
381 | ||
382 | return ret; | |
383 | } | |
384 | ||
385 | int main(int argc, char **argv) | |
386 | { | |
f6fa7814 | 387 | int ret = 0, seeded = 0; |
2ba45a60 DM |
388 | uint32_t seed; |
389 | AVFloatDSPContext fdsp, cdsp; | |
390 | AVLFG lfg; | |
391 | ||
392 | LOCAL_ALIGNED(32, float, src0, [LEN]); | |
393 | LOCAL_ALIGNED(32, float, src1, [LEN]); | |
394 | LOCAL_ALIGNED(32, float, src2, [LEN]); | |
395 | LOCAL_ALIGNED(32, double, dbl_src0, [LEN]); | |
396 | LOCAL_ALIGNED(32, double, dbl_src1, [LEN]); | |
397 | ||
f6fa7814 DM |
398 | for (;;) { |
399 | int arg = getopt(argc, argv, "s:c:"); | |
400 | if (arg == -1) | |
401 | break; | |
402 | switch (arg) { | |
403 | case 's': | |
404 | seed = strtoul(optarg, NULL, 10); | |
405 | seeded = 1; | |
406 | break; | |
407 | case 'c': | |
408 | { | |
409 | int cpuflags = av_get_cpu_flags(); | |
410 | ||
411 | if (av_parse_cpu_caps(&cpuflags, optarg) < 0) | |
412 | return 1; | |
413 | ||
414 | av_force_cpu_flags(cpuflags); | |
415 | break; | |
416 | } | |
417 | } | |
418 | } | |
419 | if (!seeded) | |
2ba45a60 DM |
420 | seed = av_get_random_seed(); |
421 | ||
f6fa7814 | 422 | av_log(NULL, AV_LOG_INFO, "float_dsp-test: %s %u\n", seeded ? "seed" : "random seed", seed); |
2ba45a60 DM |
423 | |
424 | av_lfg_init(&lfg, seed); | |
425 | ||
426 | fill_float_array(&lfg, src0, LEN); | |
427 | fill_float_array(&lfg, src1, LEN); | |
428 | fill_float_array(&lfg, src2, LEN); | |
429 | ||
430 | fill_double_array(&lfg, dbl_src0, LEN); | |
431 | fill_double_array(&lfg, dbl_src1, LEN); | |
432 | ||
433 | avpriv_float_dsp_init(&fdsp, 1); | |
434 | av_set_cpu_flags_mask(0); | |
435 | avpriv_float_dsp_init(&cdsp, 1); | |
436 | ||
437 | if (test_vector_fmul(&fdsp, &cdsp, src0, src1)) | |
438 | ret -= 1 << 0; | |
439 | if (test_vector_fmac_scalar(&fdsp, &cdsp, src2, src0, src1[0])) | |
440 | ret -= 1 << 1; | |
441 | if (test_vector_fmul_scalar(&fdsp, &cdsp, src0, src1[0])) | |
442 | ret -= 1 << 2; | |
443 | if (test_vector_fmul_window(&fdsp, &cdsp, src0, src1, src2)) | |
444 | ret -= 1 << 3; | |
445 | if (test_vector_fmul_add(&fdsp, &cdsp, src0, src1, src2)) | |
446 | ret -= 1 << 4; | |
447 | if (test_vector_fmul_reverse(&fdsp, &cdsp, src0, src1)) | |
448 | ret -= 1 << 5; | |
449 | if (test_butterflies_float(&fdsp, &cdsp, src0, src1)) | |
450 | ret -= 1 << 6; | |
451 | if (test_scalarproduct_float(&fdsp, &cdsp, src0, src1)) | |
452 | ret -= 1 << 7; | |
453 | if (test_vector_dmul_scalar(&fdsp, &cdsp, dbl_src0, dbl_src1[0])) | |
454 | ret -= 1 << 8; | |
455 | ||
456 | return ret; | |
457 | } | |
458 | ||
459 | #endif /* TEST */ |