]>
Commit | Line | Data |
---|---|---|
1 | /* | |
2 | * Copyright (C) 2012 Michael Niedermayer (michaelni@gmx.at) | |
3 | * | |
4 | * This file is part of libswresample | |
5 | * | |
6 | * libswresample is free software; you can redistribute it and/or | |
7 | * modify it under the terms of the GNU Lesser General Public | |
8 | * License as published by the Free Software Foundation; either | |
9 | * version 2.1 of the License, or (at your option) any later version. | |
10 | * | |
11 | * libswresample is distributed in the hope that it will be useful, | |
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
14 | * Lesser General Public License for more details. | |
15 | * | |
16 | * You should have received a copy of the GNU Lesser General Public | |
17 | * License along with libswresample; if not, write to the Free Software | |
18 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |
19 | */ | |
20 | ||
21 | #include "libavutil/x86/cpu.h" | |
22 | #include "libswresample/swresample_internal.h" | |
23 | ||
24 | #define D(type, simd) \ | |
25 | mix_1_1_func_type ff_mix_1_1_a_## type ## _ ## simd;\ | |
26 | mix_2_1_func_type ff_mix_2_1_a_## type ## _ ## simd; | |
27 | ||
28 | D(float, sse) | |
29 | D(float, avx) | |
30 | D(int16, mmx) | |
31 | D(int16, sse2) | |
32 | ||
33 | av_cold void swri_rematrix_init_x86(struct SwrContext *s){ | |
34 | #if HAVE_YASM | |
35 | int mm_flags = av_get_cpu_flags(); | |
36 | int nb_in = av_get_channel_layout_nb_channels(s->in_ch_layout); | |
37 | int nb_out = av_get_channel_layout_nb_channels(s->out_ch_layout); | |
38 | int num = nb_in * nb_out; | |
39 | int i,j; | |
40 | ||
41 | s->mix_1_1_simd = NULL; | |
42 | s->mix_2_1_simd = NULL; | |
43 | ||
44 | if (s->midbuf.fmt == AV_SAMPLE_FMT_S16P){ | |
45 | if(EXTERNAL_MMX(mm_flags)) { | |
46 | s->mix_1_1_simd = ff_mix_1_1_a_int16_mmx; | |
47 | s->mix_2_1_simd = ff_mix_2_1_a_int16_mmx; | |
48 | } | |
49 | if(EXTERNAL_SSE2(mm_flags)) { | |
50 | s->mix_1_1_simd = ff_mix_1_1_a_int16_sse2; | |
51 | s->mix_2_1_simd = ff_mix_2_1_a_int16_sse2; | |
52 | } | |
53 | s->native_simd_matrix = av_mallocz(2 * num * sizeof(int16_t)); | |
54 | s->native_simd_one = av_mallocz(2 * sizeof(int16_t)); | |
55 | for(i=0; i<nb_out; i++){ | |
56 | int sh = 0; | |
57 | for(j=0; j<nb_in; j++) | |
58 | sh = FFMAX(sh, FFABS(((int*)s->native_matrix)[i * nb_in + j])); | |
59 | sh = FFMAX(av_log2(sh) - 14, 0); | |
60 | for(j=0; j<nb_in; j++) { | |
61 | ((int16_t*)s->native_simd_matrix)[2*(i * nb_in + j)+1] = 15 - sh; | |
62 | ((int16_t*)s->native_simd_matrix)[2*(i * nb_in + j)] = | |
63 | ((((int*)s->native_matrix)[i * nb_in + j]) + (1<<sh>>1)) >> sh; | |
64 | } | |
65 | } | |
66 | ((int16_t*)s->native_simd_one)[1] = 14; | |
67 | ((int16_t*)s->native_simd_one)[0] = 16384; | |
68 | } else if(s->midbuf.fmt == AV_SAMPLE_FMT_FLTP){ | |
69 | if(EXTERNAL_SSE(mm_flags)) { | |
70 | s->mix_1_1_simd = ff_mix_1_1_a_float_sse; | |
71 | s->mix_2_1_simd = ff_mix_2_1_a_float_sse; | |
72 | } | |
73 | if(EXTERNAL_AVX(mm_flags)) { | |
74 | s->mix_1_1_simd = ff_mix_1_1_a_float_avx; | |
75 | s->mix_2_1_simd = ff_mix_2_1_a_float_avx; | |
76 | } | |
77 | s->native_simd_matrix = av_mallocz(num * sizeof(float)); | |
78 | memcpy(s->native_simd_matrix, s->native_matrix, num * sizeof(float)); | |
79 | s->native_simd_one = av_mallocz(sizeof(float)); | |
80 | memcpy(s->native_simd_one, s->native_one, sizeof(float)); | |
81 | } | |
82 | #endif | |
83 | } |