| 1 | /* |
| 2 | * Copyright (C) 2012 Michael Niedermayer (michaelni@gmx.at) |
| 3 | * |
| 4 | * This file is part of libswresample |
| 5 | * |
| 6 | * libswresample is free software; you can redistribute it and/or |
| 7 | * modify it under the terms of the GNU Lesser General Public |
| 8 | * License as published by the Free Software Foundation; either |
| 9 | * version 2.1 of the License, or (at your option) any later version. |
| 10 | * |
| 11 | * libswresample is distributed in the hope that it will be useful, |
| 12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| 14 | * Lesser General Public License for more details. |
| 15 | * |
| 16 | * You should have received a copy of the GNU Lesser General Public |
| 17 | * License along with libswresample; if not, write to the Free Software |
| 18 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
| 19 | */ |
| 20 | |
| 21 | #include "libavutil/x86/cpu.h" |
| 22 | #include "libswresample/swresample_internal.h" |
| 23 | |
| 24 | #define D(type, simd) \ |
| 25 | mix_1_1_func_type ff_mix_1_1_a_## type ## _ ## simd;\ |
| 26 | mix_2_1_func_type ff_mix_2_1_a_## type ## _ ## simd; |
| 27 | |
| 28 | D(float, sse) |
| 29 | D(float, avx) |
| 30 | D(int16, mmx) |
| 31 | D(int16, sse2) |
| 32 | |
| 33 | av_cold void swri_rematrix_init_x86(struct SwrContext *s){ |
| 34 | #if HAVE_YASM |
| 35 | int mm_flags = av_get_cpu_flags(); |
| 36 | int nb_in = av_get_channel_layout_nb_channels(s->in_ch_layout); |
| 37 | int nb_out = av_get_channel_layout_nb_channels(s->out_ch_layout); |
| 38 | int num = nb_in * nb_out; |
| 39 | int i,j; |
| 40 | |
| 41 | s->mix_1_1_simd = NULL; |
| 42 | s->mix_2_1_simd = NULL; |
| 43 | |
| 44 | if (s->midbuf.fmt == AV_SAMPLE_FMT_S16P){ |
| 45 | if(EXTERNAL_MMX(mm_flags)) { |
| 46 | s->mix_1_1_simd = ff_mix_1_1_a_int16_mmx; |
| 47 | s->mix_2_1_simd = ff_mix_2_1_a_int16_mmx; |
| 48 | } |
| 49 | if(EXTERNAL_SSE2(mm_flags)) { |
| 50 | s->mix_1_1_simd = ff_mix_1_1_a_int16_sse2; |
| 51 | s->mix_2_1_simd = ff_mix_2_1_a_int16_sse2; |
| 52 | } |
| 53 | s->native_simd_matrix = av_mallocz(2 * num * sizeof(int16_t)); |
| 54 | s->native_simd_one = av_mallocz(2 * sizeof(int16_t)); |
| 55 | for(i=0; i<nb_out; i++){ |
| 56 | int sh = 0; |
| 57 | for(j=0; j<nb_in; j++) |
| 58 | sh = FFMAX(sh, FFABS(((int*)s->native_matrix)[i * nb_in + j])); |
| 59 | sh = FFMAX(av_log2(sh) - 14, 0); |
| 60 | for(j=0; j<nb_in; j++) { |
| 61 | ((int16_t*)s->native_simd_matrix)[2*(i * nb_in + j)+1] = 15 - sh; |
| 62 | ((int16_t*)s->native_simd_matrix)[2*(i * nb_in + j)] = |
| 63 | ((((int*)s->native_matrix)[i * nb_in + j]) + (1<<sh>>1)) >> sh; |
| 64 | } |
| 65 | } |
| 66 | ((int16_t*)s->native_simd_one)[1] = 14; |
| 67 | ((int16_t*)s->native_simd_one)[0] = 16384; |
| 68 | } else if(s->midbuf.fmt == AV_SAMPLE_FMT_FLTP){ |
| 69 | if(EXTERNAL_SSE(mm_flags)) { |
| 70 | s->mix_1_1_simd = ff_mix_1_1_a_float_sse; |
| 71 | s->mix_2_1_simd = ff_mix_2_1_a_float_sse; |
| 72 | } |
| 73 | if(EXTERNAL_AVX(mm_flags)) { |
| 74 | s->mix_1_1_simd = ff_mix_1_1_a_float_avx; |
| 75 | s->mix_2_1_simd = ff_mix_2_1_a_float_avx; |
| 76 | } |
| 77 | s->native_simd_matrix = av_mallocz(num * sizeof(float)); |
| 78 | memcpy(s->native_simd_matrix, s->native_matrix, num * sizeof(float)); |
| 79 | s->native_simd_one = av_mallocz(sizeof(float)); |
| 80 | memcpy(s->native_simd_one, s->native_one, sizeof(float)); |
| 81 | } |
| 82 | #endif |
| 83 | } |