| 1 | /* |
| 2 | * Copyright (c) 2006 Luca Barbato <lu_zero@gentoo.org> |
| 3 | * |
| 4 | * This file is part of FFmpeg. |
| 5 | * |
| 6 | * FFmpeg is free software; you can redistribute it and/or |
| 7 | * modify it under the terms of the GNU Lesser General Public |
| 8 | * License as published by the Free Software Foundation; either |
| 9 | * version 2.1 of the License, or (at your option) any later version. |
| 10 | * |
| 11 | * FFmpeg is distributed in the hope that it will be useful, |
| 12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| 14 | * Lesser General Public License for more details. |
| 15 | * |
| 16 | * You should have received a copy of the GNU Lesser General Public |
| 17 | * License along with FFmpeg; if not, write to the Free Software |
| 18 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
| 19 | */ |
| 20 | |
| 21 | #include "config.h" |
| 22 | #include "libavutil/attributes.h" |
| 23 | #include "libavutil/cpu.h" |
| 24 | #include "libavutil/mem.h" |
| 25 | #include "libavutil/ppc/cpu.h" |
| 26 | #include "libavutil/ppc/util_altivec.h" |
| 27 | #include "libavcodec/fmtconvert.h" |
| 28 | |
| 29 | #if HAVE_ALTIVEC |
| 30 | |
| 31 | static void int32_to_float_fmul_scalar_altivec(float *dst, const int32_t *src, |
| 32 | float mul, int len) |
| 33 | { |
| 34 | union { |
| 35 | vector float v; |
| 36 | float s[4]; |
| 37 | } mul_u; |
| 38 | int i; |
| 39 | vector float src1, src2, dst1, dst2, mul_v, zero; |
| 40 | |
| 41 | zero = (vector float)vec_splat_u32(0); |
| 42 | mul_u.s[0] = mul; |
| 43 | mul_v = vec_splat(mul_u.v, 0); |
| 44 | |
| 45 | for (i = 0; i < len; i += 8) { |
| 46 | src1 = vec_ctf(vec_ld(0, src+i), 0); |
| 47 | src2 = vec_ctf(vec_ld(16, src+i), 0); |
| 48 | dst1 = vec_madd(src1, mul_v, zero); |
| 49 | dst2 = vec_madd(src2, mul_v, zero); |
| 50 | vec_st(dst1, 0, dst+i); |
| 51 | vec_st(dst2, 16, dst+i); |
| 52 | } |
| 53 | } |
| 54 | |
| 55 | |
| 56 | static vector signed short float_to_int16_one_altivec(const float *src) |
| 57 | { |
| 58 | vector float s0 = vec_ld(0, src); |
| 59 | vector float s1 = vec_ld(16, src); |
| 60 | vector signed int t0 = vec_cts(s0, 0); |
| 61 | vector signed int t1 = vec_cts(s1, 0); |
| 62 | return vec_packs(t0,t1); |
| 63 | } |
| 64 | |
| 65 | static void float_to_int16_altivec(int16_t *dst, const float *src, long len) |
| 66 | { |
| 67 | int i; |
| 68 | vector signed short d0, d1, d; |
| 69 | vector unsigned char align; |
| 70 | if (((long)dst) & 15) { //FIXME |
| 71 | for (i = 0; i < len - 7; i += 8) { |
| 72 | d0 = vec_ld(0, dst+i); |
| 73 | d = float_to_int16_one_altivec(src + i); |
| 74 | d1 = vec_ld(15, dst+i); |
| 75 | d1 = vec_perm(d1, d0, vec_lvsl(0, dst + i)); |
| 76 | align = vec_lvsr(0, dst + i); |
| 77 | d0 = vec_perm(d1, d, align); |
| 78 | d1 = vec_perm(d, d1, align); |
| 79 | vec_st(d0, 0, dst + i); |
| 80 | vec_st(d1, 15, dst + i); |
| 81 | } |
| 82 | } else { |
| 83 | for (i = 0; i < len - 7; i += 8) { |
| 84 | d = float_to_int16_one_altivec(src + i); |
| 85 | vec_st(d, 0, dst + i); |
| 86 | } |
| 87 | } |
| 88 | } |
| 89 | |
| 90 | #define VSTE_INC(dst, v, elem, inc) do { \ |
| 91 | vector signed short s = vec_splat(v, elem); \ |
| 92 | vec_ste(s, 0, dst); \ |
| 93 | dst += inc; \ |
| 94 | } while (0) |
| 95 | |
| 96 | static void float_to_int16_stride_altivec(int16_t *dst, const float *src, |
| 97 | long len, int stride) |
| 98 | { |
| 99 | int i; |
| 100 | vector signed short d; |
| 101 | |
| 102 | for (i = 0; i < len - 7; i += 8) { |
| 103 | d = float_to_int16_one_altivec(src + i); |
| 104 | VSTE_INC(dst, d, 0, stride); |
| 105 | VSTE_INC(dst, d, 1, stride); |
| 106 | VSTE_INC(dst, d, 2, stride); |
| 107 | VSTE_INC(dst, d, 3, stride); |
| 108 | VSTE_INC(dst, d, 4, stride); |
| 109 | VSTE_INC(dst, d, 5, stride); |
| 110 | VSTE_INC(dst, d, 6, stride); |
| 111 | VSTE_INC(dst, d, 7, stride); |
| 112 | } |
| 113 | } |
| 114 | |
| 115 | static void float_to_int16_interleave_altivec(int16_t *dst, const float **src, |
| 116 | long len, int channels) |
| 117 | { |
| 118 | int i; |
| 119 | vector signed short d0, d1, d2, c0, c1, t0, t1; |
| 120 | vector unsigned char align; |
| 121 | |
| 122 | if (channels == 1) |
| 123 | float_to_int16_altivec(dst, src[0], len); |
| 124 | else { |
| 125 | if (channels == 2) { |
| 126 | if (((long)dst) & 15) { |
| 127 | for (i = 0; i < len - 7; i += 8) { |
| 128 | d0 = vec_ld(0, dst + i); |
| 129 | t0 = float_to_int16_one_altivec(src[0] + i); |
| 130 | d1 = vec_ld(31, dst + i); |
| 131 | t1 = float_to_int16_one_altivec(src[1] + i); |
| 132 | c0 = vec_mergeh(t0, t1); |
| 133 | c1 = vec_mergel(t0, t1); |
| 134 | d2 = vec_perm(d1, d0, vec_lvsl(0, dst + i)); |
| 135 | align = vec_lvsr(0, dst + i); |
| 136 | d0 = vec_perm(d2, c0, align); |
| 137 | d1 = vec_perm(c0, c1, align); |
| 138 | vec_st(d0, 0, dst + i); |
| 139 | d0 = vec_perm(c1, d2, align); |
| 140 | vec_st(d1, 15, dst + i); |
| 141 | vec_st(d0, 31, dst + i); |
| 142 | dst += 8; |
| 143 | } |
| 144 | } else { |
| 145 | for (i = 0; i < len - 7; i += 8) { |
| 146 | t0 = float_to_int16_one_altivec(src[0] + i); |
| 147 | t1 = float_to_int16_one_altivec(src[1] + i); |
| 148 | d0 = vec_mergeh(t0, t1); |
| 149 | d1 = vec_mergel(t0, t1); |
| 150 | vec_st(d0, 0, dst + i); |
| 151 | vec_st(d1, 16, dst + i); |
| 152 | dst += 8; |
| 153 | } |
| 154 | } |
| 155 | } else { |
| 156 | for (i = 0; i < channels; i++) |
| 157 | float_to_int16_stride_altivec(dst + i, src[i], len, channels); |
| 158 | } |
| 159 | } |
| 160 | } |
| 161 | |
| 162 | #endif /* HAVE_ALTIVEC */ |
| 163 | |
| 164 | av_cold void ff_fmt_convert_init_ppc(FmtConvertContext *c, |
| 165 | AVCodecContext *avctx) |
| 166 | { |
| 167 | #if HAVE_ALTIVEC |
| 168 | if (!PPC_ALTIVEC(av_get_cpu_flags())) |
| 169 | return; |
| 170 | |
| 171 | c->int32_to_float_fmul_scalar = int32_to_float_fmul_scalar_altivec; |
| 172 | if (!(avctx->flags & CODEC_FLAG_BITEXACT)) { |
| 173 | c->float_to_int16 = float_to_int16_altivec; |
| 174 | c->float_to_int16_interleave = float_to_int16_interleave_altivec; |
| 175 | } |
| 176 | #endif /* HAVE_ALTIVEC */ |
| 177 | } |