2 * Copyright (c) 2006 Luca Barbato <lu_zero@gentoo.org>
4 * This file is part of FFmpeg.
6 * FFmpeg is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with FFmpeg; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 #include "libavutil/attributes.h"
23 #include "libavutil/cpu.h"
24 #include "libavutil/mem.h"
25 #include "libavutil/ppc/cpu.h"
26 #include "libavutil/ppc/util_altivec.h"
27 #include "libavcodec/fmtconvert.h"
31 static void int32_to_float_fmul_scalar_altivec(float *dst
, const int32_t *src
,
39 vector
float src1
, src2
, dst1
, dst2
, mul_v
, zero
;
41 zero
= (vector
float)vec_splat_u32(0);
43 mul_v
= vec_splat(mul_u
.v
, 0);
45 for (i
= 0; i
< len
; i
+= 8) {
46 src1
= vec_ctf(vec_ld(0, src
+i
), 0);
47 src2
= vec_ctf(vec_ld(16, src
+i
), 0);
48 dst1
= vec_madd(src1
, mul_v
, zero
);
49 dst2
= vec_madd(src2
, mul_v
, zero
);
50 vec_st(dst1
, 0, dst
+i
);
51 vec_st(dst2
, 16, dst
+i
);
56 static vector
signed short float_to_int16_one_altivec(const float *src
)
58 vector
float s0
= vec_ld(0, src
);
59 vector
float s1
= vec_ld(16, src
);
60 vector
signed int t0
= vec_cts(s0
, 0);
61 vector
signed int t1
= vec_cts(s1
, 0);
62 return vec_packs(t0
,t1
);
65 static void float_to_int16_altivec(int16_t *dst
, const float *src
, long len
)
68 vector
signed short d0
, d1
, d
;
69 vector
unsigned char align
;
70 if (((long)dst
) & 15) { //FIXME
71 for (i
= 0; i
< len
- 7; i
+= 8) {
72 d0
= vec_ld(0, dst
+i
);
73 d
= float_to_int16_one_altivec(src
+ i
);
74 d1
= vec_ld(15, dst
+i
);
75 d1
= vec_perm(d1
, d0
, vec_lvsl(0, dst
+ i
));
76 align
= vec_lvsr(0, dst
+ i
);
77 d0
= vec_perm(d1
, d
, align
);
78 d1
= vec_perm(d
, d1
, align
);
79 vec_st(d0
, 0, dst
+ i
);
80 vec_st(d1
, 15, dst
+ i
);
83 for (i
= 0; i
< len
- 7; i
+= 8) {
84 d
= float_to_int16_one_altivec(src
+ i
);
85 vec_st(d
, 0, dst
+ i
);
90 #define VSTE_INC(dst, v, elem, inc) do { \
91 vector signed short s = vec_splat(v, elem); \
96 static void float_to_int16_stride_altivec(int16_t *dst
, const float *src
,
100 vector
signed short d
;
102 for (i
= 0; i
< len
- 7; i
+= 8) {
103 d
= float_to_int16_one_altivec(src
+ i
);
104 VSTE_INC(dst
, d
, 0, stride
);
105 VSTE_INC(dst
, d
, 1, stride
);
106 VSTE_INC(dst
, d
, 2, stride
);
107 VSTE_INC(dst
, d
, 3, stride
);
108 VSTE_INC(dst
, d
, 4, stride
);
109 VSTE_INC(dst
, d
, 5, stride
);
110 VSTE_INC(dst
, d
, 6, stride
);
111 VSTE_INC(dst
, d
, 7, stride
);
115 static void float_to_int16_interleave_altivec(int16_t *dst
, const float **src
,
116 long len
, int channels
)
119 vector
signed short d0
, d1
, d2
, c0
, c1
, t0
, t1
;
120 vector
unsigned char align
;
123 float_to_int16_altivec(dst
, src
[0], len
);
126 if (((long)dst
) & 15) {
127 for (i
= 0; i
< len
- 7; i
+= 8) {
128 d0
= vec_ld(0, dst
+ i
);
129 t0
= float_to_int16_one_altivec(src
[0] + i
);
130 d1
= vec_ld(31, dst
+ i
);
131 t1
= float_to_int16_one_altivec(src
[1] + i
);
132 c0
= vec_mergeh(t0
, t1
);
133 c1
= vec_mergel(t0
, t1
);
134 d2
= vec_perm(d1
, d0
, vec_lvsl(0, dst
+ i
));
135 align
= vec_lvsr(0, dst
+ i
);
136 d0
= vec_perm(d2
, c0
, align
);
137 d1
= vec_perm(c0
, c1
, align
);
138 vec_st(d0
, 0, dst
+ i
);
139 d0
= vec_perm(c1
, d2
, align
);
140 vec_st(d1
, 15, dst
+ i
);
141 vec_st(d0
, 31, dst
+ i
);
145 for (i
= 0; i
< len
- 7; i
+= 8) {
146 t0
= float_to_int16_one_altivec(src
[0] + i
);
147 t1
= float_to_int16_one_altivec(src
[1] + i
);
148 d0
= vec_mergeh(t0
, t1
);
149 d1
= vec_mergel(t0
, t1
);
150 vec_st(d0
, 0, dst
+ i
);
151 vec_st(d1
, 16, dst
+ i
);
156 for (i
= 0; i
< channels
; i
++)
157 float_to_int16_stride_altivec(dst
+ i
, src
[i
], len
, channels
);
162 #endif /* HAVE_ALTIVEC */
164 av_cold
void ff_fmt_convert_init_ppc(FmtConvertContext
*c
,
165 AVCodecContext
*avctx
)
168 if (!PPC_ALTIVEC(av_get_cpu_flags()))
171 c
->int32_to_float_fmul_scalar
= int32_to_float_fmul_scalar_altivec
;
172 if (!(avctx
->flags
& CODEC_FLAG_BITEXACT
)) {
173 c
->float_to_int16
= float_to_int16_altivec
;
174 c
->float_to_int16_interleave
= float_to_int16_interleave_altivec
;
176 #endif /* HAVE_ALTIVEC */