49af9f9abf38c6d9d52f3b982e14e7ef69933308
2 * Copyright (c) 2006 Luca Barbato <lu_zero@gentoo.org>
4 * This file is part of FFmpeg.
6 * FFmpeg is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with FFmpeg; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21 #include "util_altivec.h"
22 #include "float_dsp_altivec.h"
24 void ff_vector_fmul_altivec(float *dst
, const float *src0
, const float *src1
,
28 vector
float d0
, d1
, s
, zero
= (vector
float)vec_splat_u32(0);
29 for (i
= 0; i
< len
- 7; i
+= 8) {
30 d0
= vec_ld( 0, src0
+ i
);
31 s
= vec_ld( 0, src1
+ i
);
32 d1
= vec_ld(16, src0
+ i
);
33 d0
= vec_madd(d0
, s
, zero
);
34 d1
= vec_madd(d1
, vec_ld(16, src1
+ i
), zero
);
35 vec_st(d0
, 0, dst
+ i
);
36 vec_st(d1
, 16, dst
+ i
);
40 void ff_vector_fmul_window_altivec(float *dst
, const float *src0
,
41 const float *src1
, const float *win
, int len
)
43 vector
float zero
, t0
, t1
, s0
, s1
, wi
, wj
;
44 const vector
unsigned char reverse
= vcprm(3, 2, 1, 0);
51 zero
= (vector
float)vec_splat_u32(0);
53 for (i
= -len
* 4, j
= len
* 4 - 16; i
< 0; i
+= 16, j
-= 16) {
59 s1
= vec_perm(s1
, s1
, reverse
);
60 wj
= vec_perm(wj
, wj
, reverse
);
62 t0
= vec_madd(s0
, wj
, zero
);
63 t0
= vec_nmsub(s1
, wi
, t0
);
64 t1
= vec_madd(s0
, wi
, zero
);
65 t1
= vec_madd(s1
, wj
, t1
);
66 t1
= vec_perm(t1
, t1
, reverse
);
73 void ff_vector_fmul_add_altivec(float *dst
, const float *src0
,
74 const float *src1
, const float *src2
,
78 vector
float d
, ss0
, ss1
, ss2
, t0
, t1
, edges
;
80 for (i
= 0; i
< len
- 3; i
+= 4) {
81 t0
= vec_ld(0, dst
+ i
);
82 t1
= vec_ld(15, dst
+ i
);
83 ss0
= vec_ld(0, src0
+ i
);
84 ss1
= vec_ld(0, src1
+ i
);
85 ss2
= vec_ld(0, src2
+ i
);
86 edges
= vec_perm(t1
, t0
, vcprm(0, 1, 2, 3));
87 d
= vec_madd(ss0
, ss1
, ss2
);
88 t1
= vec_perm(d
, edges
, vcprm(s0
,s1
,s2
,s3
));
89 t0
= vec_perm(edges
, d
, vcprm(s0
,s1
,s2
,s3
));
90 vec_st(t1
, 15, dst
+ i
);
91 vec_st(t0
, 0, dst
+ i
);
95 void ff_vector_fmul_reverse_altivec(float *dst
, const float *src0
,
96 const float *src1
, int len
)
99 vector
float d
, s0
, s1
, h0
, l0
,
100 s2
, s3
, zero
= (vector
float) vec_splat_u32(0);
103 for(i
= 0; i
< len
- 7; i
+= 8) {
104 s1
= vec_ld(0, src1
- i
); // [a,b,c,d]
105 s0
= vec_ld(0, src0
+ i
);
106 l0
= vec_mergel(s1
, s1
); // [c,c,d,d]
107 s3
= vec_ld(-16, src1
- i
);
108 h0
= vec_mergeh(s1
, s1
); // [a,a,b,b]
109 s2
= vec_ld(16, src0
+ i
);
110 s1
= vec_mergeh(vec_mergel(l0
, h0
), // [d,b,d,b]
111 vec_mergeh(l0
, h0
)); // [c,a,c,a]
113 l0
= vec_mergel(s3
, s3
);
114 d
= vec_madd(s0
, s1
, zero
);
115 h0
= vec_mergeh(s3
, s3
);
116 vec_st(d
, 0, dst
+ i
);
117 s3
= vec_mergeh(vec_mergel(l0
, h0
),
119 d
= vec_madd(s2
, s3
, zero
);
120 vec_st(d
, 16, dst
+ i
);