9bbdf96d12cb9ba5af30f84eaaac28b09f875fed
2 * Copyright (c) 2002 Brian Foley
3 * Copyright (c) 2002 Dieter Shirley
4 * Copyright (c) 2003-2004 Romain Dolbeau <romain@dolbeau.org>
6 * This file is part of FFmpeg.
8 * FFmpeg is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
13 * FFmpeg is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with FFmpeg; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
28 #include "libavutil/attributes.h"
29 #include "libavutil/cpu.h"
30 #include "libavutil/ppc/cpu.h"
31 #include "libavutil/ppc/types_altivec.h"
32 #include "libavutil/ppc/util_altivec.h"
33 #include "libavcodec/avcodec.h"
34 #include "libavcodec/pixblockdsp.h"
39 static void get_pixels_altivec(int16_t *restrict block
, const uint8_t *pixels
,
43 vector
unsigned char perm
=
44 (vector
unsigned char) {0x00,0x10, 0x01,0x11,0x02,0x12,0x03,0x13,\
45 0x04,0x14,0x05,0x15,0x06,0x16,0x07,0x17};
46 const vector
unsigned char zero
=
47 (const vector
unsigned char) vec_splat_u8(0);
49 for (i
= 0; i
< 8; i
++) {
50 /* Read potentially unaligned pixels.
51 * We're reading 16 pixels, and actually only want 8,
52 * but we simply ignore the extras. */
53 vector
unsigned char bytes
= vec_vsx_ld(0, pixels
);
55 // Convert the bytes into shorts.
56 //vector signed short shorts = (vector signed short) vec_perm(zero, bytes, perm);
57 vector
signed short shorts
= (vector
signed short) vec_perm(bytes
, zero
, perm
);
59 // Save the data to the block, we assume the block is 16-byte aligned.
60 vec_vsx_st(shorts
, i
* 16, (vector
signed short *) block
);
66 static void get_pixels_altivec(int16_t *restrict block
, const uint8_t *pixels
,
70 vector
unsigned char perm
= vec_lvsl(0, pixels
);
71 const vector
unsigned char zero
=
72 (const vector
unsigned char) vec_splat_u8(0);
74 for (i
= 0; i
< 8; i
++) {
75 /* Read potentially unaligned pixels.
76 * We're reading 16 pixels, and actually only want 8,
77 * but we simply ignore the extras. */
78 vector
unsigned char pixl
= vec_ld(0, pixels
);
79 vector
unsigned char pixr
= vec_ld(7, pixels
);
80 vector
unsigned char bytes
= vec_perm(pixl
, pixr
, perm
);
82 // Convert the bytes into shorts.
83 vector
signed short shorts
= (vector
signed short) vec_mergeh(zero
,
86 // Save the data to the block, we assume the block is 16-byte aligned.
87 vec_st(shorts
, i
* 16, (vector
signed short *) block
);
96 static void diff_pixels_altivec(int16_t *restrict block
, const uint8_t *s1
,
97 const uint8_t *s2
, int stride
)
100 const vector
unsigned char zero
=
101 (const vector
unsigned char) vec_splat_u8(0);
102 vector
signed short shorts1
, shorts2
;
104 for (i
= 0; i
< 4; i
++) {
105 /* Read potentially unaligned pixels.
106 * We're reading 16 pixels, and actually only want 8,
107 * but we simply ignore the extras. */
108 vector
unsigned char bytes
= vec_vsx_ld(0, s1
);
110 // Convert the bytes into shorts.
111 shorts1
= (vector
signed short) vec_mergeh(bytes
, zero
);
113 // Do the same for the second block of pixels.
114 bytes
=vec_vsx_ld(0, s2
);
116 // Convert the bytes into shorts.
117 shorts2
= (vector
signed short) vec_mergeh(bytes
, zero
);
119 // Do the subtraction.
120 shorts1
= vec_sub(shorts1
, shorts2
);
122 // Save the data to the block, we assume the block is 16-byte aligned.
123 vec_vsx_st(shorts1
, 0, (vector
signed short *) block
);
129 /* The code below is a copy of the code above...
130 * This is a manual unroll. */
132 /* Read potentially unaligned pixels.
133 * We're reading 16 pixels, and actually only want 8,
134 * but we simply ignore the extras. */
135 bytes
= vec_vsx_ld(0, s1
);
137 // Convert the bytes into shorts.
138 shorts1
= (vector
signed short) vec_mergeh(bytes
, zero
);
140 // Do the same for the second block of pixels.
141 bytes
= vec_vsx_ld(0, s2
);
143 // Convert the bytes into shorts.
144 shorts2
= (vector
signed short) vec_mergeh(bytes
, zero
);
146 // Do the subtraction.
147 shorts1
= vec_sub(shorts1
, shorts2
);
149 // Save the data to the block, we assume the block is 16-byte aligned.
150 vec_vsx_st(shorts1
, 0, (vector
signed short *) block
);
158 static void diff_pixels_altivec(int16_t *restrict block
, const uint8_t *s1
,
159 const uint8_t *s2
, int stride
)
162 vector
unsigned char perm1
= vec_lvsl(0, s1
);
163 vector
unsigned char perm2
= vec_lvsl(0, s2
);
164 const vector
unsigned char zero
=
165 (const vector
unsigned char) vec_splat_u8(0);
166 vector
signed short shorts1
, shorts2
;
168 for (i
= 0; i
< 4; i
++) {
169 /* Read potentially unaligned pixels.
170 * We're reading 16 pixels, and actually only want 8,
171 * but we simply ignore the extras. */
172 vector
unsigned char pixl
= vec_ld(0, s1
);
173 vector
unsigned char pixr
= vec_ld(15, s1
);
174 vector
unsigned char bytes
= vec_perm(pixl
, pixr
, perm1
);
176 // Convert the bytes into shorts.
177 shorts1
= (vector
signed short) vec_mergeh(zero
, bytes
);
179 // Do the same for the second block of pixels.
180 pixl
= vec_ld(0, s2
);
181 pixr
= vec_ld(15, s2
);
182 bytes
= vec_perm(pixl
, pixr
, perm2
);
184 // Convert the bytes into shorts.
185 shorts2
= (vector
signed short) vec_mergeh(zero
, bytes
);
187 // Do the subtraction.
188 shorts1
= vec_sub(shorts1
, shorts2
);
190 // Save the data to the block, we assume the block is 16-byte aligned.
191 vec_st(shorts1
, 0, (vector
signed short *) block
);
197 /* The code below is a copy of the code above...
198 * This is a manual unroll. */
200 /* Read potentially unaligned pixels.
201 * We're reading 16 pixels, and actually only want 8,
202 * but we simply ignore the extras. */
203 pixl
= vec_ld(0, s1
);
204 pixr
= vec_ld(15, s1
);
205 bytes
= vec_perm(pixl
, pixr
, perm1
);
207 // Convert the bytes into shorts.
208 shorts1
= (vector
signed short) vec_mergeh(zero
, bytes
);
210 // Do the same for the second block of pixels.
211 pixl
= vec_ld(0, s2
);
212 pixr
= vec_ld(15, s2
);
213 bytes
= vec_perm(pixl
, pixr
, perm2
);
215 // Convert the bytes into shorts.
216 shorts2
= (vector
signed short) vec_mergeh(zero
, bytes
);
218 // Do the subtraction.
219 shorts1
= vec_sub(shorts1
, shorts2
);
221 // Save the data to the block, we assume the block is 16-byte aligned.
222 vec_st(shorts1
, 0, (vector
signed short *) block
);
230 #endif /* HAVE_VSX */
232 #endif /* HAVE_ALTIVEC */
234 av_cold
void ff_pixblockdsp_init_ppc(PixblockDSPContext
*c
,
235 AVCodecContext
*avctx
,
236 unsigned high_bit_depth
)
239 if (!PPC_ALTIVEC(av_get_cpu_flags()))
242 c
->diff_pixels
= diff_pixels_altivec
;
244 if (!high_bit_depth
) {
245 c
->get_pixels
= get_pixels_altivec
;
247 #endif /* HAVE_ALTIVEC */