9bbdf96d12cb9ba5af30f84eaaac28b09f875fed
[deb_ffmpeg.git] / pixblockdsp.c
1 /*
2 * Copyright (c) 2002 Brian Foley
3 * Copyright (c) 2002 Dieter Shirley
4 * Copyright (c) 2003-2004 Romain Dolbeau <romain@dolbeau.org>
5 *
6 * This file is part of FFmpeg.
7 *
8 * FFmpeg is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
12 *
13 * FFmpeg is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
17 *
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with FFmpeg; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21 */
22
23 #include "config.h"
24 #if HAVE_ALTIVEC_H
25 #include <altivec.h>
26 #endif
27
28 #include "libavutil/attributes.h"
29 #include "libavutil/cpu.h"
30 #include "libavutil/ppc/cpu.h"
31 #include "libavutil/ppc/types_altivec.h"
32 #include "libavutil/ppc/util_altivec.h"
33 #include "libavcodec/avcodec.h"
34 #include "libavcodec/pixblockdsp.h"
35
36 #if HAVE_ALTIVEC
37
38 #if HAVE_VSX
39 static void get_pixels_altivec(int16_t *restrict block, const uint8_t *pixels,
40 ptrdiff_t line_size)
41 {
42 int i;
43 vector unsigned char perm =
44 (vector unsigned char) {0x00,0x10, 0x01,0x11,0x02,0x12,0x03,0x13,\
45 0x04,0x14,0x05,0x15,0x06,0x16,0x07,0x17};
46 const vector unsigned char zero =
47 (const vector unsigned char) vec_splat_u8(0);
48
49 for (i = 0; i < 8; i++) {
50 /* Read potentially unaligned pixels.
51 * We're reading 16 pixels, and actually only want 8,
52 * but we simply ignore the extras. */
53 vector unsigned char bytes = vec_vsx_ld(0, pixels);
54
55 // Convert the bytes into shorts.
56 //vector signed short shorts = (vector signed short) vec_perm(zero, bytes, perm);
57 vector signed short shorts = (vector signed short) vec_perm(bytes, zero, perm);
58
59 // Save the data to the block, we assume the block is 16-byte aligned.
60 vec_vsx_st(shorts, i * 16, (vector signed short *) block);
61
62 pixels += line_size;
63 }
64 }
65 #else
66 static void get_pixels_altivec(int16_t *restrict block, const uint8_t *pixels,
67 ptrdiff_t line_size)
68 {
69 int i;
70 vector unsigned char perm = vec_lvsl(0, pixels);
71 const vector unsigned char zero =
72 (const vector unsigned char) vec_splat_u8(0);
73
74 for (i = 0; i < 8; i++) {
75 /* Read potentially unaligned pixels.
76 * We're reading 16 pixels, and actually only want 8,
77 * but we simply ignore the extras. */
78 vector unsigned char pixl = vec_ld(0, pixels);
79 vector unsigned char pixr = vec_ld(7, pixels);
80 vector unsigned char bytes = vec_perm(pixl, pixr, perm);
81
82 // Convert the bytes into shorts.
83 vector signed short shorts = (vector signed short) vec_mergeh(zero,
84 bytes);
85
86 // Save the data to the block, we assume the block is 16-byte aligned.
87 vec_st(shorts, i * 16, (vector signed short *) block);
88
89 pixels += line_size;
90 }
91 }
92
93 #endif /* HAVE_VSX */
94
95 #if HAVE_VSX
96 static void diff_pixels_altivec(int16_t *restrict block, const uint8_t *s1,
97 const uint8_t *s2, int stride)
98 {
99 int i;
100 const vector unsigned char zero =
101 (const vector unsigned char) vec_splat_u8(0);
102 vector signed short shorts1, shorts2;
103
104 for (i = 0; i < 4; i++) {
105 /* Read potentially unaligned pixels.
106 * We're reading 16 pixels, and actually only want 8,
107 * but we simply ignore the extras. */
108 vector unsigned char bytes = vec_vsx_ld(0, s1);
109
110 // Convert the bytes into shorts.
111 shorts1 = (vector signed short) vec_mergeh(bytes, zero);
112
113 // Do the same for the second block of pixels.
114 bytes =vec_vsx_ld(0, s2);
115
116 // Convert the bytes into shorts.
117 shorts2 = (vector signed short) vec_mergeh(bytes, zero);
118
119 // Do the subtraction.
120 shorts1 = vec_sub(shorts1, shorts2);
121
122 // Save the data to the block, we assume the block is 16-byte aligned.
123 vec_vsx_st(shorts1, 0, (vector signed short *) block);
124
125 s1 += stride;
126 s2 += stride;
127 block += 8;
128
129 /* The code below is a copy of the code above...
130 * This is a manual unroll. */
131
132 /* Read potentially unaligned pixels.
133 * We're reading 16 pixels, and actually only want 8,
134 * but we simply ignore the extras. */
135 bytes = vec_vsx_ld(0, s1);
136
137 // Convert the bytes into shorts.
138 shorts1 = (vector signed short) vec_mergeh(bytes, zero);
139
140 // Do the same for the second block of pixels.
141 bytes = vec_vsx_ld(0, s2);
142
143 // Convert the bytes into shorts.
144 shorts2 = (vector signed short) vec_mergeh(bytes, zero);
145
146 // Do the subtraction.
147 shorts1 = vec_sub(shorts1, shorts2);
148
149 // Save the data to the block, we assume the block is 16-byte aligned.
150 vec_vsx_st(shorts1, 0, (vector signed short *) block);
151
152 s1 += stride;
153 s2 += stride;
154 block += 8;
155 }
156 }
157 #else
158 static void diff_pixels_altivec(int16_t *restrict block, const uint8_t *s1,
159 const uint8_t *s2, int stride)
160 {
161 int i;
162 vector unsigned char perm1 = vec_lvsl(0, s1);
163 vector unsigned char perm2 = vec_lvsl(0, s2);
164 const vector unsigned char zero =
165 (const vector unsigned char) vec_splat_u8(0);
166 vector signed short shorts1, shorts2;
167
168 for (i = 0; i < 4; i++) {
169 /* Read potentially unaligned pixels.
170 * We're reading 16 pixels, and actually only want 8,
171 * but we simply ignore the extras. */
172 vector unsigned char pixl = vec_ld(0, s1);
173 vector unsigned char pixr = vec_ld(15, s1);
174 vector unsigned char bytes = vec_perm(pixl, pixr, perm1);
175
176 // Convert the bytes into shorts.
177 shorts1 = (vector signed short) vec_mergeh(zero, bytes);
178
179 // Do the same for the second block of pixels.
180 pixl = vec_ld(0, s2);
181 pixr = vec_ld(15, s2);
182 bytes = vec_perm(pixl, pixr, perm2);
183
184 // Convert the bytes into shorts.
185 shorts2 = (vector signed short) vec_mergeh(zero, bytes);
186
187 // Do the subtraction.
188 shorts1 = vec_sub(shorts1, shorts2);
189
190 // Save the data to the block, we assume the block is 16-byte aligned.
191 vec_st(shorts1, 0, (vector signed short *) block);
192
193 s1 += stride;
194 s2 += stride;
195 block += 8;
196
197 /* The code below is a copy of the code above...
198 * This is a manual unroll. */
199
200 /* Read potentially unaligned pixels.
201 * We're reading 16 pixels, and actually only want 8,
202 * but we simply ignore the extras. */
203 pixl = vec_ld(0, s1);
204 pixr = vec_ld(15, s1);
205 bytes = vec_perm(pixl, pixr, perm1);
206
207 // Convert the bytes into shorts.
208 shorts1 = (vector signed short) vec_mergeh(zero, bytes);
209
210 // Do the same for the second block of pixels.
211 pixl = vec_ld(0, s2);
212 pixr = vec_ld(15, s2);
213 bytes = vec_perm(pixl, pixr, perm2);
214
215 // Convert the bytes into shorts.
216 shorts2 = (vector signed short) vec_mergeh(zero, bytes);
217
218 // Do the subtraction.
219 shorts1 = vec_sub(shorts1, shorts2);
220
221 // Save the data to the block, we assume the block is 16-byte aligned.
222 vec_st(shorts1, 0, (vector signed short *) block);
223
224 s1 += stride;
225 s2 += stride;
226 block += 8;
227 }
228 }
229
230 #endif /* HAVE_VSX */
231
232 #endif /* HAVE_ALTIVEC */
233
234 av_cold void ff_pixblockdsp_init_ppc(PixblockDSPContext *c,
235 AVCodecContext *avctx,
236 unsigned high_bit_depth)
237 {
238 #if HAVE_ALTIVEC
239 if (!PPC_ALTIVEC(av_get_cpu_flags()))
240 return;
241
242 c->diff_pixels = diff_pixels_altivec;
243
244 if (!high_bit_depth) {
245 c->get_pixels = get_pixels_altivec;
246 }
247 #endif /* HAVE_ALTIVEC */
248 }