Commit | Line | Data |
---|---|---|
2ba45a60 DM |
1 | /* |
2 | * Copyright (c) 2002 Brian Foley | |
3 | * Copyright (c) 2002 Dieter Shirley | |
4 | * Copyright (c) 2003-2004 Romain Dolbeau <romain@dolbeau.org> | |
5 | * | |
6 | * This file is part of FFmpeg. | |
7 | * | |
8 | * FFmpeg is free software; you can redistribute it and/or | |
9 | * modify it under the terms of the GNU Lesser General Public | |
10 | * License as published by the Free Software Foundation; either | |
11 | * version 2.1 of the License, or (at your option) any later version. | |
12 | * | |
13 | * FFmpeg is distributed in the hope that it will be useful, | |
14 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
16 | * Lesser General Public License for more details. | |
17 | * | |
18 | * You should have received a copy of the GNU Lesser General Public | |
19 | * License along with FFmpeg; if not, write to the Free Software | |
20 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |
21 | */ | |
22 | ||
23 | #include "config.h" | |
24 | ||
25 | #if HAVE_ALTIVEC_H | |
26 | #include <altivec.h> | |
27 | #endif | |
28 | ||
29 | #include "libavutil/attributes.h" | |
30 | #include "libavutil/cpu.h" | |
31 | #include "libavutil/ppc/cpu.h" | |
32 | #include "libavutil/ppc/types_altivec.h" | |
33 | #include "libavutil/ppc/util_altivec.h" | |
34 | #include "libavcodec/hpeldsp.h" | |
35 | #include "hpeldsp_altivec.h" | |
36 | ||
37 | #if HAVE_ALTIVEC | |
38 | /* next one assumes that ((line_size % 16) == 0) */ | |
39 | void ff_put_pixels16_altivec(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h) | |
40 | { | |
41 | register vector unsigned char pixelsv1, pixelsv2; | |
42 | register vector unsigned char pixelsv1B, pixelsv2B; | |
43 | register vector unsigned char pixelsv1C, pixelsv2C; | |
44 | register vector unsigned char pixelsv1D, pixelsv2D; | |
45 | ||
46 | register vector unsigned char perm = vec_lvsl(0, pixels); | |
47 | int i; | |
48 | register ptrdiff_t line_size_2 = line_size << 1; | |
49 | register ptrdiff_t line_size_3 = line_size + line_size_2; | |
50 | register ptrdiff_t line_size_4 = line_size << 2; | |
51 | ||
52 | // hand-unrolling the loop by 4 gains about 15% | |
53 | // mininum execution time goes from 74 to 60 cycles | |
54 | // it's faster than -funroll-loops, but using | |
55 | // -funroll-loops w/ this is bad - 74 cycles again. | |
56 | // all this is on a 7450, tuning for the 7450 | |
57 | for (i = 0; i < h; i += 4) { | |
58 | pixelsv1 = vec_ld( 0, pixels); | |
59 | pixelsv2 = vec_ld(15, pixels); | |
60 | pixelsv1B = vec_ld(line_size, pixels); | |
61 | pixelsv2B = vec_ld(15 + line_size, pixels); | |
62 | pixelsv1C = vec_ld(line_size_2, pixels); | |
63 | pixelsv2C = vec_ld(15 + line_size_2, pixels); | |
64 | pixelsv1D = vec_ld(line_size_3, pixels); | |
65 | pixelsv2D = vec_ld(15 + line_size_3, pixels); | |
66 | vec_st(vec_perm(pixelsv1, pixelsv2, perm), | |
67 | 0, (unsigned char*)block); | |
68 | vec_st(vec_perm(pixelsv1B, pixelsv2B, perm), | |
69 | line_size, (unsigned char*)block); | |
70 | vec_st(vec_perm(pixelsv1C, pixelsv2C, perm), | |
71 | line_size_2, (unsigned char*)block); | |
72 | vec_st(vec_perm(pixelsv1D, pixelsv2D, perm), | |
73 | line_size_3, (unsigned char*)block); | |
74 | pixels+=line_size_4; | |
75 | block +=line_size_4; | |
76 | } | |
77 | } | |
78 | ||
79 | /* next one assumes that ((line_size % 16) == 0) */ | |
80 | #define op_avg(a,b) a = ( ((a)|(b)) - ((((a)^(b))&0xFEFEFEFEUL)>>1) ) | |
81 | void ff_avg_pixels16_altivec(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h) | |
82 | { | |
83 | register vector unsigned char pixelsv1, pixelsv2, pixelsv, blockv; | |
84 | register vector unsigned char perm = vec_lvsl(0, pixels); | |
85 | int i; | |
86 | ||
87 | for (i = 0; i < h; i++) { | |
88 | pixelsv1 = vec_ld( 0, pixels); | |
89 | pixelsv2 = vec_ld(16,pixels); | |
90 | blockv = vec_ld(0, block); | |
91 | pixelsv = vec_perm(pixelsv1, pixelsv2, perm); | |
92 | blockv = vec_avg(blockv,pixelsv); | |
93 | vec_st(blockv, 0, (unsigned char*)block); | |
94 | pixels+=line_size; | |
95 | block +=line_size; | |
96 | } | |
97 | } | |
98 | ||
99 | /* next one assumes that ((line_size % 8) == 0) */ | |
100 | static void avg_pixels8_altivec(uint8_t * block, const uint8_t * pixels, ptrdiff_t line_size, int h) | |
101 | { | |
102 | register vector unsigned char pixelsv1, pixelsv2, pixelsv, blockv; | |
103 | int i; | |
104 | ||
105 | for (i = 0; i < h; i++) { | |
106 | /* block is 8 bytes-aligned, so we're either in the | |
107 | left block (16 bytes-aligned) or in the right block (not) */ | |
108 | int rightside = ((unsigned long)block & 0x0000000F); | |
109 | ||
110 | blockv = vec_ld(0, block); | |
111 | pixelsv1 = vec_ld( 0, pixels); | |
112 | pixelsv2 = vec_ld(16, pixels); | |
113 | pixelsv = vec_perm(pixelsv1, pixelsv2, vec_lvsl(0, pixels)); | |
114 | ||
115 | if (rightside) { | |
116 | pixelsv = vec_perm(blockv, pixelsv, vcprm(0,1,s0,s1)); | |
117 | } else { | |
118 | pixelsv = vec_perm(blockv, pixelsv, vcprm(s0,s1,2,3)); | |
119 | } | |
120 | ||
121 | blockv = vec_avg(blockv, pixelsv); | |
122 | ||
123 | vec_st(blockv, 0, block); | |
124 | ||
125 | pixels += line_size; | |
126 | block += line_size; | |
127 | } | |
128 | } | |
129 | ||
130 | /* next one assumes that ((line_size % 8) == 0) */ | |
131 | static void put_pixels8_xy2_altivec(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h) | |
132 | { | |
133 | register int i; | |
134 | register vector unsigned char pixelsv1, pixelsv2, pixelsavg; | |
135 | register vector unsigned char blockv, temp1, temp2; | |
136 | register vector unsigned short pixelssum1, pixelssum2, temp3; | |
137 | register const vector unsigned char vczero = (const vector unsigned char)vec_splat_u8(0); | |
138 | register const vector unsigned short vctwo = (const vector unsigned short)vec_splat_u16(2); | |
139 | ||
140 | temp1 = vec_ld(0, pixels); | |
141 | temp2 = vec_ld(16, pixels); | |
142 | pixelsv1 = vec_perm(temp1, temp2, vec_lvsl(0, pixels)); | |
143 | if ((((unsigned long)pixels) & 0x0000000F) == 0x0000000F) { | |
144 | pixelsv2 = temp2; | |
145 | } else { | |
146 | pixelsv2 = vec_perm(temp1, temp2, vec_lvsl(1, pixels)); | |
147 | } | |
148 | pixelsv1 = vec_mergeh(vczero, pixelsv1); | |
149 | pixelsv2 = vec_mergeh(vczero, pixelsv2); | |
150 | pixelssum1 = vec_add((vector unsigned short)pixelsv1, | |
151 | (vector unsigned short)pixelsv2); | |
152 | pixelssum1 = vec_add(pixelssum1, vctwo); | |
153 | ||
154 | for (i = 0; i < h ; i++) { | |
155 | int rightside = ((unsigned long)block & 0x0000000F); | |
156 | blockv = vec_ld(0, block); | |
157 | ||
158 | temp1 = vec_ld(line_size, pixels); | |
159 | temp2 = vec_ld(line_size + 16, pixels); | |
160 | pixelsv1 = vec_perm(temp1, temp2, vec_lvsl(line_size, pixels)); | |
161 | if (((((unsigned long)pixels) + line_size) & 0x0000000F) == 0x0000000F) { | |
162 | pixelsv2 = temp2; | |
163 | } else { | |
164 | pixelsv2 = vec_perm(temp1, temp2, vec_lvsl(line_size + 1, pixels)); | |
165 | } | |
166 | ||
167 | pixelsv1 = vec_mergeh(vczero, pixelsv1); | |
168 | pixelsv2 = vec_mergeh(vczero, pixelsv2); | |
169 | pixelssum2 = vec_add((vector unsigned short)pixelsv1, | |
170 | (vector unsigned short)pixelsv2); | |
171 | temp3 = vec_add(pixelssum1, pixelssum2); | |
172 | temp3 = vec_sra(temp3, vctwo); | |
173 | pixelssum1 = vec_add(pixelssum2, vctwo); | |
174 | pixelsavg = vec_packsu(temp3, (vector unsigned short) vczero); | |
175 | ||
176 | if (rightside) { | |
177 | blockv = vec_perm(blockv, pixelsavg, vcprm(0, 1, s0, s1)); | |
178 | } else { | |
179 | blockv = vec_perm(blockv, pixelsavg, vcprm(s0, s1, 2, 3)); | |
180 | } | |
181 | ||
182 | vec_st(blockv, 0, block); | |
183 | ||
184 | block += line_size; | |
185 | pixels += line_size; | |
186 | } | |
187 | } | |
188 | ||
189 | /* next one assumes that ((line_size % 8) == 0) */ | |
190 | static void put_no_rnd_pixels8_xy2_altivec(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h) | |
191 | { | |
192 | register int i; | |
193 | register vector unsigned char pixelsv1, pixelsv2, pixelsavg; | |
194 | register vector unsigned char blockv, temp1, temp2; | |
195 | register vector unsigned short pixelssum1, pixelssum2, temp3; | |
196 | register const vector unsigned char vczero = (const vector unsigned char)vec_splat_u8(0); | |
197 | register const vector unsigned short vcone = (const vector unsigned short)vec_splat_u16(1); | |
198 | register const vector unsigned short vctwo = (const vector unsigned short)vec_splat_u16(2); | |
199 | ||
200 | temp1 = vec_ld(0, pixels); | |
201 | temp2 = vec_ld(16, pixels); | |
202 | pixelsv1 = vec_perm(temp1, temp2, vec_lvsl(0, pixels)); | |
203 | if ((((unsigned long)pixels) & 0x0000000F) == 0x0000000F) { | |
204 | pixelsv2 = temp2; | |
205 | } else { | |
206 | pixelsv2 = vec_perm(temp1, temp2, vec_lvsl(1, pixels)); | |
207 | } | |
208 | pixelsv1 = vec_mergeh(vczero, pixelsv1); | |
209 | pixelsv2 = vec_mergeh(vczero, pixelsv2); | |
210 | pixelssum1 = vec_add((vector unsigned short)pixelsv1, | |
211 | (vector unsigned short)pixelsv2); | |
212 | pixelssum1 = vec_add(pixelssum1, vcone); | |
213 | ||
214 | for (i = 0; i < h ; i++) { | |
215 | int rightside = ((unsigned long)block & 0x0000000F); | |
216 | blockv = vec_ld(0, block); | |
217 | ||
218 | temp1 = vec_ld(line_size, pixels); | |
219 | temp2 = vec_ld(line_size + 16, pixels); | |
220 | pixelsv1 = vec_perm(temp1, temp2, vec_lvsl(line_size, pixels)); | |
221 | if (((((unsigned long)pixels) + line_size) & 0x0000000F) == 0x0000000F) { | |
222 | pixelsv2 = temp2; | |
223 | } else { | |
224 | pixelsv2 = vec_perm(temp1, temp2, vec_lvsl(line_size + 1, pixels)); | |
225 | } | |
226 | ||
227 | pixelsv1 = vec_mergeh(vczero, pixelsv1); | |
228 | pixelsv2 = vec_mergeh(vczero, pixelsv2); | |
229 | pixelssum2 = vec_add((vector unsigned short)pixelsv1, | |
230 | (vector unsigned short)pixelsv2); | |
231 | temp3 = vec_add(pixelssum1, pixelssum2); | |
232 | temp3 = vec_sra(temp3, vctwo); | |
233 | pixelssum1 = vec_add(pixelssum2, vcone); | |
234 | pixelsavg = vec_packsu(temp3, (vector unsigned short) vczero); | |
235 | ||
236 | if (rightside) { | |
237 | blockv = vec_perm(blockv, pixelsavg, vcprm(0, 1, s0, s1)); | |
238 | } else { | |
239 | blockv = vec_perm(blockv, pixelsavg, vcprm(s0, s1, 2, 3)); | |
240 | } | |
241 | ||
242 | vec_st(blockv, 0, block); | |
243 | ||
244 | block += line_size; | |
245 | pixels += line_size; | |
246 | } | |
247 | } | |
248 | ||
249 | /* next one assumes that ((line_size % 16) == 0) */ | |
250 | static void put_pixels16_xy2_altivec(uint8_t * block, const uint8_t * pixels, ptrdiff_t line_size, int h) | |
251 | { | |
252 | register int i; | |
253 | register vector unsigned char pixelsv1, pixelsv2, pixelsv3, pixelsv4; | |
254 | register vector unsigned char blockv, temp1, temp2; | |
255 | register vector unsigned short temp3, temp4, | |
256 | pixelssum1, pixelssum2, pixelssum3, pixelssum4; | |
257 | register const vector unsigned char vczero = (const vector unsigned char)vec_splat_u8(0); | |
258 | register const vector unsigned short vctwo = (const vector unsigned short)vec_splat_u16(2); | |
259 | ||
260 | temp1 = vec_ld(0, pixels); | |
261 | temp2 = vec_ld(16, pixels); | |
262 | pixelsv1 = vec_perm(temp1, temp2, vec_lvsl(0, pixels)); | |
263 | if ((((unsigned long)pixels) & 0x0000000F) == 0x0000000F) { | |
264 | pixelsv2 = temp2; | |
265 | } else { | |
266 | pixelsv2 = vec_perm(temp1, temp2, vec_lvsl(1, pixels)); | |
267 | } | |
268 | pixelsv3 = vec_mergel(vczero, pixelsv1); | |
269 | pixelsv4 = vec_mergel(vczero, pixelsv2); | |
270 | pixelsv1 = vec_mergeh(vczero, pixelsv1); | |
271 | pixelsv2 = vec_mergeh(vczero, pixelsv2); | |
272 | pixelssum3 = vec_add((vector unsigned short)pixelsv3, | |
273 | (vector unsigned short)pixelsv4); | |
274 | pixelssum3 = vec_add(pixelssum3, vctwo); | |
275 | pixelssum1 = vec_add((vector unsigned short)pixelsv1, | |
276 | (vector unsigned short)pixelsv2); | |
277 | pixelssum1 = vec_add(pixelssum1, vctwo); | |
278 | ||
279 | for (i = 0; i < h ; i++) { | |
280 | blockv = vec_ld(0, block); | |
281 | ||
282 | temp1 = vec_ld(line_size, pixels); | |
283 | temp2 = vec_ld(line_size + 16, pixels); | |
284 | pixelsv1 = vec_perm(temp1, temp2, vec_lvsl(line_size, pixels)); | |
285 | if (((((unsigned long)pixels) + line_size) & 0x0000000F) == 0x0000000F) { | |
286 | pixelsv2 = temp2; | |
287 | } else { | |
288 | pixelsv2 = vec_perm(temp1, temp2, vec_lvsl(line_size + 1, pixels)); | |
289 | } | |
290 | ||
291 | pixelsv3 = vec_mergel(vczero, pixelsv1); | |
292 | pixelsv4 = vec_mergel(vczero, pixelsv2); | |
293 | pixelsv1 = vec_mergeh(vczero, pixelsv1); | |
294 | pixelsv2 = vec_mergeh(vczero, pixelsv2); | |
295 | ||
296 | pixelssum4 = vec_add((vector unsigned short)pixelsv3, | |
297 | (vector unsigned short)pixelsv4); | |
298 | pixelssum2 = vec_add((vector unsigned short)pixelsv1, | |
299 | (vector unsigned short)pixelsv2); | |
300 | temp4 = vec_add(pixelssum3, pixelssum4); | |
301 | temp4 = vec_sra(temp4, vctwo); | |
302 | temp3 = vec_add(pixelssum1, pixelssum2); | |
303 | temp3 = vec_sra(temp3, vctwo); | |
304 | ||
305 | pixelssum3 = vec_add(pixelssum4, vctwo); | |
306 | pixelssum1 = vec_add(pixelssum2, vctwo); | |
307 | ||
308 | blockv = vec_packsu(temp3, temp4); | |
309 | ||
310 | vec_st(blockv, 0, block); | |
311 | ||
312 | block += line_size; | |
313 | pixels += line_size; | |
314 | } | |
315 | } | |
316 | ||
317 | /* next one assumes that ((line_size % 16) == 0) */ | |
318 | static void put_no_rnd_pixels16_xy2_altivec(uint8_t * block, const uint8_t * pixels, ptrdiff_t line_size, int h) | |
319 | { | |
320 | register int i; | |
321 | register vector unsigned char pixelsv1, pixelsv2, pixelsv3, pixelsv4; | |
322 | register vector unsigned char blockv, temp1, temp2; | |
323 | register vector unsigned short temp3, temp4, | |
324 | pixelssum1, pixelssum2, pixelssum3, pixelssum4; | |
325 | register const vector unsigned char vczero = (const vector unsigned char)vec_splat_u8(0); | |
326 | register const vector unsigned short vcone = (const vector unsigned short)vec_splat_u16(1); | |
327 | register const vector unsigned short vctwo = (const vector unsigned short)vec_splat_u16(2); | |
328 | ||
329 | temp1 = vec_ld(0, pixels); | |
330 | temp2 = vec_ld(16, pixels); | |
331 | pixelsv1 = vec_perm(temp1, temp2, vec_lvsl(0, pixels)); | |
332 | if ((((unsigned long)pixels) & 0x0000000F) == 0x0000000F) { | |
333 | pixelsv2 = temp2; | |
334 | } else { | |
335 | pixelsv2 = vec_perm(temp1, temp2, vec_lvsl(1, pixels)); | |
336 | } | |
337 | pixelsv3 = vec_mergel(vczero, pixelsv1); | |
338 | pixelsv4 = vec_mergel(vczero, pixelsv2); | |
339 | pixelsv1 = vec_mergeh(vczero, pixelsv1); | |
340 | pixelsv2 = vec_mergeh(vczero, pixelsv2); | |
341 | pixelssum3 = vec_add((vector unsigned short)pixelsv3, | |
342 | (vector unsigned short)pixelsv4); | |
343 | pixelssum3 = vec_add(pixelssum3, vcone); | |
344 | pixelssum1 = vec_add((vector unsigned short)pixelsv1, | |
345 | (vector unsigned short)pixelsv2); | |
346 | pixelssum1 = vec_add(pixelssum1, vcone); | |
347 | ||
348 | for (i = 0; i < h ; i++) { | |
349 | blockv = vec_ld(0, block); | |
350 | ||
351 | temp1 = vec_ld(line_size, pixels); | |
352 | temp2 = vec_ld(line_size + 16, pixels); | |
353 | pixelsv1 = vec_perm(temp1, temp2, vec_lvsl(line_size, pixels)); | |
354 | if (((((unsigned long)pixels) + line_size) & 0x0000000F) == 0x0000000F) { | |
355 | pixelsv2 = temp2; | |
356 | } else { | |
357 | pixelsv2 = vec_perm(temp1, temp2, vec_lvsl(line_size + 1, pixels)); | |
358 | } | |
359 | ||
360 | pixelsv3 = vec_mergel(vczero, pixelsv1); | |
361 | pixelsv4 = vec_mergel(vczero, pixelsv2); | |
362 | pixelsv1 = vec_mergeh(vczero, pixelsv1); | |
363 | pixelsv2 = vec_mergeh(vczero, pixelsv2); | |
364 | ||
365 | pixelssum4 = vec_add((vector unsigned short)pixelsv3, | |
366 | (vector unsigned short)pixelsv4); | |
367 | pixelssum2 = vec_add((vector unsigned short)pixelsv1, | |
368 | (vector unsigned short)pixelsv2); | |
369 | temp4 = vec_add(pixelssum3, pixelssum4); | |
370 | temp4 = vec_sra(temp4, vctwo); | |
371 | temp3 = vec_add(pixelssum1, pixelssum2); | |
372 | temp3 = vec_sra(temp3, vctwo); | |
373 | ||
374 | pixelssum3 = vec_add(pixelssum4, vcone); | |
375 | pixelssum1 = vec_add(pixelssum2, vcone); | |
376 | ||
377 | blockv = vec_packsu(temp3, temp4); | |
378 | ||
379 | vec_st(blockv, 0, block); | |
380 | ||
381 | block += line_size; | |
382 | pixels += line_size; | |
383 | } | |
384 | } | |
385 | ||
386 | /* next one assumes that ((line_size % 8) == 0) */ | |
387 | static void avg_pixels8_xy2_altivec(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h) | |
388 | { | |
389 | register int i; | |
390 | register vector unsigned char pixelsv1, pixelsv2, pixelsavg; | |
391 | register vector unsigned char blockv, temp1, temp2, blocktemp; | |
392 | register vector unsigned short pixelssum1, pixelssum2, temp3; | |
393 | ||
394 | register const vector unsigned char vczero = (const vector unsigned char) | |
395 | vec_splat_u8(0); | |
396 | register const vector unsigned short vctwo = (const vector unsigned short) | |
397 | vec_splat_u16(2); | |
398 | ||
399 | temp1 = vec_ld(0, pixels); | |
400 | temp2 = vec_ld(16, pixels); | |
401 | pixelsv1 = vec_perm(temp1, temp2, vec_lvsl(0, pixels)); | |
402 | if ((((unsigned long)pixels) & 0x0000000F) == 0x0000000F) { | |
403 | pixelsv2 = temp2; | |
404 | } else { | |
405 | pixelsv2 = vec_perm(temp1, temp2, vec_lvsl(1, pixels)); | |
406 | } | |
407 | pixelsv1 = vec_mergeh(vczero, pixelsv1); | |
408 | pixelsv2 = vec_mergeh(vczero, pixelsv2); | |
409 | pixelssum1 = vec_add((vector unsigned short)pixelsv1, | |
410 | (vector unsigned short)pixelsv2); | |
411 | pixelssum1 = vec_add(pixelssum1, vctwo); | |
412 | ||
413 | for (i = 0; i < h ; i++) { | |
414 | int rightside = ((unsigned long)block & 0x0000000F); | |
415 | blockv = vec_ld(0, block); | |
416 | ||
417 | temp1 = vec_ld(line_size, pixels); | |
418 | temp2 = vec_ld(line_size + 16, pixels); | |
419 | pixelsv1 = vec_perm(temp1, temp2, vec_lvsl(line_size, pixels)); | |
420 | if (((((unsigned long)pixels) + line_size) & 0x0000000F) == 0x0000000F) { | |
421 | pixelsv2 = temp2; | |
422 | } else { | |
423 | pixelsv2 = vec_perm(temp1, temp2, vec_lvsl(line_size + 1, pixels)); | |
424 | } | |
425 | ||
426 | pixelsv1 = vec_mergeh(vczero, pixelsv1); | |
427 | pixelsv2 = vec_mergeh(vczero, pixelsv2); | |
428 | pixelssum2 = vec_add((vector unsigned short)pixelsv1, | |
429 | (vector unsigned short)pixelsv2); | |
430 | temp3 = vec_add(pixelssum1, pixelssum2); | |
431 | temp3 = vec_sra(temp3, vctwo); | |
432 | pixelssum1 = vec_add(pixelssum2, vctwo); | |
433 | pixelsavg = vec_packsu(temp3, (vector unsigned short) vczero); | |
434 | ||
435 | if (rightside) { | |
436 | blocktemp = vec_perm(blockv, pixelsavg, vcprm(0, 1, s0, s1)); | |
437 | } else { | |
438 | blocktemp = vec_perm(blockv, pixelsavg, vcprm(s0, s1, 2, 3)); | |
439 | } | |
440 | ||
441 | blockv = vec_avg(blocktemp, blockv); | |
442 | vec_st(blockv, 0, block); | |
443 | ||
444 | block += line_size; | |
445 | pixels += line_size; | |
446 | } | |
447 | } | |
448 | #endif /* HAVE_ALTIVEC */ | |
449 | ||
450 | av_cold void ff_hpeldsp_init_ppc(HpelDSPContext *c, int flags) | |
451 | { | |
452 | #if HAVE_ALTIVEC | |
453 | if (!PPC_ALTIVEC(av_get_cpu_flags())) | |
454 | return; | |
455 | ||
456 | c->avg_pixels_tab[0][0] = ff_avg_pixels16_altivec; | |
457 | c->avg_pixels_tab[1][0] = avg_pixels8_altivec; | |
458 | c->avg_pixels_tab[1][3] = avg_pixels8_xy2_altivec; | |
459 | ||
460 | c->put_pixels_tab[0][0] = ff_put_pixels16_altivec; | |
461 | c->put_pixels_tab[1][3] = put_pixels8_xy2_altivec; | |
462 | c->put_pixels_tab[0][3] = put_pixels16_xy2_altivec; | |
463 | ||
464 | c->put_no_rnd_pixels_tab[0][0] = ff_put_pixels16_altivec; | |
465 | c->put_no_rnd_pixels_tab[1][3] = put_no_rnd_pixels8_xy2_altivec; | |
466 | c->put_no_rnd_pixels_tab[0][3] = put_no_rnd_pixels16_xy2_altivec; | |
467 | #endif /* HAVE_ALTIVEC */ | |
468 | } |