2 * Copyright (c) 2002 Brian Foley
3 * Copyright (c) 2002 Dieter Shirley
4 * Copyright (c) 2003-2004 Romain Dolbeau <romain@dolbeau.org>
6 * This file is part of FFmpeg.
8 * FFmpeg is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
13 * FFmpeg is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with FFmpeg; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
28 #include "libavutil/attributes.h"
29 #include "libavutil/cpu.h"
30 #include "libavutil/ppc/cpu.h"
31 #include "libavutil/ppc/types_altivec.h"
32 #include "libavutil/ppc/util_altivec.h"
33 #include "libavcodec/avcodec.h"
34 #include "libavcodec/pixblockdsp.h"
38 static void get_pixels_altivec(int16_t *restrict block
, const uint8_t *pixels
,
42 vector
unsigned char perm
= vec_lvsl(0, pixels
);
43 const vector
unsigned char zero
=
44 (const vector
unsigned char) vec_splat_u8(0);
46 for (i
= 0; i
< 8; i
++) {
47 /* Read potentially unaligned pixels.
48 * We're reading 16 pixels, and actually only want 8,
49 * but we simply ignore the extras. */
50 vector
unsigned char pixl
= vec_ld(0, pixels
);
51 vector
unsigned char pixr
= vec_ld(7, pixels
);
52 vector
unsigned char bytes
= vec_perm(pixl
, pixr
, perm
);
54 // Convert the bytes into shorts.
55 vector
signed short shorts
= (vector
signed short) vec_mergeh(zero
,
58 // Save the data to the block, we assume the block is 16-byte aligned.
59 vec_st(shorts
, i
* 16, (vector
signed short *) block
);
65 static void diff_pixels_altivec(int16_t *restrict block
, const uint8_t *s1
,
66 const uint8_t *s2
, int stride
)
69 vector
unsigned char perm1
= vec_lvsl(0, s1
);
70 vector
unsigned char perm2
= vec_lvsl(0, s2
);
71 const vector
unsigned char zero
=
72 (const vector
unsigned char) vec_splat_u8(0);
73 vector
signed short shorts1
, shorts2
;
75 for (i
= 0; i
< 4; i
++) {
76 /* Read potentially unaligned pixels.
77 * We're reading 16 pixels, and actually only want 8,
78 * but we simply ignore the extras. */
79 vector
unsigned char pixl
= vec_ld(0, s1
);
80 vector
unsigned char pixr
= vec_ld(15, s1
);
81 vector
unsigned char bytes
= vec_perm(pixl
, pixr
, perm1
);
83 // Convert the bytes into shorts.
84 shorts1
= (vector
signed short) vec_mergeh(zero
, bytes
);
86 // Do the same for the second block of pixels.
88 pixr
= vec_ld(15, s2
);
89 bytes
= vec_perm(pixl
, pixr
, perm2
);
91 // Convert the bytes into shorts.
92 shorts2
= (vector
signed short) vec_mergeh(zero
, bytes
);
94 // Do the subtraction.
95 shorts1
= vec_sub(shorts1
, shorts2
);
97 // Save the data to the block, we assume the block is 16-byte aligned.
98 vec_st(shorts1
, 0, (vector
signed short *) block
);
104 /* The code below is a copy of the code above...
105 * This is a manual unroll. */
107 /* Read potentially unaligned pixels.
108 * We're reading 16 pixels, and actually only want 8,
109 * but we simply ignore the extras. */
110 pixl
= vec_ld(0, s1
);
111 pixr
= vec_ld(15, s1
);
112 bytes
= vec_perm(pixl
, pixr
, perm1
);
114 // Convert the bytes into shorts.
115 shorts1
= (vector
signed short) vec_mergeh(zero
, bytes
);
117 // Do the same for the second block of pixels.
118 pixl
= vec_ld(0, s2
);
119 pixr
= vec_ld(15, s2
);
120 bytes
= vec_perm(pixl
, pixr
, perm2
);
122 // Convert the bytes into shorts.
123 shorts2
= (vector
signed short) vec_mergeh(zero
, bytes
);
125 // Do the subtraction.
126 shorts1
= vec_sub(shorts1
, shorts2
);
128 // Save the data to the block, we assume the block is 16-byte aligned.
129 vec_st(shorts1
, 0, (vector
signed short *) block
);
137 #endif /* HAVE_ALTIVEC */
139 av_cold
void ff_pixblockdsp_init_ppc(PixblockDSPContext
*c
,
140 AVCodecContext
*avctx
,
141 unsigned high_bit_depth
)
144 if (!PPC_ALTIVEC(av_get_cpu_flags()))
147 c
->diff_pixels
= diff_pixels_altivec
;
149 if (!high_bit_depth
) {
150 c
->get_pixels
= get_pixels_altivec
;
152 #endif /* HAVE_ALTIVEC */