Commit | Line | Data |
---|---|---|
2ba45a60 DM |
1 | /* |
2 | * Copyright (c) 2002 Brian Foley | |
3 | * Copyright (c) 2002 Dieter Shirley | |
4 | * Copyright (c) 2003-2004 Romain Dolbeau <romain@dolbeau.org> | |
5 | * | |
6 | * This file is part of FFmpeg. | |
7 | * | |
8 | * FFmpeg is free software; you can redistribute it and/or | |
9 | * modify it under the terms of the GNU Lesser General Public | |
10 | * License as published by the Free Software Foundation; either | |
11 | * version 2.1 of the License, or (at your option) any later version. | |
12 | * | |
13 | * FFmpeg is distributed in the hope that it will be useful, | |
14 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
16 | * Lesser General Public License for more details. | |
17 | * | |
18 | * You should have received a copy of the GNU Lesser General Public | |
19 | * License along with FFmpeg; if not, write to the Free Software | |
20 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |
21 | */ | |
22 | ||
23 | #include "config.h" | |
24 | #if HAVE_ALTIVEC_H | |
25 | #include <altivec.h> | |
26 | #endif | |
27 | ||
28 | #include "libavutil/attributes.h" | |
29 | #include "libavutil/cpu.h" | |
30 | #include "libavutil/ppc/cpu.h" | |
31 | #include "libavutil/ppc/types_altivec.h" | |
32 | #include "libavutil/ppc/util_altivec.h" | |
33 | #include "libavcodec/avcodec.h" | |
34 | #include "libavcodec/pixblockdsp.h" | |
35 | ||
36 | #if HAVE_ALTIVEC | |
37 | ||
f6fa7814 DM |
38 | #if HAVE_VSX |
39 | static void get_pixels_altivec(int16_t *restrict block, const uint8_t *pixels, | |
40 | ptrdiff_t line_size) | |
41 | { | |
42 | int i; | |
43 | vector unsigned char perm = | |
44 | (vector unsigned char) {0x00,0x10, 0x01,0x11,0x02,0x12,0x03,0x13,\ | |
45 | 0x04,0x14,0x05,0x15,0x06,0x16,0x07,0x17}; | |
46 | const vector unsigned char zero = | |
47 | (const vector unsigned char) vec_splat_u8(0); | |
48 | ||
49 | for (i = 0; i < 8; i++) { | |
50 | /* Read potentially unaligned pixels. | |
51 | * We're reading 16 pixels, and actually only want 8, | |
52 | * but we simply ignore the extras. */ | |
53 | vector unsigned char bytes = vec_vsx_ld(0, pixels); | |
54 | ||
55 | // Convert the bytes into shorts. | |
56 | //vector signed short shorts = (vector signed short) vec_perm(zero, bytes, perm); | |
57 | vector signed short shorts = (vector signed short) vec_perm(bytes, zero, perm); | |
58 | ||
59 | // Save the data to the block, we assume the block is 16-byte aligned. | |
60 | vec_vsx_st(shorts, i * 16, (vector signed short *) block); | |
61 | ||
62 | pixels += line_size; | |
63 | } | |
64 | } | |
65 | #else | |
2ba45a60 DM |
66 | static void get_pixels_altivec(int16_t *restrict block, const uint8_t *pixels, |
67 | ptrdiff_t line_size) | |
68 | { | |
69 | int i; | |
70 | vector unsigned char perm = vec_lvsl(0, pixels); | |
71 | const vector unsigned char zero = | |
72 | (const vector unsigned char) vec_splat_u8(0); | |
73 | ||
74 | for (i = 0; i < 8; i++) { | |
75 | /* Read potentially unaligned pixels. | |
76 | * We're reading 16 pixels, and actually only want 8, | |
77 | * but we simply ignore the extras. */ | |
78 | vector unsigned char pixl = vec_ld(0, pixels); | |
79 | vector unsigned char pixr = vec_ld(7, pixels); | |
80 | vector unsigned char bytes = vec_perm(pixl, pixr, perm); | |
81 | ||
82 | // Convert the bytes into shorts. | |
83 | vector signed short shorts = (vector signed short) vec_mergeh(zero, | |
84 | bytes); | |
85 | ||
86 | // Save the data to the block, we assume the block is 16-byte aligned. | |
87 | vec_st(shorts, i * 16, (vector signed short *) block); | |
88 | ||
89 | pixels += line_size; | |
90 | } | |
91 | } | |
92 | ||
f6fa7814 DM |
93 | #endif /* HAVE_VSX */ |
94 | ||
95 | #if HAVE_VSX | |
96 | static void diff_pixels_altivec(int16_t *restrict block, const uint8_t *s1, | |
97 | const uint8_t *s2, int stride) | |
98 | { | |
99 | int i; | |
100 | const vector unsigned char zero = | |
101 | (const vector unsigned char) vec_splat_u8(0); | |
102 | vector signed short shorts1, shorts2; | |
103 | ||
104 | for (i = 0; i < 4; i++) { | |
105 | /* Read potentially unaligned pixels. | |
106 | * We're reading 16 pixels, and actually only want 8, | |
107 | * but we simply ignore the extras. */ | |
108 | vector unsigned char bytes = vec_vsx_ld(0, s1); | |
109 | ||
110 | // Convert the bytes into shorts. | |
111 | shorts1 = (vector signed short) vec_mergeh(bytes, zero); | |
112 | ||
113 | // Do the same for the second block of pixels. | |
114 | bytes =vec_vsx_ld(0, s2); | |
115 | ||
116 | // Convert the bytes into shorts. | |
117 | shorts2 = (vector signed short) vec_mergeh(bytes, zero); | |
118 | ||
119 | // Do the subtraction. | |
120 | shorts1 = vec_sub(shorts1, shorts2); | |
121 | ||
122 | // Save the data to the block, we assume the block is 16-byte aligned. | |
123 | vec_vsx_st(shorts1, 0, (vector signed short *) block); | |
124 | ||
125 | s1 += stride; | |
126 | s2 += stride; | |
127 | block += 8; | |
128 | ||
129 | /* The code below is a copy of the code above... | |
130 | * This is a manual unroll. */ | |
131 | ||
132 | /* Read potentially unaligned pixels. | |
133 | * We're reading 16 pixels, and actually only want 8, | |
134 | * but we simply ignore the extras. */ | |
135 | bytes = vec_vsx_ld(0, s1); | |
136 | ||
137 | // Convert the bytes into shorts. | |
138 | shorts1 = (vector signed short) vec_mergeh(bytes, zero); | |
139 | ||
140 | // Do the same for the second block of pixels. | |
141 | bytes = vec_vsx_ld(0, s2); | |
142 | ||
143 | // Convert the bytes into shorts. | |
144 | shorts2 = (vector signed short) vec_mergeh(bytes, zero); | |
145 | ||
146 | // Do the subtraction. | |
147 | shorts1 = vec_sub(shorts1, shorts2); | |
148 | ||
149 | // Save the data to the block, we assume the block is 16-byte aligned. | |
150 | vec_vsx_st(shorts1, 0, (vector signed short *) block); | |
151 | ||
152 | s1 += stride; | |
153 | s2 += stride; | |
154 | block += 8; | |
155 | } | |
156 | } | |
157 | #else | |
2ba45a60 DM |
158 | static void diff_pixels_altivec(int16_t *restrict block, const uint8_t *s1, |
159 | const uint8_t *s2, int stride) | |
160 | { | |
161 | int i; | |
162 | vector unsigned char perm1 = vec_lvsl(0, s1); | |
163 | vector unsigned char perm2 = vec_lvsl(0, s2); | |
164 | const vector unsigned char zero = | |
165 | (const vector unsigned char) vec_splat_u8(0); | |
166 | vector signed short shorts1, shorts2; | |
167 | ||
168 | for (i = 0; i < 4; i++) { | |
169 | /* Read potentially unaligned pixels. | |
170 | * We're reading 16 pixels, and actually only want 8, | |
171 | * but we simply ignore the extras. */ | |
172 | vector unsigned char pixl = vec_ld(0, s1); | |
173 | vector unsigned char pixr = vec_ld(15, s1); | |
174 | vector unsigned char bytes = vec_perm(pixl, pixr, perm1); | |
175 | ||
176 | // Convert the bytes into shorts. | |
177 | shorts1 = (vector signed short) vec_mergeh(zero, bytes); | |
178 | ||
179 | // Do the same for the second block of pixels. | |
180 | pixl = vec_ld(0, s2); | |
181 | pixr = vec_ld(15, s2); | |
182 | bytes = vec_perm(pixl, pixr, perm2); | |
183 | ||
184 | // Convert the bytes into shorts. | |
185 | shorts2 = (vector signed short) vec_mergeh(zero, bytes); | |
186 | ||
187 | // Do the subtraction. | |
188 | shorts1 = vec_sub(shorts1, shorts2); | |
189 | ||
190 | // Save the data to the block, we assume the block is 16-byte aligned. | |
191 | vec_st(shorts1, 0, (vector signed short *) block); | |
192 | ||
193 | s1 += stride; | |
194 | s2 += stride; | |
195 | block += 8; | |
196 | ||
197 | /* The code below is a copy of the code above... | |
198 | * This is a manual unroll. */ | |
199 | ||
200 | /* Read potentially unaligned pixels. | |
201 | * We're reading 16 pixels, and actually only want 8, | |
202 | * but we simply ignore the extras. */ | |
203 | pixl = vec_ld(0, s1); | |
204 | pixr = vec_ld(15, s1); | |
205 | bytes = vec_perm(pixl, pixr, perm1); | |
206 | ||
207 | // Convert the bytes into shorts. | |
208 | shorts1 = (vector signed short) vec_mergeh(zero, bytes); | |
209 | ||
210 | // Do the same for the second block of pixels. | |
211 | pixl = vec_ld(0, s2); | |
212 | pixr = vec_ld(15, s2); | |
213 | bytes = vec_perm(pixl, pixr, perm2); | |
214 | ||
215 | // Convert the bytes into shorts. | |
216 | shorts2 = (vector signed short) vec_mergeh(zero, bytes); | |
217 | ||
218 | // Do the subtraction. | |
219 | shorts1 = vec_sub(shorts1, shorts2); | |
220 | ||
221 | // Save the data to the block, we assume the block is 16-byte aligned. | |
222 | vec_st(shorts1, 0, (vector signed short *) block); | |
223 | ||
224 | s1 += stride; | |
225 | s2 += stride; | |
226 | block += 8; | |
227 | } | |
228 | } | |
229 | ||
f6fa7814 DM |
230 | #endif /* HAVE_VSX */ |
231 | ||
2ba45a60 DM |
232 | #endif /* HAVE_ALTIVEC */ |
233 | ||
234 | av_cold void ff_pixblockdsp_init_ppc(PixblockDSPContext *c, | |
235 | AVCodecContext *avctx, | |
236 | unsigned high_bit_depth) | |
237 | { | |
238 | #if HAVE_ALTIVEC | |
239 | if (!PPC_ALTIVEC(av_get_cpu_flags())) | |
240 | return; | |
241 | ||
242 | c->diff_pixels = diff_pixels_altivec; | |
243 | ||
244 | if (!high_bit_depth) { | |
245 | c->get_pixels = get_pixels_altivec; | |
246 | } | |
247 | #endif /* HAVE_ALTIVEC */ | |
248 | } |