Commit | Line | Data |
---|---|---|
2ba45a60 DM |
1 | /* |
2 | * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder | |
3 | * Copyright (c) 2003-2011 Michael Niedermayer <michaelni@gmx.at> | |
4 | * | |
5 | * This file is part of FFmpeg. | |
6 | * | |
7 | * FFmpeg is free software; you can redistribute it and/or | |
8 | * modify it under the terms of the GNU Lesser General Public | |
9 | * License as published by the Free Software Foundation; either | |
10 | * version 2.1 of the License, or (at your option) any later version. | |
11 | * | |
12 | * FFmpeg is distributed in the hope that it will be useful, | |
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
15 | * Lesser General Public License for more details. | |
16 | * | |
17 | * You should have received a copy of the GNU Lesser General Public | |
18 | * License along with FFmpeg; if not, write to the Free Software | |
19 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |
20 | */ | |
21 | ||
22 | /** | |
23 | * @file | |
24 | * H.264 / AVC / MPEG4 part10 prediction functions. | |
25 | * @author Michael Niedermayer <michaelni@gmx.at> | |
26 | */ | |
27 | ||
28 | #include "libavutil/intreadwrite.h" | |
29 | ||
30 | #include "mathops.h" | |
31 | ||
32 | #include "bit_depth_template.c" | |
33 | ||
34 | static void FUNCC(pred4x4_vertical)(uint8_t *_src, const uint8_t *topright, | |
35 | ptrdiff_t _stride) | |
36 | { | |
37 | pixel *src = (pixel*)_src; | |
38 | int stride = _stride>>(sizeof(pixel)-1); | |
39 | const pixel4 a= AV_RN4PA(src-stride); | |
40 | ||
41 | AV_WN4PA(src+0*stride, a); | |
42 | AV_WN4PA(src+1*stride, a); | |
43 | AV_WN4PA(src+2*stride, a); | |
44 | AV_WN4PA(src+3*stride, a); | |
45 | } | |
46 | ||
47 | static void FUNCC(pred4x4_horizontal)(uint8_t *_src, const uint8_t *topright, | |
48 | ptrdiff_t _stride) | |
49 | { | |
50 | pixel *src = (pixel*)_src; | |
51 | int stride = _stride>>(sizeof(pixel)-1); | |
52 | AV_WN4PA(src+0*stride, PIXEL_SPLAT_X4(src[-1+0*stride])); | |
53 | AV_WN4PA(src+1*stride, PIXEL_SPLAT_X4(src[-1+1*stride])); | |
54 | AV_WN4PA(src+2*stride, PIXEL_SPLAT_X4(src[-1+2*stride])); | |
55 | AV_WN4PA(src+3*stride, PIXEL_SPLAT_X4(src[-1+3*stride])); | |
56 | } | |
57 | ||
58 | static void FUNCC(pred4x4_dc)(uint8_t *_src, const uint8_t *topright, | |
59 | ptrdiff_t _stride) | |
60 | { | |
61 | pixel *src = (pixel*)_src; | |
62 | int stride = _stride>>(sizeof(pixel)-1); | |
63 | const int dc= ( src[-stride] + src[1-stride] + src[2-stride] + src[3-stride] | |
64 | + src[-1+0*stride] + src[-1+1*stride] + src[-1+2*stride] + src[-1+3*stride] + 4) >>3; | |
65 | const pixel4 a = PIXEL_SPLAT_X4(dc); | |
66 | ||
67 | AV_WN4PA(src+0*stride, a); | |
68 | AV_WN4PA(src+1*stride, a); | |
69 | AV_WN4PA(src+2*stride, a); | |
70 | AV_WN4PA(src+3*stride, a); | |
71 | } | |
72 | ||
73 | static void FUNCC(pred4x4_left_dc)(uint8_t *_src, const uint8_t *topright, | |
74 | ptrdiff_t _stride) | |
75 | { | |
76 | pixel *src = (pixel*)_src; | |
77 | int stride = _stride>>(sizeof(pixel)-1); | |
78 | const int dc= ( src[-1+0*stride] + src[-1+1*stride] + src[-1+2*stride] + src[-1+3*stride] + 2) >>2; | |
79 | const pixel4 a = PIXEL_SPLAT_X4(dc); | |
80 | ||
81 | AV_WN4PA(src+0*stride, a); | |
82 | AV_WN4PA(src+1*stride, a); | |
83 | AV_WN4PA(src+2*stride, a); | |
84 | AV_WN4PA(src+3*stride, a); | |
85 | } | |
86 | ||
87 | static void FUNCC(pred4x4_top_dc)(uint8_t *_src, const uint8_t *topright, | |
88 | ptrdiff_t _stride) | |
89 | { | |
90 | pixel *src = (pixel*)_src; | |
91 | int stride = _stride>>(sizeof(pixel)-1); | |
92 | const int dc= ( src[-stride] + src[1-stride] + src[2-stride] + src[3-stride] + 2) >>2; | |
93 | const pixel4 a = PIXEL_SPLAT_X4(dc); | |
94 | ||
95 | AV_WN4PA(src+0*stride, a); | |
96 | AV_WN4PA(src+1*stride, a); | |
97 | AV_WN4PA(src+2*stride, a); | |
98 | AV_WN4PA(src+3*stride, a); | |
99 | } | |
100 | ||
101 | static void FUNCC(pred4x4_128_dc)(uint8_t *_src, const uint8_t *topright, | |
102 | ptrdiff_t _stride) | |
103 | { | |
104 | pixel *src = (pixel*)_src; | |
105 | int stride = _stride>>(sizeof(pixel)-1); | |
106 | const pixel4 a = PIXEL_SPLAT_X4(1<<(BIT_DEPTH-1)); | |
107 | ||
108 | AV_WN4PA(src+0*stride, a); | |
109 | AV_WN4PA(src+1*stride, a); | |
110 | AV_WN4PA(src+2*stride, a); | |
111 | AV_WN4PA(src+3*stride, a); | |
112 | } | |
113 | ||
114 | static void FUNCC(pred4x4_127_dc)(uint8_t *_src, const uint8_t *topright, | |
115 | ptrdiff_t _stride) | |
116 | { | |
117 | pixel *src = (pixel*)_src; | |
118 | int stride = _stride>>(sizeof(pixel)-1); | |
119 | const pixel4 a = PIXEL_SPLAT_X4((1<<(BIT_DEPTH-1))-1); | |
120 | ||
121 | AV_WN4PA(src+0*stride, a); | |
122 | AV_WN4PA(src+1*stride, a); | |
123 | AV_WN4PA(src+2*stride, a); | |
124 | AV_WN4PA(src+3*stride, a); | |
125 | } | |
126 | ||
127 | static void FUNCC(pred4x4_129_dc)(uint8_t *_src, const uint8_t *topright, | |
128 | ptrdiff_t _stride) | |
129 | { | |
130 | pixel *src = (pixel*)_src; | |
131 | int stride = _stride>>(sizeof(pixel)-1); | |
132 | const pixel4 a = PIXEL_SPLAT_X4((1<<(BIT_DEPTH-1))+1); | |
133 | ||
134 | AV_WN4PA(src+0*stride, a); | |
135 | AV_WN4PA(src+1*stride, a); | |
136 | AV_WN4PA(src+2*stride, a); | |
137 | AV_WN4PA(src+3*stride, a); | |
138 | } | |
139 | ||
140 | ||
141 | #define LOAD_TOP_RIGHT_EDGE\ | |
142 | const unsigned av_unused t4 = topright[0];\ | |
143 | const unsigned av_unused t5 = topright[1];\ | |
144 | const unsigned av_unused t6 = topright[2];\ | |
145 | const unsigned av_unused t7 = topright[3];\ | |
146 | ||
147 | #define LOAD_DOWN_LEFT_EDGE\ | |
148 | const unsigned av_unused l4 = src[-1+4*stride];\ | |
149 | const unsigned av_unused l5 = src[-1+5*stride];\ | |
150 | const unsigned av_unused l6 = src[-1+6*stride];\ | |
151 | const unsigned av_unused l7 = src[-1+7*stride];\ | |
152 | ||
153 | #define LOAD_LEFT_EDGE\ | |
154 | const unsigned av_unused l0 = src[-1+0*stride];\ | |
155 | const unsigned av_unused l1 = src[-1+1*stride];\ | |
156 | const unsigned av_unused l2 = src[-1+2*stride];\ | |
157 | const unsigned av_unused l3 = src[-1+3*stride];\ | |
158 | ||
159 | #define LOAD_TOP_EDGE\ | |
160 | const unsigned av_unused t0 = src[ 0-1*stride];\ | |
161 | const unsigned av_unused t1 = src[ 1-1*stride];\ | |
162 | const unsigned av_unused t2 = src[ 2-1*stride];\ | |
163 | const unsigned av_unused t3 = src[ 3-1*stride];\ | |
164 | ||
165 | static void FUNCC(pred4x4_down_right)(uint8_t *_src, const uint8_t *topright, | |
166 | ptrdiff_t _stride) | |
167 | { | |
168 | pixel *src = (pixel*)_src; | |
169 | int stride = _stride>>(sizeof(pixel)-1); | |
170 | const int lt= src[-1-1*stride]; | |
171 | LOAD_TOP_EDGE | |
172 | LOAD_LEFT_EDGE | |
173 | ||
174 | src[0+3*stride]=(l3 + 2*l2 + l1 + 2)>>2; | |
175 | src[0+2*stride]= | |
176 | src[1+3*stride]=(l2 + 2*l1 + l0 + 2)>>2; | |
177 | src[0+1*stride]= | |
178 | src[1+2*stride]= | |
179 | src[2+3*stride]=(l1 + 2*l0 + lt + 2)>>2; | |
180 | src[0+0*stride]= | |
181 | src[1+1*stride]= | |
182 | src[2+2*stride]= | |
183 | src[3+3*stride]=(l0 + 2*lt + t0 + 2)>>2; | |
184 | src[1+0*stride]= | |
185 | src[2+1*stride]= | |
186 | src[3+2*stride]=(lt + 2*t0 + t1 + 2)>>2; | |
187 | src[2+0*stride]= | |
188 | src[3+1*stride]=(t0 + 2*t1 + t2 + 2)>>2; | |
189 | src[3+0*stride]=(t1 + 2*t2 + t3 + 2)>>2; | |
190 | } | |
191 | ||
192 | static void FUNCC(pred4x4_down_left)(uint8_t *_src, const uint8_t *_topright, | |
193 | ptrdiff_t _stride) | |
194 | { | |
195 | pixel *src = (pixel*)_src; | |
196 | const pixel *topright = (const pixel*)_topright; | |
197 | int stride = _stride>>(sizeof(pixel)-1); | |
198 | LOAD_TOP_EDGE | |
199 | LOAD_TOP_RIGHT_EDGE | |
200 | // LOAD_LEFT_EDGE | |
201 | ||
202 | src[0+0*stride]=(t0 + t2 + 2*t1 + 2)>>2; | |
203 | src[1+0*stride]= | |
204 | src[0+1*stride]=(t1 + t3 + 2*t2 + 2)>>2; | |
205 | src[2+0*stride]= | |
206 | src[1+1*stride]= | |
207 | src[0+2*stride]=(t2 + t4 + 2*t3 + 2)>>2; | |
208 | src[3+0*stride]= | |
209 | src[2+1*stride]= | |
210 | src[1+2*stride]= | |
211 | src[0+3*stride]=(t3 + t5 + 2*t4 + 2)>>2; | |
212 | src[3+1*stride]= | |
213 | src[2+2*stride]= | |
214 | src[1+3*stride]=(t4 + t6 + 2*t5 + 2)>>2; | |
215 | src[3+2*stride]= | |
216 | src[2+3*stride]=(t5 + t7 + 2*t6 + 2)>>2; | |
217 | src[3+3*stride]=(t6 + 3*t7 + 2)>>2; | |
218 | } | |
219 | ||
220 | static void FUNCC(pred4x4_vertical_right)(uint8_t *_src, | |
221 | const uint8_t *topright, | |
222 | ptrdiff_t _stride) | |
223 | { | |
224 | pixel *src = (pixel*)_src; | |
225 | int stride = _stride>>(sizeof(pixel)-1); | |
226 | const int lt= src[-1-1*stride]; | |
227 | LOAD_TOP_EDGE | |
228 | LOAD_LEFT_EDGE | |
229 | ||
230 | src[0+0*stride]= | |
231 | src[1+2*stride]=(lt + t0 + 1)>>1; | |
232 | src[1+0*stride]= | |
233 | src[2+2*stride]=(t0 + t1 + 1)>>1; | |
234 | src[2+0*stride]= | |
235 | src[3+2*stride]=(t1 + t2 + 1)>>1; | |
236 | src[3+0*stride]=(t2 + t3 + 1)>>1; | |
237 | src[0+1*stride]= | |
238 | src[1+3*stride]=(l0 + 2*lt + t0 + 2)>>2; | |
239 | src[1+1*stride]= | |
240 | src[2+3*stride]=(lt + 2*t0 + t1 + 2)>>2; | |
241 | src[2+1*stride]= | |
242 | src[3+3*stride]=(t0 + 2*t1 + t2 + 2)>>2; | |
243 | src[3+1*stride]=(t1 + 2*t2 + t3 + 2)>>2; | |
244 | src[0+2*stride]=(lt + 2*l0 + l1 + 2)>>2; | |
245 | src[0+3*stride]=(l0 + 2*l1 + l2 + 2)>>2; | |
246 | } | |
247 | ||
248 | static void FUNCC(pred4x4_vertical_left)(uint8_t *_src, | |
249 | const uint8_t *_topright, | |
250 | ptrdiff_t _stride) | |
251 | { | |
252 | pixel *src = (pixel*)_src; | |
253 | const pixel *topright = (const pixel*)_topright; | |
254 | int stride = _stride>>(sizeof(pixel)-1); | |
255 | LOAD_TOP_EDGE | |
256 | LOAD_TOP_RIGHT_EDGE | |
257 | ||
258 | src[0+0*stride]=(t0 + t1 + 1)>>1; | |
259 | src[1+0*stride]= | |
260 | src[0+2*stride]=(t1 + t2 + 1)>>1; | |
261 | src[2+0*stride]= | |
262 | src[1+2*stride]=(t2 + t3 + 1)>>1; | |
263 | src[3+0*stride]= | |
264 | src[2+2*stride]=(t3 + t4+ 1)>>1; | |
265 | src[3+2*stride]=(t4 + t5+ 1)>>1; | |
266 | src[0+1*stride]=(t0 + 2*t1 + t2 + 2)>>2; | |
267 | src[1+1*stride]= | |
268 | src[0+3*stride]=(t1 + 2*t2 + t3 + 2)>>2; | |
269 | src[2+1*stride]= | |
270 | src[1+3*stride]=(t2 + 2*t3 + t4 + 2)>>2; | |
271 | src[3+1*stride]= | |
272 | src[2+3*stride]=(t3 + 2*t4 + t5 + 2)>>2; | |
273 | src[3+3*stride]=(t4 + 2*t5 + t6 + 2)>>2; | |
274 | } | |
275 | ||
276 | static void FUNCC(pred4x4_horizontal_up)(uint8_t *_src, const uint8_t *topright, | |
277 | ptrdiff_t _stride) | |
278 | { | |
279 | pixel *src = (pixel*)_src; | |
280 | int stride = _stride>>(sizeof(pixel)-1); | |
281 | LOAD_LEFT_EDGE | |
282 | ||
283 | src[0+0*stride]=(l0 + l1 + 1)>>1; | |
284 | src[1+0*stride]=(l0 + 2*l1 + l2 + 2)>>2; | |
285 | src[2+0*stride]= | |
286 | src[0+1*stride]=(l1 + l2 + 1)>>1; | |
287 | src[3+0*stride]= | |
288 | src[1+1*stride]=(l1 + 2*l2 + l3 + 2)>>2; | |
289 | src[2+1*stride]= | |
290 | src[0+2*stride]=(l2 + l3 + 1)>>1; | |
291 | src[3+1*stride]= | |
292 | src[1+2*stride]=(l2 + 2*l3 + l3 + 2)>>2; | |
293 | src[3+2*stride]= | |
294 | src[1+3*stride]= | |
295 | src[0+3*stride]= | |
296 | src[2+2*stride]= | |
297 | src[2+3*stride]= | |
298 | src[3+3*stride]=l3; | |
299 | } | |
300 | ||
301 | static void FUNCC(pred4x4_horizontal_down)(uint8_t *_src, | |
302 | const uint8_t *topright, | |
303 | ptrdiff_t _stride) | |
304 | { | |
305 | pixel *src = (pixel*)_src; | |
306 | int stride = _stride>>(sizeof(pixel)-1); | |
307 | const int lt= src[-1-1*stride]; | |
308 | LOAD_TOP_EDGE | |
309 | LOAD_LEFT_EDGE | |
310 | ||
311 | src[0+0*stride]= | |
312 | src[2+1*stride]=(lt + l0 + 1)>>1; | |
313 | src[1+0*stride]= | |
314 | src[3+1*stride]=(l0 + 2*lt + t0 + 2)>>2; | |
315 | src[2+0*stride]=(lt + 2*t0 + t1 + 2)>>2; | |
316 | src[3+0*stride]=(t0 + 2*t1 + t2 + 2)>>2; | |
317 | src[0+1*stride]= | |
318 | src[2+2*stride]=(l0 + l1 + 1)>>1; | |
319 | src[1+1*stride]= | |
320 | src[3+2*stride]=(lt + 2*l0 + l1 + 2)>>2; | |
321 | src[0+2*stride]= | |
322 | src[2+3*stride]=(l1 + l2+ 1)>>1; | |
323 | src[1+2*stride]= | |
324 | src[3+3*stride]=(l0 + 2*l1 + l2 + 2)>>2; | |
325 | src[0+3*stride]=(l2 + l3 + 1)>>1; | |
326 | src[1+3*stride]=(l1 + 2*l2 + l3 + 2)>>2; | |
327 | } | |
328 | ||
329 | static void FUNCC(pred16x16_vertical)(uint8_t *_src, ptrdiff_t _stride) | |
330 | { | |
331 | int i; | |
332 | pixel *src = (pixel*)_src; | |
333 | int stride = _stride>>(sizeof(pixel)-1); | |
334 | const pixel4 a = AV_RN4PA(((pixel4*)(src-stride))+0); | |
335 | const pixel4 b = AV_RN4PA(((pixel4*)(src-stride))+1); | |
336 | const pixel4 c = AV_RN4PA(((pixel4*)(src-stride))+2); | |
337 | const pixel4 d = AV_RN4PA(((pixel4*)(src-stride))+3); | |
338 | ||
339 | for(i=0; i<16; i++){ | |
340 | AV_WN4PA(((pixel4*)(src+i*stride))+0, a); | |
341 | AV_WN4PA(((pixel4*)(src+i*stride))+1, b); | |
342 | AV_WN4PA(((pixel4*)(src+i*stride))+2, c); | |
343 | AV_WN4PA(((pixel4*)(src+i*stride))+3, d); | |
344 | } | |
345 | } | |
346 | ||
347 | static void FUNCC(pred16x16_horizontal)(uint8_t *_src, ptrdiff_t stride) | |
348 | { | |
349 | int i; | |
350 | pixel *src = (pixel*)_src; | |
351 | stride >>= sizeof(pixel)-1; | |
352 | ||
353 | for(i=0; i<16; i++){ | |
354 | const pixel4 a = PIXEL_SPLAT_X4(src[-1+i*stride]); | |
355 | ||
356 | AV_WN4PA(((pixel4*)(src+i*stride))+0, a); | |
357 | AV_WN4PA(((pixel4*)(src+i*stride))+1, a); | |
358 | AV_WN4PA(((pixel4*)(src+i*stride))+2, a); | |
359 | AV_WN4PA(((pixel4*)(src+i*stride))+3, a); | |
360 | } | |
361 | } | |
362 | ||
363 | #define PREDICT_16x16_DC(v)\ | |
364 | for(i=0; i<16; i++){\ | |
365 | AV_WN4PA(src+ 0, v);\ | |
366 | AV_WN4PA(src+ 4, v);\ | |
367 | AV_WN4PA(src+ 8, v);\ | |
368 | AV_WN4PA(src+12, v);\ | |
369 | src += stride;\ | |
370 | } | |
371 | ||
372 | static void FUNCC(pred16x16_dc)(uint8_t *_src, ptrdiff_t stride) | |
373 | { | |
374 | int i, dc=0; | |
375 | pixel *src = (pixel*)_src; | |
376 | pixel4 dcsplat; | |
377 | stride >>= sizeof(pixel)-1; | |
378 | ||
379 | for(i=0;i<16; i++){ | |
380 | dc+= src[-1+i*stride]; | |
381 | } | |
382 | ||
383 | for(i=0;i<16; i++){ | |
384 | dc+= src[i-stride]; | |
385 | } | |
386 | ||
387 | dcsplat = PIXEL_SPLAT_X4((dc+16)>>5); | |
388 | PREDICT_16x16_DC(dcsplat); | |
389 | } | |
390 | ||
391 | static void FUNCC(pred16x16_left_dc)(uint8_t *_src, ptrdiff_t stride) | |
392 | { | |
393 | int i, dc=0; | |
394 | pixel *src = (pixel*)_src; | |
395 | pixel4 dcsplat; | |
396 | stride >>= sizeof(pixel)-1; | |
397 | ||
398 | for(i=0;i<16; i++){ | |
399 | dc+= src[-1+i*stride]; | |
400 | } | |
401 | ||
402 | dcsplat = PIXEL_SPLAT_X4((dc+8)>>4); | |
403 | PREDICT_16x16_DC(dcsplat); | |
404 | } | |
405 | ||
406 | static void FUNCC(pred16x16_top_dc)(uint8_t *_src, ptrdiff_t stride) | |
407 | { | |
408 | int i, dc=0; | |
409 | pixel *src = (pixel*)_src; | |
410 | pixel4 dcsplat; | |
411 | stride >>= sizeof(pixel)-1; | |
412 | ||
413 | for(i=0;i<16; i++){ | |
414 | dc+= src[i-stride]; | |
415 | } | |
416 | ||
417 | dcsplat = PIXEL_SPLAT_X4((dc+8)>>4); | |
418 | PREDICT_16x16_DC(dcsplat); | |
419 | } | |
420 | ||
421 | #define PRED16x16_X(n, v) \ | |
422 | static void FUNCC(pred16x16_##n##_dc)(uint8_t *_src, ptrdiff_t stride)\ | |
423 | {\ | |
424 | int i;\ | |
425 | pixel *src = (pixel*)_src;\ | |
426 | stride >>= sizeof(pixel)-1;\ | |
427 | PREDICT_16x16_DC(PIXEL_SPLAT_X4(v));\ | |
428 | } | |
429 | ||
430 | PRED16x16_X(127, (1<<(BIT_DEPTH-1))-1) | |
431 | PRED16x16_X(128, (1<<(BIT_DEPTH-1))+0) | |
432 | PRED16x16_X(129, (1<<(BIT_DEPTH-1))+1) | |
433 | ||
434 | static inline void FUNCC(pred16x16_plane_compat)(uint8_t *_src, | |
435 | ptrdiff_t _stride, | |
436 | const int svq3, | |
437 | const int rv40) | |
438 | { | |
439 | int i, j, k; | |
440 | int a; | |
441 | INIT_CLIP | |
442 | pixel *src = (pixel*)_src; | |
443 | int stride = _stride>>(sizeof(pixel)-1); | |
444 | const pixel * const src0 = src +7-stride; | |
445 | const pixel * src1 = src +8*stride-1; | |
446 | const pixel * src2 = src1-2*stride; // == src+6*stride-1; | |
447 | int H = src0[1] - src0[-1]; | |
448 | int V = src1[0] - src2[ 0]; | |
449 | for(k=2; k<=8; ++k) { | |
450 | src1 += stride; src2 -= stride; | |
451 | H += k*(src0[k] - src0[-k]); | |
452 | V += k*(src1[0] - src2[ 0]); | |
453 | } | |
454 | if(svq3){ | |
455 | H = ( 5*(H/4) ) / 16; | |
456 | V = ( 5*(V/4) ) / 16; | |
457 | ||
458 | /* required for 100% accuracy */ | |
459 | i = H; H = V; V = i; | |
460 | }else if(rv40){ | |
461 | H = ( H + (H>>2) ) >> 4; | |
462 | V = ( V + (V>>2) ) >> 4; | |
463 | }else{ | |
464 | H = ( 5*H+32 ) >> 6; | |
465 | V = ( 5*V+32 ) >> 6; | |
466 | } | |
467 | ||
468 | a = 16*(src1[0] + src2[16] + 1) - 7*(V+H); | |
469 | for(j=16; j>0; --j) { | |
470 | int b = a; | |
471 | a += V; | |
472 | for(i=-16; i<0; i+=4) { | |
473 | src[16+i] = CLIP((b ) >> 5); | |
474 | src[17+i] = CLIP((b+ H) >> 5); | |
475 | src[18+i] = CLIP((b+2*H) >> 5); | |
476 | src[19+i] = CLIP((b+3*H) >> 5); | |
477 | b += 4*H; | |
478 | } | |
479 | src += stride; | |
480 | } | |
481 | } | |
482 | ||
483 | static void FUNCC(pred16x16_plane)(uint8_t *src, ptrdiff_t stride) | |
484 | { | |
485 | FUNCC(pred16x16_plane_compat)(src, stride, 0, 0); | |
486 | } | |
487 | ||
488 | static void FUNCC(pred8x8_vertical)(uint8_t *_src, ptrdiff_t _stride) | |
489 | { | |
490 | int i; | |
491 | pixel *src = (pixel*)_src; | |
492 | int stride = _stride>>(sizeof(pixel)-1); | |
493 | const pixel4 a= AV_RN4PA(((pixel4*)(src-stride))+0); | |
494 | const pixel4 b= AV_RN4PA(((pixel4*)(src-stride))+1); | |
495 | ||
496 | for(i=0; i<8; i++){ | |
497 | AV_WN4PA(((pixel4*)(src+i*stride))+0, a); | |
498 | AV_WN4PA(((pixel4*)(src+i*stride))+1, b); | |
499 | } | |
500 | } | |
501 | ||
502 | static void FUNCC(pred8x16_vertical)(uint8_t *_src, ptrdiff_t _stride) | |
503 | { | |
504 | int i; | |
505 | pixel *src = (pixel*)_src; | |
506 | int stride = _stride>>(sizeof(pixel)-1); | |
507 | const pixel4 a= AV_RN4PA(((pixel4*)(src-stride))+0); | |
508 | const pixel4 b= AV_RN4PA(((pixel4*)(src-stride))+1); | |
509 | ||
510 | for(i=0; i<16; i++){ | |
511 | AV_WN4PA(((pixel4*)(src+i*stride))+0, a); | |
512 | AV_WN4PA(((pixel4*)(src+i*stride))+1, b); | |
513 | } | |
514 | } | |
515 | ||
516 | static void FUNCC(pred8x8_horizontal)(uint8_t *_src, ptrdiff_t stride) | |
517 | { | |
518 | int i; | |
519 | pixel *src = (pixel*)_src; | |
520 | stride >>= sizeof(pixel)-1; | |
521 | ||
522 | for(i=0; i<8; i++){ | |
523 | const pixel4 a = PIXEL_SPLAT_X4(src[-1+i*stride]); | |
524 | AV_WN4PA(((pixel4*)(src+i*stride))+0, a); | |
525 | AV_WN4PA(((pixel4*)(src+i*stride))+1, a); | |
526 | } | |
527 | } | |
528 | ||
529 | static void FUNCC(pred8x16_horizontal)(uint8_t *_src, ptrdiff_t stride) | |
530 | { | |
531 | int i; | |
532 | pixel *src = (pixel*)_src; | |
533 | stride >>= sizeof(pixel)-1; | |
534 | for(i=0; i<16; i++){ | |
535 | const pixel4 a = PIXEL_SPLAT_X4(src[-1+i*stride]); | |
536 | AV_WN4PA(((pixel4*)(src+i*stride))+0, a); | |
537 | AV_WN4PA(((pixel4*)(src+i*stride))+1, a); | |
538 | } | |
539 | } | |
540 | ||
541 | #define PRED8x8_X(n, v)\ | |
542 | static void FUNCC(pred8x8_##n##_dc)(uint8_t *_src, ptrdiff_t stride)\ | |
543 | {\ | |
544 | int i;\ | |
545 | const pixel4 a = PIXEL_SPLAT_X4(v);\ | |
546 | pixel *src = (pixel*)_src;\ | |
547 | stride >>= sizeof(pixel)-1;\ | |
548 | for(i=0; i<8; i++){\ | |
549 | AV_WN4PA(((pixel4*)(src+i*stride))+0, a);\ | |
550 | AV_WN4PA(((pixel4*)(src+i*stride))+1, a);\ | |
551 | }\ | |
552 | } | |
553 | ||
554 | PRED8x8_X(127, (1<<(BIT_DEPTH-1))-1) | |
555 | PRED8x8_X(128, (1<<(BIT_DEPTH-1))+0) | |
556 | PRED8x8_X(129, (1<<(BIT_DEPTH-1))+1) | |
557 | ||
558 | static void FUNCC(pred8x16_128_dc)(uint8_t *_src, ptrdiff_t stride) | |
559 | { | |
560 | FUNCC(pred8x8_128_dc)(_src, stride); | |
561 | FUNCC(pred8x8_128_dc)(_src+8*stride, stride); | |
562 | } | |
563 | ||
564 | static void FUNCC(pred8x8_left_dc)(uint8_t *_src, ptrdiff_t stride) | |
565 | { | |
566 | int i; | |
567 | int dc0, dc2; | |
568 | pixel4 dc0splat, dc2splat; | |
569 | pixel *src = (pixel*)_src; | |
570 | stride >>= sizeof(pixel)-1; | |
571 | ||
572 | dc0=dc2=0; | |
573 | for(i=0;i<4; i++){ | |
574 | dc0+= src[-1+i*stride]; | |
575 | dc2+= src[-1+(i+4)*stride]; | |
576 | } | |
577 | dc0splat = PIXEL_SPLAT_X4((dc0 + 2)>>2); | |
578 | dc2splat = PIXEL_SPLAT_X4((dc2 + 2)>>2); | |
579 | ||
580 | for(i=0; i<4; i++){ | |
581 | AV_WN4PA(((pixel4*)(src+i*stride))+0, dc0splat); | |
582 | AV_WN4PA(((pixel4*)(src+i*stride))+1, dc0splat); | |
583 | } | |
584 | for(i=4; i<8; i++){ | |
585 | AV_WN4PA(((pixel4*)(src+i*stride))+0, dc2splat); | |
586 | AV_WN4PA(((pixel4*)(src+i*stride))+1, dc2splat); | |
587 | } | |
588 | } | |
589 | ||
590 | static void FUNCC(pred8x16_left_dc)(uint8_t *_src, ptrdiff_t stride) | |
591 | { | |
592 | FUNCC(pred8x8_left_dc)(_src, stride); | |
593 | FUNCC(pred8x8_left_dc)(_src+8*stride, stride); | |
594 | } | |
595 | ||
596 | static void FUNCC(pred8x8_top_dc)(uint8_t *_src, ptrdiff_t stride) | |
597 | { | |
598 | int i; | |
599 | int dc0, dc1; | |
600 | pixel4 dc0splat, dc1splat; | |
601 | pixel *src = (pixel*)_src; | |
602 | stride >>= sizeof(pixel)-1; | |
603 | ||
604 | dc0=dc1=0; | |
605 | for(i=0;i<4; i++){ | |
606 | dc0+= src[i-stride]; | |
607 | dc1+= src[4+i-stride]; | |
608 | } | |
609 | dc0splat = PIXEL_SPLAT_X4((dc0 + 2)>>2); | |
610 | dc1splat = PIXEL_SPLAT_X4((dc1 + 2)>>2); | |
611 | ||
612 | for(i=0; i<4; i++){ | |
613 | AV_WN4PA(((pixel4*)(src+i*stride))+0, dc0splat); | |
614 | AV_WN4PA(((pixel4*)(src+i*stride))+1, dc1splat); | |
615 | } | |
616 | for(i=4; i<8; i++){ | |
617 | AV_WN4PA(((pixel4*)(src+i*stride))+0, dc0splat); | |
618 | AV_WN4PA(((pixel4*)(src+i*stride))+1, dc1splat); | |
619 | } | |
620 | } | |
621 | ||
622 | static void FUNCC(pred8x16_top_dc)(uint8_t *_src, ptrdiff_t stride) | |
623 | { | |
624 | int i; | |
625 | int dc0, dc1; | |
626 | pixel4 dc0splat, dc1splat; | |
627 | pixel *src = (pixel*)_src; | |
628 | stride >>= sizeof(pixel)-1; | |
629 | ||
630 | dc0=dc1=0; | |
631 | for(i=0;i<4; i++){ | |
632 | dc0+= src[i-stride]; | |
633 | dc1+= src[4+i-stride]; | |
634 | } | |
635 | dc0splat = PIXEL_SPLAT_X4((dc0 + 2)>>2); | |
636 | dc1splat = PIXEL_SPLAT_X4((dc1 + 2)>>2); | |
637 | ||
638 | for(i=0; i<16; i++){ | |
639 | AV_WN4PA(((pixel4*)(src+i*stride))+0, dc0splat); | |
640 | AV_WN4PA(((pixel4*)(src+i*stride))+1, dc1splat); | |
641 | } | |
642 | } | |
643 | ||
644 | static void FUNCC(pred8x8_dc)(uint8_t *_src, ptrdiff_t stride) | |
645 | { | |
646 | int i; | |
647 | int dc0, dc1, dc2; | |
648 | pixel4 dc0splat, dc1splat, dc2splat, dc3splat; | |
649 | pixel *src = (pixel*)_src; | |
650 | stride >>= sizeof(pixel)-1; | |
651 | ||
652 | dc0=dc1=dc2=0; | |
653 | for(i=0;i<4; i++){ | |
654 | dc0+= src[-1+i*stride] + src[i-stride]; | |
655 | dc1+= src[4+i-stride]; | |
656 | dc2+= src[-1+(i+4)*stride]; | |
657 | } | |
658 | dc0splat = PIXEL_SPLAT_X4((dc0 + 4)>>3); | |
659 | dc1splat = PIXEL_SPLAT_X4((dc1 + 2)>>2); | |
660 | dc2splat = PIXEL_SPLAT_X4((dc2 + 2)>>2); | |
661 | dc3splat = PIXEL_SPLAT_X4((dc1 + dc2 + 4)>>3); | |
662 | ||
663 | for(i=0; i<4; i++){ | |
664 | AV_WN4PA(((pixel4*)(src+i*stride))+0, dc0splat); | |
665 | AV_WN4PA(((pixel4*)(src+i*stride))+1, dc1splat); | |
666 | } | |
667 | for(i=4; i<8; i++){ | |
668 | AV_WN4PA(((pixel4*)(src+i*stride))+0, dc2splat); | |
669 | AV_WN4PA(((pixel4*)(src+i*stride))+1, dc3splat); | |
670 | } | |
671 | } | |
672 | ||
673 | static void FUNCC(pred8x16_dc)(uint8_t *_src, ptrdiff_t stride) | |
674 | { | |
675 | int i; | |
676 | int dc0, dc1, dc2, dc3, dc4; | |
677 | pixel4 dc0splat, dc1splat, dc2splat, dc3splat, dc4splat, dc5splat, dc6splat, dc7splat; | |
678 | pixel *src = (pixel*)_src; | |
679 | stride >>= sizeof(pixel)-1; | |
680 | ||
681 | dc0=dc1=dc2=dc3=dc4=0; | |
682 | for(i=0;i<4; i++){ | |
683 | dc0+= src[-1+i*stride] + src[i-stride]; | |
684 | dc1+= src[4+i-stride]; | |
685 | dc2+= src[-1+(i+4)*stride]; | |
686 | dc3+= src[-1+(i+8)*stride]; | |
687 | dc4+= src[-1+(i+12)*stride]; | |
688 | } | |
689 | dc0splat = PIXEL_SPLAT_X4((dc0 + 4)>>3); | |
690 | dc1splat = PIXEL_SPLAT_X4((dc1 + 2)>>2); | |
691 | dc2splat = PIXEL_SPLAT_X4((dc2 + 2)>>2); | |
692 | dc3splat = PIXEL_SPLAT_X4((dc1 + dc2 + 4)>>3); | |
693 | dc4splat = PIXEL_SPLAT_X4((dc3 + 2)>>2); | |
694 | dc5splat = PIXEL_SPLAT_X4((dc1 + dc3 + 4)>>3); | |
695 | dc6splat = PIXEL_SPLAT_X4((dc4 + 2)>>2); | |
696 | dc7splat = PIXEL_SPLAT_X4((dc1 + dc4 + 4)>>3); | |
697 | ||
698 | for(i=0; i<4; i++){ | |
699 | AV_WN4PA(((pixel4*)(src+i*stride))+0, dc0splat); | |
700 | AV_WN4PA(((pixel4*)(src+i*stride))+1, dc1splat); | |
701 | } | |
702 | for(i=4; i<8; i++){ | |
703 | AV_WN4PA(((pixel4*)(src+i*stride))+0, dc2splat); | |
704 | AV_WN4PA(((pixel4*)(src+i*stride))+1, dc3splat); | |
705 | } | |
706 | for(i=8; i<12; i++){ | |
707 | AV_WN4PA(((pixel4*)(src+i*stride))+0, dc4splat); | |
708 | AV_WN4PA(((pixel4*)(src+i*stride))+1, dc5splat); | |
709 | } | |
710 | for(i=12; i<16; i++){ | |
711 | AV_WN4PA(((pixel4*)(src+i*stride))+0, dc6splat); | |
712 | AV_WN4PA(((pixel4*)(src+i*stride))+1, dc7splat); | |
713 | } | |
714 | } | |
715 | ||
716 | //the following 4 function should not be optimized! | |
717 | static void FUNC(pred8x8_mad_cow_dc_l0t)(uint8_t *src, ptrdiff_t stride) | |
718 | { | |
719 | FUNCC(pred8x8_top_dc)(src, stride); | |
720 | FUNCC(pred4x4_dc)(src, NULL, stride); | |
721 | } | |
722 | ||
723 | static void FUNC(pred8x16_mad_cow_dc_l0t)(uint8_t *src, ptrdiff_t stride) | |
724 | { | |
725 | FUNCC(pred8x16_top_dc)(src, stride); | |
726 | FUNCC(pred4x4_dc)(src, NULL, stride); | |
727 | } | |
728 | ||
729 | static void FUNC(pred8x8_mad_cow_dc_0lt)(uint8_t *src, ptrdiff_t stride) | |
730 | { | |
731 | FUNCC(pred8x8_dc)(src, stride); | |
732 | FUNCC(pred4x4_top_dc)(src, NULL, stride); | |
733 | } | |
734 | ||
735 | static void FUNC(pred8x16_mad_cow_dc_0lt)(uint8_t *src, ptrdiff_t stride) | |
736 | { | |
737 | FUNCC(pred8x16_dc)(src, stride); | |
738 | FUNCC(pred4x4_top_dc)(src, NULL, stride); | |
739 | } | |
740 | ||
741 | static void FUNC(pred8x8_mad_cow_dc_l00)(uint8_t *src, ptrdiff_t stride) | |
742 | { | |
743 | FUNCC(pred8x8_left_dc)(src, stride); | |
744 | FUNCC(pred4x4_128_dc)(src + 4*stride , NULL, stride); | |
745 | FUNCC(pred4x4_128_dc)(src + 4*stride + 4*sizeof(pixel), NULL, stride); | |
746 | } | |
747 | ||
748 | static void FUNC(pred8x16_mad_cow_dc_l00)(uint8_t *src, ptrdiff_t stride) | |
749 | { | |
750 | FUNCC(pred8x16_left_dc)(src, stride); | |
751 | FUNCC(pred4x4_128_dc)(src + 4*stride , NULL, stride); | |
752 | FUNCC(pred4x4_128_dc)(src + 4*stride + 4*sizeof(pixel), NULL, stride); | |
753 | } | |
754 | ||
755 | static void FUNC(pred8x8_mad_cow_dc_0l0)(uint8_t *src, ptrdiff_t stride) | |
756 | { | |
757 | FUNCC(pred8x8_left_dc)(src, stride); | |
758 | FUNCC(pred4x4_128_dc)(src , NULL, stride); | |
759 | FUNCC(pred4x4_128_dc)(src + 4*sizeof(pixel), NULL, stride); | |
760 | } | |
761 | ||
762 | static void FUNC(pred8x16_mad_cow_dc_0l0)(uint8_t *src, ptrdiff_t stride) | |
763 | { | |
764 | FUNCC(pred8x16_left_dc)(src, stride); | |
765 | FUNCC(pred4x4_128_dc)(src , NULL, stride); | |
766 | FUNCC(pred4x4_128_dc)(src + 4*sizeof(pixel), NULL, stride); | |
767 | } | |
768 | ||
769 | static void FUNCC(pred8x8_plane)(uint8_t *_src, ptrdiff_t _stride) | |
770 | { | |
771 | int j, k; | |
772 | int a; | |
773 | INIT_CLIP | |
774 | pixel *src = (pixel*)_src; | |
775 | int stride = _stride>>(sizeof(pixel)-1); | |
776 | const pixel * const src0 = src +3-stride; | |
777 | const pixel * src1 = src +4*stride-1; | |
778 | const pixel * src2 = src1-2*stride; // == src+2*stride-1; | |
779 | int H = src0[1] - src0[-1]; | |
780 | int V = src1[0] - src2[ 0]; | |
781 | for(k=2; k<=4; ++k) { | |
782 | src1 += stride; src2 -= stride; | |
783 | H += k*(src0[k] - src0[-k]); | |
784 | V += k*(src1[0] - src2[ 0]); | |
785 | } | |
786 | H = ( 17*H+16 ) >> 5; | |
787 | V = ( 17*V+16 ) >> 5; | |
788 | ||
789 | a = 16*(src1[0] + src2[8]+1) - 3*(V+H); | |
790 | for(j=8; j>0; --j) { | |
791 | int b = a; | |
792 | a += V; | |
793 | src[0] = CLIP((b ) >> 5); | |
794 | src[1] = CLIP((b+ H) >> 5); | |
795 | src[2] = CLIP((b+2*H) >> 5); | |
796 | src[3] = CLIP((b+3*H) >> 5); | |
797 | src[4] = CLIP((b+4*H) >> 5); | |
798 | src[5] = CLIP((b+5*H) >> 5); | |
799 | src[6] = CLIP((b+6*H) >> 5); | |
800 | src[7] = CLIP((b+7*H) >> 5); | |
801 | src += stride; | |
802 | } | |
803 | } | |
804 | ||
805 | static void FUNCC(pred8x16_plane)(uint8_t *_src, ptrdiff_t _stride) | |
806 | { | |
807 | int j, k; | |
808 | int a; | |
809 | INIT_CLIP | |
810 | pixel *src = (pixel*)_src; | |
811 | int stride = _stride>>(sizeof(pixel)-1); | |
812 | const pixel * const src0 = src +3-stride; | |
813 | const pixel * src1 = src +8*stride-1; | |
814 | const pixel * src2 = src1-2*stride; // == src+6*stride-1; | |
815 | int H = src0[1] - src0[-1]; | |
816 | int V = src1[0] - src2[ 0]; | |
817 | ||
818 | for (k = 2; k <= 4; ++k) { | |
819 | src1 += stride; src2 -= stride; | |
820 | H += k*(src0[k] - src0[-k]); | |
821 | V += k*(src1[0] - src2[ 0]); | |
822 | } | |
823 | for (; k <= 8; ++k) { | |
824 | src1 += stride; src2 -= stride; | |
825 | V += k*(src1[0] - src2[0]); | |
826 | } | |
827 | ||
828 | H = (17*H+16) >> 5; | |
829 | V = (5*V+32) >> 6; | |
830 | ||
831 | a = 16*(src1[0] + src2[8] + 1) - 7*V - 3*H; | |
832 | for(j=16; j>0; --j) { | |
833 | int b = a; | |
834 | a += V; | |
835 | src[0] = CLIP((b ) >> 5); | |
836 | src[1] = CLIP((b+ H) >> 5); | |
837 | src[2] = CLIP((b+2*H) >> 5); | |
838 | src[3] = CLIP((b+3*H) >> 5); | |
839 | src[4] = CLIP((b+4*H) >> 5); | |
840 | src[5] = CLIP((b+5*H) >> 5); | |
841 | src[6] = CLIP((b+6*H) >> 5); | |
842 | src[7] = CLIP((b+7*H) >> 5); | |
843 | src += stride; | |
844 | } | |
845 | } | |
846 | ||
847 | #define SRC(x,y) src[(x)+(y)*stride] | |
848 | #define PL(y) \ | |
849 | const int l##y = (SRC(-1,y-1) + 2*SRC(-1,y) + SRC(-1,y+1) + 2) >> 2; | |
850 | #define PREDICT_8x8_LOAD_LEFT \ | |
851 | const int l0 = ((has_topleft ? SRC(-1,-1) : SRC(-1,0)) \ | |
852 | + 2*SRC(-1,0) + SRC(-1,1) + 2) >> 2; \ | |
853 | PL(1) PL(2) PL(3) PL(4) PL(5) PL(6) \ | |
854 | const int l7 av_unused = (SRC(-1,6) + 3*SRC(-1,7) + 2) >> 2 | |
855 | ||
856 | #define PT(x) \ | |
857 | const int t##x = (SRC(x-1,-1) + 2*SRC(x,-1) + SRC(x+1,-1) + 2) >> 2; | |
858 | #define PREDICT_8x8_LOAD_TOP \ | |
859 | const int t0 = ((has_topleft ? SRC(-1,-1) : SRC(0,-1)) \ | |
860 | + 2*SRC(0,-1) + SRC(1,-1) + 2) >> 2; \ | |
861 | PT(1) PT(2) PT(3) PT(4) PT(5) PT(6) \ | |
862 | const int t7 av_unused = ((has_topright ? SRC(8,-1) : SRC(7,-1)) \ | |
863 | + 2*SRC(7,-1) + SRC(6,-1) + 2) >> 2 | |
864 | ||
865 | #define PTR(x) \ | |
866 | t##x = (SRC(x-1,-1) + 2*SRC(x,-1) + SRC(x+1,-1) + 2) >> 2; | |
867 | #define PREDICT_8x8_LOAD_TOPRIGHT \ | |
868 | int t8, t9, t10, t11, t12, t13, t14, t15; \ | |
869 | if(has_topright) { \ | |
870 | PTR(8) PTR(9) PTR(10) PTR(11) PTR(12) PTR(13) PTR(14) \ | |
871 | t15 = (SRC(14,-1) + 3*SRC(15,-1) + 2) >> 2; \ | |
872 | } else t8=t9=t10=t11=t12=t13=t14=t15= SRC(7,-1); | |
873 | ||
874 | #define PREDICT_8x8_LOAD_TOPLEFT \ | |
875 | const int lt = (SRC(-1,0) + 2*SRC(-1,-1) + SRC(0,-1) + 2) >> 2 | |
876 | ||
877 | #define PREDICT_8x8_DC(v) \ | |
878 | int y; \ | |
879 | for( y = 0; y < 8; y++ ) { \ | |
880 | AV_WN4PA(((pixel4*)src)+0, v); \ | |
881 | AV_WN4PA(((pixel4*)src)+1, v); \ | |
882 | src += stride; \ | |
883 | } | |
884 | ||
885 | static void FUNCC(pred8x8l_128_dc)(uint8_t *_src, int has_topleft, | |
886 | int has_topright, ptrdiff_t _stride) | |
887 | { | |
888 | pixel *src = (pixel*)_src; | |
889 | int stride = _stride>>(sizeof(pixel)-1); | |
890 | ||
891 | PREDICT_8x8_DC(PIXEL_SPLAT_X4(1<<(BIT_DEPTH-1))); | |
892 | } | |
893 | static void FUNCC(pred8x8l_left_dc)(uint8_t *_src, int has_topleft, | |
894 | int has_topright, ptrdiff_t _stride) | |
895 | { | |
896 | pixel *src = (pixel*)_src; | |
897 | int stride = _stride>>(sizeof(pixel)-1); | |
898 | ||
899 | PREDICT_8x8_LOAD_LEFT; | |
900 | const pixel4 dc = PIXEL_SPLAT_X4((l0+l1+l2+l3+l4+l5+l6+l7+4) >> 3); | |
901 | PREDICT_8x8_DC(dc); | |
902 | } | |
903 | static void FUNCC(pred8x8l_top_dc)(uint8_t *_src, int has_topleft, | |
904 | int has_topright, ptrdiff_t _stride) | |
905 | { | |
906 | pixel *src = (pixel*)_src; | |
907 | int stride = _stride>>(sizeof(pixel)-1); | |
908 | ||
909 | PREDICT_8x8_LOAD_TOP; | |
910 | const pixel4 dc = PIXEL_SPLAT_X4((t0+t1+t2+t3+t4+t5+t6+t7+4) >> 3); | |
911 | PREDICT_8x8_DC(dc); | |
912 | } | |
913 | static void FUNCC(pred8x8l_dc)(uint8_t *_src, int has_topleft, | |
914 | int has_topright, ptrdiff_t _stride) | |
915 | { | |
916 | pixel *src = (pixel*)_src; | |
917 | int stride = _stride>>(sizeof(pixel)-1); | |
918 | ||
919 | PREDICT_8x8_LOAD_LEFT; | |
920 | PREDICT_8x8_LOAD_TOP; | |
921 | const pixel4 dc = PIXEL_SPLAT_X4((l0+l1+l2+l3+l4+l5+l6+l7 | |
922 | +t0+t1+t2+t3+t4+t5+t6+t7+8) >> 4); | |
923 | PREDICT_8x8_DC(dc); | |
924 | } | |
925 | static void FUNCC(pred8x8l_horizontal)(uint8_t *_src, int has_topleft, | |
926 | int has_topright, ptrdiff_t _stride) | |
927 | { | |
928 | pixel *src = (pixel*)_src; | |
929 | int stride = _stride>>(sizeof(pixel)-1); | |
930 | pixel4 a; | |
931 | ||
932 | PREDICT_8x8_LOAD_LEFT; | |
933 | #define ROW(y) a = PIXEL_SPLAT_X4(l##y); \ | |
934 | AV_WN4PA(src+y*stride, a); \ | |
935 | AV_WN4PA(src+y*stride+4, a); | |
936 | ROW(0); ROW(1); ROW(2); ROW(3); ROW(4); ROW(5); ROW(6); ROW(7); | |
937 | #undef ROW | |
938 | } | |
939 | static void FUNCC(pred8x8l_vertical)(uint8_t *_src, int has_topleft, | |
940 | int has_topright, ptrdiff_t _stride) | |
941 | { | |
942 | int y; | |
943 | pixel *src = (pixel*)_src; | |
944 | int stride = _stride>>(sizeof(pixel)-1); | |
945 | pixel4 a, b; | |
946 | ||
947 | PREDICT_8x8_LOAD_TOP; | |
948 | src[0] = t0; | |
949 | src[1] = t1; | |
950 | src[2] = t2; | |
951 | src[3] = t3; | |
952 | src[4] = t4; | |
953 | src[5] = t5; | |
954 | src[6] = t6; | |
955 | src[7] = t7; | |
956 | a = AV_RN4PA(((pixel4*)src)+0); | |
957 | b = AV_RN4PA(((pixel4*)src)+1); | |
958 | for( y = 1; y < 8; y++ ) { | |
959 | AV_WN4PA(((pixel4*)(src+y*stride))+0, a); | |
960 | AV_WN4PA(((pixel4*)(src+y*stride))+1, b); | |
961 | } | |
962 | } | |
963 | static void FUNCC(pred8x8l_down_left)(uint8_t *_src, int has_topleft, | |
964 | int has_topright, ptrdiff_t _stride) | |
965 | { | |
966 | pixel *src = (pixel*)_src; | |
967 | int stride = _stride>>(sizeof(pixel)-1); | |
968 | PREDICT_8x8_LOAD_TOP; | |
969 | PREDICT_8x8_LOAD_TOPRIGHT; | |
970 | SRC(0,0)= (t0 + 2*t1 + t2 + 2) >> 2; | |
971 | SRC(0,1)=SRC(1,0)= (t1 + 2*t2 + t3 + 2) >> 2; | |
972 | SRC(0,2)=SRC(1,1)=SRC(2,0)= (t2 + 2*t3 + t4 + 2) >> 2; | |
973 | SRC(0,3)=SRC(1,2)=SRC(2,1)=SRC(3,0)= (t3 + 2*t4 + t5 + 2) >> 2; | |
974 | SRC(0,4)=SRC(1,3)=SRC(2,2)=SRC(3,1)=SRC(4,0)= (t4 + 2*t5 + t6 + 2) >> 2; | |
975 | SRC(0,5)=SRC(1,4)=SRC(2,3)=SRC(3,2)=SRC(4,1)=SRC(5,0)= (t5 + 2*t6 + t7 + 2) >> 2; | |
976 | SRC(0,6)=SRC(1,5)=SRC(2,4)=SRC(3,3)=SRC(4,2)=SRC(5,1)=SRC(6,0)= (t6 + 2*t7 + t8 + 2) >> 2; | |
977 | SRC(0,7)=SRC(1,6)=SRC(2,5)=SRC(3,4)=SRC(4,3)=SRC(5,2)=SRC(6,1)=SRC(7,0)= (t7 + 2*t8 + t9 + 2) >> 2; | |
978 | SRC(1,7)=SRC(2,6)=SRC(3,5)=SRC(4,4)=SRC(5,3)=SRC(6,2)=SRC(7,1)= (t8 + 2*t9 + t10 + 2) >> 2; | |
979 | SRC(2,7)=SRC(3,6)=SRC(4,5)=SRC(5,4)=SRC(6,3)=SRC(7,2)= (t9 + 2*t10 + t11 + 2) >> 2; | |
980 | SRC(3,7)=SRC(4,6)=SRC(5,5)=SRC(6,4)=SRC(7,3)= (t10 + 2*t11 + t12 + 2) >> 2; | |
981 | SRC(4,7)=SRC(5,6)=SRC(6,5)=SRC(7,4)= (t11 + 2*t12 + t13 + 2) >> 2; | |
982 | SRC(5,7)=SRC(6,6)=SRC(7,5)= (t12 + 2*t13 + t14 + 2) >> 2; | |
983 | SRC(6,7)=SRC(7,6)= (t13 + 2*t14 + t15 + 2) >> 2; | |
984 | SRC(7,7)= (t14 + 3*t15 + 2) >> 2; | |
985 | } | |
986 | static void FUNCC(pred8x8l_down_right)(uint8_t *_src, int has_topleft, | |
987 | int has_topright, ptrdiff_t _stride) | |
988 | { | |
989 | pixel *src = (pixel*)_src; | |
990 | int stride = _stride>>(sizeof(pixel)-1); | |
991 | PREDICT_8x8_LOAD_TOP; | |
992 | PREDICT_8x8_LOAD_LEFT; | |
993 | PREDICT_8x8_LOAD_TOPLEFT; | |
994 | SRC(0,7)= (l7 + 2*l6 + l5 + 2) >> 2; | |
995 | SRC(0,6)=SRC(1,7)= (l6 + 2*l5 + l4 + 2) >> 2; | |
996 | SRC(0,5)=SRC(1,6)=SRC(2,7)= (l5 + 2*l4 + l3 + 2) >> 2; | |
997 | SRC(0,4)=SRC(1,5)=SRC(2,6)=SRC(3,7)= (l4 + 2*l3 + l2 + 2) >> 2; | |
998 | SRC(0,3)=SRC(1,4)=SRC(2,5)=SRC(3,6)=SRC(4,7)= (l3 + 2*l2 + l1 + 2) >> 2; | |
999 | SRC(0,2)=SRC(1,3)=SRC(2,4)=SRC(3,5)=SRC(4,6)=SRC(5,7)= (l2 + 2*l1 + l0 + 2) >> 2; | |
1000 | SRC(0,1)=SRC(1,2)=SRC(2,3)=SRC(3,4)=SRC(4,5)=SRC(5,6)=SRC(6,7)= (l1 + 2*l0 + lt + 2) >> 2; | |
1001 | SRC(0,0)=SRC(1,1)=SRC(2,2)=SRC(3,3)=SRC(4,4)=SRC(5,5)=SRC(6,6)=SRC(7,7)= (l0 + 2*lt + t0 + 2) >> 2; | |
1002 | SRC(1,0)=SRC(2,1)=SRC(3,2)=SRC(4,3)=SRC(5,4)=SRC(6,5)=SRC(7,6)= (lt + 2*t0 + t1 + 2) >> 2; | |
1003 | SRC(2,0)=SRC(3,1)=SRC(4,2)=SRC(5,3)=SRC(6,4)=SRC(7,5)= (t0 + 2*t1 + t2 + 2) >> 2; | |
1004 | SRC(3,0)=SRC(4,1)=SRC(5,2)=SRC(6,3)=SRC(7,4)= (t1 + 2*t2 + t3 + 2) >> 2; | |
1005 | SRC(4,0)=SRC(5,1)=SRC(6,2)=SRC(7,3)= (t2 + 2*t3 + t4 + 2) >> 2; | |
1006 | SRC(5,0)=SRC(6,1)=SRC(7,2)= (t3 + 2*t4 + t5 + 2) >> 2; | |
1007 | SRC(6,0)=SRC(7,1)= (t4 + 2*t5 + t6 + 2) >> 2; | |
1008 | SRC(7,0)= (t5 + 2*t6 + t7 + 2) >> 2; | |
1009 | } | |
1010 | static void FUNCC(pred8x8l_vertical_right)(uint8_t *_src, int has_topleft, | |
1011 | int has_topright, ptrdiff_t _stride) | |
1012 | { | |
1013 | pixel *src = (pixel*)_src; | |
1014 | int stride = _stride>>(sizeof(pixel)-1); | |
1015 | PREDICT_8x8_LOAD_TOP; | |
1016 | PREDICT_8x8_LOAD_LEFT; | |
1017 | PREDICT_8x8_LOAD_TOPLEFT; | |
1018 | SRC(0,6)= (l5 + 2*l4 + l3 + 2) >> 2; | |
1019 | SRC(0,7)= (l6 + 2*l5 + l4 + 2) >> 2; | |
1020 | SRC(0,4)=SRC(1,6)= (l3 + 2*l2 + l1 + 2) >> 2; | |
1021 | SRC(0,5)=SRC(1,7)= (l4 + 2*l3 + l2 + 2) >> 2; | |
1022 | SRC(0,2)=SRC(1,4)=SRC(2,6)= (l1 + 2*l0 + lt + 2) >> 2; | |
1023 | SRC(0,3)=SRC(1,5)=SRC(2,7)= (l2 + 2*l1 + l0 + 2) >> 2; | |
1024 | SRC(0,1)=SRC(1,3)=SRC(2,5)=SRC(3,7)= (l0 + 2*lt + t0 + 2) >> 2; | |
1025 | SRC(0,0)=SRC(1,2)=SRC(2,4)=SRC(3,6)= (lt + t0 + 1) >> 1; | |
1026 | SRC(1,1)=SRC(2,3)=SRC(3,5)=SRC(4,7)= (lt + 2*t0 + t1 + 2) >> 2; | |
1027 | SRC(1,0)=SRC(2,2)=SRC(3,4)=SRC(4,6)= (t0 + t1 + 1) >> 1; | |
1028 | SRC(2,1)=SRC(3,3)=SRC(4,5)=SRC(5,7)= (t0 + 2*t1 + t2 + 2) >> 2; | |
1029 | SRC(2,0)=SRC(3,2)=SRC(4,4)=SRC(5,6)= (t1 + t2 + 1) >> 1; | |
1030 | SRC(3,1)=SRC(4,3)=SRC(5,5)=SRC(6,7)= (t1 + 2*t2 + t3 + 2) >> 2; | |
1031 | SRC(3,0)=SRC(4,2)=SRC(5,4)=SRC(6,6)= (t2 + t3 + 1) >> 1; | |
1032 | SRC(4,1)=SRC(5,3)=SRC(6,5)=SRC(7,7)= (t2 + 2*t3 + t4 + 2) >> 2; | |
1033 | SRC(4,0)=SRC(5,2)=SRC(6,4)=SRC(7,6)= (t3 + t4 + 1) >> 1; | |
1034 | SRC(5,1)=SRC(6,3)=SRC(7,5)= (t3 + 2*t4 + t5 + 2) >> 2; | |
1035 | SRC(5,0)=SRC(6,2)=SRC(7,4)= (t4 + t5 + 1) >> 1; | |
1036 | SRC(6,1)=SRC(7,3)= (t4 + 2*t5 + t6 + 2) >> 2; | |
1037 | SRC(6,0)=SRC(7,2)= (t5 + t6 + 1) >> 1; | |
1038 | SRC(7,1)= (t5 + 2*t6 + t7 + 2) >> 2; | |
1039 | SRC(7,0)= (t6 + t7 + 1) >> 1; | |
1040 | } | |
1041 | static void FUNCC(pred8x8l_horizontal_down)(uint8_t *_src, int has_topleft, | |
1042 | int has_topright, ptrdiff_t _stride) | |
1043 | { | |
1044 | pixel *src = (pixel*)_src; | |
1045 | int stride = _stride>>(sizeof(pixel)-1); | |
1046 | PREDICT_8x8_LOAD_TOP; | |
1047 | PREDICT_8x8_LOAD_LEFT; | |
1048 | PREDICT_8x8_LOAD_TOPLEFT; | |
1049 | SRC(0,7)= (l6 + l7 + 1) >> 1; | |
1050 | SRC(1,7)= (l5 + 2*l6 + l7 + 2) >> 2; | |
1051 | SRC(0,6)=SRC(2,7)= (l5 + l6 + 1) >> 1; | |
1052 | SRC(1,6)=SRC(3,7)= (l4 + 2*l5 + l6 + 2) >> 2; | |
1053 | SRC(0,5)=SRC(2,6)=SRC(4,7)= (l4 + l5 + 1) >> 1; | |
1054 | SRC(1,5)=SRC(3,6)=SRC(5,7)= (l3 + 2*l4 + l5 + 2) >> 2; | |
1055 | SRC(0,4)=SRC(2,5)=SRC(4,6)=SRC(6,7)= (l3 + l4 + 1) >> 1; | |
1056 | SRC(1,4)=SRC(3,5)=SRC(5,6)=SRC(7,7)= (l2 + 2*l3 + l4 + 2) >> 2; | |
1057 | SRC(0,3)=SRC(2,4)=SRC(4,5)=SRC(6,6)= (l2 + l3 + 1) >> 1; | |
1058 | SRC(1,3)=SRC(3,4)=SRC(5,5)=SRC(7,6)= (l1 + 2*l2 + l3 + 2) >> 2; | |
1059 | SRC(0,2)=SRC(2,3)=SRC(4,4)=SRC(6,5)= (l1 + l2 + 1) >> 1; | |
1060 | SRC(1,2)=SRC(3,3)=SRC(5,4)=SRC(7,5)= (l0 + 2*l1 + l2 + 2) >> 2; | |
1061 | SRC(0,1)=SRC(2,2)=SRC(4,3)=SRC(6,4)= (l0 + l1 + 1) >> 1; | |
1062 | SRC(1,1)=SRC(3,2)=SRC(5,3)=SRC(7,4)= (lt + 2*l0 + l1 + 2) >> 2; | |
1063 | SRC(0,0)=SRC(2,1)=SRC(4,2)=SRC(6,3)= (lt + l0 + 1) >> 1; | |
1064 | SRC(1,0)=SRC(3,1)=SRC(5,2)=SRC(7,3)= (l0 + 2*lt + t0 + 2) >> 2; | |
1065 | SRC(2,0)=SRC(4,1)=SRC(6,2)= (t1 + 2*t0 + lt + 2) >> 2; | |
1066 | SRC(3,0)=SRC(5,1)=SRC(7,2)= (t2 + 2*t1 + t0 + 2) >> 2; | |
1067 | SRC(4,0)=SRC(6,1)= (t3 + 2*t2 + t1 + 2) >> 2; | |
1068 | SRC(5,0)=SRC(7,1)= (t4 + 2*t3 + t2 + 2) >> 2; | |
1069 | SRC(6,0)= (t5 + 2*t4 + t3 + 2) >> 2; | |
1070 | SRC(7,0)= (t6 + 2*t5 + t4 + 2) >> 2; | |
1071 | } | |
1072 | static void FUNCC(pred8x8l_vertical_left)(uint8_t *_src, int has_topleft, | |
1073 | int has_topright, ptrdiff_t _stride) | |
1074 | { | |
1075 | pixel *src = (pixel*)_src; | |
1076 | int stride = _stride>>(sizeof(pixel)-1); | |
1077 | PREDICT_8x8_LOAD_TOP; | |
1078 | PREDICT_8x8_LOAD_TOPRIGHT; | |
1079 | SRC(0,0)= (t0 + t1 + 1) >> 1; | |
1080 | SRC(0,1)= (t0 + 2*t1 + t2 + 2) >> 2; | |
1081 | SRC(0,2)=SRC(1,0)= (t1 + t2 + 1) >> 1; | |
1082 | SRC(0,3)=SRC(1,1)= (t1 + 2*t2 + t3 + 2) >> 2; | |
1083 | SRC(0,4)=SRC(1,2)=SRC(2,0)= (t2 + t3 + 1) >> 1; | |
1084 | SRC(0,5)=SRC(1,3)=SRC(2,1)= (t2 + 2*t3 + t4 + 2) >> 2; | |
1085 | SRC(0,6)=SRC(1,4)=SRC(2,2)=SRC(3,0)= (t3 + t4 + 1) >> 1; | |
1086 | SRC(0,7)=SRC(1,5)=SRC(2,3)=SRC(3,1)= (t3 + 2*t4 + t5 + 2) >> 2; | |
1087 | SRC(1,6)=SRC(2,4)=SRC(3,2)=SRC(4,0)= (t4 + t5 + 1) >> 1; | |
1088 | SRC(1,7)=SRC(2,5)=SRC(3,3)=SRC(4,1)= (t4 + 2*t5 + t6 + 2) >> 2; | |
1089 | SRC(2,6)=SRC(3,4)=SRC(4,2)=SRC(5,0)= (t5 + t6 + 1) >> 1; | |
1090 | SRC(2,7)=SRC(3,5)=SRC(4,3)=SRC(5,1)= (t5 + 2*t6 + t7 + 2) >> 2; | |
1091 | SRC(3,6)=SRC(4,4)=SRC(5,2)=SRC(6,0)= (t6 + t7 + 1) >> 1; | |
1092 | SRC(3,7)=SRC(4,5)=SRC(5,3)=SRC(6,1)= (t6 + 2*t7 + t8 + 2) >> 2; | |
1093 | SRC(4,6)=SRC(5,4)=SRC(6,2)=SRC(7,0)= (t7 + t8 + 1) >> 1; | |
1094 | SRC(4,7)=SRC(5,5)=SRC(6,3)=SRC(7,1)= (t7 + 2*t8 + t9 + 2) >> 2; | |
1095 | SRC(5,6)=SRC(6,4)=SRC(7,2)= (t8 + t9 + 1) >> 1; | |
1096 | SRC(5,7)=SRC(6,5)=SRC(7,3)= (t8 + 2*t9 + t10 + 2) >> 2; | |
1097 | SRC(6,6)=SRC(7,4)= (t9 + t10 + 1) >> 1; | |
1098 | SRC(6,7)=SRC(7,5)= (t9 + 2*t10 + t11 + 2) >> 2; | |
1099 | SRC(7,6)= (t10 + t11 + 1) >> 1; | |
1100 | SRC(7,7)= (t10 + 2*t11 + t12 + 2) >> 2; | |
1101 | } | |
1102 | static void FUNCC(pred8x8l_horizontal_up)(uint8_t *_src, int has_topleft, | |
1103 | int has_topright, ptrdiff_t _stride) | |
1104 | { | |
1105 | pixel *src = (pixel*)_src; | |
1106 | int stride = _stride>>(sizeof(pixel)-1); | |
1107 | PREDICT_8x8_LOAD_LEFT; | |
1108 | SRC(0,0)= (l0 + l1 + 1) >> 1; | |
1109 | SRC(1,0)= (l0 + 2*l1 + l2 + 2) >> 2; | |
1110 | SRC(0,1)=SRC(2,0)= (l1 + l2 + 1) >> 1; | |
1111 | SRC(1,1)=SRC(3,0)= (l1 + 2*l2 + l3 + 2) >> 2; | |
1112 | SRC(0,2)=SRC(2,1)=SRC(4,0)= (l2 + l3 + 1) >> 1; | |
1113 | SRC(1,2)=SRC(3,1)=SRC(5,0)= (l2 + 2*l3 + l4 + 2) >> 2; | |
1114 | SRC(0,3)=SRC(2,2)=SRC(4,1)=SRC(6,0)= (l3 + l4 + 1) >> 1; | |
1115 | SRC(1,3)=SRC(3,2)=SRC(5,1)=SRC(7,0)= (l3 + 2*l4 + l5 + 2) >> 2; | |
1116 | SRC(0,4)=SRC(2,3)=SRC(4,2)=SRC(6,1)= (l4 + l5 + 1) >> 1; | |
1117 | SRC(1,4)=SRC(3,3)=SRC(5,2)=SRC(7,1)= (l4 + 2*l5 + l6 + 2) >> 2; | |
1118 | SRC(0,5)=SRC(2,4)=SRC(4,3)=SRC(6,2)= (l5 + l6 + 1) >> 1; | |
1119 | SRC(1,5)=SRC(3,4)=SRC(5,3)=SRC(7,2)= (l5 + 2*l6 + l7 + 2) >> 2; | |
1120 | SRC(0,6)=SRC(2,5)=SRC(4,4)=SRC(6,3)= (l6 + l7 + 1) >> 1; | |
1121 | SRC(1,6)=SRC(3,5)=SRC(5,4)=SRC(7,3)= (l6 + 3*l7 + 2) >> 2; | |
1122 | SRC(0,7)=SRC(1,7)=SRC(2,6)=SRC(2,7)=SRC(3,6)= | |
1123 | SRC(3,7)=SRC(4,5)=SRC(4,6)=SRC(4,7)=SRC(5,5)= | |
1124 | SRC(5,6)=SRC(5,7)=SRC(6,4)=SRC(6,5)=SRC(6,6)= | |
1125 | SRC(6,7)=SRC(7,4)=SRC(7,5)=SRC(7,6)=SRC(7,7)= l7; | |
1126 | } | |
1127 | ||
1128 | static void FUNCC(pred8x8l_vertical_filter_add)(uint8_t *_src, int16_t *_block, int has_topleft, | |
1129 | int has_topright, ptrdiff_t _stride) | |
1130 | { | |
1131 | int i; | |
1132 | pixel *src = (pixel*)_src; | |
1133 | const dctcoef *block = (const dctcoef*)_block; | |
1134 | pixel pix[8]; | |
1135 | int stride = _stride>>(sizeof(pixel)-1); | |
1136 | PREDICT_8x8_LOAD_TOP; | |
1137 | ||
1138 | pix[0] = t0; | |
1139 | pix[1] = t1; | |
1140 | pix[2] = t2; | |
1141 | pix[3] = t3; | |
1142 | pix[4] = t4; | |
1143 | pix[5] = t5; | |
1144 | pix[6] = t6; | |
1145 | pix[7] = t7; | |
1146 | ||
1147 | for(i=0; i<8; i++){ | |
1148 | pixel v = pix[i]; | |
1149 | src[0*stride]= v += block[0]; | |
1150 | src[1*stride]= v += block[8]; | |
1151 | src[2*stride]= v += block[16]; | |
1152 | src[3*stride]= v += block[24]; | |
1153 | src[4*stride]= v += block[32]; | |
1154 | src[5*stride]= v += block[40]; | |
1155 | src[6*stride]= v += block[48]; | |
1156 | src[7*stride]= v + block[56]; | |
1157 | src++; | |
1158 | block++; | |
1159 | } | |
1160 | ||
1161 | memset(_block, 0, sizeof(dctcoef) * 64); | |
1162 | } | |
1163 | ||
1164 | static void FUNCC(pred8x8l_horizontal_filter_add)(uint8_t *_src, int16_t *_block, int has_topleft, | |
1165 | int has_topright, ptrdiff_t _stride) | |
1166 | { | |
1167 | int i; | |
1168 | pixel *src = (pixel*)_src; | |
1169 | const dctcoef *block = (const dctcoef*)_block; | |
1170 | pixel pix[8]; | |
1171 | int stride = _stride>>(sizeof(pixel)-1); | |
1172 | PREDICT_8x8_LOAD_LEFT; | |
1173 | ||
1174 | pix[0] = l0; | |
1175 | pix[1] = l1; | |
1176 | pix[2] = l2; | |
1177 | pix[3] = l3; | |
1178 | pix[4] = l4; | |
1179 | pix[5] = l5; | |
1180 | pix[6] = l6; | |
1181 | pix[7] = l7; | |
1182 | ||
1183 | for(i=0; i<8; i++){ | |
1184 | pixel v = pix[i]; | |
1185 | src[0]= v += block[0]; | |
1186 | src[1]= v += block[1]; | |
1187 | src[2]= v += block[2]; | |
1188 | src[3]= v += block[3]; | |
1189 | src[4]= v += block[4]; | |
1190 | src[5]= v += block[5]; | |
1191 | src[6]= v += block[6]; | |
1192 | src[7]= v + block[7]; | |
1193 | src+= stride; | |
1194 | block+= 8; | |
1195 | } | |
1196 | ||
1197 | memset(_block, 0, sizeof(dctcoef) * 64); | |
1198 | } | |
1199 | ||
1200 | #undef PREDICT_8x8_LOAD_LEFT | |
1201 | #undef PREDICT_8x8_LOAD_TOP | |
1202 | #undef PREDICT_8x8_LOAD_TOPLEFT | |
1203 | #undef PREDICT_8x8_LOAD_TOPRIGHT | |
1204 | #undef PREDICT_8x8_DC | |
1205 | #undef PTR | |
1206 | #undef PT | |
1207 | #undef PL | |
1208 | #undef SRC | |
1209 | ||
1210 | static void FUNCC(pred4x4_vertical_add)(uint8_t *_pix, int16_t *_block, | |
1211 | ptrdiff_t stride) | |
1212 | { | |
1213 | int i; | |
1214 | pixel *pix = (pixel*)_pix; | |
1215 | const dctcoef *block = (const dctcoef*)_block; | |
1216 | stride >>= sizeof(pixel)-1; | |
1217 | pix -= stride; | |
1218 | for(i=0; i<4; i++){ | |
1219 | pixel v = pix[0]; | |
1220 | pix[1*stride]= v += block[0]; | |
1221 | pix[2*stride]= v += block[4]; | |
1222 | pix[3*stride]= v += block[8]; | |
1223 | pix[4*stride]= v + block[12]; | |
1224 | pix++; | |
1225 | block++; | |
1226 | } | |
1227 | ||
1228 | memset(_block, 0, sizeof(dctcoef) * 16); | |
1229 | } | |
1230 | ||
1231 | static void FUNCC(pred4x4_horizontal_add)(uint8_t *_pix, int16_t *_block, | |
1232 | ptrdiff_t stride) | |
1233 | { | |
1234 | int i; | |
1235 | pixel *pix = (pixel*)_pix; | |
1236 | const dctcoef *block = (const dctcoef*)_block; | |
1237 | stride >>= sizeof(pixel)-1; | |
1238 | for(i=0; i<4; i++){ | |
1239 | pixel v = pix[-1]; | |
1240 | pix[0]= v += block[0]; | |
1241 | pix[1]= v += block[1]; | |
1242 | pix[2]= v += block[2]; | |
1243 | pix[3]= v + block[3]; | |
1244 | pix+= stride; | |
1245 | block+= 4; | |
1246 | } | |
1247 | ||
1248 | memset(_block, 0, sizeof(dctcoef) * 16); | |
1249 | } | |
1250 | ||
1251 | static void FUNCC(pred8x8l_vertical_add)(uint8_t *_pix, int16_t *_block, | |
1252 | ptrdiff_t stride) | |
1253 | { | |
1254 | int i; | |
1255 | pixel *pix = (pixel*)_pix; | |
1256 | const dctcoef *block = (const dctcoef*)_block; | |
1257 | stride >>= sizeof(pixel)-1; | |
1258 | pix -= stride; | |
1259 | for(i=0; i<8; i++){ | |
1260 | pixel v = pix[0]; | |
1261 | pix[1*stride]= v += block[0]; | |
1262 | pix[2*stride]= v += block[8]; | |
1263 | pix[3*stride]= v += block[16]; | |
1264 | pix[4*stride]= v += block[24]; | |
1265 | pix[5*stride]= v += block[32]; | |
1266 | pix[6*stride]= v += block[40]; | |
1267 | pix[7*stride]= v += block[48]; | |
1268 | pix[8*stride]= v + block[56]; | |
1269 | pix++; | |
1270 | block++; | |
1271 | } | |
1272 | ||
1273 | memset(_block, 0, sizeof(dctcoef) * 64); | |
1274 | } | |
1275 | ||
1276 | static void FUNCC(pred8x8l_horizontal_add)(uint8_t *_pix, int16_t *_block, | |
1277 | ptrdiff_t stride) | |
1278 | { | |
1279 | int i; | |
1280 | pixel *pix = (pixel*)_pix; | |
1281 | const dctcoef *block = (const dctcoef*)_block; | |
1282 | stride >>= sizeof(pixel)-1; | |
1283 | for(i=0; i<8; i++){ | |
1284 | pixel v = pix[-1]; | |
1285 | pix[0]= v += block[0]; | |
1286 | pix[1]= v += block[1]; | |
1287 | pix[2]= v += block[2]; | |
1288 | pix[3]= v += block[3]; | |
1289 | pix[4]= v += block[4]; | |
1290 | pix[5]= v += block[5]; | |
1291 | pix[6]= v += block[6]; | |
1292 | pix[7]= v + block[7]; | |
1293 | pix+= stride; | |
1294 | block+= 8; | |
1295 | } | |
1296 | ||
1297 | memset(_block, 0, sizeof(dctcoef) * 64); | |
1298 | } | |
1299 | ||
1300 | static void FUNCC(pred16x16_vertical_add)(uint8_t *pix, const int *block_offset, | |
1301 | int16_t *block, | |
1302 | ptrdiff_t stride) | |
1303 | { | |
1304 | int i; | |
1305 | for(i=0; i<16; i++) | |
1306 | FUNCC(pred4x4_vertical_add)(pix + block_offset[i], block + i*16*sizeof(pixel), stride); | |
1307 | } | |
1308 | ||
1309 | static void FUNCC(pred16x16_horizontal_add)(uint8_t *pix, | |
1310 | const int *block_offset, | |
1311 | int16_t *block, | |
1312 | ptrdiff_t stride) | |
1313 | { | |
1314 | int i; | |
1315 | for(i=0; i<16; i++) | |
1316 | FUNCC(pred4x4_horizontal_add)(pix + block_offset[i], block + i*16*sizeof(pixel), stride); | |
1317 | } | |
1318 | ||
1319 | static void FUNCC(pred8x8_vertical_add)(uint8_t *pix, const int *block_offset, | |
1320 | int16_t *block, ptrdiff_t stride) | |
1321 | { | |
1322 | int i; | |
1323 | for(i=0; i<4; i++) | |
1324 | FUNCC(pred4x4_vertical_add)(pix + block_offset[i], block + i*16*sizeof(pixel), stride); | |
1325 | } | |
1326 | ||
1327 | static void FUNCC(pred8x16_vertical_add)(uint8_t *pix, const int *block_offset, | |
1328 | int16_t *block, ptrdiff_t stride) | |
1329 | { | |
1330 | int i; | |
1331 | for(i=0; i<4; i++) | |
1332 | FUNCC(pred4x4_vertical_add)(pix + block_offset[i], block + i*16*sizeof(pixel), stride); | |
1333 | for(i=4; i<8; i++) | |
1334 | FUNCC(pred4x4_vertical_add)(pix + block_offset[i+4], block + i*16*sizeof(pixel), stride); | |
1335 | } | |
1336 | ||
1337 | static void FUNCC(pred8x8_horizontal_add)(uint8_t *pix, const int *block_offset, | |
1338 | int16_t *block, | |
1339 | ptrdiff_t stride) | |
1340 | { | |
1341 | int i; | |
1342 | for(i=0; i<4; i++) | |
1343 | FUNCC(pred4x4_horizontal_add)(pix + block_offset[i], block + i*16*sizeof(pixel), stride); | |
1344 | } | |
1345 | ||
1346 | static void FUNCC(pred8x16_horizontal_add)(uint8_t *pix, | |
1347 | const int *block_offset, | |
1348 | int16_t *block, ptrdiff_t stride) | |
1349 | { | |
1350 | int i; | |
1351 | for(i=0; i<4; i++) | |
1352 | FUNCC(pred4x4_horizontal_add)(pix + block_offset[i], block + i*16*sizeof(pixel), stride); | |
1353 | for(i=4; i<8; i++) | |
1354 | FUNCC(pred4x4_horizontal_add)(pix + block_offset[i+4], block + i*16*sizeof(pixel), stride); | |
1355 | } |