2 * VC-1 and WMV3 decoder - DSP functions
3 * Copyright (c) 2006 Konstantin Shishkov
5 * This file is part of FFmpeg.
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24 * VC-1 and WMV3 decoder
28 #include "libavutil/avassert.h"
29 #include "libavutil/common.h"
30 #include "libavutil/intreadwrite.h"
31 #include "h264chroma.h"
35 #include "startcode.h"
37 /* Apply overlap transform to horizontal edge */
38 static void vc1_v_overlap_c(uint8_t *src
, int stride
)
44 for (i
= 0; i
< 8; i
++) {
49 d1
= (a
- d
+ 3 + rnd
) >> 3;
50 d2
= (a
- d
+ b
- c
+ 4 - rnd
) >> 3;
52 src
[-2 * stride
] = a
- d1
;
53 src
[-stride
] = av_clip_uint8(b
- d2
);
54 src
[0] = av_clip_uint8(c
+ d2
);
61 /* Apply overlap transform to vertical edge */
62 static void vc1_h_overlap_c(uint8_t *src
, int stride
)
68 for (i
= 0; i
< 8; i
++) {
73 d1
= (a
- d
+ 3 + rnd
) >> 3;
74 d2
= (a
- d
+ b
- c
+ 4 - rnd
) >> 3;
77 src
[-1] = av_clip_uint8(b
- d2
);
78 src
[0] = av_clip_uint8(c
+ d2
);
85 static void vc1_v_s_overlap_c(int16_t *top
, int16_t *bottom
)
90 int rnd1
= 4, rnd2
= 3;
91 for (i
= 0; i
< 8; i
++) {
99 top
[48] = ((a
<< 3) - d1
+ rnd1
) >> 3;
100 top
[56] = ((b
<< 3) - d2
+ rnd2
) >> 3;
101 bottom
[0] = ((c
<< 3) + d2
+ rnd1
) >> 3;
102 bottom
[8] = ((d
<< 3) + d1
+ rnd2
) >> 3;
111 static void vc1_h_s_overlap_c(int16_t *left
, int16_t *right
)
116 int rnd1
= 4, rnd2
= 3;
117 for (i
= 0; i
< 8; i
++) {
125 left
[6] = ((a
<< 3) - d1
+ rnd1
) >> 3;
126 left
[7] = ((b
<< 3) - d2
+ rnd2
) >> 3;
127 right
[0] = ((c
<< 3) + d2
+ rnd1
) >> 3;
128 right
[1] = ((d
<< 3) + d1
+ rnd2
) >> 3;
138 * VC-1 in-loop deblocking filter for one line
139 * @param src source block type
140 * @param stride block stride
141 * @param pq block quantizer
142 * @return whether other 3 pairs should be filtered or not
145 static av_always_inline
int vc1_filter_line(uint8_t *src
, int stride
, int pq
)
147 int a0
= (2 * (src
[-2 * stride
] - src
[1 * stride
]) -
148 5 * (src
[-1 * stride
] - src
[0 * stride
]) + 4) >> 3;
149 int a0_sign
= a0
>> 31; /* Store sign */
151 a0
= (a0
^ a0_sign
) - a0_sign
; /* a0 = FFABS(a0); */
153 int a1
= FFABS((2 * (src
[-4 * stride
] - src
[-1 * stride
]) -
154 5 * (src
[-3 * stride
] - src
[-2 * stride
]) + 4) >> 3);
155 int a2
= FFABS((2 * (src
[ 0 * stride
] - src
[ 3 * stride
]) -
156 5 * (src
[ 1 * stride
] - src
[ 2 * stride
]) + 4) >> 3);
157 if (a1
< a0
|| a2
< a0
) {
158 int clip
= src
[-1 * stride
] - src
[0 * stride
];
159 int clip_sign
= clip
>> 31;
161 clip
= ((clip
^ clip_sign
) - clip_sign
) >> 1;
163 int a3
= FFMIN(a1
, a2
);
164 int d
= 5 * (a3
- a0
);
165 int d_sign
= (d
>> 31);
167 d
= ((d
^ d_sign
) - d_sign
) >> 3;
170 if (d_sign
^ clip_sign
)
174 d
= (d
^ d_sign
) - d_sign
; /* Restore sign */
175 src
[-1 * stride
] = av_clip_uint8(src
[-1 * stride
] - d
);
176 src
[ 0 * stride
] = av_clip_uint8(src
[ 0 * stride
] + d
);
186 * VC-1 in-loop deblocking filter
187 * @param src source block type
188 * @param step distance between horizontally adjacent elements
189 * @param stride distance between vertically adjacent elements
190 * @param len edge length to filter (4 or 8 pixels)
191 * @param pq block quantizer
194 static inline void vc1_loop_filter(uint8_t *src
, int step
, int stride
,
200 for (i
= 0; i
< len
; i
+= 4) {
201 filt3
= vc1_filter_line(src
+ 2 * step
, stride
, pq
);
203 vc1_filter_line(src
+ 0 * step
, stride
, pq
);
204 vc1_filter_line(src
+ 1 * step
, stride
, pq
);
205 vc1_filter_line(src
+ 3 * step
, stride
, pq
);
211 static void vc1_v_loop_filter4_c(uint8_t *src
, int stride
, int pq
)
213 vc1_loop_filter(src
, 1, stride
, 4, pq
);
216 static void vc1_h_loop_filter4_c(uint8_t *src
, int stride
, int pq
)
218 vc1_loop_filter(src
, stride
, 1, 4, pq
);
221 static void vc1_v_loop_filter8_c(uint8_t *src
, int stride
, int pq
)
223 vc1_loop_filter(src
, 1, stride
, 8, pq
);
226 static void vc1_h_loop_filter8_c(uint8_t *src
, int stride
, int pq
)
228 vc1_loop_filter(src
, stride
, 1, 8, pq
);
231 static void vc1_v_loop_filter16_c(uint8_t *src
, int stride
, int pq
)
233 vc1_loop_filter(src
, 1, stride
, 16, pq
);
236 static void vc1_h_loop_filter16_c(uint8_t *src
, int stride
, int pq
)
238 vc1_loop_filter(src
, stride
, 1, 16, pq
);
241 /* Do inverse transform on 8x8 block */
242 static void vc1_inv_trans_8x8_dc_c(uint8_t *dest
, int linesize
, int16_t *block
)
247 dc
= (3 * dc
+ 1) >> 1;
248 dc
= (3 * dc
+ 16) >> 5;
250 for (i
= 0; i
< 8; i
++) {
251 dest
[0] = av_clip_uint8(dest
[0] + dc
);
252 dest
[1] = av_clip_uint8(dest
[1] + dc
);
253 dest
[2] = av_clip_uint8(dest
[2] + dc
);
254 dest
[3] = av_clip_uint8(dest
[3] + dc
);
255 dest
[4] = av_clip_uint8(dest
[4] + dc
);
256 dest
[5] = av_clip_uint8(dest
[5] + dc
);
257 dest
[6] = av_clip_uint8(dest
[6] + dc
);
258 dest
[7] = av_clip_uint8(dest
[7] + dc
);
263 static void vc1_inv_trans_8x8_c(int16_t block
[64])
266 register int t1
, t2
, t3
, t4
, t5
, t6
, t7
, t8
;
267 int16_t *src
, *dst
, temp
[64];
271 for (i
= 0; i
< 8; i
++) {
272 t1
= 12 * (src
[ 0] + src
[32]) + 4;
273 t2
= 12 * (src
[ 0] - src
[32]) + 4;
274 t3
= 16 * src
[16] + 6 * src
[48];
275 t4
= 6 * src
[16] - 16 * src
[48];
282 t1
= 16 * src
[ 8] + 15 * src
[24] + 9 * src
[40] + 4 * src
[56];
283 t2
= 15 * src
[ 8] - 4 * src
[24] - 16 * src
[40] - 9 * src
[56];
284 t3
= 9 * src
[ 8] - 16 * src
[24] + 4 * src
[40] + 15 * src
[56];
285 t4
= 4 * src
[ 8] - 9 * src
[24] + 15 * src
[40] - 16 * src
[56];
287 dst
[0] = (t5
+ t1
) >> 3;
288 dst
[1] = (t6
+ t2
) >> 3;
289 dst
[2] = (t7
+ t3
) >> 3;
290 dst
[3] = (t8
+ t4
) >> 3;
291 dst
[4] = (t8
- t4
) >> 3;
292 dst
[5] = (t7
- t3
) >> 3;
293 dst
[6] = (t6
- t2
) >> 3;
294 dst
[7] = (t5
- t1
) >> 3;
302 for (i
= 0; i
< 8; i
++) {
303 t1
= 12 * (src
[ 0] + src
[32]) + 64;
304 t2
= 12 * (src
[ 0] - src
[32]) + 64;
305 t3
= 16 * src
[16] + 6 * src
[48];
306 t4
= 6 * src
[16] - 16 * src
[48];
313 t1
= 16 * src
[ 8] + 15 * src
[24] + 9 * src
[40] + 4 * src
[56];
314 t2
= 15 * src
[ 8] - 4 * src
[24] - 16 * src
[40] - 9 * src
[56];
315 t3
= 9 * src
[ 8] - 16 * src
[24] + 4 * src
[40] + 15 * src
[56];
316 t4
= 4 * src
[ 8] - 9 * src
[24] + 15 * src
[40] - 16 * src
[56];
318 dst
[ 0] = (t5
+ t1
) >> 7;
319 dst
[ 8] = (t6
+ t2
) >> 7;
320 dst
[16] = (t7
+ t3
) >> 7;
321 dst
[24] = (t8
+ t4
) >> 7;
322 dst
[32] = (t8
- t4
+ 1) >> 7;
323 dst
[40] = (t7
- t3
+ 1) >> 7;
324 dst
[48] = (t6
- t2
+ 1) >> 7;
325 dst
[56] = (t5
- t1
+ 1) >> 7;
332 /* Do inverse transform on 8x4 part of block */
333 static void vc1_inv_trans_8x4_dc_c(uint8_t *dest
, int linesize
, int16_t *block
)
338 dc
= (3 * dc
+ 1) >> 1;
339 dc
= (17 * dc
+ 64) >> 7;
341 for (i
= 0; i
< 4; i
++) {
342 dest
[0] = av_clip_uint8(dest
[0] + dc
);
343 dest
[1] = av_clip_uint8(dest
[1] + dc
);
344 dest
[2] = av_clip_uint8(dest
[2] + dc
);
345 dest
[3] = av_clip_uint8(dest
[3] + dc
);
346 dest
[4] = av_clip_uint8(dest
[4] + dc
);
347 dest
[5] = av_clip_uint8(dest
[5] + dc
);
348 dest
[6] = av_clip_uint8(dest
[6] + dc
);
349 dest
[7] = av_clip_uint8(dest
[7] + dc
);
354 static void vc1_inv_trans_8x4_c(uint8_t *dest
, int linesize
, int16_t *block
)
357 register int t1
, t2
, t3
, t4
, t5
, t6
, t7
, t8
;
363 for (i
= 0; i
< 4; i
++) {
364 t1
= 12 * (src
[0] + src
[4]) + 4;
365 t2
= 12 * (src
[0] - src
[4]) + 4;
366 t3
= 16 * src
[2] + 6 * src
[6];
367 t4
= 6 * src
[2] - 16 * src
[6];
374 t1
= 16 * src
[1] + 15 * src
[3] + 9 * src
[5] + 4 * src
[7];
375 t2
= 15 * src
[1] - 4 * src
[3] - 16 * src
[5] - 9 * src
[7];
376 t3
= 9 * src
[1] - 16 * src
[3] + 4 * src
[5] + 15 * src
[7];
377 t4
= 4 * src
[1] - 9 * src
[3] + 15 * src
[5] - 16 * src
[7];
379 dst
[0] = (t5
+ t1
) >> 3;
380 dst
[1] = (t6
+ t2
) >> 3;
381 dst
[2] = (t7
+ t3
) >> 3;
382 dst
[3] = (t8
+ t4
) >> 3;
383 dst
[4] = (t8
- t4
) >> 3;
384 dst
[5] = (t7
- t3
) >> 3;
385 dst
[6] = (t6
- t2
) >> 3;
386 dst
[7] = (t5
- t1
) >> 3;
393 for (i
= 0; i
< 8; i
++) {
394 t1
= 17 * (src
[ 0] + src
[16]) + 64;
395 t2
= 17 * (src
[ 0] - src
[16]) + 64;
396 t3
= 22 * src
[ 8] + 10 * src
[24];
397 t4
= 22 * src
[24] - 10 * src
[ 8];
399 dest
[0 * linesize
] = av_clip_uint8(dest
[0 * linesize
] + ((t1
+ t3
) >> 7));
400 dest
[1 * linesize
] = av_clip_uint8(dest
[1 * linesize
] + ((t2
- t4
) >> 7));
401 dest
[2 * linesize
] = av_clip_uint8(dest
[2 * linesize
] + ((t2
+ t4
) >> 7));
402 dest
[3 * linesize
] = av_clip_uint8(dest
[3 * linesize
] + ((t1
- t3
) >> 7));
409 /* Do inverse transform on 4x8 parts of block */
410 static void vc1_inv_trans_4x8_dc_c(uint8_t *dest
, int linesize
, int16_t *block
)
415 dc
= (17 * dc
+ 4) >> 3;
416 dc
= (12 * dc
+ 64) >> 7;
418 for (i
= 0; i
< 8; i
++) {
419 dest
[0] = av_clip_uint8(dest
[0] + dc
);
420 dest
[1] = av_clip_uint8(dest
[1] + dc
);
421 dest
[2] = av_clip_uint8(dest
[2] + dc
);
422 dest
[3] = av_clip_uint8(dest
[3] + dc
);
427 static void vc1_inv_trans_4x8_c(uint8_t *dest
, int linesize
, int16_t *block
)
430 register int t1
, t2
, t3
, t4
, t5
, t6
, t7
, t8
;
436 for (i
= 0; i
< 8; i
++) {
437 t1
= 17 * (src
[0] + src
[2]) + 4;
438 t2
= 17 * (src
[0] - src
[2]) + 4;
439 t3
= 22 * src
[1] + 10 * src
[3];
440 t4
= 22 * src
[3] - 10 * src
[1];
442 dst
[0] = (t1
+ t3
) >> 3;
443 dst
[1] = (t2
- t4
) >> 3;
444 dst
[2] = (t2
+ t4
) >> 3;
445 dst
[3] = (t1
- t3
) >> 3;
452 for (i
= 0; i
< 4; i
++) {
453 t1
= 12 * (src
[ 0] + src
[32]) + 64;
454 t2
= 12 * (src
[ 0] - src
[32]) + 64;
455 t3
= 16 * src
[16] + 6 * src
[48];
456 t4
= 6 * src
[16] - 16 * src
[48];
463 t1
= 16 * src
[ 8] + 15 * src
[24] + 9 * src
[40] + 4 * src
[56];
464 t2
= 15 * src
[ 8] - 4 * src
[24] - 16 * src
[40] - 9 * src
[56];
465 t3
= 9 * src
[ 8] - 16 * src
[24] + 4 * src
[40] + 15 * src
[56];
466 t4
= 4 * src
[ 8] - 9 * src
[24] + 15 * src
[40] - 16 * src
[56];
468 dest
[0 * linesize
] = av_clip_uint8(dest
[0 * linesize
] + ((t5
+ t1
) >> 7));
469 dest
[1 * linesize
] = av_clip_uint8(dest
[1 * linesize
] + ((t6
+ t2
) >> 7));
470 dest
[2 * linesize
] = av_clip_uint8(dest
[2 * linesize
] + ((t7
+ t3
) >> 7));
471 dest
[3 * linesize
] = av_clip_uint8(dest
[3 * linesize
] + ((t8
+ t4
) >> 7));
472 dest
[4 * linesize
] = av_clip_uint8(dest
[4 * linesize
] + ((t8
- t4
+ 1) >> 7));
473 dest
[5 * linesize
] = av_clip_uint8(dest
[5 * linesize
] + ((t7
- t3
+ 1) >> 7));
474 dest
[6 * linesize
] = av_clip_uint8(dest
[6 * linesize
] + ((t6
- t2
+ 1) >> 7));
475 dest
[7 * linesize
] = av_clip_uint8(dest
[7 * linesize
] + ((t5
- t1
+ 1) >> 7));
482 /* Do inverse transform on 4x4 part of block */
483 static void vc1_inv_trans_4x4_dc_c(uint8_t *dest
, int linesize
, int16_t *block
)
488 dc
= (17 * dc
+ 4) >> 3;
489 dc
= (17 * dc
+ 64) >> 7;
491 for (i
= 0; i
< 4; i
++) {
492 dest
[0] = av_clip_uint8(dest
[0] + dc
);
493 dest
[1] = av_clip_uint8(dest
[1] + dc
);
494 dest
[2] = av_clip_uint8(dest
[2] + dc
);
495 dest
[3] = av_clip_uint8(dest
[3] + dc
);
500 static void vc1_inv_trans_4x4_c(uint8_t *dest
, int linesize
, int16_t *block
)
503 register int t1
, t2
, t3
, t4
;
508 for (i
= 0; i
< 4; i
++) {
509 t1
= 17 * (src
[0] + src
[2]) + 4;
510 t2
= 17 * (src
[0] - src
[2]) + 4;
511 t3
= 22 * src
[1] + 10 * src
[3];
512 t4
= 22 * src
[3] - 10 * src
[1];
514 dst
[0] = (t1
+ t3
) >> 3;
515 dst
[1] = (t2
- t4
) >> 3;
516 dst
[2] = (t2
+ t4
) >> 3;
517 dst
[3] = (t1
- t3
) >> 3;
524 for (i
= 0; i
< 4; i
++) {
525 t1
= 17 * (src
[0] + src
[16]) + 64;
526 t2
= 17 * (src
[0] - src
[16]) + 64;
527 t3
= 22 * src
[8] + 10 * src
[24];
528 t4
= 22 * src
[24] - 10 * src
[8];
530 dest
[0 * linesize
] = av_clip_uint8(dest
[0 * linesize
] + ((t1
+ t3
) >> 7));
531 dest
[1 * linesize
] = av_clip_uint8(dest
[1 * linesize
] + ((t2
- t4
) >> 7));
532 dest
[2 * linesize
] = av_clip_uint8(dest
[2 * linesize
] + ((t2
+ t4
) >> 7));
533 dest
[3 * linesize
] = av_clip_uint8(dest
[3 * linesize
] + ((t1
- t3
) >> 7));
540 /* motion compensation functions */
542 /* Filter in case of 2 filters */
543 #define VC1_MSPEL_FILTER_16B(DIR, TYPE) \
544 static av_always_inline int vc1_mspel_ ## DIR ## _filter_16bits(const TYPE *src, \
549 case 0: /* no shift - should not occur */ \
551 case 1: /* 1/4 shift */ \
552 return -4 * src[-stride] + 53 * src[0] + \
553 18 * src[stride] - 3 * src[stride * 2]; \
554 case 2: /* 1/2 shift */ \
555 return -1 * src[-stride] + 9 * src[0] + \
556 9 * src[stride] - 1 * src[stride * 2]; \
557 case 3: /* 3/4 shift */ \
558 return -3 * src[-stride] + 18 * src[0] + \
559 53 * src[stride] - 4 * src[stride * 2]; \
561 return 0; /* should not occur */ \
564 VC1_MSPEL_FILTER_16B(ver
, uint8_t)
565 VC1_MSPEL_FILTER_16B(hor
, int16_t)
567 /* Filter used to interpolate fractional pel values */
568 static av_always_inline
int vc1_mspel_filter(const uint8_t *src
, int stride
,
575 return (-4 * src
[-stride
] + 53 * src
[0] +
576 18 * src
[stride
] - 3 * src
[stride
* 2] + 32 - r
) >> 6;
578 return (-1 * src
[-stride
] + 9 * src
[0] +
579 9 * src
[stride
] - 1 * src
[stride
* 2] + 8 - r
) >> 4;
581 return (-3 * src
[-stride
] + 18 * src
[0] +
582 53 * src
[stride
] - 4 * src
[stride
* 2] + 32 - r
) >> 6;
584 return 0; // should not occur
587 /* Function used to do motion compensation with bicubic interpolation */
588 #define VC1_MSPEL_MC(OP, OP4, OPNAME) \
589 static av_always_inline void OPNAME ## vc1_mspel_mc(uint8_t *dst, \
590 const uint8_t *src, \
598 if (vmode) { /* Horizontal filter to apply */ \
601 if (hmode) { /* Vertical filter to apply, output to tmp */ \
602 static const int shift_value[] = { 0, 5, 1, 5 }; \
603 int shift = (shift_value[hmode] + shift_value[vmode]) >> 1; \
604 int16_t tmp[11 * 8], *tptr = tmp; \
606 r = (1 << (shift - 1)) + rnd - 1; \
609 for (j = 0; j < 8; j++) { \
610 for (i = 0; i < 11; i++) \
611 tptr[i] = (vc1_mspel_ver_filter_16bits(src + i, stride, vmode) + r) >> shift; \
618 for (j = 0; j < 8; j++) { \
619 for (i = 0; i < 8; i++) \
620 OP(dst[i], (vc1_mspel_hor_filter_16bits(tptr + i, 1, hmode) + r) >> 7); \
626 } else { /* No horizontal filter, output 8 lines to dst */ \
629 for (j = 0; j < 8; j++) { \
630 for (i = 0; i < 8; i++) \
631 OP(dst[i], vc1_mspel_filter(src + i, stride, vmode, r)); \
639 /* Horizontal mode with no vertical mode */ \
640 for (j = 0; j < 8; j++) { \
641 for (i = 0; i < 8; i++) \
642 OP(dst[i], vc1_mspel_filter(src + i, 1, hmode, rnd)); \
647 static av_always_inline void OPNAME ## vc1_mspel_mc_16(uint8_t *dst, \
648 const uint8_t *src, \
656 if (vmode) { /* Horizontal filter to apply */ \
659 if (hmode) { /* Vertical filter to apply, output to tmp */ \
660 static const int shift_value[] = { 0, 5, 1, 5 }; \
661 int shift = (shift_value[hmode] + shift_value[vmode]) >> 1; \
662 int16_t tmp[19 * 16], *tptr = tmp; \
664 r = (1 << (shift - 1)) + rnd - 1; \
667 for (j = 0; j < 16; j++) { \
668 for (i = 0; i < 19; i++) \
669 tptr[i] = (vc1_mspel_ver_filter_16bits(src + i, stride, vmode) + r) >> shift; \
676 for (j = 0; j < 16; j++) { \
677 for (i = 0; i < 16; i++) \
678 OP(dst[i], (vc1_mspel_hor_filter_16bits(tptr + i, 1, hmode) + r) >> 7); \
684 } else { /* No horizontal filter, output 8 lines to dst */ \
687 for (j = 0; j < 16; j++) { \
688 for (i = 0; i < 16; i++) \
689 OP(dst[i], vc1_mspel_filter(src + i, stride, vmode, r)); \
697 /* Horizontal mode with no vertical mode */ \
698 for (j = 0; j < 16; j++) { \
699 for (i = 0; i < 16; i++) \
700 OP(dst[i], vc1_mspel_filter(src + i, 1, hmode, rnd)); \
705 static void OPNAME ## pixels8x8_c(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int rnd){\
708 OP4(*(uint32_t*)(block ), AV_RN32(pixels ));\
709 OP4(*(uint32_t*)(block+4), AV_RN32(pixels+4));\
714 static void OPNAME ## pixels16x16_c(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int rnd){\
716 for(i=0; i<16; i++){\
717 OP4(*(uint32_t*)(block ), AV_RN32(pixels ));\
718 OP4(*(uint32_t*)(block+ 4), AV_RN32(pixels+ 4));\
719 OP4(*(uint32_t*)(block+ 8), AV_RN32(pixels+ 8));\
720 OP4(*(uint32_t*)(block+12), AV_RN32(pixels+12));\
726 #define op_put(a, b) a = av_clip_uint8(b)
727 #define op_avg(a, b) a = (a + av_clip_uint8(b) + 1) >> 1
728 #define op4_avg(a, b) a = rnd_avg32(a, b)
729 #define op4_put(a, b) a = b
731 VC1_MSPEL_MC(op_put
, op4_put
, put_
)
732 VC1_MSPEL_MC(op_avg
, op4_avg
, avg_
)
734 /* pixel functions - really are entry points to vc1_mspel_mc */
736 #define PUT_VC1_MSPEL(a, b) \
737 static void put_vc1_mspel_mc ## a ## b ## _c(uint8_t *dst, \
738 const uint8_t *src, \
739 ptrdiff_t stride, int rnd) \
741 put_vc1_mspel_mc(dst, src, stride, a, b, rnd); \
743 static void avg_vc1_mspel_mc ## a ## b ## _c(uint8_t *dst, \
744 const uint8_t *src, \
745 ptrdiff_t stride, int rnd) \
747 avg_vc1_mspel_mc(dst, src, stride, a, b, rnd); \
749 static void put_vc1_mspel_mc ## a ## b ## _16_c(uint8_t *dst, \
750 const uint8_t *src, \
751 ptrdiff_t stride, int rnd) \
753 put_vc1_mspel_mc_16(dst, src, stride, a, b, rnd); \
755 static void avg_vc1_mspel_mc ## a ## b ## _16_c(uint8_t *dst, \
756 const uint8_t *src, \
757 ptrdiff_t stride, int rnd) \
759 avg_vc1_mspel_mc_16(dst, src, stride, a, b, rnd); \
781 #define chroma_mc(a) \
782 ((A * src[a] + B * src[a + 1] + \
783 C * src[stride + a] + D * src[stride + a + 1] + 32 - 4) >> 6)
784 static void put_no_rnd_vc1_chroma_mc8_c(uint8_t *dst
/* align 8 */,
785 uint8_t *src
/* align 1 */,
786 int stride
, int h
, int x
, int y
)
788 const int A
= (8 - x
) * (8 - y
);
789 const int B
= (x
) * (8 - y
);
790 const int C
= (8 - x
) * (y
);
791 const int D
= (x
) * (y
);
794 av_assert2(x
< 8 && y
< 8 && x
>= 0 && y
>= 0);
796 for (i
= 0; i
< h
; i
++) {
797 dst
[0] = chroma_mc(0);
798 dst
[1] = chroma_mc(1);
799 dst
[2] = chroma_mc(2);
800 dst
[3] = chroma_mc(3);
801 dst
[4] = chroma_mc(4);
802 dst
[5] = chroma_mc(5);
803 dst
[6] = chroma_mc(6);
804 dst
[7] = chroma_mc(7);
810 static void put_no_rnd_vc1_chroma_mc4_c(uint8_t *dst
, uint8_t *src
,
811 int stride
, int h
, int x
, int y
)
813 const int A
= (8 - x
) * (8 - y
);
814 const int B
= (x
) * (8 - y
);
815 const int C
= (8 - x
) * (y
);
816 const int D
= (x
) * (y
);
819 av_assert2(x
< 8 && y
< 8 && x
>= 0 && y
>= 0);
821 for (i
= 0; i
< h
; i
++) {
822 dst
[0] = chroma_mc(0);
823 dst
[1] = chroma_mc(1);
824 dst
[2] = chroma_mc(2);
825 dst
[3] = chroma_mc(3);
831 #define avg2(a, b) (((a) + (b) + 1) >> 1)
832 static void avg_no_rnd_vc1_chroma_mc8_c(uint8_t *dst
/* align 8 */,
833 uint8_t *src
/* align 1 */,
834 int stride
, int h
, int x
, int y
)
836 const int A
= (8 - x
) * (8 - y
);
837 const int B
= (x
) * (8 - y
);
838 const int C
= (8 - x
) * (y
);
839 const int D
= (x
) * (y
);
842 av_assert2(x
< 8 && y
< 8 && x
>= 0 && y
>= 0);
844 for (i
= 0; i
< h
; i
++) {
845 dst
[0] = avg2(dst
[0], chroma_mc(0));
846 dst
[1] = avg2(dst
[1], chroma_mc(1));
847 dst
[2] = avg2(dst
[2], chroma_mc(2));
848 dst
[3] = avg2(dst
[3], chroma_mc(3));
849 dst
[4] = avg2(dst
[4], chroma_mc(4));
850 dst
[5] = avg2(dst
[5], chroma_mc(5));
851 dst
[6] = avg2(dst
[6], chroma_mc(6));
852 dst
[7] = avg2(dst
[7], chroma_mc(7));
858 static void avg_no_rnd_vc1_chroma_mc4_c(uint8_t *dst
/* align 8 */,
859 uint8_t *src
/* align 1 */,
860 int stride
, int h
, int x
, int y
)
862 const int A
= (8 - x
) * (8 - y
);
863 const int B
= ( x
) * (8 - y
);
864 const int C
= (8 - x
) * ( y
);
865 const int D
= ( x
) * ( y
);
868 av_assert2(x
< 8 && y
< 8 && x
>= 0 && y
>= 0);
870 for (i
= 0; i
< h
; i
++) {
871 dst
[0] = avg2(dst
[0], chroma_mc(0));
872 dst
[1] = avg2(dst
[1], chroma_mc(1));
873 dst
[2] = avg2(dst
[2], chroma_mc(2));
874 dst
[3] = avg2(dst
[3], chroma_mc(3));
880 #if CONFIG_WMV3IMAGE_DECODER || CONFIG_VC1IMAGE_DECODER
882 static void sprite_h_c(uint8_t *dst
, const uint8_t *src
, int offset
,
883 int advance
, int count
)
886 int a
= src
[(offset
>> 16)];
887 int b
= src
[(offset
>> 16) + 1];
888 *dst
++ = a
+ ((b
- a
) * (offset
& 0xFFFF) >> 16);
893 static av_always_inline
void sprite_v_template(uint8_t *dst
,
894 const uint8_t *src1a
,
895 const uint8_t *src1b
,
898 const uint8_t *src2a
,
899 const uint8_t *src2b
,
901 int alpha
, int scaled
,
909 a1
= a1
+ ((b1
- a1
) * offset1
>> 16);
915 a2
= a2
+ ((b2
- a2
) * offset2
>> 16);
917 a1
= a1
+ ((a2
- a1
) * alpha
>> 16);
923 static void sprite_v_single_c(uint8_t *dst
, const uint8_t *src1a
,
924 const uint8_t *src1b
,
925 int offset
, int width
)
927 sprite_v_template(dst
, src1a
, src1b
, offset
, 0, NULL
, NULL
, 0, 0, 1, width
);
930 static void sprite_v_double_noscale_c(uint8_t *dst
, const uint8_t *src1a
,
931 const uint8_t *src2a
,
932 int alpha
, int width
)
934 sprite_v_template(dst
, src1a
, NULL
, 0, 1, src2a
, NULL
, 0, alpha
, 0, width
);
937 static void sprite_v_double_onescale_c(uint8_t *dst
,
938 const uint8_t *src1a
,
939 const uint8_t *src1b
,
941 const uint8_t *src2a
,
942 int alpha
, int width
)
944 sprite_v_template(dst
, src1a
, src1b
, offset1
, 1, src2a
, NULL
, 0, alpha
, 1,
948 static void sprite_v_double_twoscale_c(uint8_t *dst
,
949 const uint8_t *src1a
,
950 const uint8_t *src1b
,
952 const uint8_t *src2a
,
953 const uint8_t *src2b
,
958 sprite_v_template(dst
, src1a
, src1b
, offset1
, 1, src2a
, src2b
, offset2
,
962 #endif /* CONFIG_WMV3IMAGE_DECODER || CONFIG_VC1IMAGE_DECODER */
963 #define FN_ASSIGN(X, Y) \
964 dsp->put_vc1_mspel_pixels_tab[1][X+4*Y] = put_vc1_mspel_mc##X##Y##_c; \
965 dsp->put_vc1_mspel_pixels_tab[0][X+4*Y] = put_vc1_mspel_mc##X##Y##_16_c; \
966 dsp->avg_vc1_mspel_pixels_tab[1][X+4*Y] = avg_vc1_mspel_mc##X##Y##_c; \
967 dsp->avg_vc1_mspel_pixels_tab[0][X+4*Y] = avg_vc1_mspel_mc##X##Y##_16_c
969 av_cold
void ff_vc1dsp_init(VC1DSPContext
*dsp
)
971 dsp
->vc1_inv_trans_8x8
= vc1_inv_trans_8x8_c
;
972 dsp
->vc1_inv_trans_4x8
= vc1_inv_trans_4x8_c
;
973 dsp
->vc1_inv_trans_8x4
= vc1_inv_trans_8x4_c
;
974 dsp
->vc1_inv_trans_4x4
= vc1_inv_trans_4x4_c
;
975 dsp
->vc1_inv_trans_8x8_dc
= vc1_inv_trans_8x8_dc_c
;
976 dsp
->vc1_inv_trans_4x8_dc
= vc1_inv_trans_4x8_dc_c
;
977 dsp
->vc1_inv_trans_8x4_dc
= vc1_inv_trans_8x4_dc_c
;
978 dsp
->vc1_inv_trans_4x4_dc
= vc1_inv_trans_4x4_dc_c
;
980 dsp
->vc1_h_overlap
= vc1_h_overlap_c
;
981 dsp
->vc1_v_overlap
= vc1_v_overlap_c
;
982 dsp
->vc1_h_s_overlap
= vc1_h_s_overlap_c
;
983 dsp
->vc1_v_s_overlap
= vc1_v_s_overlap_c
;
985 dsp
->vc1_v_loop_filter4
= vc1_v_loop_filter4_c
;
986 dsp
->vc1_h_loop_filter4
= vc1_h_loop_filter4_c
;
987 dsp
->vc1_v_loop_filter8
= vc1_v_loop_filter8_c
;
988 dsp
->vc1_h_loop_filter8
= vc1_h_loop_filter8_c
;
989 dsp
->vc1_v_loop_filter16
= vc1_v_loop_filter16_c
;
990 dsp
->vc1_h_loop_filter16
= vc1_h_loop_filter16_c
;
992 dsp
->put_vc1_mspel_pixels_tab
[0][0] = put_pixels16x16_c
;
993 dsp
->avg_vc1_mspel_pixels_tab
[0][0] = avg_pixels16x16_c
;
994 dsp
->put_vc1_mspel_pixels_tab
[1][0] = put_pixels8x8_c
;
995 dsp
->avg_vc1_mspel_pixels_tab
[1][0] = avg_pixels8x8_c
;
1015 dsp
->put_no_rnd_vc1_chroma_pixels_tab
[0] = put_no_rnd_vc1_chroma_mc8_c
;
1016 dsp
->avg_no_rnd_vc1_chroma_pixels_tab
[0] = avg_no_rnd_vc1_chroma_mc8_c
;
1017 dsp
->put_no_rnd_vc1_chroma_pixels_tab
[1] = put_no_rnd_vc1_chroma_mc4_c
;
1018 dsp
->avg_no_rnd_vc1_chroma_pixels_tab
[1] = avg_no_rnd_vc1_chroma_mc4_c
;
1020 #if CONFIG_WMV3IMAGE_DECODER || CONFIG_VC1IMAGE_DECODER
1021 dsp
->sprite_h
= sprite_h_c
;
1022 dsp
->sprite_v_single
= sprite_v_single_c
;
1023 dsp
->sprite_v_double_noscale
= sprite_v_double_noscale_c
;
1024 dsp
->sprite_v_double_onescale
= sprite_v_double_onescale_c
;
1025 dsp
->sprite_v_double_twoscale
= sprite_v_double_twoscale_c
;
1026 #endif /* CONFIG_WMV3IMAGE_DECODER || CONFIG_VC1IMAGE_DECODER */
1028 dsp
->startcode_find_candidate
= ff_startcode_find_candidate_c
;
1031 ff_vc1dsp_init_aarch64(dsp
);
1033 ff_vc1dsp_init_arm(dsp
);
1035 ff_vc1dsp_init_ppc(dsp
);
1037 ff_vc1dsp_init_x86(dsp
);