2 * VP7/VP8 compatible video decoder
4 * Copyright (C) 2010 David Conrad
5 * Copyright (C) 2010 Ronald S. Bultje
6 * Copyright (C) 2010 Fiona Glaser
7 * Copyright (C) 2012 Daniel Kang
8 * Copyright (C) 2014 Peter Ross
10 * This file is part of FFmpeg.
12 * FFmpeg is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU Lesser General Public
14 * License as published by the Free Software Foundation; either
15 * version 2.1 of the License, or (at your option) any later version.
17 * FFmpeg is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * Lesser General Public License for more details.
22 * You should have received a copy of the GNU Lesser General Public
23 * License along with FFmpeg; if not, write to the Free Software
24 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
27 #include "libavutil/imgutils.h"
31 #include "rectangle.h"
40 #if CONFIG_VP7_DECODER && CONFIG_VP8_DECODER
41 #define VPX(vp7, f) (vp7 ? vp7_ ## f : vp8_ ## f)
42 #elif CONFIG_VP7_DECODER
43 #define VPX(vp7, f) vp7_ ## f
44 #else // CONFIG_VP8_DECODER
45 #define VPX(vp7, f) vp8_ ## f
48 static void free_buffers(VP8Context
*s
)
52 for (i
= 0; i
< MAX_THREADS
; i
++) {
54 pthread_cond_destroy(&s
->thread_data
[i
].cond
);
55 pthread_mutex_destroy(&s
->thread_data
[i
].lock
);
57 av_freep(&s
->thread_data
[i
].filter_strength
);
59 av_freep(&s
->thread_data
);
60 av_freep(&s
->macroblocks_base
);
61 av_freep(&s
->intra4x4_pred_mode_top
);
62 av_freep(&s
->top_nnz
);
63 av_freep(&s
->top_border
);
65 s
->macroblocks
= NULL
;
68 static int vp8_alloc_frame(VP8Context
*s
, VP8Frame
*f
, int ref
)
71 if ((ret
= ff_thread_get_buffer(s
->avctx
, &f
->tf
,
72 ref
? AV_GET_BUFFER_FLAG_REF
: 0)) < 0)
74 if (!(f
->seg_map
= av_buffer_allocz(s
->mb_width
* s
->mb_height
))) {
75 ff_thread_release_buffer(s
->avctx
, &f
->tf
);
76 return AVERROR(ENOMEM
);
81 static void vp8_release_frame(VP8Context
*s
, VP8Frame
*f
)
83 av_buffer_unref(&f
->seg_map
);
84 ff_thread_release_buffer(s
->avctx
, &f
->tf
);
87 #if CONFIG_VP8_DECODER
88 static int vp8_ref_frame(VP8Context
*s
, VP8Frame
*dst
, VP8Frame
*src
)
92 vp8_release_frame(s
, dst
);
94 if ((ret
= ff_thread_ref_frame(&dst
->tf
, &src
->tf
)) < 0)
97 !(dst
->seg_map
= av_buffer_ref(src
->seg_map
))) {
98 vp8_release_frame(s
, dst
);
99 return AVERROR(ENOMEM
);
104 #endif /* CONFIG_VP8_DECODER */
106 static void vp8_decode_flush_impl(AVCodecContext
*avctx
, int free_mem
)
108 VP8Context
*s
= avctx
->priv_data
;
111 for (i
= 0; i
< FF_ARRAY_ELEMS(s
->frames
); i
++)
112 vp8_release_frame(s
, &s
->frames
[i
]);
113 memset(s
->framep
, 0, sizeof(s
->framep
));
119 static void vp8_decode_flush(AVCodecContext
*avctx
)
121 vp8_decode_flush_impl(avctx
, 0);
124 static VP8Frame
*vp8_find_free_buffer(VP8Context
*s
)
126 VP8Frame
*frame
= NULL
;
129 // find a free buffer
130 for (i
= 0; i
< 5; i
++)
131 if (&s
->frames
[i
] != s
->framep
[VP56_FRAME_CURRENT
] &&
132 &s
->frames
[i
] != s
->framep
[VP56_FRAME_PREVIOUS
] &&
133 &s
->frames
[i
] != s
->framep
[VP56_FRAME_GOLDEN
] &&
134 &s
->frames
[i
] != s
->framep
[VP56_FRAME_GOLDEN2
]) {
135 frame
= &s
->frames
[i
];
139 av_log(s
->avctx
, AV_LOG_FATAL
, "Ran out of free frames!\n");
142 if (frame
->tf
.f
->data
[0])
143 vp8_release_frame(s
, frame
);
148 static av_always_inline
149 int update_dimensions(VP8Context
*s
, int width
, int height
, int is_vp7
)
151 AVCodecContext
*avctx
= s
->avctx
;
154 if (width
!= s
->avctx
->width
|| ((width
+15)/16 != s
->mb_width
|| (height
+15)/16 != s
->mb_height
) && s
->macroblocks_base
||
155 height
!= s
->avctx
->height
) {
156 vp8_decode_flush_impl(s
->avctx
, 1);
158 ret
= ff_set_dimensions(s
->avctx
, width
, height
);
163 s
->mb_width
= (s
->avctx
->coded_width
+ 15) / 16;
164 s
->mb_height
= (s
->avctx
->coded_height
+ 15) / 16;
166 s
->mb_layout
= is_vp7
|| avctx
->active_thread_type
== FF_THREAD_SLICE
&&
167 FFMIN(s
->num_coeff_partitions
, avctx
->thread_count
) > 1;
168 if (!s
->mb_layout
) { // Frame threading and one thread
169 s
->macroblocks_base
= av_mallocz((s
->mb_width
+ s
->mb_height
* 2 + 1) *
170 sizeof(*s
->macroblocks
));
171 s
->intra4x4_pred_mode_top
= av_mallocz(s
->mb_width
* 4);
172 } else // Sliced threading
173 s
->macroblocks_base
= av_mallocz((s
->mb_width
+ 2) * (s
->mb_height
+ 2) *
174 sizeof(*s
->macroblocks
));
175 s
->top_nnz
= av_mallocz(s
->mb_width
* sizeof(*s
->top_nnz
));
176 s
->top_border
= av_mallocz((s
->mb_width
+ 1) * sizeof(*s
->top_border
));
177 s
->thread_data
= av_mallocz(MAX_THREADS
* sizeof(VP8ThreadData
));
179 for (i
= 0; i
< MAX_THREADS
; i
++) {
180 s
->thread_data
[i
].filter_strength
=
181 av_mallocz(s
->mb_width
* sizeof(*s
->thread_data
[0].filter_strength
));
183 pthread_mutex_init(&s
->thread_data
[i
].lock
, NULL
);
184 pthread_cond_init(&s
->thread_data
[i
].cond
, NULL
);
188 if (!s
->macroblocks_base
|| !s
->top_nnz
|| !s
->top_border
||
189 (!s
->intra4x4_pred_mode_top
&& !s
->mb_layout
))
190 return AVERROR(ENOMEM
);
192 s
->macroblocks
= s
->macroblocks_base
+ 1;
197 static int vp7_update_dimensions(VP8Context
*s
, int width
, int height
)
199 return update_dimensions(s
, width
, height
, IS_VP7
);
202 static int vp8_update_dimensions(VP8Context
*s
, int width
, int height
)
204 return update_dimensions(s
, width
, height
, IS_VP8
);
208 static void parse_segment_info(VP8Context
*s
)
210 VP56RangeCoder
*c
= &s
->c
;
213 s
->segmentation
.update_map
= vp8_rac_get(c
);
215 if (vp8_rac_get(c
)) { // update segment feature data
216 s
->segmentation
.absolute_vals
= vp8_rac_get(c
);
218 for (i
= 0; i
< 4; i
++)
219 s
->segmentation
.base_quant
[i
] = vp8_rac_get_sint(c
, 7);
221 for (i
= 0; i
< 4; i
++)
222 s
->segmentation
.filter_level
[i
] = vp8_rac_get_sint(c
, 6);
224 if (s
->segmentation
.update_map
)
225 for (i
= 0; i
< 3; i
++)
226 s
->prob
->segmentid
[i
] = vp8_rac_get(c
) ? vp8_rac_get_uint(c
, 8) : 255;
229 static void update_lf_deltas(VP8Context
*s
)
231 VP56RangeCoder
*c
= &s
->c
;
234 for (i
= 0; i
< 4; i
++) {
235 if (vp8_rac_get(c
)) {
236 s
->lf_delta
.ref
[i
] = vp8_rac_get_uint(c
, 6);
239 s
->lf_delta
.ref
[i
] = -s
->lf_delta
.ref
[i
];
243 for (i
= MODE_I4x4
; i
<= VP8_MVMODE_SPLIT
; i
++) {
244 if (vp8_rac_get(c
)) {
245 s
->lf_delta
.mode
[i
] = vp8_rac_get_uint(c
, 6);
248 s
->lf_delta
.mode
[i
] = -s
->lf_delta
.mode
[i
];
253 static int setup_partitions(VP8Context
*s
, const uint8_t *buf
, int buf_size
)
255 const uint8_t *sizes
= buf
;
258 s
->num_coeff_partitions
= 1 << vp8_rac_get_uint(&s
->c
, 2);
260 buf
+= 3 * (s
->num_coeff_partitions
- 1);
261 buf_size
-= 3 * (s
->num_coeff_partitions
- 1);
265 for (i
= 0; i
< s
->num_coeff_partitions
- 1; i
++) {
266 int size
= AV_RL24(sizes
+ 3 * i
);
267 if (buf_size
- size
< 0)
270 ff_vp56_init_range_decoder(&s
->coeff_partition
[i
], buf
, size
);
274 ff_vp56_init_range_decoder(&s
->coeff_partition
[i
], buf
, buf_size
);
279 static void vp7_get_quants(VP8Context
*s
)
281 VP56RangeCoder
*c
= &s
->c
;
283 int yac_qi
= vp8_rac_get_uint(c
, 7);
284 int ydc_qi
= vp8_rac_get(c
) ? vp8_rac_get_uint(c
, 7) : yac_qi
;
285 int y2dc_qi
= vp8_rac_get(c
) ? vp8_rac_get_uint(c
, 7) : yac_qi
;
286 int y2ac_qi
= vp8_rac_get(c
) ? vp8_rac_get_uint(c
, 7) : yac_qi
;
287 int uvdc_qi
= vp8_rac_get(c
) ? vp8_rac_get_uint(c
, 7) : yac_qi
;
288 int uvac_qi
= vp8_rac_get(c
) ? vp8_rac_get_uint(c
, 7) : yac_qi
;
290 s
->qmat
[0].luma_qmul
[0] = vp7_ydc_qlookup
[ydc_qi
];
291 s
->qmat
[0].luma_qmul
[1] = vp7_yac_qlookup
[yac_qi
];
292 s
->qmat
[0].luma_dc_qmul
[0] = vp7_y2dc_qlookup
[y2dc_qi
];
293 s
->qmat
[0].luma_dc_qmul
[1] = vp7_y2ac_qlookup
[y2ac_qi
];
294 s
->qmat
[0].chroma_qmul
[0] = FFMIN(vp7_ydc_qlookup
[uvdc_qi
], 132);
295 s
->qmat
[0].chroma_qmul
[1] = vp7_yac_qlookup
[uvac_qi
];
298 static void vp8_get_quants(VP8Context
*s
)
300 VP56RangeCoder
*c
= &s
->c
;
303 int yac_qi
= vp8_rac_get_uint(c
, 7);
304 int ydc_delta
= vp8_rac_get_sint(c
, 4);
305 int y2dc_delta
= vp8_rac_get_sint(c
, 4);
306 int y2ac_delta
= vp8_rac_get_sint(c
, 4);
307 int uvdc_delta
= vp8_rac_get_sint(c
, 4);
308 int uvac_delta
= vp8_rac_get_sint(c
, 4);
310 for (i
= 0; i
< 4; i
++) {
311 if (s
->segmentation
.enabled
) {
312 base_qi
= s
->segmentation
.base_quant
[i
];
313 if (!s
->segmentation
.absolute_vals
)
318 s
->qmat
[i
].luma_qmul
[0] = vp8_dc_qlookup
[av_clip_uintp2(base_qi
+ ydc_delta
, 7)];
319 s
->qmat
[i
].luma_qmul
[1] = vp8_ac_qlookup
[av_clip_uintp2(base_qi
, 7)];
320 s
->qmat
[i
].luma_dc_qmul
[0] = vp8_dc_qlookup
[av_clip_uintp2(base_qi
+ y2dc_delta
, 7)] * 2;
321 /* 101581>>16 is equivalent to 155/100 */
322 s
->qmat
[i
].luma_dc_qmul
[1] = vp8_ac_qlookup
[av_clip_uintp2(base_qi
+ y2ac_delta
, 7)] * 101581 >> 16;
323 s
->qmat
[i
].chroma_qmul
[0] = vp8_dc_qlookup
[av_clip_uintp2(base_qi
+ uvdc_delta
, 7)];
324 s
->qmat
[i
].chroma_qmul
[1] = vp8_ac_qlookup
[av_clip_uintp2(base_qi
+ uvac_delta
, 7)];
326 s
->qmat
[i
].luma_dc_qmul
[1] = FFMAX(s
->qmat
[i
].luma_dc_qmul
[1], 8);
327 s
->qmat
[i
].chroma_qmul
[0] = FFMIN(s
->qmat
[i
].chroma_qmul
[0], 132);
332 * Determine which buffers golden and altref should be updated with after this frame.
333 * The spec isn't clear here, so I'm going by my understanding of what libvpx does
335 * Intra frames update all 3 references
336 * Inter frames update VP56_FRAME_PREVIOUS if the update_last flag is set
337 * If the update (golden|altref) flag is set, it's updated with the current frame
338 * if update_last is set, and VP56_FRAME_PREVIOUS otherwise.
339 * If the flag is not set, the number read means:
341 * 1: VP56_FRAME_PREVIOUS
342 * 2: update golden with altref, or update altref with golden
344 static VP56Frame
ref_to_update(VP8Context
*s
, int update
, VP56Frame ref
)
346 VP56RangeCoder
*c
= &s
->c
;
349 return VP56_FRAME_CURRENT
;
351 switch (vp8_rac_get_uint(c
, 2)) {
353 return VP56_FRAME_PREVIOUS
;
355 return (ref
== VP56_FRAME_GOLDEN
) ? VP56_FRAME_GOLDEN2
: VP56_FRAME_GOLDEN
;
357 return VP56_FRAME_NONE
;
360 static void vp78_reset_probability_tables(VP8Context
*s
)
363 for (i
= 0; i
< 4; i
++)
364 for (j
= 0; j
< 16; j
++)
365 memcpy(s
->prob
->token
[i
][j
], vp8_token_default_probs
[i
][vp8_coeff_band
[j
]],
366 sizeof(s
->prob
->token
[i
][j
]));
369 static void vp78_update_probability_tables(VP8Context
*s
)
371 VP56RangeCoder
*c
= &s
->c
;
374 for (i
= 0; i
< 4; i
++)
375 for (j
= 0; j
< 8; j
++)
376 for (k
= 0; k
< 3; k
++)
377 for (l
= 0; l
< NUM_DCT_TOKENS
-1; l
++)
378 if (vp56_rac_get_prob_branchy(c
, vp8_token_update_probs
[i
][j
][k
][l
])) {
379 int prob
= vp8_rac_get_uint(c
, 8);
380 for (m
= 0; vp8_coeff_band_indexes
[j
][m
] >= 0; m
++)
381 s
->prob
->token
[i
][vp8_coeff_band_indexes
[j
][m
]][k
][l
] = prob
;
385 #define VP7_MVC_SIZE 17
386 #define VP8_MVC_SIZE 19
388 static void vp78_update_pred16x16_pred8x8_mvc_probabilities(VP8Context
*s
,
391 VP56RangeCoder
*c
= &s
->c
;
395 for (i
= 0; i
< 4; i
++)
396 s
->prob
->pred16x16
[i
] = vp8_rac_get_uint(c
, 8);
398 for (i
= 0; i
< 3; i
++)
399 s
->prob
->pred8x8c
[i
] = vp8_rac_get_uint(c
, 8);
401 // 17.2 MV probability update
402 for (i
= 0; i
< 2; i
++)
403 for (j
= 0; j
< mvc_size
; j
++)
404 if (vp56_rac_get_prob_branchy(c
, vp8_mv_update_prob
[i
][j
]))
405 s
->prob
->mvc
[i
][j
] = vp8_rac_get_nn(c
);
408 static void update_refs(VP8Context
*s
)
410 VP56RangeCoder
*c
= &s
->c
;
412 int update_golden
= vp8_rac_get(c
);
413 int update_altref
= vp8_rac_get(c
);
415 s
->update_golden
= ref_to_update(s
, update_golden
, VP56_FRAME_GOLDEN
);
416 s
->update_altref
= ref_to_update(s
, update_altref
, VP56_FRAME_GOLDEN2
);
419 static void copy_chroma(AVFrame
*dst
, AVFrame
*src
, int width
, int height
)
423 for (j
= 1; j
< 3; j
++) {
424 for (i
= 0; i
< height
/ 2; i
++)
425 memcpy(dst
->data
[j
] + i
* dst
->linesize
[j
],
426 src
->data
[j
] + i
* src
->linesize
[j
], width
/ 2);
430 static void fade(uint8_t *dst
, int dst_linesize
,
431 const uint8_t *src
, int src_linesize
,
432 int width
, int height
,
436 for (j
= 0; j
< height
; j
++) {
437 for (i
= 0; i
< width
; i
++) {
438 uint8_t y
= src
[j
* src_linesize
+ i
];
439 dst
[j
* dst_linesize
+ i
] = av_clip_uint8(y
+ ((y
* beta
) >> 8) + alpha
);
444 static int vp7_fade_frame(VP8Context
*s
, VP56RangeCoder
*c
)
446 int alpha
= (int8_t) vp8_rac_get_uint(c
, 8);
447 int beta
= (int8_t) vp8_rac_get_uint(c
, 8);
450 if (!s
->keyframe
&& (alpha
|| beta
)) {
451 int width
= s
->mb_width
* 16;
452 int height
= s
->mb_height
* 16;
455 if (!s
->framep
[VP56_FRAME_PREVIOUS
] ||
456 !s
->framep
[VP56_FRAME_GOLDEN
]) {
457 av_log(s
->avctx
, AV_LOG_WARNING
, "Discarding interframe without a prior keyframe!\n");
458 return AVERROR_INVALIDDATA
;
462 src
= s
->framep
[VP56_FRAME_PREVIOUS
]->tf
.f
;
464 /* preserve the golden frame, write a new previous frame */
465 if (s
->framep
[VP56_FRAME_GOLDEN
] == s
->framep
[VP56_FRAME_PREVIOUS
]) {
466 s
->framep
[VP56_FRAME_PREVIOUS
] = vp8_find_free_buffer(s
);
467 if ((ret
= vp8_alloc_frame(s
, s
->framep
[VP56_FRAME_PREVIOUS
], 1)) < 0)
470 dst
= s
->framep
[VP56_FRAME_PREVIOUS
]->tf
.f
;
472 copy_chroma(dst
, src
, width
, height
);
475 fade(dst
->data
[0], dst
->linesize
[0],
476 src
->data
[0], src
->linesize
[0],
477 width
, height
, alpha
, beta
);
483 static int vp7_decode_frame_header(VP8Context
*s
, const uint8_t *buf
, int buf_size
)
485 VP56RangeCoder
*c
= &s
->c
;
486 int part1_size
, hscale
, vscale
, i
, j
, ret
;
487 int width
= s
->avctx
->width
;
488 int height
= s
->avctx
->height
;
490 s
->profile
= (buf
[0] >> 1) & 7;
491 if (s
->profile
> 1) {
492 avpriv_request_sample(s
->avctx
, "Unknown profile %d", s
->profile
);
493 return AVERROR_INVALIDDATA
;
496 s
->keyframe
= !(buf
[0] & 1);
498 part1_size
= AV_RL24(buf
) >> 4;
500 if (buf_size
< 4 - s
->profile
+ part1_size
) {
501 av_log(s
->avctx
, AV_LOG_ERROR
, "Buffer size %d is too small, needed : %d\n", buf_size
, 4 - s
->profile
+ part1_size
);
502 return AVERROR_INVALIDDATA
;
505 buf
+= 4 - s
->profile
;
506 buf_size
-= 4 - s
->profile
;
508 memcpy(s
->put_pixels_tab
, s
->vp8dsp
.put_vp8_epel_pixels_tab
, sizeof(s
->put_pixels_tab
));
510 ff_vp56_init_range_decoder(c
, buf
, part1_size
);
512 buf_size
-= part1_size
;
514 /* A. Dimension information (keyframes only) */
516 width
= vp8_rac_get_uint(c
, 12);
517 height
= vp8_rac_get_uint(c
, 12);
518 hscale
= vp8_rac_get_uint(c
, 2);
519 vscale
= vp8_rac_get_uint(c
, 2);
520 if (hscale
|| vscale
)
521 avpriv_request_sample(s
->avctx
, "Upscaling");
523 s
->update_golden
= s
->update_altref
= VP56_FRAME_CURRENT
;
524 vp78_reset_probability_tables(s
);
525 memcpy(s
->prob
->pred16x16
, vp8_pred16x16_prob_inter
,
526 sizeof(s
->prob
->pred16x16
));
527 memcpy(s
->prob
->pred8x8c
, vp8_pred8x8c_prob_inter
,
528 sizeof(s
->prob
->pred8x8c
));
529 for (i
= 0; i
< 2; i
++)
530 memcpy(s
->prob
->mvc
[i
], vp7_mv_default_prob
[i
],
531 sizeof(vp7_mv_default_prob
[i
]));
532 memset(&s
->segmentation
, 0, sizeof(s
->segmentation
));
533 memset(&s
->lf_delta
, 0, sizeof(s
->lf_delta
));
534 memcpy(s
->prob
[0].scan
, zigzag_scan
, sizeof(s
->prob
[0].scan
));
537 if (s
->keyframe
|| s
->profile
> 0)
538 memset(s
->inter_dc_pred
, 0 , sizeof(s
->inter_dc_pred
));
540 /* B. Decoding information for all four macroblock-level features */
541 for (i
= 0; i
< 4; i
++) {
542 s
->feature_enabled
[i
] = vp8_rac_get(c
);
543 if (s
->feature_enabled
[i
]) {
544 s
->feature_present_prob
[i
] = vp8_rac_get_uint(c
, 8);
546 for (j
= 0; j
< 3; j
++)
547 s
->feature_index_prob
[i
][j
] =
548 vp8_rac_get(c
) ? vp8_rac_get_uint(c
, 8) : 255;
550 if (vp7_feature_value_size
[s
->profile
][i
])
551 for (j
= 0; j
< 4; j
++)
552 s
->feature_value
[i
][j
] =
553 vp8_rac_get(c
) ? vp8_rac_get_uint(c
, vp7_feature_value_size
[s
->profile
][i
]) : 0;
557 s
->segmentation
.enabled
= 0;
558 s
->segmentation
.update_map
= 0;
559 s
->lf_delta
.enabled
= 0;
561 s
->num_coeff_partitions
= 1;
562 ff_vp56_init_range_decoder(&s
->coeff_partition
[0], buf
, buf_size
);
564 if (!s
->macroblocks_base
|| /* first frame */
565 width
!= s
->avctx
->width
|| height
!= s
->avctx
->height
||
566 (width
+ 15) / 16 != s
->mb_width
|| (height
+ 15) / 16 != s
->mb_height
) {
567 if ((ret
= vp7_update_dimensions(s
, width
, height
)) < 0)
571 /* C. Dequantization indices */
574 /* D. Golden frame update flag (a Flag) for interframes only */
576 s
->update_golden
= vp8_rac_get(c
) ? VP56_FRAME_CURRENT
: VP56_FRAME_NONE
;
577 s
->sign_bias
[VP56_FRAME_GOLDEN
] = 0;
581 s
->update_probabilities
= 1;
584 if (s
->profile
> 0) {
585 s
->update_probabilities
= vp8_rac_get(c
);
586 if (!s
->update_probabilities
)
587 s
->prob
[1] = s
->prob
[0];
590 s
->fade_present
= vp8_rac_get(c
);
593 /* E. Fading information for previous frame */
594 if (s
->fade_present
&& vp8_rac_get(c
)) {
595 if ((ret
= vp7_fade_frame(s
,c
)) < 0)
599 /* F. Loop filter type */
601 s
->filter
.simple
= vp8_rac_get(c
);
603 /* G. DCT coefficient ordering specification */
605 for (i
= 1; i
< 16; i
++)
606 s
->prob
[0].scan
[i
] = zigzag_scan
[vp8_rac_get_uint(c
, 4)];
608 /* H. Loop filter levels */
610 s
->filter
.simple
= vp8_rac_get(c
);
611 s
->filter
.level
= vp8_rac_get_uint(c
, 6);
612 s
->filter
.sharpness
= vp8_rac_get_uint(c
, 3);
614 /* I. DCT coefficient probability update; 13.3 Token Probability Updates */
615 vp78_update_probability_tables(s
);
617 s
->mbskip_enabled
= 0;
619 /* J. The remaining frame header data occurs ONLY FOR INTERFRAMES */
621 s
->prob
->intra
= vp8_rac_get_uint(c
, 8);
622 s
->prob
->last
= vp8_rac_get_uint(c
, 8);
623 vp78_update_pred16x16_pred8x8_mvc_probabilities(s
, VP7_MVC_SIZE
);
629 static int vp8_decode_frame_header(VP8Context
*s
, const uint8_t *buf
, int buf_size
)
631 VP56RangeCoder
*c
= &s
->c
;
632 int header_size
, hscale
, vscale
, ret
;
633 int width
= s
->avctx
->width
;
634 int height
= s
->avctx
->height
;
636 s
->keyframe
= !(buf
[0] & 1);
637 s
->profile
= (buf
[0]>>1) & 7;
638 s
->invisible
= !(buf
[0] & 0x10);
639 header_size
= AV_RL24(buf
) >> 5;
644 av_log(s
->avctx
, AV_LOG_WARNING
, "Unknown profile %d\n", s
->profile
);
647 memcpy(s
->put_pixels_tab
, s
->vp8dsp
.put_vp8_epel_pixels_tab
,
648 sizeof(s
->put_pixels_tab
));
649 else // profile 1-3 use bilinear, 4+ aren't defined so whatever
650 memcpy(s
->put_pixels_tab
, s
->vp8dsp
.put_vp8_bilinear_pixels_tab
,
651 sizeof(s
->put_pixels_tab
));
653 if (header_size
> buf_size
- 7 * s
->keyframe
) {
654 av_log(s
->avctx
, AV_LOG_ERROR
, "Header size larger than data provided\n");
655 return AVERROR_INVALIDDATA
;
659 if (AV_RL24(buf
) != 0x2a019d) {
660 av_log(s
->avctx
, AV_LOG_ERROR
,
661 "Invalid start code 0x%x\n", AV_RL24(buf
));
662 return AVERROR_INVALIDDATA
;
664 width
= AV_RL16(buf
+ 3) & 0x3fff;
665 height
= AV_RL16(buf
+ 5) & 0x3fff;
666 hscale
= buf
[4] >> 6;
667 vscale
= buf
[6] >> 6;
671 if (hscale
|| vscale
)
672 avpriv_request_sample(s
->avctx
, "Upscaling");
674 s
->update_golden
= s
->update_altref
= VP56_FRAME_CURRENT
;
675 vp78_reset_probability_tables(s
);
676 memcpy(s
->prob
->pred16x16
, vp8_pred16x16_prob_inter
,
677 sizeof(s
->prob
->pred16x16
));
678 memcpy(s
->prob
->pred8x8c
, vp8_pred8x8c_prob_inter
,
679 sizeof(s
->prob
->pred8x8c
));
680 memcpy(s
->prob
->mvc
, vp8_mv_default_prob
,
681 sizeof(s
->prob
->mvc
));
682 memset(&s
->segmentation
, 0, sizeof(s
->segmentation
));
683 memset(&s
->lf_delta
, 0, sizeof(s
->lf_delta
));
686 ff_vp56_init_range_decoder(c
, buf
, header_size
);
688 buf_size
-= header_size
;
691 s
->colorspace
= vp8_rac_get(c
);
693 av_log(s
->avctx
, AV_LOG_WARNING
, "Unspecified colorspace\n");
694 s
->fullrange
= vp8_rac_get(c
);
697 if ((s
->segmentation
.enabled
= vp8_rac_get(c
)))
698 parse_segment_info(s
);
700 s
->segmentation
.update_map
= 0; // FIXME: move this to some init function?
702 s
->filter
.simple
= vp8_rac_get(c
);
703 s
->filter
.level
= vp8_rac_get_uint(c
, 6);
704 s
->filter
.sharpness
= vp8_rac_get_uint(c
, 3);
706 if ((s
->lf_delta
.enabled
= vp8_rac_get(c
)))
710 if (setup_partitions(s
, buf
, buf_size
)) {
711 av_log(s
->avctx
, AV_LOG_ERROR
, "Invalid partitions\n");
712 return AVERROR_INVALIDDATA
;
715 if (!s
->macroblocks_base
|| /* first frame */
716 width
!= s
->avctx
->width
|| height
!= s
->avctx
->height
||
717 (width
+15)/16 != s
->mb_width
|| (height
+15)/16 != s
->mb_height
)
718 if ((ret
= vp8_update_dimensions(s
, width
, height
)) < 0)
725 s
->sign_bias
[VP56_FRAME_GOLDEN
] = vp8_rac_get(c
);
726 s
->sign_bias
[VP56_FRAME_GOLDEN2
/* altref */] = vp8_rac_get(c
);
729 // if we aren't saving this frame's probabilities for future frames,
730 // make a copy of the current probabilities
731 if (!(s
->update_probabilities
= vp8_rac_get(c
)))
732 s
->prob
[1] = s
->prob
[0];
734 s
->update_last
= s
->keyframe
|| vp8_rac_get(c
);
736 vp78_update_probability_tables(s
);
738 if ((s
->mbskip_enabled
= vp8_rac_get(c
)))
739 s
->prob
->mbskip
= vp8_rac_get_uint(c
, 8);
742 s
->prob
->intra
= vp8_rac_get_uint(c
, 8);
743 s
->prob
->last
= vp8_rac_get_uint(c
, 8);
744 s
->prob
->golden
= vp8_rac_get_uint(c
, 8);
745 vp78_update_pred16x16_pred8x8_mvc_probabilities(s
, VP8_MVC_SIZE
);
751 static av_always_inline
752 void clamp_mv(VP8Context
*s
, VP56mv
*dst
, const VP56mv
*src
)
754 dst
->x
= av_clip(src
->x
, s
->mv_min
.x
, s
->mv_max
.x
);
755 dst
->y
= av_clip(src
->y
, s
->mv_min
.y
, s
->mv_max
.y
);
759 * Motion vector coding, 17.1.
761 static av_always_inline
int read_mv_component(VP56RangeCoder
*c
, const uint8_t *p
, int vp7
)
765 if (vp56_rac_get_prob_branchy(c
, p
[0])) {
768 for (i
= 0; i
< 3; i
++)
769 x
+= vp56_rac_get_prob(c
, p
[9 + i
]) << i
;
770 for (i
= (vp7
? 7 : 9); i
> 3; i
--)
771 x
+= vp56_rac_get_prob(c
, p
[9 + i
]) << i
;
772 if (!(x
& (vp7
? 0xF0 : 0xFFF0)) || vp56_rac_get_prob(c
, p
[12]))
776 const uint8_t *ps
= p
+ 2;
777 bit
= vp56_rac_get_prob(c
, *ps
);
780 bit
= vp56_rac_get_prob(c
, *ps
);
783 x
+= vp56_rac_get_prob(c
, *ps
);
786 return (x
&& vp56_rac_get_prob(c
, p
[1])) ? -x
: x
;
789 static int vp7_read_mv_component(VP56RangeCoder
*c
, const uint8_t *p
)
791 return read_mv_component(c
, p
, 1);
794 static int vp8_read_mv_component(VP56RangeCoder
*c
, const uint8_t *p
)
796 return read_mv_component(c
, p
, 0);
799 static av_always_inline
800 const uint8_t *get_submv_prob(uint32_t left
, uint32_t top
, int is_vp7
)
803 return vp7_submv_prob
;
806 return vp8_submv_prob
[4 - !!left
];
808 return vp8_submv_prob
[2];
809 return vp8_submv_prob
[1 - !!left
];
813 * Split motion vector prediction, 16.4.
814 * @returns the number of motion vectors parsed (2, 4 or 16)
816 static av_always_inline
817 int decode_splitmvs(VP8Context
*s
, VP56RangeCoder
*c
, VP8Macroblock
*mb
,
818 int layout
, int is_vp7
)
822 VP8Macroblock
*top_mb
;
823 VP8Macroblock
*left_mb
= &mb
[-1];
824 const uint8_t *mbsplits_left
= vp8_mbsplits
[left_mb
->partitioning
];
825 const uint8_t *mbsplits_top
, *mbsplits_cur
, *firstidx
;
827 VP56mv
*left_mv
= left_mb
->bmv
;
828 VP56mv
*cur_mv
= mb
->bmv
;
830 if (!layout
) // layout is inlined, s->mb_layout is not
833 top_mb
= &mb
[-s
->mb_width
- 1];
834 mbsplits_top
= vp8_mbsplits
[top_mb
->partitioning
];
835 top_mv
= top_mb
->bmv
;
837 if (vp56_rac_get_prob_branchy(c
, vp8_mbsplit_prob
[0])) {
838 if (vp56_rac_get_prob_branchy(c
, vp8_mbsplit_prob
[1]))
839 part_idx
= VP8_SPLITMVMODE_16x8
+ vp56_rac_get_prob(c
, vp8_mbsplit_prob
[2]);
841 part_idx
= VP8_SPLITMVMODE_8x8
;
843 part_idx
= VP8_SPLITMVMODE_4x4
;
846 num
= vp8_mbsplit_count
[part_idx
];
847 mbsplits_cur
= vp8_mbsplits
[part_idx
],
848 firstidx
= vp8_mbfirstidx
[part_idx
];
849 mb
->partitioning
= part_idx
;
851 for (n
= 0; n
< num
; n
++) {
853 uint32_t left
, above
;
854 const uint8_t *submv_prob
;
857 left
= AV_RN32A(&left_mv
[mbsplits_left
[k
+ 3]]);
859 left
= AV_RN32A(&cur_mv
[mbsplits_cur
[k
- 1]]);
861 above
= AV_RN32A(&top_mv
[mbsplits_top
[k
+ 12]]);
863 above
= AV_RN32A(&cur_mv
[mbsplits_cur
[k
- 4]]);
865 submv_prob
= get_submv_prob(left
, above
, is_vp7
);
867 if (vp56_rac_get_prob_branchy(c
, submv_prob
[0])) {
868 if (vp56_rac_get_prob_branchy(c
, submv_prob
[1])) {
869 if (vp56_rac_get_prob_branchy(c
, submv_prob
[2])) {
870 mb
->bmv
[n
].y
= mb
->mv
.y
+
871 read_mv_component(c
, s
->prob
->mvc
[0], is_vp7
);
872 mb
->bmv
[n
].x
= mb
->mv
.x
+
873 read_mv_component(c
, s
->prob
->mvc
[1], is_vp7
);
875 AV_ZERO32(&mb
->bmv
[n
]);
878 AV_WN32A(&mb
->bmv
[n
], above
);
881 AV_WN32A(&mb
->bmv
[n
], left
);
889 * The vp7 reference decoder uses a padding macroblock column (added to right
890 * edge of the frame) to guard against illegal macroblock offsets. The
891 * algorithm has bugs that permit offsets to straddle the padding column.
892 * This function replicates those bugs.
894 * @param[out] edge_x macroblock x address
895 * @param[out] edge_y macroblock y address
897 * @return macroblock offset legal (boolean)
899 static int vp7_calculate_mb_offset(int mb_x
, int mb_y
, int mb_width
,
900 int xoffset
, int yoffset
, int boundary
,
901 int *edge_x
, int *edge_y
)
903 int vwidth
= mb_width
+ 1;
904 int new = (mb_y
+ yoffset
) * vwidth
+ mb_x
+ xoffset
;
905 if (new < boundary
|| new % vwidth
== vwidth
- 1)
907 *edge_y
= new / vwidth
;
908 *edge_x
= new % vwidth
;
912 static const VP56mv
*get_bmv_ptr(const VP8Macroblock
*mb
, int subblock
)
914 return &mb
->bmv
[mb
->mode
== VP8_MVMODE_SPLIT
? vp8_mbsplits
[mb
->partitioning
][subblock
] : 0];
917 static av_always_inline
918 void vp7_decode_mvs(VP8Context
*s
, VP8Macroblock
*mb
,
919 int mb_x
, int mb_y
, int layout
)
921 VP8Macroblock
*mb_edge
[12];
922 enum { CNT_ZERO
, CNT_NEAREST
, CNT_NEAR
};
923 enum { VP8_EDGE_TOP
, VP8_EDGE_LEFT
, VP8_EDGE_TOPLEFT
};
926 uint8_t cnt
[3] = { 0 };
927 VP56RangeCoder
*c
= &s
->c
;
930 AV_ZERO32(&near_mv
[0]);
931 AV_ZERO32(&near_mv
[1]);
932 AV_ZERO32(&near_mv
[2]);
934 for (i
= 0; i
< VP7_MV_PRED_COUNT
; i
++) {
935 const VP7MVPred
* pred
= &vp7_mv_pred
[i
];
938 if (vp7_calculate_mb_offset(mb_x
, mb_y
, s
->mb_width
, pred
->xoffset
,
939 pred
->yoffset
, !s
->profile
, &edge_x
, &edge_y
)) {
940 VP8Macroblock
*edge
= mb_edge
[i
] = (s
->mb_layout
== 1)
941 ? s
->macroblocks_base
+ 1 + edge_x
+
942 (s
->mb_width
+ 1) * (edge_y
+ 1)
943 : s
->macroblocks
+ edge_x
+
944 (s
->mb_height
- edge_y
- 1) * 2;
945 uint32_t mv
= AV_RN32A(get_bmv_ptr(edge
, vp7_mv_pred
[i
].subblock
));
947 if (AV_RN32A(&near_mv
[CNT_NEAREST
])) {
948 if (mv
== AV_RN32A(&near_mv
[CNT_NEAREST
])) {
950 } else if (AV_RN32A(&near_mv
[CNT_NEAR
])) {
951 if (mv
!= AV_RN32A(&near_mv
[CNT_NEAR
]))
955 AV_WN32A(&near_mv
[CNT_NEAR
], mv
);
959 AV_WN32A(&near_mv
[CNT_NEAREST
], mv
);
968 cnt
[idx
] += vp7_mv_pred
[i
].score
;
971 mb
->partitioning
= VP8_SPLITMVMODE_NONE
;
973 if (vp56_rac_get_prob_branchy(c
, vp7_mode_contexts
[cnt
[CNT_ZERO
]][0])) {
974 mb
->mode
= VP8_MVMODE_MV
;
976 if (vp56_rac_get_prob_branchy(c
, vp7_mode_contexts
[cnt
[CNT_NEAREST
]][1])) {
978 if (vp56_rac_get_prob_branchy(c
, vp7_mode_contexts
[cnt
[CNT_NEAR
]][2])) {
980 if (cnt
[CNT_NEAREST
] > cnt
[CNT_NEAR
])
981 AV_WN32A(&mb
->mv
, cnt
[CNT_ZERO
] > cnt
[CNT_NEAREST
] ? 0 : AV_RN32A(&near_mv
[CNT_NEAREST
]));
983 AV_WN32A(&mb
->mv
, cnt
[CNT_ZERO
] > cnt
[CNT_NEAR
] ? 0 : AV_RN32A(&near_mv
[CNT_NEAR
]));
985 if (vp56_rac_get_prob_branchy(c
, vp7_mode_contexts
[cnt
[CNT_NEAR
]][3])) {
986 mb
->mode
= VP8_MVMODE_SPLIT
;
987 mb
->mv
= mb
->bmv
[decode_splitmvs(s
, c
, mb
, layout
, IS_VP7
) - 1];
989 mb
->mv
.y
+= vp7_read_mv_component(c
, s
->prob
->mvc
[0]);
990 mb
->mv
.x
+= vp7_read_mv_component(c
, s
->prob
->mvc
[1]);
994 mb
->mv
= near_mv
[CNT_NEAR
];
998 mb
->mv
= near_mv
[CNT_NEAREST
];
1002 mb
->mode
= VP8_MVMODE_ZERO
;
1004 mb
->bmv
[0] = mb
->mv
;
1008 static av_always_inline
1009 void vp8_decode_mvs(VP8Context
*s
, VP8Macroblock
*mb
,
1010 int mb_x
, int mb_y
, int layout
)
1012 VP8Macroblock
*mb_edge
[3] = { 0 /* top */,
1015 enum { CNT_ZERO
, CNT_NEAREST
, CNT_NEAR
, CNT_SPLITMV
};
1016 enum { VP8_EDGE_TOP
, VP8_EDGE_LEFT
, VP8_EDGE_TOPLEFT
};
1018 int cur_sign_bias
= s
->sign_bias
[mb
->ref_frame
];
1019 int8_t *sign_bias
= s
->sign_bias
;
1021 uint8_t cnt
[4] = { 0 };
1022 VP56RangeCoder
*c
= &s
->c
;
1024 if (!layout
) { // layout is inlined (s->mb_layout is not)
1025 mb_edge
[0] = mb
+ 2;
1026 mb_edge
[2] = mb
+ 1;
1028 mb_edge
[0] = mb
- s
->mb_width
- 1;
1029 mb_edge
[2] = mb
- s
->mb_width
- 2;
1032 AV_ZERO32(&near_mv
[0]);
1033 AV_ZERO32(&near_mv
[1]);
1034 AV_ZERO32(&near_mv
[2]);
1036 /* Process MB on top, left and top-left */
1037 #define MV_EDGE_CHECK(n) \
1039 VP8Macroblock *edge = mb_edge[n]; \
1040 int edge_ref = edge->ref_frame; \
1041 if (edge_ref != VP56_FRAME_CURRENT) { \
1042 uint32_t mv = AV_RN32A(&edge->mv); \
1044 if (cur_sign_bias != sign_bias[edge_ref]) { \
1045 /* SWAR negate of the values in mv. */ \
1047 mv = ((mv & 0x7fff7fff) + \
1048 0x00010001) ^ (mv & 0x80008000); \
1050 if (!n || mv != AV_RN32A(&near_mv[idx])) \
1051 AV_WN32A(&near_mv[++idx], mv); \
1052 cnt[idx] += 1 + (n != 2); \
1054 cnt[CNT_ZERO] += 1 + (n != 2); \
1062 mb
->partitioning
= VP8_SPLITMVMODE_NONE
;
1063 if (vp56_rac_get_prob_branchy(c
, vp8_mode_contexts
[cnt
[CNT_ZERO
]][0])) {
1064 mb
->mode
= VP8_MVMODE_MV
;
1066 /* If we have three distinct MVs, merge first and last if they're the same */
1067 if (cnt
[CNT_SPLITMV
] &&
1068 AV_RN32A(&near_mv
[1 + VP8_EDGE_TOP
]) == AV_RN32A(&near_mv
[1 + VP8_EDGE_TOPLEFT
]))
1069 cnt
[CNT_NEAREST
] += 1;
1071 /* Swap near and nearest if necessary */
1072 if (cnt
[CNT_NEAR
] > cnt
[CNT_NEAREST
]) {
1073 FFSWAP(uint8_t, cnt
[CNT_NEAREST
], cnt
[CNT_NEAR
]);
1074 FFSWAP( VP56mv
, near_mv
[CNT_NEAREST
], near_mv
[CNT_NEAR
]);
1077 if (vp56_rac_get_prob_branchy(c
, vp8_mode_contexts
[cnt
[CNT_NEAREST
]][1])) {
1078 if (vp56_rac_get_prob_branchy(c
, vp8_mode_contexts
[cnt
[CNT_NEAR
]][2])) {
1079 /* Choose the best mv out of 0,0 and the nearest mv */
1080 clamp_mv(s
, &mb
->mv
, &near_mv
[CNT_ZERO
+ (cnt
[CNT_NEAREST
] >= cnt
[CNT_ZERO
])]);
1081 cnt
[CNT_SPLITMV
] = ((mb_edge
[VP8_EDGE_LEFT
]->mode
== VP8_MVMODE_SPLIT
) +
1082 (mb_edge
[VP8_EDGE_TOP
]->mode
== VP8_MVMODE_SPLIT
)) * 2 +
1083 (mb_edge
[VP8_EDGE_TOPLEFT
]->mode
== VP8_MVMODE_SPLIT
);
1085 if (vp56_rac_get_prob_branchy(c
, vp8_mode_contexts
[cnt
[CNT_SPLITMV
]][3])) {
1086 mb
->mode
= VP8_MVMODE_SPLIT
;
1087 mb
->mv
= mb
->bmv
[decode_splitmvs(s
, c
, mb
, layout
, IS_VP8
) - 1];
1089 mb
->mv
.y
+= vp8_read_mv_component(c
, s
->prob
->mvc
[0]);
1090 mb
->mv
.x
+= vp8_read_mv_component(c
, s
->prob
->mvc
[1]);
1091 mb
->bmv
[0] = mb
->mv
;
1094 clamp_mv(s
, &mb
->mv
, &near_mv
[CNT_NEAR
]);
1095 mb
->bmv
[0] = mb
->mv
;
1098 clamp_mv(s
, &mb
->mv
, &near_mv
[CNT_NEAREST
]);
1099 mb
->bmv
[0] = mb
->mv
;
1102 mb
->mode
= VP8_MVMODE_ZERO
;
1104 mb
->bmv
[0] = mb
->mv
;
1108 static av_always_inline
1109 void decode_intra4x4_modes(VP8Context
*s
, VP56RangeCoder
*c
, VP8Macroblock
*mb
,
1110 int mb_x
, int keyframe
, int layout
)
1112 uint8_t *intra4x4
= mb
->intra4x4_pred_mode_mb
;
1115 VP8Macroblock
*mb_top
= mb
- s
->mb_width
- 1;
1116 memcpy(mb
->intra4x4_pred_mode_top
, mb_top
->intra4x4_pred_mode_top
, 4);
1121 uint8_t *const left
= s
->intra4x4_pred_mode_left
;
1123 top
= mb
->intra4x4_pred_mode_top
;
1125 top
= s
->intra4x4_pred_mode_top
+ 4 * mb_x
;
1126 for (y
= 0; y
< 4; y
++) {
1127 for (x
= 0; x
< 4; x
++) {
1129 ctx
= vp8_pred4x4_prob_intra
[top
[x
]][left
[y
]];
1130 *intra4x4
= vp8_rac_get_tree(c
, vp8_pred4x4_tree
, ctx
);
1131 left
[y
] = top
[x
] = *intra4x4
;
1137 for (i
= 0; i
< 16; i
++)
1138 intra4x4
[i
] = vp8_rac_get_tree(c
, vp8_pred4x4_tree
,
1139 vp8_pred4x4_prob_inter
);
1143 static av_always_inline
1144 void decode_mb_mode(VP8Context
*s
, VP8Macroblock
*mb
, int mb_x
, int mb_y
,
1145 uint8_t *segment
, uint8_t *ref
, int layout
, int is_vp7
)
1147 VP56RangeCoder
*c
= &s
->c
;
1148 const char *vp7_feature_name
[] = { "q-index",
1150 "partial-golden-update",
1155 for (i
= 0; i
< 4; i
++) {
1156 if (s
->feature_enabled
[i
]) {
1157 if (vp56_rac_get_prob_branchy(c
, s
->feature_present_prob
[i
])) {
1158 int index
= vp8_rac_get_tree(c
, vp7_feature_index_tree
,
1159 s
->feature_index_prob
[i
]);
1160 av_log(s
->avctx
, AV_LOG_WARNING
,
1161 "Feature %s present in macroblock (value 0x%x)\n",
1162 vp7_feature_name
[i
], s
->feature_value
[i
][index
]);
1166 } else if (s
->segmentation
.update_map
) {
1167 int bit
= vp56_rac_get_prob(c
, s
->prob
->segmentid
[0]);
1168 *segment
= vp56_rac_get_prob(c
, s
->prob
->segmentid
[1+bit
]) + 2*bit
;
1169 } else if (s
->segmentation
.enabled
)
1170 *segment
= ref
? *ref
: *segment
;
1171 mb
->segment
= *segment
;
1173 mb
->skip
= s
->mbskip_enabled
? vp56_rac_get_prob(c
, s
->prob
->mbskip
) : 0;
1176 mb
->mode
= vp8_rac_get_tree(c
, vp8_pred16x16_tree_intra
,
1177 vp8_pred16x16_prob_intra
);
1179 if (mb
->mode
== MODE_I4x4
) {
1180 decode_intra4x4_modes(s
, c
, mb
, mb_x
, 1, layout
);
1182 const uint32_t modes
= (is_vp7
? vp7_pred4x4_mode
1183 : vp8_pred4x4_mode
)[mb
->mode
] * 0x01010101u
;
1185 AV_WN32A(mb
->intra4x4_pred_mode_top
, modes
);
1187 AV_WN32A(s
->intra4x4_pred_mode_top
+ 4 * mb_x
, modes
);
1188 AV_WN32A(s
->intra4x4_pred_mode_left
, modes
);
1191 mb
->chroma_pred_mode
= vp8_rac_get_tree(c
, vp8_pred8x8c_tree
,
1192 vp8_pred8x8c_prob_intra
);
1193 mb
->ref_frame
= VP56_FRAME_CURRENT
;
1194 } else if (vp56_rac_get_prob_branchy(c
, s
->prob
->intra
)) {
1196 if (vp56_rac_get_prob_branchy(c
, s
->prob
->last
))
1198 (!is_vp7
&& vp56_rac_get_prob(c
, s
->prob
->golden
)) ? VP56_FRAME_GOLDEN2
/* altref */
1199 : VP56_FRAME_GOLDEN
;
1201 mb
->ref_frame
= VP56_FRAME_PREVIOUS
;
1202 s
->ref_count
[mb
->ref_frame
- 1]++;
1204 // motion vectors, 16.3
1206 vp7_decode_mvs(s
, mb
, mb_x
, mb_y
, layout
);
1208 vp8_decode_mvs(s
, mb
, mb_x
, mb_y
, layout
);
1211 mb
->mode
= vp8_rac_get_tree(c
, vp8_pred16x16_tree_inter
, s
->prob
->pred16x16
);
1213 if (mb
->mode
== MODE_I4x4
)
1214 decode_intra4x4_modes(s
, c
, mb
, mb_x
, 0, layout
);
1216 mb
->chroma_pred_mode
= vp8_rac_get_tree(c
, vp8_pred8x8c_tree
,
1218 mb
->ref_frame
= VP56_FRAME_CURRENT
;
1219 mb
->partitioning
= VP8_SPLITMVMODE_NONE
;
1220 AV_ZERO32(&mb
->bmv
[0]);
1225 * @param r arithmetic bitstream reader context
1226 * @param block destination for block coefficients
1227 * @param probs probabilities to use when reading trees from the bitstream
1228 * @param i initial coeff index, 0 unless a separate DC block is coded
1229 * @param qmul array holding the dc/ac dequant factor at position 0/1
1231 * @return 0 if no coeffs were decoded
1232 * otherwise, the index of the last coeff decoded plus one
1234 static av_always_inline
1235 int decode_block_coeffs_internal(VP56RangeCoder
*r
, int16_t block
[16],
1236 uint8_t probs
[16][3][NUM_DCT_TOKENS
- 1],
1237 int i
, uint8_t *token_prob
, int16_t qmul
[2],
1238 const uint8_t scan
[16], int vp7
)
1240 VP56RangeCoder c
= *r
;
1245 if (!vp56_rac_get_prob_branchy(&c
, token_prob
[0])) // DCT_EOB
1249 if (!vp56_rac_get_prob_branchy(&c
, token_prob
[1])) { // DCT_0
1251 break; // invalid input; blocks should end with EOB
1252 token_prob
= probs
[i
][0];
1258 if (!vp56_rac_get_prob_branchy(&c
, token_prob
[2])) { // DCT_1
1260 token_prob
= probs
[i
+ 1][1];
1262 if (!vp56_rac_get_prob_branchy(&c
, token_prob
[3])) { // DCT 2,3,4
1263 coeff
= vp56_rac_get_prob_branchy(&c
, token_prob
[4]);
1265 coeff
+= vp56_rac_get_prob(&c
, token_prob
[5]);
1269 if (!vp56_rac_get_prob_branchy(&c
, token_prob
[6])) {
1270 if (!vp56_rac_get_prob_branchy(&c
, token_prob
[7])) { // DCT_CAT1
1271 coeff
= 5 + vp56_rac_get_prob(&c
, vp8_dct_cat1_prob
[0]);
1272 } else { // DCT_CAT2
1274 coeff
+= vp56_rac_get_prob(&c
, vp8_dct_cat2_prob
[0]) << 1;
1275 coeff
+= vp56_rac_get_prob(&c
, vp8_dct_cat2_prob
[1]);
1277 } else { // DCT_CAT3 and up
1278 int a
= vp56_rac_get_prob(&c
, token_prob
[8]);
1279 int b
= vp56_rac_get_prob(&c
, token_prob
[9 + a
]);
1280 int cat
= (a
<< 1) + b
;
1281 coeff
= 3 + (8 << cat
);
1282 coeff
+= vp8_rac_get_coeff(&c
, ff_vp8_dct_cat_prob
[cat
]);
1285 token_prob
= probs
[i
+ 1][2];
1287 block
[scan
[i
]] = (vp8_rac_get(&c
) ? -coeff
: coeff
) * qmul
[!!i
];
1294 static av_always_inline
1295 int inter_predict_dc(int16_t block
[16], int16_t pred
[2])
1297 int16_t dc
= block
[0];
1305 if (!pred
[0] | !dc
| ((int32_t)pred
[0] ^ (int32_t)dc
) >> 31) {
1306 block
[0] = pred
[0] = dc
;
1311 block
[0] = pred
[0] = dc
;
1317 static int vp7_decode_block_coeffs_internal(VP56RangeCoder
*r
,
1319 uint8_t probs
[16][3][NUM_DCT_TOKENS
- 1],
1320 int i
, uint8_t *token_prob
,
1322 const uint8_t scan
[16])
1324 return decode_block_coeffs_internal(r
, block
, probs
, i
,
1325 token_prob
, qmul
, scan
, IS_VP7
);
1328 #ifndef vp8_decode_block_coeffs_internal
1329 static int vp8_decode_block_coeffs_internal(VP56RangeCoder
*r
,
1331 uint8_t probs
[16][3][NUM_DCT_TOKENS
- 1],
1332 int i
, uint8_t *token_prob
,
1335 return decode_block_coeffs_internal(r
, block
, probs
, i
,
1336 token_prob
, qmul
, zigzag_scan
, IS_VP8
);
1341 * @param c arithmetic bitstream reader context
1342 * @param block destination for block coefficients
1343 * @param probs probabilities to use when reading trees from the bitstream
1344 * @param i initial coeff index, 0 unless a separate DC block is coded
1345 * @param zero_nhood the initial prediction context for number of surrounding
1346 * all-zero blocks (only left/top, so 0-2)
1347 * @param qmul array holding the dc/ac dequant factor at position 0/1
1348 * @param scan scan pattern (VP7 only)
1350 * @return 0 if no coeffs were decoded
1351 * otherwise, the index of the last coeff decoded plus one
1353 static av_always_inline
1354 int decode_block_coeffs(VP56RangeCoder
*c
, int16_t block
[16],
1355 uint8_t probs
[16][3][NUM_DCT_TOKENS
- 1],
1356 int i
, int zero_nhood
, int16_t qmul
[2],
1357 const uint8_t scan
[16], int vp7
)
1359 uint8_t *token_prob
= probs
[i
][zero_nhood
];
1360 if (!vp56_rac_get_prob_branchy(c
, token_prob
[0])) // DCT_EOB
1362 return vp7
? vp7_decode_block_coeffs_internal(c
, block
, probs
, i
,
1363 token_prob
, qmul
, scan
)
1364 : vp8_decode_block_coeffs_internal(c
, block
, probs
, i
,
1368 static av_always_inline
1369 void decode_mb_coeffs(VP8Context
*s
, VP8ThreadData
*td
, VP56RangeCoder
*c
,
1370 VP8Macroblock
*mb
, uint8_t t_nnz
[9], uint8_t l_nnz
[9],
1373 int i
, x
, y
, luma_start
= 0, luma_ctx
= 3;
1374 int nnz_pred
, nnz
, nnz_total
= 0;
1375 int segment
= mb
->segment
;
1378 if (mb
->mode
!= MODE_I4x4
&& (is_vp7
|| mb
->mode
!= VP8_MVMODE_SPLIT
)) {
1379 nnz_pred
= t_nnz
[8] + l_nnz
[8];
1381 // decode DC values and do hadamard
1382 nnz
= decode_block_coeffs(c
, td
->block_dc
, s
->prob
->token
[1], 0,
1383 nnz_pred
, s
->qmat
[segment
].luma_dc_qmul
,
1384 zigzag_scan
, is_vp7
);
1385 l_nnz
[8] = t_nnz
[8] = !!nnz
;
1387 if (is_vp7
&& mb
->mode
> MODE_I4x4
) {
1388 nnz
|= inter_predict_dc(td
->block_dc
,
1389 s
->inter_dc_pred
[mb
->ref_frame
- 1]);
1396 s
->vp8dsp
.vp8_luma_dc_wht_dc(td
->block
, td
->block_dc
);
1398 s
->vp8dsp
.vp8_luma_dc_wht(td
->block
, td
->block_dc
);
1405 for (y
= 0; y
< 4; y
++)
1406 for (x
= 0; x
< 4; x
++) {
1407 nnz_pred
= l_nnz
[y
] + t_nnz
[x
];
1408 nnz
= decode_block_coeffs(c
, td
->block
[y
][x
],
1409 s
->prob
->token
[luma_ctx
],
1410 luma_start
, nnz_pred
,
1411 s
->qmat
[segment
].luma_qmul
,
1412 s
->prob
[0].scan
, is_vp7
);
1413 /* nnz+block_dc may be one more than the actual last index,
1414 * but we don't care */
1415 td
->non_zero_count_cache
[y
][x
] = nnz
+ block_dc
;
1416 t_nnz
[x
] = l_nnz
[y
] = !!nnz
;
1421 // TODO: what to do about dimensions? 2nd dim for luma is x,
1422 // but for chroma it's (y<<1)|x
1423 for (i
= 4; i
< 6; i
++)
1424 for (y
= 0; y
< 2; y
++)
1425 for (x
= 0; x
< 2; x
++) {
1426 nnz_pred
= l_nnz
[i
+ 2 * y
] + t_nnz
[i
+ 2 * x
];
1427 nnz
= decode_block_coeffs(c
, td
->block
[i
][(y
<< 1) + x
],
1428 s
->prob
->token
[2], 0, nnz_pred
,
1429 s
->qmat
[segment
].chroma_qmul
,
1430 s
->prob
[0].scan
, is_vp7
);
1431 td
->non_zero_count_cache
[i
][(y
<< 1) + x
] = nnz
;
1432 t_nnz
[i
+ 2 * x
] = l_nnz
[i
+ 2 * y
] = !!nnz
;
1436 // if there were no coded coeffs despite the macroblock not being marked skip,
1437 // we MUST not do the inner loop filter and should not do IDCT
1438 // Since skip isn't used for bitstream prediction, just manually set it.
1443 static av_always_inline
1444 void backup_mb_border(uint8_t *top_border
, uint8_t *src_y
,
1445 uint8_t *src_cb
, uint8_t *src_cr
,
1446 int linesize
, int uvlinesize
, int simple
)
1448 AV_COPY128(top_border
, src_y
+ 15 * linesize
);
1450 AV_COPY64(top_border
+ 16, src_cb
+ 7 * uvlinesize
);
1451 AV_COPY64(top_border
+ 24, src_cr
+ 7 * uvlinesize
);
1455 static av_always_inline
1456 void xchg_mb_border(uint8_t *top_border
, uint8_t *src_y
, uint8_t *src_cb
,
1457 uint8_t *src_cr
, int linesize
, int uvlinesize
, int mb_x
,
1458 int mb_y
, int mb_width
, int simple
, int xchg
)
1460 uint8_t *top_border_m1
= top_border
- 32; // for TL prediction
1462 src_cb
-= uvlinesize
;
1463 src_cr
-= uvlinesize
;
1465 #define XCHG(a, b, xchg) \
1473 XCHG(top_border_m1
+ 8, src_y
- 8, xchg
);
1474 XCHG(top_border
, src_y
, xchg
);
1475 XCHG(top_border
+ 8, src_y
+ 8, 1);
1476 if (mb_x
< mb_width
- 1)
1477 XCHG(top_border
+ 32, src_y
+ 16, 1);
1479 // only copy chroma for normal loop filter
1480 // or to initialize the top row to 127
1481 if (!simple
|| !mb_y
) {
1482 XCHG(top_border_m1
+ 16, src_cb
- 8, xchg
);
1483 XCHG(top_border_m1
+ 24, src_cr
- 8, xchg
);
1484 XCHG(top_border
+ 16, src_cb
, 1);
1485 XCHG(top_border
+ 24, src_cr
, 1);
1489 static av_always_inline
1490 int check_dc_pred8x8_mode(int mode
, int mb_x
, int mb_y
)
1493 return mb_y
? TOP_DC_PRED8x8
: DC_128_PRED8x8
;
1495 return mb_y
? mode
: LEFT_DC_PRED8x8
;
1498 static av_always_inline
1499 int check_tm_pred8x8_mode(int mode
, int mb_x
, int mb_y
, int vp7
)
1502 return mb_y
? VERT_PRED8x8
: (vp7
? DC_128_PRED8x8
: DC_129_PRED8x8
);
1504 return mb_y
? mode
: HOR_PRED8x8
;
1507 static av_always_inline
1508 int check_intra_pred8x8_mode_emuedge(int mode
, int mb_x
, int mb_y
, int vp7
)
1512 return check_dc_pred8x8_mode(mode
, mb_x
, mb_y
);
1514 return !mb_y
? (vp7
? DC_128_PRED8x8
: DC_127_PRED8x8
) : mode
;
1516 return !mb_x
? (vp7
? DC_128_PRED8x8
: DC_129_PRED8x8
) : mode
;
1517 case PLANE_PRED8x8
: /* TM */
1518 return check_tm_pred8x8_mode(mode
, mb_x
, mb_y
, vp7
);
1523 static av_always_inline
1524 int check_tm_pred4x4_mode(int mode
, int mb_x
, int mb_y
, int vp7
)
1527 return mb_y
? VERT_VP8_PRED
: (vp7
? DC_128_PRED
: DC_129_PRED
);
1529 return mb_y
? mode
: HOR_VP8_PRED
;
1533 static av_always_inline
1534 int check_intra_pred4x4_mode_emuedge(int mode
, int mb_x
, int mb_y
,
1535 int *copy_buf
, int vp7
)
1539 if (!mb_x
&& mb_y
) {
1544 case DIAG_DOWN_LEFT_PRED
:
1545 case VERT_LEFT_PRED
:
1546 return !mb_y
? (vp7
? DC_128_PRED
: DC_127_PRED
) : mode
;
1554 return !mb_x
? (vp7
? DC_128_PRED
: DC_129_PRED
) : mode
;
1556 return check_tm_pred4x4_mode(mode
, mb_x
, mb_y
, vp7
);
1557 case DC_PRED
: /* 4x4 DC doesn't use the same "H.264-style" exceptions
1558 * as 16x16/8x8 DC */
1559 case DIAG_DOWN_RIGHT_PRED
:
1560 case VERT_RIGHT_PRED
:
1569 static av_always_inline
1570 void intra_predict(VP8Context
*s
, VP8ThreadData
*td
, uint8_t *dst
[3],
1571 VP8Macroblock
*mb
, int mb_x
, int mb_y
, int is_vp7
)
1573 int x
, y
, mode
, nnz
;
1576 /* for the first row, we need to run xchg_mb_border to init the top edge
1577 * to 127 otherwise, skip it if we aren't going to deblock */
1578 if (mb_y
&& (s
->deblock_filter
|| !mb_y
) && td
->thread_nr
== 0)
1579 xchg_mb_border(s
->top_border
[mb_x
+ 1], dst
[0], dst
[1], dst
[2],
1580 s
->linesize
, s
->uvlinesize
, mb_x
, mb_y
, s
->mb_width
,
1581 s
->filter
.simple
, 1);
1583 if (mb
->mode
< MODE_I4x4
) {
1584 mode
= check_intra_pred8x8_mode_emuedge(mb
->mode
, mb_x
, mb_y
, is_vp7
);
1585 s
->hpc
.pred16x16
[mode
](dst
[0], s
->linesize
);
1587 uint8_t *ptr
= dst
[0];
1588 uint8_t *intra4x4
= mb
->intra4x4_pred_mode_mb
;
1589 const uint8_t lo
= is_vp7
? 128 : 127;
1590 const uint8_t hi
= is_vp7
? 128 : 129;
1591 uint8_t tr_top
[4] = { lo
, lo
, lo
, lo
};
1593 // all blocks on the right edge of the macroblock use bottom edge
1594 // the top macroblock for their topright edge
1595 uint8_t *tr_right
= ptr
- s
->linesize
+ 16;
1597 // if we're on the right edge of the frame, said edge is extended
1598 // from the top macroblock
1599 if (mb_y
&& mb_x
== s
->mb_width
- 1) {
1600 tr
= tr_right
[-1] * 0x01010101u
;
1601 tr_right
= (uint8_t *) &tr
;
1605 AV_ZERO128(td
->non_zero_count_cache
);
1607 for (y
= 0; y
< 4; y
++) {
1608 uint8_t *topright
= ptr
+ 4 - s
->linesize
;
1609 for (x
= 0; x
< 4; x
++) {
1610 int copy
= 0, linesize
= s
->linesize
;
1611 uint8_t *dst
= ptr
+ 4 * x
;
1612 DECLARE_ALIGNED(4, uint8_t, copy_dst
)[5 * 8];
1614 if ((y
== 0 || x
== 3) && mb_y
== 0) {
1617 topright
= tr_right
;
1619 mode
= check_intra_pred4x4_mode_emuedge(intra4x4
[x
], mb_x
+ x
,
1620 mb_y
+ y
, ©
, is_vp7
);
1622 dst
= copy_dst
+ 12;
1626 AV_WN32A(copy_dst
+ 4, lo
* 0x01010101U
);
1628 AV_COPY32(copy_dst
+ 4, ptr
+ 4 * x
- s
->linesize
);
1632 copy_dst
[3] = ptr
[4 * x
- s
->linesize
- 1];
1641 copy_dst
[11] = ptr
[4 * x
- 1];
1642 copy_dst
[19] = ptr
[4 * x
+ s
->linesize
- 1];
1643 copy_dst
[27] = ptr
[4 * x
+ s
->linesize
* 2 - 1];
1644 copy_dst
[35] = ptr
[4 * x
+ s
->linesize
* 3 - 1];
1647 s
->hpc
.pred4x4
[mode
](dst
, topright
, linesize
);
1649 AV_COPY32(ptr
+ 4 * x
, copy_dst
+ 12);
1650 AV_COPY32(ptr
+ 4 * x
+ s
->linesize
, copy_dst
+ 20);
1651 AV_COPY32(ptr
+ 4 * x
+ s
->linesize
* 2, copy_dst
+ 28);
1652 AV_COPY32(ptr
+ 4 * x
+ s
->linesize
* 3, copy_dst
+ 36);
1655 nnz
= td
->non_zero_count_cache
[y
][x
];
1658 s
->vp8dsp
.vp8_idct_dc_add(ptr
+ 4 * x
,
1659 td
->block
[y
][x
], s
->linesize
);
1661 s
->vp8dsp
.vp8_idct_add(ptr
+ 4 * x
,
1662 td
->block
[y
][x
], s
->linesize
);
1667 ptr
+= 4 * s
->linesize
;
1672 mode
= check_intra_pred8x8_mode_emuedge(mb
->chroma_pred_mode
,
1673 mb_x
, mb_y
, is_vp7
);
1674 s
->hpc
.pred8x8
[mode
](dst
[1], s
->uvlinesize
);
1675 s
->hpc
.pred8x8
[mode
](dst
[2], s
->uvlinesize
);
1677 if (mb_y
&& (s
->deblock_filter
|| !mb_y
) && td
->thread_nr
== 0)
1678 xchg_mb_border(s
->top_border
[mb_x
+ 1], dst
[0], dst
[1], dst
[2],
1679 s
->linesize
, s
->uvlinesize
, mb_x
, mb_y
, s
->mb_width
,
1680 s
->filter
.simple
, 0);
1683 static const uint8_t subpel_idx
[3][8] = {
1684 { 0, 1, 2, 1, 2, 1, 2, 1 }, // nr. of left extra pixels,
1685 // also function pointer index
1686 { 0, 3, 5, 3, 5, 3, 5, 3 }, // nr. of extra pixels required
1687 { 0, 2, 3, 2, 3, 2, 3, 2 }, // nr. of right extra pixels
1693 * @param s VP8 decoding context
1694 * @param dst target buffer for block data at block position
1695 * @param ref reference picture buffer at origin (0, 0)
1696 * @param mv motion vector (relative to block position) to get pixel data from
1697 * @param x_off horizontal position of block from origin (0, 0)
1698 * @param y_off vertical position of block from origin (0, 0)
1699 * @param block_w width of block (16, 8 or 4)
1700 * @param block_h height of block (always same as block_w)
1701 * @param width width of src/dst plane data
1702 * @param height height of src/dst plane data
1703 * @param linesize size of a single line of plane data, including padding
1704 * @param mc_func motion compensation function pointers (bilinear or sixtap MC)
1706 static av_always_inline
1707 void vp8_mc_luma(VP8Context
*s
, VP8ThreadData
*td
, uint8_t *dst
,
1708 ThreadFrame
*ref
, const VP56mv
*mv
,
1709 int x_off
, int y_off
, int block_w
, int block_h
,
1710 int width
, int height
, ptrdiff_t linesize
,
1711 vp8_mc_func mc_func
[3][3])
1713 uint8_t *src
= ref
->f
->data
[0];
1716 int src_linesize
= linesize
;
1718 int mx
= (mv
->x
<< 1) & 7, mx_idx
= subpel_idx
[0][mx
];
1719 int my
= (mv
->y
<< 1) & 7, my_idx
= subpel_idx
[0][my
];
1721 x_off
+= mv
->x
>> 2;
1722 y_off
+= mv
->y
>> 2;
1725 ff_thread_await_progress(ref
, (3 + y_off
+ block_h
+ subpel_idx
[2][my
]) >> 4, 0);
1726 src
+= y_off
* linesize
+ x_off
;
1727 if (x_off
< mx_idx
|| x_off
>= width
- block_w
- subpel_idx
[2][mx
] ||
1728 y_off
< my_idx
|| y_off
>= height
- block_h
- subpel_idx
[2][my
]) {
1729 s
->vdsp
.emulated_edge_mc(td
->edge_emu_buffer
,
1730 src
- my_idx
* linesize
- mx_idx
,
1731 EDGE_EMU_LINESIZE
, linesize
,
1732 block_w
+ subpel_idx
[1][mx
],
1733 block_h
+ subpel_idx
[1][my
],
1734 x_off
- mx_idx
, y_off
- my_idx
,
1736 src
= td
->edge_emu_buffer
+ mx_idx
+ EDGE_EMU_LINESIZE
* my_idx
;
1737 src_linesize
= EDGE_EMU_LINESIZE
;
1739 mc_func
[my_idx
][mx_idx
](dst
, linesize
, src
, src_linesize
, block_h
, mx
, my
);
1741 ff_thread_await_progress(ref
, (3 + y_off
+ block_h
) >> 4, 0);
1742 mc_func
[0][0](dst
, linesize
, src
+ y_off
* linesize
+ x_off
,
1743 linesize
, block_h
, 0, 0);
1748 * chroma MC function
1750 * @param s VP8 decoding context
1751 * @param dst1 target buffer for block data at block position (U plane)
1752 * @param dst2 target buffer for block data at block position (V plane)
1753 * @param ref reference picture buffer at origin (0, 0)
1754 * @param mv motion vector (relative to block position) to get pixel data from
1755 * @param x_off horizontal position of block from origin (0, 0)
1756 * @param y_off vertical position of block from origin (0, 0)
1757 * @param block_w width of block (16, 8 or 4)
1758 * @param block_h height of block (always same as block_w)
1759 * @param width width of src/dst plane data
1760 * @param height height of src/dst plane data
1761 * @param linesize size of a single line of plane data, including padding
1762 * @param mc_func motion compensation function pointers (bilinear or sixtap MC)
1764 static av_always_inline
1765 void vp8_mc_chroma(VP8Context
*s
, VP8ThreadData
*td
, uint8_t *dst1
,
1766 uint8_t *dst2
, ThreadFrame
*ref
, const VP56mv
*mv
,
1767 int x_off
, int y_off
, int block_w
, int block_h
,
1768 int width
, int height
, ptrdiff_t linesize
,
1769 vp8_mc_func mc_func
[3][3])
1771 uint8_t *src1
= ref
->f
->data
[1], *src2
= ref
->f
->data
[2];
1774 int mx
= mv
->x
& 7, mx_idx
= subpel_idx
[0][mx
];
1775 int my
= mv
->y
& 7, my_idx
= subpel_idx
[0][my
];
1777 x_off
+= mv
->x
>> 3;
1778 y_off
+= mv
->y
>> 3;
1781 src1
+= y_off
* linesize
+ x_off
;
1782 src2
+= y_off
* linesize
+ x_off
;
1783 ff_thread_await_progress(ref
, (3 + y_off
+ block_h
+ subpel_idx
[2][my
]) >> 3, 0);
1784 if (x_off
< mx_idx
|| x_off
>= width
- block_w
- subpel_idx
[2][mx
] ||
1785 y_off
< my_idx
|| y_off
>= height
- block_h
- subpel_idx
[2][my
]) {
1786 s
->vdsp
.emulated_edge_mc(td
->edge_emu_buffer
,
1787 src1
- my_idx
* linesize
- mx_idx
,
1788 EDGE_EMU_LINESIZE
, linesize
,
1789 block_w
+ subpel_idx
[1][mx
],
1790 block_h
+ subpel_idx
[1][my
],
1791 x_off
- mx_idx
, y_off
- my_idx
, width
, height
);
1792 src1
= td
->edge_emu_buffer
+ mx_idx
+ EDGE_EMU_LINESIZE
* my_idx
;
1793 mc_func
[my_idx
][mx_idx
](dst1
, linesize
, src1
, EDGE_EMU_LINESIZE
, block_h
, mx
, my
);
1795 s
->vdsp
.emulated_edge_mc(td
->edge_emu_buffer
,
1796 src2
- my_idx
* linesize
- mx_idx
,
1797 EDGE_EMU_LINESIZE
, linesize
,
1798 block_w
+ subpel_idx
[1][mx
],
1799 block_h
+ subpel_idx
[1][my
],
1800 x_off
- mx_idx
, y_off
- my_idx
, width
, height
);
1801 src2
= td
->edge_emu_buffer
+ mx_idx
+ EDGE_EMU_LINESIZE
* my_idx
;
1802 mc_func
[my_idx
][mx_idx
](dst2
, linesize
, src2
, EDGE_EMU_LINESIZE
, block_h
, mx
, my
);
1804 mc_func
[my_idx
][mx_idx
](dst1
, linesize
, src1
, linesize
, block_h
, mx
, my
);
1805 mc_func
[my_idx
][mx_idx
](dst2
, linesize
, src2
, linesize
, block_h
, mx
, my
);
1808 ff_thread_await_progress(ref
, (3 + y_off
+ block_h
) >> 3, 0);
1809 mc_func
[0][0](dst1
, linesize
, src1
+ y_off
* linesize
+ x_off
, linesize
, block_h
, 0, 0);
1810 mc_func
[0][0](dst2
, linesize
, src2
+ y_off
* linesize
+ x_off
, linesize
, block_h
, 0, 0);
1814 static av_always_inline
1815 void vp8_mc_part(VP8Context
*s
, VP8ThreadData
*td
, uint8_t *dst
[3],
1816 ThreadFrame
*ref_frame
, int x_off
, int y_off
,
1817 int bx_off
, int by_off
, int block_w
, int block_h
,
1818 int width
, int height
, VP56mv
*mv
)
1823 vp8_mc_luma(s
, td
, dst
[0] + by_off
* s
->linesize
+ bx_off
,
1824 ref_frame
, mv
, x_off
+ bx_off
, y_off
+ by_off
,
1825 block_w
, block_h
, width
, height
, s
->linesize
,
1826 s
->put_pixels_tab
[block_w
== 8]);
1829 if (s
->profile
== 3) {
1830 /* this block only applies VP8; it is safe to check
1831 * only the profile, as VP7 profile <= 1 */
1843 vp8_mc_chroma(s
, td
, dst
[1] + by_off
* s
->uvlinesize
+ bx_off
,
1844 dst
[2] + by_off
* s
->uvlinesize
+ bx_off
, ref_frame
,
1845 &uvmv
, x_off
+ bx_off
, y_off
+ by_off
,
1846 block_w
, block_h
, width
, height
, s
->uvlinesize
,
1847 s
->put_pixels_tab
[1 + (block_w
== 4)]);
1850 /* Fetch pixels for estimated mv 4 macroblocks ahead.
1851 * Optimized for 64-byte cache lines. Inspired by ffh264 prefetch_motion. */
1852 static av_always_inline
1853 void prefetch_motion(VP8Context
*s
, VP8Macroblock
*mb
, int mb_x
, int mb_y
,
1856 /* Don't prefetch refs that haven't been used very often this frame. */
1857 if (s
->ref_count
[ref
- 1] > (mb_xy
>> 5)) {
1858 int x_off
= mb_x
<< 4, y_off
= mb_y
<< 4;
1859 int mx
= (mb
->mv
.x
>> 2) + x_off
+ 8;
1860 int my
= (mb
->mv
.y
>> 2) + y_off
;
1861 uint8_t **src
= s
->framep
[ref
]->tf
.f
->data
;
1862 int off
= mx
+ (my
+ (mb_x
& 3) * 4) * s
->linesize
+ 64;
1863 /* For threading, a ff_thread_await_progress here might be useful, but
1864 * it actually slows down the decoder. Since a bad prefetch doesn't
1865 * generate bad decoder output, we don't run it here. */
1866 s
->vdsp
.prefetch(src
[0] + off
, s
->linesize
, 4);
1867 off
= (mx
>> 1) + ((my
>> 1) + (mb_x
& 7)) * s
->uvlinesize
+ 64;
1868 s
->vdsp
.prefetch(src
[1] + off
, src
[2] - src
[1], 2);
1873 * Apply motion vectors to prediction buffer, chapter 18.
1875 static av_always_inline
1876 void inter_predict(VP8Context
*s
, VP8ThreadData
*td
, uint8_t *dst
[3],
1877 VP8Macroblock
*mb
, int mb_x
, int mb_y
)
1879 int x_off
= mb_x
<< 4, y_off
= mb_y
<< 4;
1880 int width
= 16 * s
->mb_width
, height
= 16 * s
->mb_height
;
1881 ThreadFrame
*ref
= &s
->framep
[mb
->ref_frame
]->tf
;
1882 VP56mv
*bmv
= mb
->bmv
;
1884 switch (mb
->partitioning
) {
1885 case VP8_SPLITMVMODE_NONE
:
1886 vp8_mc_part(s
, td
, dst
, ref
, x_off
, y_off
,
1887 0, 0, 16, 16, width
, height
, &mb
->mv
);
1889 case VP8_SPLITMVMODE_4x4
: {
1894 for (y
= 0; y
< 4; y
++) {
1895 for (x
= 0; x
< 4; x
++) {
1896 vp8_mc_luma(s
, td
, dst
[0] + 4 * y
* s
->linesize
+ x
* 4,
1897 ref
, &bmv
[4 * y
+ x
],
1898 4 * x
+ x_off
, 4 * y
+ y_off
, 4, 4,
1899 width
, height
, s
->linesize
,
1900 s
->put_pixels_tab
[2]);
1909 for (y
= 0; y
< 2; y
++) {
1910 for (x
= 0; x
< 2; x
++) {
1911 uvmv
.x
= mb
->bmv
[2 * y
* 4 + 2 * x
].x
+
1912 mb
->bmv
[2 * y
* 4 + 2 * x
+ 1].x
+
1913 mb
->bmv
[(2 * y
+ 1) * 4 + 2 * x
].x
+
1914 mb
->bmv
[(2 * y
+ 1) * 4 + 2 * x
+ 1].x
;
1915 uvmv
.y
= mb
->bmv
[2 * y
* 4 + 2 * x
].y
+
1916 mb
->bmv
[2 * y
* 4 + 2 * x
+ 1].y
+
1917 mb
->bmv
[(2 * y
+ 1) * 4 + 2 * x
].y
+
1918 mb
->bmv
[(2 * y
+ 1) * 4 + 2 * x
+ 1].y
;
1919 uvmv
.x
= (uvmv
.x
+ 2 + FF_SIGNBIT(uvmv
.x
)) >> 2;
1920 uvmv
.y
= (uvmv
.y
+ 2 + FF_SIGNBIT(uvmv
.y
)) >> 2;
1921 if (s
->profile
== 3) {
1925 vp8_mc_chroma(s
, td
, dst
[1] + 4 * y
* s
->uvlinesize
+ x
* 4,
1926 dst
[2] + 4 * y
* s
->uvlinesize
+ x
* 4, ref
,
1927 &uvmv
, 4 * x
+ x_off
, 4 * y
+ y_off
, 4, 4,
1928 width
, height
, s
->uvlinesize
,
1929 s
->put_pixels_tab
[2]);
1934 case VP8_SPLITMVMODE_16x8
:
1935 vp8_mc_part(s
, td
, dst
, ref
, x_off
, y_off
,
1936 0, 0, 16, 8, width
, height
, &bmv
[0]);
1937 vp8_mc_part(s
, td
, dst
, ref
, x_off
, y_off
,
1938 0, 8, 16, 8, width
, height
, &bmv
[1]);
1940 case VP8_SPLITMVMODE_8x16
:
1941 vp8_mc_part(s
, td
, dst
, ref
, x_off
, y_off
,
1942 0, 0, 8, 16, width
, height
, &bmv
[0]);
1943 vp8_mc_part(s
, td
, dst
, ref
, x_off
, y_off
,
1944 8, 0, 8, 16, width
, height
, &bmv
[1]);
1946 case VP8_SPLITMVMODE_8x8
:
1947 vp8_mc_part(s
, td
, dst
, ref
, x_off
, y_off
,
1948 0, 0, 8, 8, width
, height
, &bmv
[0]);
1949 vp8_mc_part(s
, td
, dst
, ref
, x_off
, y_off
,
1950 8, 0, 8, 8, width
, height
, &bmv
[1]);
1951 vp8_mc_part(s
, td
, dst
, ref
, x_off
, y_off
,
1952 0, 8, 8, 8, width
, height
, &bmv
[2]);
1953 vp8_mc_part(s
, td
, dst
, ref
, x_off
, y_off
,
1954 8, 8, 8, 8, width
, height
, &bmv
[3]);
1959 static av_always_inline
1960 void idct_mb(VP8Context
*s
, VP8ThreadData
*td
, uint8_t *dst
[3], VP8Macroblock
*mb
)
1964 if (mb
->mode
!= MODE_I4x4
) {
1965 uint8_t *y_dst
= dst
[0];
1966 for (y
= 0; y
< 4; y
++) {
1967 uint32_t nnz4
= AV_RL32(td
->non_zero_count_cache
[y
]);
1969 if (nnz4
& ~0x01010101) {
1970 for (x
= 0; x
< 4; x
++) {
1971 if ((uint8_t) nnz4
== 1)
1972 s
->vp8dsp
.vp8_idct_dc_add(y_dst
+ 4 * x
,
1975 else if ((uint8_t) nnz4
> 1)
1976 s
->vp8dsp
.vp8_idct_add(y_dst
+ 4 * x
,
1984 s
->vp8dsp
.vp8_idct_dc_add4y(y_dst
, td
->block
[y
], s
->linesize
);
1987 y_dst
+= 4 * s
->linesize
;
1991 for (ch
= 0; ch
< 2; ch
++) {
1992 uint32_t nnz4
= AV_RL32(td
->non_zero_count_cache
[4 + ch
]);
1994 uint8_t *ch_dst
= dst
[1 + ch
];
1995 if (nnz4
& ~0x01010101) {
1996 for (y
= 0; y
< 2; y
++) {
1997 for (x
= 0; x
< 2; x
++) {
1998 if ((uint8_t) nnz4
== 1)
1999 s
->vp8dsp
.vp8_idct_dc_add(ch_dst
+ 4 * x
,
2000 td
->block
[4 + ch
][(y
<< 1) + x
],
2002 else if ((uint8_t) nnz4
> 1)
2003 s
->vp8dsp
.vp8_idct_add(ch_dst
+ 4 * x
,
2004 td
->block
[4 + ch
][(y
<< 1) + x
],
2008 goto chroma_idct_end
;
2010 ch_dst
+= 4 * s
->uvlinesize
;
2013 s
->vp8dsp
.vp8_idct_dc_add4uv(ch_dst
, td
->block
[4 + ch
], s
->uvlinesize
);
2021 static av_always_inline
2022 void filter_level_for_mb(VP8Context
*s
, VP8Macroblock
*mb
,
2023 VP8FilterStrength
*f
, int is_vp7
)
2025 int interior_limit
, filter_level
;
2027 if (s
->segmentation
.enabled
) {
2028 filter_level
= s
->segmentation
.filter_level
[mb
->segment
];
2029 if (!s
->segmentation
.absolute_vals
)
2030 filter_level
+= s
->filter
.level
;
2032 filter_level
= s
->filter
.level
;
2034 if (s
->lf_delta
.enabled
) {
2035 filter_level
+= s
->lf_delta
.ref
[mb
->ref_frame
];
2036 filter_level
+= s
->lf_delta
.mode
[mb
->mode
];
2039 filter_level
= av_clip_uintp2(filter_level
, 6);
2041 interior_limit
= filter_level
;
2042 if (s
->filter
.sharpness
) {
2043 interior_limit
>>= (s
->filter
.sharpness
+ 3) >> 2;
2044 interior_limit
= FFMIN(interior_limit
, 9 - s
->filter
.sharpness
);
2046 interior_limit
= FFMAX(interior_limit
, 1);
2048 f
->filter_level
= filter_level
;
2049 f
->inner_limit
= interior_limit
;
2050 f
->inner_filter
= is_vp7
|| !mb
->skip
|| mb
->mode
== MODE_I4x4
||
2051 mb
->mode
== VP8_MVMODE_SPLIT
;
2054 static av_always_inline
2055 void filter_mb(VP8Context
*s
, uint8_t *dst
[3], VP8FilterStrength
*f
,
2056 int mb_x
, int mb_y
, int is_vp7
)
2058 int mbedge_lim
, bedge_lim_y
, bedge_lim_uv
, hev_thresh
;
2059 int filter_level
= f
->filter_level
;
2060 int inner_limit
= f
->inner_limit
;
2061 int inner_filter
= f
->inner_filter
;
2062 int linesize
= s
->linesize
;
2063 int uvlinesize
= s
->uvlinesize
;
2064 static const uint8_t hev_thresh_lut
[2][64] = {
2065 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
2066 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2067 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
2069 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
2070 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
2071 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2079 bedge_lim_y
= filter_level
;
2080 bedge_lim_uv
= filter_level
* 2;
2081 mbedge_lim
= filter_level
+ 2;
2084 bedge_lim_uv
= filter_level
* 2 + inner_limit
;
2085 mbedge_lim
= bedge_lim_y
+ 4;
2088 hev_thresh
= hev_thresh_lut
[s
->keyframe
][filter_level
];
2091 s
->vp8dsp
.vp8_h_loop_filter16y(dst
[0], linesize
,
2092 mbedge_lim
, inner_limit
, hev_thresh
);
2093 s
->vp8dsp
.vp8_h_loop_filter8uv(dst
[1], dst
[2], uvlinesize
,
2094 mbedge_lim
, inner_limit
, hev_thresh
);
2097 #define H_LOOP_FILTER_16Y_INNER(cond) \
2098 if (cond && inner_filter) { \
2099 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0] + 4, linesize, \
2100 bedge_lim_y, inner_limit, \
2102 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0] + 8, linesize, \
2103 bedge_lim_y, inner_limit, \
2105 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0] + 12, linesize, \
2106 bedge_lim_y, inner_limit, \
2108 s->vp8dsp.vp8_h_loop_filter8uv_inner(dst[1] + 4, dst[2] + 4, \
2109 uvlinesize, bedge_lim_uv, \
2110 inner_limit, hev_thresh); \
2113 H_LOOP_FILTER_16Y_INNER(!is_vp7
)
2116 s
->vp8dsp
.vp8_v_loop_filter16y(dst
[0], linesize
,
2117 mbedge_lim
, inner_limit
, hev_thresh
);
2118 s
->vp8dsp
.vp8_v_loop_filter8uv(dst
[1], dst
[2], uvlinesize
,
2119 mbedge_lim
, inner_limit
, hev_thresh
);
2123 s
->vp8dsp
.vp8_v_loop_filter16y_inner(dst
[0] + 4 * linesize
,
2124 linesize
, bedge_lim_y
,
2125 inner_limit
, hev_thresh
);
2126 s
->vp8dsp
.vp8_v_loop_filter16y_inner(dst
[0] + 8 * linesize
,
2127 linesize
, bedge_lim_y
,
2128 inner_limit
, hev_thresh
);
2129 s
->vp8dsp
.vp8_v_loop_filter16y_inner(dst
[0] + 12 * linesize
,
2130 linesize
, bedge_lim_y
,
2131 inner_limit
, hev_thresh
);
2132 s
->vp8dsp
.vp8_v_loop_filter8uv_inner(dst
[1] + 4 * uvlinesize
,
2133 dst
[2] + 4 * uvlinesize
,
2134 uvlinesize
, bedge_lim_uv
,
2135 inner_limit
, hev_thresh
);
2138 H_LOOP_FILTER_16Y_INNER(is_vp7
)
2141 static av_always_inline
2142 void filter_mb_simple(VP8Context
*s
, uint8_t *dst
, VP8FilterStrength
*f
,
2145 int mbedge_lim
, bedge_lim
;
2146 int filter_level
= f
->filter_level
;
2147 int inner_limit
= f
->inner_limit
;
2148 int inner_filter
= f
->inner_filter
;
2149 int linesize
= s
->linesize
;
2154 bedge_lim
= 2 * filter_level
+ inner_limit
;
2155 mbedge_lim
= bedge_lim
+ 4;
2158 s
->vp8dsp
.vp8_h_loop_filter_simple(dst
, linesize
, mbedge_lim
);
2160 s
->vp8dsp
.vp8_h_loop_filter_simple(dst
+ 4, linesize
, bedge_lim
);
2161 s
->vp8dsp
.vp8_h_loop_filter_simple(dst
+ 8, linesize
, bedge_lim
);
2162 s
->vp8dsp
.vp8_h_loop_filter_simple(dst
+ 12, linesize
, bedge_lim
);
2166 s
->vp8dsp
.vp8_v_loop_filter_simple(dst
, linesize
, mbedge_lim
);
2168 s
->vp8dsp
.vp8_v_loop_filter_simple(dst
+ 4 * linesize
, linesize
, bedge_lim
);
2169 s
->vp8dsp
.vp8_v_loop_filter_simple(dst
+ 8 * linesize
, linesize
, bedge_lim
);
2170 s
->vp8dsp
.vp8_v_loop_filter_simple(dst
+ 12 * linesize
, linesize
, bedge_lim
);
2174 #define MARGIN (16 << 2)
2175 static av_always_inline
2176 void vp78_decode_mv_mb_modes(AVCodecContext
*avctx
, VP8Frame
*curframe
,
2177 VP8Frame
*prev_frame
, int is_vp7
)
2179 VP8Context
*s
= avctx
->priv_data
;
2182 s
->mv_min
.y
= -MARGIN
;
2183 s
->mv_max
.y
= ((s
->mb_height
- 1) << 6) + MARGIN
;
2184 for (mb_y
= 0; mb_y
< s
->mb_height
; mb_y
++) {
2185 VP8Macroblock
*mb
= s
->macroblocks_base
+
2186 ((s
->mb_width
+ 1) * (mb_y
+ 1) + 1);
2187 int mb_xy
= mb_y
* s
->mb_width
;
2189 AV_WN32A(s
->intra4x4_pred_mode_left
, DC_PRED
* 0x01010101);
2191 s
->mv_min
.x
= -MARGIN
;
2192 s
->mv_max
.x
= ((s
->mb_width
- 1) << 6) + MARGIN
;
2193 for (mb_x
= 0; mb_x
< s
->mb_width
; mb_x
++, mb_xy
++, mb
++) {
2195 AV_WN32A((mb
- s
->mb_width
- 1)->intra4x4_pred_mode_top
,
2196 DC_PRED
* 0x01010101);
2197 decode_mb_mode(s
, mb
, mb_x
, mb_y
, curframe
->seg_map
->data
+ mb_xy
,
2198 prev_frame
&& prev_frame
->seg_map
?
2199 prev_frame
->seg_map
->data
+ mb_xy
: NULL
, 1, is_vp7
);
2208 static void vp7_decode_mv_mb_modes(AVCodecContext
*avctx
, VP8Frame
*cur_frame
,
2209 VP8Frame
*prev_frame
)
2211 vp78_decode_mv_mb_modes(avctx
, cur_frame
, prev_frame
, IS_VP7
);
2214 static void vp8_decode_mv_mb_modes(AVCodecContext
*avctx
, VP8Frame
*cur_frame
,
2215 VP8Frame
*prev_frame
)
2217 vp78_decode_mv_mb_modes(avctx
, cur_frame
, prev_frame
, IS_VP8
);
2221 #define check_thread_pos(td, otd, mb_x_check, mb_y_check) \
2223 int tmp = (mb_y_check << 16) | (mb_x_check & 0xFFFF); \
2224 if (otd->thread_mb_pos < tmp) { \
2225 pthread_mutex_lock(&otd->lock); \
2226 td->wait_mb_pos = tmp; \
2228 if (otd->thread_mb_pos >= tmp) \
2230 pthread_cond_wait(&otd->cond, &otd->lock); \
2232 td->wait_mb_pos = INT_MAX; \
2233 pthread_mutex_unlock(&otd->lock); \
2237 #define update_pos(td, mb_y, mb_x) \
2239 int pos = (mb_y << 16) | (mb_x & 0xFFFF); \
2240 int sliced_threading = (avctx->active_thread_type == FF_THREAD_SLICE) && \
2242 int is_null = !next_td || !prev_td; \
2243 int pos_check = (is_null) ? 1 \
2244 : (next_td != td && \
2245 pos >= next_td->wait_mb_pos) || \
2247 pos >= prev_td->wait_mb_pos); \
2248 td->thread_mb_pos = pos; \
2249 if (sliced_threading && pos_check) { \
2250 pthread_mutex_lock(&td->lock); \
2251 pthread_cond_broadcast(&td->cond); \
2252 pthread_mutex_unlock(&td->lock); \
2256 #define check_thread_pos(td, otd, mb_x_check, mb_y_check)
2257 #define update_pos(td, mb_y, mb_x)
2260 static av_always_inline
void decode_mb_row_no_filter(AVCodecContext
*avctx
, void *tdata
,
2261 int jobnr
, int threadnr
, int is_vp7
)
2263 VP8Context
*s
= avctx
->priv_data
;
2264 VP8ThreadData
*prev_td
, *next_td
, *td
= &s
->thread_data
[threadnr
];
2265 int mb_y
= td
->thread_mb_pos
>> 16;
2266 int mb_x
, mb_xy
= mb_y
* s
->mb_width
;
2267 int num_jobs
= s
->num_jobs
;
2268 VP8Frame
*curframe
= s
->curframe
, *prev_frame
= s
->prev_frame
;
2269 VP56RangeCoder
*c
= &s
->coeff_partition
[mb_y
& (s
->num_coeff_partitions
- 1)];
2272 curframe
->tf
.f
->data
[0] + 16 * mb_y
* s
->linesize
,
2273 curframe
->tf
.f
->data
[1] + 8 * mb_y
* s
->uvlinesize
,
2274 curframe
->tf
.f
->data
[2] + 8 * mb_y
* s
->uvlinesize
2279 prev_td
= &s
->thread_data
[(jobnr
+ num_jobs
- 1) % num_jobs
];
2280 if (mb_y
== s
->mb_height
- 1)
2283 next_td
= &s
->thread_data
[(jobnr
+ 1) % num_jobs
];
2284 if (s
->mb_layout
== 1)
2285 mb
= s
->macroblocks_base
+ ((s
->mb_width
+ 1) * (mb_y
+ 1) + 1);
2287 // Make sure the previous frame has read its segmentation map,
2288 // if we re-use the same map.
2289 if (prev_frame
&& s
->segmentation
.enabled
&&
2290 !s
->segmentation
.update_map
)
2291 ff_thread_await_progress(&prev_frame
->tf
, mb_y
, 0);
2292 mb
= s
->macroblocks
+ (s
->mb_height
- mb_y
- 1) * 2;
2293 memset(mb
- 1, 0, sizeof(*mb
)); // zero left macroblock
2294 AV_WN32A(s
->intra4x4_pred_mode_left
, DC_PRED
* 0x01010101);
2297 if (!is_vp7
|| mb_y
== 0)
2298 memset(td
->left_nnz
, 0, sizeof(td
->left_nnz
));
2300 s
->mv_min
.x
= -MARGIN
;
2301 s
->mv_max
.x
= ((s
->mb_width
- 1) << 6) + MARGIN
;
2303 for (mb_x
= 0; mb_x
< s
->mb_width
; mb_x
++, mb_xy
++, mb
++) {
2304 // Wait for previous thread to read mb_x+2, and reach mb_y-1.
2305 if (prev_td
!= td
) {
2306 if (threadnr
!= 0) {
2307 check_thread_pos(td
, prev_td
,
2308 mb_x
+ (is_vp7
? 2 : 1),
2309 mb_y
- (is_vp7
? 2 : 1));
2311 check_thread_pos(td
, prev_td
,
2312 mb_x
+ (is_vp7
? 2 : 1) + s
->mb_width
+ 3,
2313 mb_y
- (is_vp7
? 2 : 1));
2317 s
->vdsp
.prefetch(dst
[0] + (mb_x
& 3) * 4 * s
->linesize
+ 64,
2319 s
->vdsp
.prefetch(dst
[1] + (mb_x
& 7) * s
->uvlinesize
+ 64,
2320 dst
[2] - dst
[1], 2);
2323 decode_mb_mode(s
, mb
, mb_x
, mb_y
, curframe
->seg_map
->data
+ mb_xy
,
2324 prev_frame
&& prev_frame
->seg_map
?
2325 prev_frame
->seg_map
->data
+ mb_xy
: NULL
, 0, is_vp7
);
2327 prefetch_motion(s
, mb
, mb_x
, mb_y
, mb_xy
, VP56_FRAME_PREVIOUS
);
2330 decode_mb_coeffs(s
, td
, c
, mb
, s
->top_nnz
[mb_x
], td
->left_nnz
, is_vp7
);
2332 if (mb
->mode
<= MODE_I4x4
)
2333 intra_predict(s
, td
, dst
, mb
, mb_x
, mb_y
, is_vp7
);
2335 inter_predict(s
, td
, dst
, mb
, mb_x
, mb_y
);
2337 prefetch_motion(s
, mb
, mb_x
, mb_y
, mb_xy
, VP56_FRAME_GOLDEN
);
2340 idct_mb(s
, td
, dst
, mb
);
2342 AV_ZERO64(td
->left_nnz
);
2343 AV_WN64(s
->top_nnz
[mb_x
], 0); // array of 9, so unaligned
2345 /* Reset DC block predictors if they would exist
2346 * if the mb had coefficients */
2347 if (mb
->mode
!= MODE_I4x4
&& mb
->mode
!= VP8_MVMODE_SPLIT
) {
2348 td
->left_nnz
[8] = 0;
2349 s
->top_nnz
[mb_x
][8] = 0;
2353 if (s
->deblock_filter
)
2354 filter_level_for_mb(s
, mb
, &td
->filter_strength
[mb_x
], is_vp7
);
2356 if (s
->deblock_filter
&& num_jobs
!= 1 && threadnr
== num_jobs
- 1) {
2357 if (s
->filter
.simple
)
2358 backup_mb_border(s
->top_border
[mb_x
+ 1], dst
[0],
2359 NULL
, NULL
, s
->linesize
, 0, 1);
2361 backup_mb_border(s
->top_border
[mb_x
+ 1], dst
[0],
2362 dst
[1], dst
[2], s
->linesize
, s
->uvlinesize
, 0);
2365 prefetch_motion(s
, mb
, mb_x
, mb_y
, mb_xy
, VP56_FRAME_GOLDEN2
);
2373 if (mb_x
== s
->mb_width
+ 1) {
2374 update_pos(td
, mb_y
, s
->mb_width
+ 3);
2376 update_pos(td
, mb_y
, mb_x
);
2381 static void vp7_decode_mb_row_no_filter(AVCodecContext
*avctx
, void *tdata
,
2382 int jobnr
, int threadnr
)
2384 decode_mb_row_no_filter(avctx
, tdata
, jobnr
, threadnr
, 1);
2387 static void vp8_decode_mb_row_no_filter(AVCodecContext
*avctx
, void *tdata
,
2388 int jobnr
, int threadnr
)
2390 decode_mb_row_no_filter(avctx
, tdata
, jobnr
, threadnr
, 0);
2393 static av_always_inline
void filter_mb_row(AVCodecContext
*avctx
, void *tdata
,
2394 int jobnr
, int threadnr
, int is_vp7
)
2396 VP8Context
*s
= avctx
->priv_data
;
2397 VP8ThreadData
*td
= &s
->thread_data
[threadnr
];
2398 int mb_x
, mb_y
= td
->thread_mb_pos
>> 16, num_jobs
= s
->num_jobs
;
2399 AVFrame
*curframe
= s
->curframe
->tf
.f
;
2401 VP8ThreadData
*prev_td
, *next_td
;
2403 curframe
->data
[0] + 16 * mb_y
* s
->linesize
,
2404 curframe
->data
[1] + 8 * mb_y
* s
->uvlinesize
,
2405 curframe
->data
[2] + 8 * mb_y
* s
->uvlinesize
2408 if (s
->mb_layout
== 1)
2409 mb
= s
->macroblocks_base
+ ((s
->mb_width
+ 1) * (mb_y
+ 1) + 1);
2411 mb
= s
->macroblocks
+ (s
->mb_height
- mb_y
- 1) * 2;
2416 prev_td
= &s
->thread_data
[(jobnr
+ num_jobs
- 1) % num_jobs
];
2417 if (mb_y
== s
->mb_height
- 1)
2420 next_td
= &s
->thread_data
[(jobnr
+ 1) % num_jobs
];
2422 for (mb_x
= 0; mb_x
< s
->mb_width
; mb_x
++, mb
++) {
2423 VP8FilterStrength
*f
= &td
->filter_strength
[mb_x
];
2425 check_thread_pos(td
, prev_td
,
2426 (mb_x
+ 1) + (s
->mb_width
+ 3), mb_y
- 1);
2428 if (next_td
!= &s
->thread_data
[0])
2429 check_thread_pos(td
, next_td
, mb_x
+ 1, mb_y
+ 1);
2431 if (num_jobs
== 1) {
2432 if (s
->filter
.simple
)
2433 backup_mb_border(s
->top_border
[mb_x
+ 1], dst
[0],
2434 NULL
, NULL
, s
->linesize
, 0, 1);
2436 backup_mb_border(s
->top_border
[mb_x
+ 1], dst
[0],
2437 dst
[1], dst
[2], s
->linesize
, s
->uvlinesize
, 0);
2440 if (s
->filter
.simple
)
2441 filter_mb_simple(s
, dst
[0], f
, mb_x
, mb_y
);
2443 filter_mb(s
, dst
, f
, mb_x
, mb_y
, is_vp7
);
2448 update_pos(td
, mb_y
, (s
->mb_width
+ 3) + mb_x
);
2452 static void vp7_filter_mb_row(AVCodecContext
*avctx
, void *tdata
,
2453 int jobnr
, int threadnr
)
2455 filter_mb_row(avctx
, tdata
, jobnr
, threadnr
, 1);
2458 static void vp8_filter_mb_row(AVCodecContext
*avctx
, void *tdata
,
2459 int jobnr
, int threadnr
)
2461 filter_mb_row(avctx
, tdata
, jobnr
, threadnr
, 0);
2464 static av_always_inline
2465 int vp78_decode_mb_row_sliced(AVCodecContext
*avctx
, void *tdata
, int jobnr
,
2466 int threadnr
, int is_vp7
)
2468 VP8Context
*s
= avctx
->priv_data
;
2469 VP8ThreadData
*td
= &s
->thread_data
[jobnr
];
2470 VP8ThreadData
*next_td
= NULL
, *prev_td
= NULL
;
2471 VP8Frame
*curframe
= s
->curframe
;
2472 int mb_y
, num_jobs
= s
->num_jobs
;
2474 td
->thread_nr
= threadnr
;
2475 for (mb_y
= jobnr
; mb_y
< s
->mb_height
; mb_y
+= num_jobs
) {
2476 if (mb_y
>= s
->mb_height
)
2478 td
->thread_mb_pos
= mb_y
<< 16;
2479 s
->decode_mb_row_no_filter(avctx
, tdata
, jobnr
, threadnr
);
2480 if (s
->deblock_filter
)
2481 s
->filter_mb_row(avctx
, tdata
, jobnr
, threadnr
);
2482 update_pos(td
, mb_y
, INT_MAX
& 0xFFFF);
2487 if (avctx
->active_thread_type
== FF_THREAD_FRAME
)
2488 ff_thread_report_progress(&curframe
->tf
, mb_y
, 0);
2494 static int vp7_decode_mb_row_sliced(AVCodecContext
*avctx
, void *tdata
,
2495 int jobnr
, int threadnr
)
2497 return vp78_decode_mb_row_sliced(avctx
, tdata
, jobnr
, threadnr
, IS_VP7
);
2500 static int vp8_decode_mb_row_sliced(AVCodecContext
*avctx
, void *tdata
,
2501 int jobnr
, int threadnr
)
2503 return vp78_decode_mb_row_sliced(avctx
, tdata
, jobnr
, threadnr
, IS_VP8
);
2507 static av_always_inline
2508 int vp78_decode_frame(AVCodecContext
*avctx
, void *data
, int *got_frame
,
2509 AVPacket
*avpkt
, int is_vp7
)
2511 VP8Context
*s
= avctx
->priv_data
;
2512 int ret
, i
, referenced
, num_jobs
;
2513 enum AVDiscard skip_thresh
;
2514 VP8Frame
*av_uninit(curframe
), *prev_frame
;
2517 ret
= vp7_decode_frame_header(s
, avpkt
->data
, avpkt
->size
);
2519 ret
= vp8_decode_frame_header(s
, avpkt
->data
, avpkt
->size
);
2524 prev_frame
= s
->framep
[VP56_FRAME_CURRENT
];
2526 referenced
= s
->update_last
|| s
->update_golden
== VP56_FRAME_CURRENT
||
2527 s
->update_altref
== VP56_FRAME_CURRENT
;
2529 skip_thresh
= !referenced
? AVDISCARD_NONREF
2530 : !s
->keyframe
? AVDISCARD_NONKEY
2533 if (avctx
->skip_frame
>= skip_thresh
) {
2535 memcpy(&s
->next_framep
[0], &s
->framep
[0], sizeof(s
->framep
[0]) * 4);
2538 s
->deblock_filter
= s
->filter
.level
&& avctx
->skip_loop_filter
< skip_thresh
;
2540 // release no longer referenced frames
2541 for (i
= 0; i
< 5; i
++)
2542 if (s
->frames
[i
].tf
.f
->data
[0] &&
2543 &s
->frames
[i
] != prev_frame
&&
2544 &s
->frames
[i
] != s
->framep
[VP56_FRAME_PREVIOUS
] &&
2545 &s
->frames
[i
] != s
->framep
[VP56_FRAME_GOLDEN
] &&
2546 &s
->frames
[i
] != s
->framep
[VP56_FRAME_GOLDEN2
])
2547 vp8_release_frame(s
, &s
->frames
[i
]);
2549 curframe
= s
->framep
[VP56_FRAME_CURRENT
] = vp8_find_free_buffer(s
);
2552 avctx
->colorspace
= AVCOL_SPC_BT470BG
;
2554 avctx
->color_range
= AVCOL_RANGE_JPEG
;
2556 avctx
->color_range
= AVCOL_RANGE_MPEG
;
2558 /* Given that arithmetic probabilities are updated every frame, it's quite
2559 * likely that the values we have on a random interframe are complete
2560 * junk if we didn't start decode on a keyframe. So just don't display
2561 * anything rather than junk. */
2562 if (!s
->keyframe
&& (!s
->framep
[VP56_FRAME_PREVIOUS
] ||
2563 !s
->framep
[VP56_FRAME_GOLDEN
] ||
2564 !s
->framep
[VP56_FRAME_GOLDEN2
])) {
2565 av_log(avctx
, AV_LOG_WARNING
,
2566 "Discarding interframe without a prior keyframe!\n");
2567 ret
= AVERROR_INVALIDDATA
;
2571 curframe
->tf
.f
->key_frame
= s
->keyframe
;
2572 curframe
->tf
.f
->pict_type
= s
->keyframe
? AV_PICTURE_TYPE_I
2573 : AV_PICTURE_TYPE_P
;
2574 if ((ret
= vp8_alloc_frame(s
, curframe
, referenced
)) < 0)
2577 // check if golden and altref are swapped
2578 if (s
->update_altref
!= VP56_FRAME_NONE
)
2579 s
->next_framep
[VP56_FRAME_GOLDEN2
] = s
->framep
[s
->update_altref
];
2581 s
->next_framep
[VP56_FRAME_GOLDEN2
] = s
->framep
[VP56_FRAME_GOLDEN2
];
2583 if (s
->update_golden
!= VP56_FRAME_NONE
)
2584 s
->next_framep
[VP56_FRAME_GOLDEN
] = s
->framep
[s
->update_golden
];
2586 s
->next_framep
[VP56_FRAME_GOLDEN
] = s
->framep
[VP56_FRAME_GOLDEN
];
2589 s
->next_framep
[VP56_FRAME_PREVIOUS
] = curframe
;
2591 s
->next_framep
[VP56_FRAME_PREVIOUS
] = s
->framep
[VP56_FRAME_PREVIOUS
];
2593 s
->next_framep
[VP56_FRAME_CURRENT
] = curframe
;
2595 if (avctx
->codec
->update_thread_context
)
2596 ff_thread_finish_setup(avctx
);
2598 s
->linesize
= curframe
->tf
.f
->linesize
[0];
2599 s
->uvlinesize
= curframe
->tf
.f
->linesize
[1];
2601 memset(s
->top_nnz
, 0, s
->mb_width
* sizeof(*s
->top_nnz
));
2602 /* Zero macroblock structures for top/top-left prediction
2603 * from outside the frame. */
2605 memset(s
->macroblocks
+ s
->mb_height
* 2 - 1, 0,
2606 (s
->mb_width
+ 1) * sizeof(*s
->macroblocks
));
2607 if (!s
->mb_layout
&& s
->keyframe
)
2608 memset(s
->intra4x4_pred_mode_top
, DC_PRED
, s
->mb_width
* 4);
2610 memset(s
->ref_count
, 0, sizeof(s
->ref_count
));
2612 if (s
->mb_layout
== 1) {
2613 // Make sure the previous frame has read its segmentation map,
2614 // if we re-use the same map.
2615 if (prev_frame
&& s
->segmentation
.enabled
&&
2616 !s
->segmentation
.update_map
)
2617 ff_thread_await_progress(&prev_frame
->tf
, 1, 0);
2619 vp7_decode_mv_mb_modes(avctx
, curframe
, prev_frame
);
2621 vp8_decode_mv_mb_modes(avctx
, curframe
, prev_frame
);
2624 if (avctx
->active_thread_type
== FF_THREAD_FRAME
)
2627 num_jobs
= FFMIN(s
->num_coeff_partitions
, avctx
->thread_count
);
2628 s
->num_jobs
= num_jobs
;
2629 s
->curframe
= curframe
;
2630 s
->prev_frame
= prev_frame
;
2631 s
->mv_min
.y
= -MARGIN
;
2632 s
->mv_max
.y
= ((s
->mb_height
- 1) << 6) + MARGIN
;
2633 for (i
= 0; i
< MAX_THREADS
; i
++) {
2634 s
->thread_data
[i
].thread_mb_pos
= 0;
2635 s
->thread_data
[i
].wait_mb_pos
= INT_MAX
;
2638 avctx
->execute2(avctx
, vp7_decode_mb_row_sliced
, s
->thread_data
, NULL
,
2641 avctx
->execute2(avctx
, vp8_decode_mb_row_sliced
, s
->thread_data
, NULL
,
2644 ff_thread_report_progress(&curframe
->tf
, INT_MAX
, 0);
2645 memcpy(&s
->framep
[0], &s
->next_framep
[0], sizeof(s
->framep
[0]) * 4);
2648 // if future frames don't use the updated probabilities,
2649 // reset them to the values we saved
2650 if (!s
->update_probabilities
)
2651 s
->prob
[0] = s
->prob
[1];
2653 if (!s
->invisible
) {
2654 if ((ret
= av_frame_ref(data
, curframe
->tf
.f
)) < 0)
2661 memcpy(&s
->next_framep
[0], &s
->framep
[0], sizeof(s
->framep
[0]) * 4);
2665 int ff_vp8_decode_frame(AVCodecContext
*avctx
, void *data
, int *got_frame
,
2668 return vp78_decode_frame(avctx
, data
, got_frame
, avpkt
, IS_VP8
);
2671 #if CONFIG_VP7_DECODER
2672 static int vp7_decode_frame(AVCodecContext
*avctx
, void *data
, int *got_frame
,
2675 return vp78_decode_frame(avctx
, data
, got_frame
, avpkt
, IS_VP7
);
2677 #endif /* CONFIG_VP7_DECODER */
2679 av_cold
int ff_vp8_decode_free(AVCodecContext
*avctx
)
2681 VP8Context
*s
= avctx
->priv_data
;
2684 vp8_decode_flush_impl(avctx
, 1);
2685 for (i
= 0; i
< FF_ARRAY_ELEMS(s
->frames
); i
++)
2686 av_frame_free(&s
->frames
[i
].tf
.f
);
2691 static av_cold
int vp8_init_frames(VP8Context
*s
)
2694 for (i
= 0; i
< FF_ARRAY_ELEMS(s
->frames
); i
++) {
2695 s
->frames
[i
].tf
.f
= av_frame_alloc();
2696 if (!s
->frames
[i
].tf
.f
)
2697 return AVERROR(ENOMEM
);
2702 static av_always_inline
2703 int vp78_decode_init(AVCodecContext
*avctx
, int is_vp7
)
2705 VP8Context
*s
= avctx
->priv_data
;
2709 s
->vp7
= avctx
->codec
->id
== AV_CODEC_ID_VP7
;
2710 avctx
->pix_fmt
= AV_PIX_FMT_YUV420P
;
2711 avctx
->internal
->allocate_progress
= 1;
2713 ff_videodsp_init(&s
->vdsp
, 8);
2715 ff_vp78dsp_init(&s
->vp8dsp
);
2716 if (CONFIG_VP7_DECODER
&& is_vp7
) {
2717 ff_h264_pred_init(&s
->hpc
, AV_CODEC_ID_VP7
, 8, 1);
2718 ff_vp7dsp_init(&s
->vp8dsp
);
2719 s
->decode_mb_row_no_filter
= vp7_decode_mb_row_no_filter
;
2720 s
->filter_mb_row
= vp7_filter_mb_row
;
2721 } else if (CONFIG_VP8_DECODER
&& !is_vp7
) {
2722 ff_h264_pred_init(&s
->hpc
, AV_CODEC_ID_VP8
, 8, 1);
2723 ff_vp8dsp_init(&s
->vp8dsp
);
2724 s
->decode_mb_row_no_filter
= vp8_decode_mb_row_no_filter
;
2725 s
->filter_mb_row
= vp8_filter_mb_row
;
2728 /* does not change for VP8 */
2729 memcpy(s
->prob
[0].scan
, zigzag_scan
, sizeof(s
->prob
[0].scan
));
2731 if ((ret
= vp8_init_frames(s
)) < 0) {
2732 ff_vp8_decode_free(avctx
);
2739 #if CONFIG_VP7_DECODER
2740 static int vp7_decode_init(AVCodecContext
*avctx
)
2742 return vp78_decode_init(avctx
, IS_VP7
);
2744 #endif /* CONFIG_VP7_DECODER */
2746 av_cold
int ff_vp8_decode_init(AVCodecContext
*avctx
)
2748 return vp78_decode_init(avctx
, IS_VP8
);
2751 #if CONFIG_VP8_DECODER
2752 static av_cold
int vp8_decode_init_thread_copy(AVCodecContext
*avctx
)
2754 VP8Context
*s
= avctx
->priv_data
;
2759 if ((ret
= vp8_init_frames(s
)) < 0) {
2760 ff_vp8_decode_free(avctx
);
2767 #define REBASE(pic) ((pic) ? (pic) - &s_src->frames[0] + &s->frames[0] : NULL)
2769 static int vp8_decode_update_thread_context(AVCodecContext
*dst
,
2770 const AVCodecContext
*src
)
2772 VP8Context
*s
= dst
->priv_data
, *s_src
= src
->priv_data
;
2775 if (s
->macroblocks_base
&&
2776 (s_src
->mb_width
!= s
->mb_width
|| s_src
->mb_height
!= s
->mb_height
)) {
2778 s
->mb_width
= s_src
->mb_width
;
2779 s
->mb_height
= s_src
->mb_height
;
2782 s
->prob
[0] = s_src
->prob
[!s_src
->update_probabilities
];
2783 s
->segmentation
= s_src
->segmentation
;
2784 s
->lf_delta
= s_src
->lf_delta
;
2785 memcpy(s
->sign_bias
, s_src
->sign_bias
, sizeof(s
->sign_bias
));
2787 for (i
= 0; i
< FF_ARRAY_ELEMS(s_src
->frames
); i
++) {
2788 if (s_src
->frames
[i
].tf
.f
->data
[0]) {
2789 int ret
= vp8_ref_frame(s
, &s
->frames
[i
], &s_src
->frames
[i
]);
2795 s
->framep
[0] = REBASE(s_src
->next_framep
[0]);
2796 s
->framep
[1] = REBASE(s_src
->next_framep
[1]);
2797 s
->framep
[2] = REBASE(s_src
->next_framep
[2]);
2798 s
->framep
[3] = REBASE(s_src
->next_framep
[3]);
2802 #endif /* CONFIG_VP8_DECODER */
2804 #if CONFIG_VP7_DECODER
2805 AVCodec ff_vp7_decoder
= {
2807 .long_name
= NULL_IF_CONFIG_SMALL("On2 VP7"),
2808 .type
= AVMEDIA_TYPE_VIDEO
,
2809 .id
= AV_CODEC_ID_VP7
,
2810 .priv_data_size
= sizeof(VP8Context
),
2811 .init
= vp7_decode_init
,
2812 .close
= ff_vp8_decode_free
,
2813 .decode
= vp7_decode_frame
,
2814 .capabilities
= CODEC_CAP_DR1
,
2815 .flush
= vp8_decode_flush
,
2817 #endif /* CONFIG_VP7_DECODER */
2819 #if CONFIG_VP8_DECODER
2820 AVCodec ff_vp8_decoder
= {
2822 .long_name
= NULL_IF_CONFIG_SMALL("On2 VP8"),
2823 .type
= AVMEDIA_TYPE_VIDEO
,
2824 .id
= AV_CODEC_ID_VP8
,
2825 .priv_data_size
= sizeof(VP8Context
),
2826 .init
= ff_vp8_decode_init
,
2827 .close
= ff_vp8_decode_free
,
2828 .decode
= ff_vp8_decode_frame
,
2829 .capabilities
= CODEC_CAP_DR1
| CODEC_CAP_FRAME_THREADS
| CODEC_CAP_SLICE_THREADS
,
2830 .flush
= vp8_decode_flush
,
2831 .init_thread_copy
= ONLY_IF_THREADS_ENABLED(vp8_decode_init_thread_copy
),
2832 .update_thread_context
= ONLY_IF_THREADS_ENABLED(vp8_decode_update_thread_context
),
2834 #endif /* CONFIG_VP7_DECODER */