Imported Debian version 2.4.3~trusty1
[deb_ffmpeg.git] / ffmpeg / libavcodec / vp8.c
CommitLineData
2ba45a60
DM
1/*
2 * VP7/VP8 compatible video decoder
3 *
4 * Copyright (C) 2010 David Conrad
5 * Copyright (C) 2010 Ronald S. Bultje
6 * Copyright (C) 2010 Fiona Glaser
7 * Copyright (C) 2012 Daniel Kang
8 * Copyright (C) 2014 Peter Ross
9 *
10 * This file is part of FFmpeg.
11 *
12 * FFmpeg is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU Lesser General Public
14 * License as published by the Free Software Foundation; either
15 * version 2.1 of the License, or (at your option) any later version.
16 *
17 * FFmpeg is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * Lesser General Public License for more details.
21 *
22 * You should have received a copy of the GNU Lesser General Public
23 * License along with FFmpeg; if not, write to the Free Software
24 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
25 */
26
27#include "libavutil/imgutils.h"
28
29#include "avcodec.h"
30#include "internal.h"
31#include "rectangle.h"
32#include "thread.h"
33#include "vp8.h"
34#include "vp8data.h"
35
36#if ARCH_ARM
37# include "arm/vp8.h"
38#endif
39
40#if CONFIG_VP7_DECODER && CONFIG_VP8_DECODER
41#define VPX(vp7, f) (vp7 ? vp7_ ## f : vp8_ ## f)
42#elif CONFIG_VP7_DECODER
43#define VPX(vp7, f) vp7_ ## f
44#else // CONFIG_VP8_DECODER
45#define VPX(vp7, f) vp8_ ## f
46#endif
47
48static void free_buffers(VP8Context *s)
49{
50 int i;
51 if (s->thread_data)
52 for (i = 0; i < MAX_THREADS; i++) {
53#if HAVE_THREADS
54 pthread_cond_destroy(&s->thread_data[i].cond);
55 pthread_mutex_destroy(&s->thread_data[i].lock);
56#endif
57 av_freep(&s->thread_data[i].filter_strength);
58 }
59 av_freep(&s->thread_data);
60 av_freep(&s->macroblocks_base);
61 av_freep(&s->intra4x4_pred_mode_top);
62 av_freep(&s->top_nnz);
63 av_freep(&s->top_border);
64
65 s->macroblocks = NULL;
66}
67
68static int vp8_alloc_frame(VP8Context *s, VP8Frame *f, int ref)
69{
70 int ret;
71 if ((ret = ff_thread_get_buffer(s->avctx, &f->tf,
72 ref ? AV_GET_BUFFER_FLAG_REF : 0)) < 0)
73 return ret;
74 if (!(f->seg_map = av_buffer_allocz(s->mb_width * s->mb_height))) {
75 ff_thread_release_buffer(s->avctx, &f->tf);
76 return AVERROR(ENOMEM);
77 }
78 return 0;
79}
80
81static void vp8_release_frame(VP8Context *s, VP8Frame *f)
82{
83 av_buffer_unref(&f->seg_map);
84 ff_thread_release_buffer(s->avctx, &f->tf);
85}
86
87#if CONFIG_VP8_DECODER
88static int vp8_ref_frame(VP8Context *s, VP8Frame *dst, VP8Frame *src)
89{
90 int ret;
91
92 vp8_release_frame(s, dst);
93
94 if ((ret = ff_thread_ref_frame(&dst->tf, &src->tf)) < 0)
95 return ret;
96 if (src->seg_map &&
97 !(dst->seg_map = av_buffer_ref(src->seg_map))) {
98 vp8_release_frame(s, dst);
99 return AVERROR(ENOMEM);
100 }
101
102 return 0;
103}
104#endif /* CONFIG_VP8_DECODER */
105
106static void vp8_decode_flush_impl(AVCodecContext *avctx, int free_mem)
107{
108 VP8Context *s = avctx->priv_data;
109 int i;
110
111 for (i = 0; i < FF_ARRAY_ELEMS(s->frames); i++)
112 vp8_release_frame(s, &s->frames[i]);
113 memset(s->framep, 0, sizeof(s->framep));
114
115 if (free_mem)
116 free_buffers(s);
117}
118
119static void vp8_decode_flush(AVCodecContext *avctx)
120{
121 vp8_decode_flush_impl(avctx, 0);
122}
123
124static VP8Frame *vp8_find_free_buffer(VP8Context *s)
125{
126 VP8Frame *frame = NULL;
127 int i;
128
129 // find a free buffer
130 for (i = 0; i < 5; i++)
131 if (&s->frames[i] != s->framep[VP56_FRAME_CURRENT] &&
132 &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] &&
133 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] &&
134 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2]) {
135 frame = &s->frames[i];
136 break;
137 }
138 if (i == 5) {
139 av_log(s->avctx, AV_LOG_FATAL, "Ran out of free frames!\n");
140 abort();
141 }
142 if (frame->tf.f->data[0])
143 vp8_release_frame(s, frame);
144
145 return frame;
146}
147
148static av_always_inline
149int update_dimensions(VP8Context *s, int width, int height, int is_vp7)
150{
151 AVCodecContext *avctx = s->avctx;
152 int i, ret;
153
154 if (width != s->avctx->width || ((width+15)/16 != s->mb_width || (height+15)/16 != s->mb_height) && s->macroblocks_base ||
155 height != s->avctx->height) {
156 vp8_decode_flush_impl(s->avctx, 1);
157
158 ret = ff_set_dimensions(s->avctx, width, height);
159 if (ret < 0)
160 return ret;
161 }
162
163 s->mb_width = (s->avctx->coded_width + 15) / 16;
164 s->mb_height = (s->avctx->coded_height + 15) / 16;
165
166 s->mb_layout = is_vp7 || avctx->active_thread_type == FF_THREAD_SLICE &&
167 FFMIN(s->num_coeff_partitions, avctx->thread_count) > 1;
168 if (!s->mb_layout) { // Frame threading and one thread
169 s->macroblocks_base = av_mallocz((s->mb_width + s->mb_height * 2 + 1) *
170 sizeof(*s->macroblocks));
171 s->intra4x4_pred_mode_top = av_mallocz(s->mb_width * 4);
172 } else // Sliced threading
173 s->macroblocks_base = av_mallocz((s->mb_width + 2) * (s->mb_height + 2) *
174 sizeof(*s->macroblocks));
175 s->top_nnz = av_mallocz(s->mb_width * sizeof(*s->top_nnz));
176 s->top_border = av_mallocz((s->mb_width + 1) * sizeof(*s->top_border));
177 s->thread_data = av_mallocz(MAX_THREADS * sizeof(VP8ThreadData));
178
179 for (i = 0; i < MAX_THREADS; i++) {
180 s->thread_data[i].filter_strength =
181 av_mallocz(s->mb_width * sizeof(*s->thread_data[0].filter_strength));
182#if HAVE_THREADS
183 pthread_mutex_init(&s->thread_data[i].lock, NULL);
184 pthread_cond_init(&s->thread_data[i].cond, NULL);
185#endif
186 }
187
188 if (!s->macroblocks_base || !s->top_nnz || !s->top_border ||
189 (!s->intra4x4_pred_mode_top && !s->mb_layout))
190 return AVERROR(ENOMEM);
191
192 s->macroblocks = s->macroblocks_base + 1;
193
194 return 0;
195}
196
197static int vp7_update_dimensions(VP8Context *s, int width, int height)
198{
199 return update_dimensions(s, width, height, IS_VP7);
200}
201
202static int vp8_update_dimensions(VP8Context *s, int width, int height)
203{
204 return update_dimensions(s, width, height, IS_VP8);
205}
206
207
208static void parse_segment_info(VP8Context *s)
209{
210 VP56RangeCoder *c = &s->c;
211 int i;
212
213 s->segmentation.update_map = vp8_rac_get(c);
214
215 if (vp8_rac_get(c)) { // update segment feature data
216 s->segmentation.absolute_vals = vp8_rac_get(c);
217
218 for (i = 0; i < 4; i++)
219 s->segmentation.base_quant[i] = vp8_rac_get_sint(c, 7);
220
221 for (i = 0; i < 4; i++)
222 s->segmentation.filter_level[i] = vp8_rac_get_sint(c, 6);
223 }
224 if (s->segmentation.update_map)
225 for (i = 0; i < 3; i++)
226 s->prob->segmentid[i] = vp8_rac_get(c) ? vp8_rac_get_uint(c, 8) : 255;
227}
228
229static void update_lf_deltas(VP8Context *s)
230{
231 VP56RangeCoder *c = &s->c;
232 int i;
233
234 for (i = 0; i < 4; i++) {
235 if (vp8_rac_get(c)) {
236 s->lf_delta.ref[i] = vp8_rac_get_uint(c, 6);
237
238 if (vp8_rac_get(c))
239 s->lf_delta.ref[i] = -s->lf_delta.ref[i];
240 }
241 }
242
243 for (i = MODE_I4x4; i <= VP8_MVMODE_SPLIT; i++) {
244 if (vp8_rac_get(c)) {
245 s->lf_delta.mode[i] = vp8_rac_get_uint(c, 6);
246
247 if (vp8_rac_get(c))
248 s->lf_delta.mode[i] = -s->lf_delta.mode[i];
249 }
250 }
251}
252
253static int setup_partitions(VP8Context *s, const uint8_t *buf, int buf_size)
254{
255 const uint8_t *sizes = buf;
256 int i;
257
258 s->num_coeff_partitions = 1 << vp8_rac_get_uint(&s->c, 2);
259
260 buf += 3 * (s->num_coeff_partitions - 1);
261 buf_size -= 3 * (s->num_coeff_partitions - 1);
262 if (buf_size < 0)
263 return -1;
264
265 for (i = 0; i < s->num_coeff_partitions - 1; i++) {
266 int size = AV_RL24(sizes + 3 * i);
267 if (buf_size - size < 0)
268 return -1;
269
270 ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, size);
271 buf += size;
272 buf_size -= size;
273 }
274 ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, buf_size);
275
276 return 0;
277}
278
279static void vp7_get_quants(VP8Context *s)
280{
281 VP56RangeCoder *c = &s->c;
282
283 int yac_qi = vp8_rac_get_uint(c, 7);
284 int ydc_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi;
285 int y2dc_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi;
286 int y2ac_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi;
287 int uvdc_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi;
288 int uvac_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi;
289
290 s->qmat[0].luma_qmul[0] = vp7_ydc_qlookup[ydc_qi];
291 s->qmat[0].luma_qmul[1] = vp7_yac_qlookup[yac_qi];
292 s->qmat[0].luma_dc_qmul[0] = vp7_y2dc_qlookup[y2dc_qi];
293 s->qmat[0].luma_dc_qmul[1] = vp7_y2ac_qlookup[y2ac_qi];
294 s->qmat[0].chroma_qmul[0] = FFMIN(vp7_ydc_qlookup[uvdc_qi], 132);
295 s->qmat[0].chroma_qmul[1] = vp7_yac_qlookup[uvac_qi];
296}
297
298static void vp8_get_quants(VP8Context *s)
299{
300 VP56RangeCoder *c = &s->c;
301 int i, base_qi;
302
303 int yac_qi = vp8_rac_get_uint(c, 7);
304 int ydc_delta = vp8_rac_get_sint(c, 4);
305 int y2dc_delta = vp8_rac_get_sint(c, 4);
306 int y2ac_delta = vp8_rac_get_sint(c, 4);
307 int uvdc_delta = vp8_rac_get_sint(c, 4);
308 int uvac_delta = vp8_rac_get_sint(c, 4);
309
310 for (i = 0; i < 4; i++) {
311 if (s->segmentation.enabled) {
312 base_qi = s->segmentation.base_quant[i];
313 if (!s->segmentation.absolute_vals)
314 base_qi += yac_qi;
315 } else
316 base_qi = yac_qi;
317
318 s->qmat[i].luma_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + ydc_delta, 7)];
319 s->qmat[i].luma_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi, 7)];
320 s->qmat[i].luma_dc_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + y2dc_delta, 7)] * 2;
321 /* 101581>>16 is equivalent to 155/100 */
322 s->qmat[i].luma_dc_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi + y2ac_delta, 7)] * 101581 >> 16;
323 s->qmat[i].chroma_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + uvdc_delta, 7)];
324 s->qmat[i].chroma_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi + uvac_delta, 7)];
325
326 s->qmat[i].luma_dc_qmul[1] = FFMAX(s->qmat[i].luma_dc_qmul[1], 8);
327 s->qmat[i].chroma_qmul[0] = FFMIN(s->qmat[i].chroma_qmul[0], 132);
328 }
329}
330
331/**
332 * Determine which buffers golden and altref should be updated with after this frame.
333 * The spec isn't clear here, so I'm going by my understanding of what libvpx does
334 *
335 * Intra frames update all 3 references
336 * Inter frames update VP56_FRAME_PREVIOUS if the update_last flag is set
337 * If the update (golden|altref) flag is set, it's updated with the current frame
338 * if update_last is set, and VP56_FRAME_PREVIOUS otherwise.
339 * If the flag is not set, the number read means:
340 * 0: no update
341 * 1: VP56_FRAME_PREVIOUS
342 * 2: update golden with altref, or update altref with golden
343 */
344static VP56Frame ref_to_update(VP8Context *s, int update, VP56Frame ref)
345{
346 VP56RangeCoder *c = &s->c;
347
348 if (update)
349 return VP56_FRAME_CURRENT;
350
351 switch (vp8_rac_get_uint(c, 2)) {
352 case 1:
353 return VP56_FRAME_PREVIOUS;
354 case 2:
355 return (ref == VP56_FRAME_GOLDEN) ? VP56_FRAME_GOLDEN2 : VP56_FRAME_GOLDEN;
356 }
357 return VP56_FRAME_NONE;
358}
359
360static void vp78_reset_probability_tables(VP8Context *s)
361{
362 int i, j;
363 for (i = 0; i < 4; i++)
364 for (j = 0; j < 16; j++)
365 memcpy(s->prob->token[i][j], vp8_token_default_probs[i][vp8_coeff_band[j]],
366 sizeof(s->prob->token[i][j]));
367}
368
369static void vp78_update_probability_tables(VP8Context *s)
370{
371 VP56RangeCoder *c = &s->c;
372 int i, j, k, l, m;
373
374 for (i = 0; i < 4; i++)
375 for (j = 0; j < 8; j++)
376 for (k = 0; k < 3; k++)
377 for (l = 0; l < NUM_DCT_TOKENS-1; l++)
378 if (vp56_rac_get_prob_branchy(c, vp8_token_update_probs[i][j][k][l])) {
379 int prob = vp8_rac_get_uint(c, 8);
380 for (m = 0; vp8_coeff_band_indexes[j][m] >= 0; m++)
381 s->prob->token[i][vp8_coeff_band_indexes[j][m]][k][l] = prob;
382 }
383}
384
385#define VP7_MVC_SIZE 17
386#define VP8_MVC_SIZE 19
387
388static void vp78_update_pred16x16_pred8x8_mvc_probabilities(VP8Context *s,
389 int mvc_size)
390{
391 VP56RangeCoder *c = &s->c;
392 int i, j;
393
394 if (vp8_rac_get(c))
395 for (i = 0; i < 4; i++)
396 s->prob->pred16x16[i] = vp8_rac_get_uint(c, 8);
397 if (vp8_rac_get(c))
398 for (i = 0; i < 3; i++)
399 s->prob->pred8x8c[i] = vp8_rac_get_uint(c, 8);
400
401 // 17.2 MV probability update
402 for (i = 0; i < 2; i++)
403 for (j = 0; j < mvc_size; j++)
404 if (vp56_rac_get_prob_branchy(c, vp8_mv_update_prob[i][j]))
405 s->prob->mvc[i][j] = vp8_rac_get_nn(c);
406}
407
408static void update_refs(VP8Context *s)
409{
410 VP56RangeCoder *c = &s->c;
411
412 int update_golden = vp8_rac_get(c);
413 int update_altref = vp8_rac_get(c);
414
415 s->update_golden = ref_to_update(s, update_golden, VP56_FRAME_GOLDEN);
416 s->update_altref = ref_to_update(s, update_altref, VP56_FRAME_GOLDEN2);
417}
418
419static void copy_chroma(AVFrame *dst, AVFrame *src, int width, int height)
420{
421 int i, j;
422
423 for (j = 1; j < 3; j++) {
424 for (i = 0; i < height / 2; i++)
425 memcpy(dst->data[j] + i * dst->linesize[j],
426 src->data[j] + i * src->linesize[j], width / 2);
427 }
428}
429
430static void fade(uint8_t *dst, int dst_linesize,
431 const uint8_t *src, int src_linesize,
432 int width, int height,
433 int alpha, int beta)
434{
435 int i, j;
436 for (j = 0; j < height; j++) {
437 for (i = 0; i < width; i++) {
438 uint8_t y = src[j * src_linesize + i];
439 dst[j * dst_linesize + i] = av_clip_uint8(y + ((y * beta) >> 8) + alpha);
440 }
441 }
442}
443
444static int vp7_fade_frame(VP8Context *s, VP56RangeCoder *c)
445{
446 int alpha = (int8_t) vp8_rac_get_uint(c, 8);
447 int beta = (int8_t) vp8_rac_get_uint(c, 8);
448 int ret;
449
450 if (!s->keyframe && (alpha || beta)) {
451 int width = s->mb_width * 16;
452 int height = s->mb_height * 16;
453 AVFrame *src, *dst;
454
455 if (!s->framep[VP56_FRAME_PREVIOUS] ||
456 !s->framep[VP56_FRAME_GOLDEN]) {
457 av_log(s->avctx, AV_LOG_WARNING, "Discarding interframe without a prior keyframe!\n");
458 return AVERROR_INVALIDDATA;
459 }
460
461 dst =
462 src = s->framep[VP56_FRAME_PREVIOUS]->tf.f;
463
464 /* preserve the golden frame, write a new previous frame */
465 if (s->framep[VP56_FRAME_GOLDEN] == s->framep[VP56_FRAME_PREVIOUS]) {
466 s->framep[VP56_FRAME_PREVIOUS] = vp8_find_free_buffer(s);
467 if ((ret = vp8_alloc_frame(s, s->framep[VP56_FRAME_PREVIOUS], 1)) < 0)
468 return ret;
469
470 dst = s->framep[VP56_FRAME_PREVIOUS]->tf.f;
471
472 copy_chroma(dst, src, width, height);
473 }
474
475 fade(dst->data[0], dst->linesize[0],
476 src->data[0], src->linesize[0],
477 width, height, alpha, beta);
478 }
479
480 return 0;
481}
482
483static int vp7_decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_size)
484{
485 VP56RangeCoder *c = &s->c;
486 int part1_size, hscale, vscale, i, j, ret;
487 int width = s->avctx->width;
488 int height = s->avctx->height;
489
490 s->profile = (buf[0] >> 1) & 7;
491 if (s->profile > 1) {
492 avpriv_request_sample(s->avctx, "Unknown profile %d", s->profile);
493 return AVERROR_INVALIDDATA;
494 }
495
496 s->keyframe = !(buf[0] & 1);
497 s->invisible = 0;
498 part1_size = AV_RL24(buf) >> 4;
499
500 if (buf_size < 4 - s->profile + part1_size) {
501 av_log(s->avctx, AV_LOG_ERROR, "Buffer size %d is too small, needed : %d\n", buf_size, 4 - s->profile + part1_size);
502 return AVERROR_INVALIDDATA;
503 }
504
505 buf += 4 - s->profile;
506 buf_size -= 4 - s->profile;
507
508 memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_epel_pixels_tab, sizeof(s->put_pixels_tab));
509
510 ff_vp56_init_range_decoder(c, buf, part1_size);
511 buf += part1_size;
512 buf_size -= part1_size;
513
514 /* A. Dimension information (keyframes only) */
515 if (s->keyframe) {
516 width = vp8_rac_get_uint(c, 12);
517 height = vp8_rac_get_uint(c, 12);
518 hscale = vp8_rac_get_uint(c, 2);
519 vscale = vp8_rac_get_uint(c, 2);
520 if (hscale || vscale)
521 avpriv_request_sample(s->avctx, "Upscaling");
522
523 s->update_golden = s->update_altref = VP56_FRAME_CURRENT;
524 vp78_reset_probability_tables(s);
525 memcpy(s->prob->pred16x16, vp8_pred16x16_prob_inter,
526 sizeof(s->prob->pred16x16));
527 memcpy(s->prob->pred8x8c, vp8_pred8x8c_prob_inter,
528 sizeof(s->prob->pred8x8c));
529 for (i = 0; i < 2; i++)
530 memcpy(s->prob->mvc[i], vp7_mv_default_prob[i],
531 sizeof(vp7_mv_default_prob[i]));
532 memset(&s->segmentation, 0, sizeof(s->segmentation));
533 memset(&s->lf_delta, 0, sizeof(s->lf_delta));
534 memcpy(s->prob[0].scan, zigzag_scan, sizeof(s->prob[0].scan));
535 }
536
537 if (s->keyframe || s->profile > 0)
538 memset(s->inter_dc_pred, 0 , sizeof(s->inter_dc_pred));
539
540 /* B. Decoding information for all four macroblock-level features */
541 for (i = 0; i < 4; i++) {
542 s->feature_enabled[i] = vp8_rac_get(c);
543 if (s->feature_enabled[i]) {
544 s->feature_present_prob[i] = vp8_rac_get_uint(c, 8);
545
546 for (j = 0; j < 3; j++)
547 s->feature_index_prob[i][j] =
548 vp8_rac_get(c) ? vp8_rac_get_uint(c, 8) : 255;
549
550 if (vp7_feature_value_size[s->profile][i])
551 for (j = 0; j < 4; j++)
552 s->feature_value[i][j] =
553 vp8_rac_get(c) ? vp8_rac_get_uint(c, vp7_feature_value_size[s->profile][i]) : 0;
554 }
555 }
556
557 s->segmentation.enabled = 0;
558 s->segmentation.update_map = 0;
559 s->lf_delta.enabled = 0;
560
561 s->num_coeff_partitions = 1;
562 ff_vp56_init_range_decoder(&s->coeff_partition[0], buf, buf_size);
563
564 if (!s->macroblocks_base || /* first frame */
565 width != s->avctx->width || height != s->avctx->height ||
566 (width + 15) / 16 != s->mb_width || (height + 15) / 16 != s->mb_height) {
567 if ((ret = vp7_update_dimensions(s, width, height)) < 0)
568 return ret;
569 }
570
571 /* C. Dequantization indices */
572 vp7_get_quants(s);
573
574 /* D. Golden frame update flag (a Flag) for interframes only */
575 if (!s->keyframe) {
576 s->update_golden = vp8_rac_get(c) ? VP56_FRAME_CURRENT : VP56_FRAME_NONE;
577 s->sign_bias[VP56_FRAME_GOLDEN] = 0;
578 }
579
580 s->update_last = 1;
581 s->update_probabilities = 1;
582 s->fade_present = 1;
583
584 if (s->profile > 0) {
585 s->update_probabilities = vp8_rac_get(c);
586 if (!s->update_probabilities)
587 s->prob[1] = s->prob[0];
588
589 if (!s->keyframe)
590 s->fade_present = vp8_rac_get(c);
591 }
592
593 /* E. Fading information for previous frame */
594 if (s->fade_present && vp8_rac_get(c)) {
595 if ((ret = vp7_fade_frame(s ,c)) < 0)
596 return ret;
597 }
598
599 /* F. Loop filter type */
600 if (!s->profile)
601 s->filter.simple = vp8_rac_get(c);
602
603 /* G. DCT coefficient ordering specification */
604 if (vp8_rac_get(c))
605 for (i = 1; i < 16; i++)
606 s->prob[0].scan[i] = zigzag_scan[vp8_rac_get_uint(c, 4)];
607
608 /* H. Loop filter levels */
609 if (s->profile > 0)
610 s->filter.simple = vp8_rac_get(c);
611 s->filter.level = vp8_rac_get_uint(c, 6);
612 s->filter.sharpness = vp8_rac_get_uint(c, 3);
613
614 /* I. DCT coefficient probability update; 13.3 Token Probability Updates */
615 vp78_update_probability_tables(s);
616
617 s->mbskip_enabled = 0;
618
619 /* J. The remaining frame header data occurs ONLY FOR INTERFRAMES */
620 if (!s->keyframe) {
621 s->prob->intra = vp8_rac_get_uint(c, 8);
622 s->prob->last = vp8_rac_get_uint(c, 8);
623 vp78_update_pred16x16_pred8x8_mvc_probabilities(s, VP7_MVC_SIZE);
624 }
625
626 return 0;
627}
628
629static int vp8_decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_size)
630{
631 VP56RangeCoder *c = &s->c;
632 int header_size, hscale, vscale, ret;
633 int width = s->avctx->width;
634 int height = s->avctx->height;
635
636 s->keyframe = !(buf[0] & 1);
637 s->profile = (buf[0]>>1) & 7;
638 s->invisible = !(buf[0] & 0x10);
639 header_size = AV_RL24(buf) >> 5;
640 buf += 3;
641 buf_size -= 3;
642
643 if (s->profile > 3)
644 av_log(s->avctx, AV_LOG_WARNING, "Unknown profile %d\n", s->profile);
645
646 if (!s->profile)
647 memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_epel_pixels_tab,
648 sizeof(s->put_pixels_tab));
649 else // profile 1-3 use bilinear, 4+ aren't defined so whatever
650 memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_bilinear_pixels_tab,
651 sizeof(s->put_pixels_tab));
652
653 if (header_size > buf_size - 7 * s->keyframe) {
654 av_log(s->avctx, AV_LOG_ERROR, "Header size larger than data provided\n");
655 return AVERROR_INVALIDDATA;
656 }
657
658 if (s->keyframe) {
659 if (AV_RL24(buf) != 0x2a019d) {
660 av_log(s->avctx, AV_LOG_ERROR,
661 "Invalid start code 0x%x\n", AV_RL24(buf));
662 return AVERROR_INVALIDDATA;
663 }
664 width = AV_RL16(buf + 3) & 0x3fff;
665 height = AV_RL16(buf + 5) & 0x3fff;
666 hscale = buf[4] >> 6;
667 vscale = buf[6] >> 6;
668 buf += 7;
669 buf_size -= 7;
670
671 if (hscale || vscale)
672 avpriv_request_sample(s->avctx, "Upscaling");
673
674 s->update_golden = s->update_altref = VP56_FRAME_CURRENT;
675 vp78_reset_probability_tables(s);
676 memcpy(s->prob->pred16x16, vp8_pred16x16_prob_inter,
677 sizeof(s->prob->pred16x16));
678 memcpy(s->prob->pred8x8c, vp8_pred8x8c_prob_inter,
679 sizeof(s->prob->pred8x8c));
680 memcpy(s->prob->mvc, vp8_mv_default_prob,
681 sizeof(s->prob->mvc));
682 memset(&s->segmentation, 0, sizeof(s->segmentation));
683 memset(&s->lf_delta, 0, sizeof(s->lf_delta));
684 }
685
686 ff_vp56_init_range_decoder(c, buf, header_size);
687 buf += header_size;
688 buf_size -= header_size;
689
690 if (s->keyframe) {
691 if (vp8_rac_get(c))
692 av_log(s->avctx, AV_LOG_WARNING, "Unspecified colorspace\n");
693 vp8_rac_get(c); // whether we can skip clamping in dsp functions
694 }
695
696 if ((s->segmentation.enabled = vp8_rac_get(c)))
697 parse_segment_info(s);
698 else
699 s->segmentation.update_map = 0; // FIXME: move this to some init function?
700
701 s->filter.simple = vp8_rac_get(c);
702 s->filter.level = vp8_rac_get_uint(c, 6);
703 s->filter.sharpness = vp8_rac_get_uint(c, 3);
704
705 if ((s->lf_delta.enabled = vp8_rac_get(c)))
706 if (vp8_rac_get(c))
707 update_lf_deltas(s);
708
709 if (setup_partitions(s, buf, buf_size)) {
710 av_log(s->avctx, AV_LOG_ERROR, "Invalid partitions\n");
711 return AVERROR_INVALIDDATA;
712 }
713
714 if (!s->macroblocks_base || /* first frame */
715 width != s->avctx->width || height != s->avctx->height ||
716 (width+15)/16 != s->mb_width || (height+15)/16 != s->mb_height)
717 if ((ret = vp8_update_dimensions(s, width, height)) < 0)
718 return ret;
719
720 vp8_get_quants(s);
721
722 if (!s->keyframe) {
723 update_refs(s);
724 s->sign_bias[VP56_FRAME_GOLDEN] = vp8_rac_get(c);
725 s->sign_bias[VP56_FRAME_GOLDEN2 /* altref */] = vp8_rac_get(c);
726 }
727
728 // if we aren't saving this frame's probabilities for future frames,
729 // make a copy of the current probabilities
730 if (!(s->update_probabilities = vp8_rac_get(c)))
731 s->prob[1] = s->prob[0];
732
733 s->update_last = s->keyframe || vp8_rac_get(c);
734
735 vp78_update_probability_tables(s);
736
737 if ((s->mbskip_enabled = vp8_rac_get(c)))
738 s->prob->mbskip = vp8_rac_get_uint(c, 8);
739
740 if (!s->keyframe) {
741 s->prob->intra = vp8_rac_get_uint(c, 8);
742 s->prob->last = vp8_rac_get_uint(c, 8);
743 s->prob->golden = vp8_rac_get_uint(c, 8);
744 vp78_update_pred16x16_pred8x8_mvc_probabilities(s, VP8_MVC_SIZE);
745 }
746
747 return 0;
748}
749
750static av_always_inline
751void clamp_mv(VP8Context *s, VP56mv *dst, const VP56mv *src)
752{
753 dst->x = av_clip(src->x, s->mv_min.x, s->mv_max.x);
754 dst->y = av_clip(src->y, s->mv_min.y, s->mv_max.y);
755}
756
757/**
758 * Motion vector coding, 17.1.
759 */
760static av_always_inline int read_mv_component(VP56RangeCoder *c, const uint8_t *p, int vp7)
761{
762 int bit, x = 0;
763
764 if (vp56_rac_get_prob_branchy(c, p[0])) {
765 int i;
766
767 for (i = 0; i < 3; i++)
768 x += vp56_rac_get_prob(c, p[9 + i]) << i;
769 for (i = (vp7 ? 7 : 9); i > 3; i--)
770 x += vp56_rac_get_prob(c, p[9 + i]) << i;
771 if (!(x & (vp7 ? 0xF0 : 0xFFF0)) || vp56_rac_get_prob(c, p[12]))
772 x += 8;
773 } else {
774 // small_mvtree
775 const uint8_t *ps = p + 2;
776 bit = vp56_rac_get_prob(c, *ps);
777 ps += 1 + 3 * bit;
778 x += 4 * bit;
779 bit = vp56_rac_get_prob(c, *ps);
780 ps += 1 + bit;
781 x += 2 * bit;
782 x += vp56_rac_get_prob(c, *ps);
783 }
784
785 return (x && vp56_rac_get_prob(c, p[1])) ? -x : x;
786}
787
788static int vp7_read_mv_component(VP56RangeCoder *c, const uint8_t *p)
789{
790 return read_mv_component(c, p, 1);
791}
792
793static int vp8_read_mv_component(VP56RangeCoder *c, const uint8_t *p)
794{
795 return read_mv_component(c, p, 0);
796}
797
798static av_always_inline
799const uint8_t *get_submv_prob(uint32_t left, uint32_t top, int is_vp7)
800{
801 if (is_vp7)
802 return vp7_submv_prob;
803
804 if (left == top)
805 return vp8_submv_prob[4 - !!left];
806 if (!top)
807 return vp8_submv_prob[2];
808 return vp8_submv_prob[1 - !!left];
809}
810
811/**
812 * Split motion vector prediction, 16.4.
813 * @returns the number of motion vectors parsed (2, 4 or 16)
814 */
815static av_always_inline
816int decode_splitmvs(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb,
817 int layout, int is_vp7)
818{
819 int part_idx;
820 int n, num;
821 VP8Macroblock *top_mb;
822 VP8Macroblock *left_mb = &mb[-1];
823 const uint8_t *mbsplits_left = vp8_mbsplits[left_mb->partitioning];
824 const uint8_t *mbsplits_top, *mbsplits_cur, *firstidx;
825 VP56mv *top_mv;
826 VP56mv *left_mv = left_mb->bmv;
827 VP56mv *cur_mv = mb->bmv;
828
829 if (!layout) // layout is inlined, s->mb_layout is not
830 top_mb = &mb[2];
831 else
832 top_mb = &mb[-s->mb_width - 1];
833 mbsplits_top = vp8_mbsplits[top_mb->partitioning];
834 top_mv = top_mb->bmv;
835
836 if (vp56_rac_get_prob_branchy(c, vp8_mbsplit_prob[0])) {
837 if (vp56_rac_get_prob_branchy(c, vp8_mbsplit_prob[1]))
838 part_idx = VP8_SPLITMVMODE_16x8 + vp56_rac_get_prob(c, vp8_mbsplit_prob[2]);
839 else
840 part_idx = VP8_SPLITMVMODE_8x8;
841 } else {
842 part_idx = VP8_SPLITMVMODE_4x4;
843 }
844
845 num = vp8_mbsplit_count[part_idx];
846 mbsplits_cur = vp8_mbsplits[part_idx],
847 firstidx = vp8_mbfirstidx[part_idx];
848 mb->partitioning = part_idx;
849
850 for (n = 0; n < num; n++) {
851 int k = firstidx[n];
852 uint32_t left, above;
853 const uint8_t *submv_prob;
854
855 if (!(k & 3))
856 left = AV_RN32A(&left_mv[mbsplits_left[k + 3]]);
857 else
858 left = AV_RN32A(&cur_mv[mbsplits_cur[k - 1]]);
859 if (k <= 3)
860 above = AV_RN32A(&top_mv[mbsplits_top[k + 12]]);
861 else
862 above = AV_RN32A(&cur_mv[mbsplits_cur[k - 4]]);
863
864 submv_prob = get_submv_prob(left, above, is_vp7);
865
866 if (vp56_rac_get_prob_branchy(c, submv_prob[0])) {
867 if (vp56_rac_get_prob_branchy(c, submv_prob[1])) {
868 if (vp56_rac_get_prob_branchy(c, submv_prob[2])) {
869 mb->bmv[n].y = mb->mv.y +
870 read_mv_component(c, s->prob->mvc[0], is_vp7);
871 mb->bmv[n].x = mb->mv.x +
872 read_mv_component(c, s->prob->mvc[1], is_vp7);
873 } else {
874 AV_ZERO32(&mb->bmv[n]);
875 }
876 } else {
877 AV_WN32A(&mb->bmv[n], above);
878 }
879 } else {
880 AV_WN32A(&mb->bmv[n], left);
881 }
882 }
883
884 return num;
885}
886
887/**
888 * The vp7 reference decoder uses a padding macroblock column (added to right
889 * edge of the frame) to guard against illegal macroblock offsets. The
890 * algorithm has bugs that permit offsets to straddle the padding column.
891 * This function replicates those bugs.
892 *
893 * @param[out] edge_x macroblock x address
894 * @param[out] edge_y macroblock y address
895 *
896 * @return macroblock offset legal (boolean)
897 */
898static int vp7_calculate_mb_offset(int mb_x, int mb_y, int mb_width,
899 int xoffset, int yoffset, int boundary,
900 int *edge_x, int *edge_y)
901{
902 int vwidth = mb_width + 1;
903 int new = (mb_y + yoffset) * vwidth + mb_x + xoffset;
904 if (new < boundary || new % vwidth == vwidth - 1)
905 return 0;
906 *edge_y = new / vwidth;
907 *edge_x = new % vwidth;
908 return 1;
909}
910
911static const VP56mv *get_bmv_ptr(const VP8Macroblock *mb, int subblock)
912{
913 return &mb->bmv[mb->mode == VP8_MVMODE_SPLIT ? vp8_mbsplits[mb->partitioning][subblock] : 0];
914}
915
916static av_always_inline
917void vp7_decode_mvs(VP8Context *s, VP8Macroblock *mb,
918 int mb_x, int mb_y, int layout)
919{
920 VP8Macroblock *mb_edge[12];
921 enum { CNT_ZERO, CNT_NEAREST, CNT_NEAR };
922 enum { VP8_EDGE_TOP, VP8_EDGE_LEFT, VP8_EDGE_TOPLEFT };
923 int idx = CNT_ZERO;
924 VP56mv near_mv[3];
925 uint8_t cnt[3] = { 0 };
926 VP56RangeCoder *c = &s->c;
927 int i;
928
929 AV_ZERO32(&near_mv[0]);
930 AV_ZERO32(&near_mv[1]);
931 AV_ZERO32(&near_mv[2]);
932
933 for (i = 0; i < VP7_MV_PRED_COUNT; i++) {
934 const VP7MVPred * pred = &vp7_mv_pred[i];
935 int edge_x, edge_y;
936
937 if (vp7_calculate_mb_offset(mb_x, mb_y, s->mb_width, pred->xoffset,
938 pred->yoffset, !s->profile, &edge_x, &edge_y)) {
939 VP8Macroblock *edge = mb_edge[i] = (s->mb_layout == 1)
940 ? s->macroblocks_base + 1 + edge_x +
941 (s->mb_width + 1) * (edge_y + 1)
942 : s->macroblocks + edge_x +
943 (s->mb_height - edge_y - 1) * 2;
944 uint32_t mv = AV_RN32A(get_bmv_ptr(edge, vp7_mv_pred[i].subblock));
945 if (mv) {
946 if (AV_RN32A(&near_mv[CNT_NEAREST])) {
947 if (mv == AV_RN32A(&near_mv[CNT_NEAREST])) {
948 idx = CNT_NEAREST;
949 } else if (AV_RN32A(&near_mv[CNT_NEAR])) {
950 if (mv != AV_RN32A(&near_mv[CNT_NEAR]))
951 continue;
952 idx = CNT_NEAR;
953 } else {
954 AV_WN32A(&near_mv[CNT_NEAR], mv);
955 idx = CNT_NEAR;
956 }
957 } else {
958 AV_WN32A(&near_mv[CNT_NEAREST], mv);
959 idx = CNT_NEAREST;
960 }
961 } else {
962 idx = CNT_ZERO;
963 }
964 } else {
965 idx = CNT_ZERO;
966 }
967 cnt[idx] += vp7_mv_pred[i].score;
968 }
969
970 mb->partitioning = VP8_SPLITMVMODE_NONE;
971
972 if (vp56_rac_get_prob_branchy(c, vp7_mode_contexts[cnt[CNT_ZERO]][0])) {
973 mb->mode = VP8_MVMODE_MV;
974
975 if (vp56_rac_get_prob_branchy(c, vp7_mode_contexts[cnt[CNT_NEAREST]][1])) {
976
977 if (vp56_rac_get_prob_branchy(c, vp7_mode_contexts[cnt[CNT_NEAR]][2])) {
978
979 if (cnt[CNT_NEAREST] > cnt[CNT_NEAR])
980 AV_WN32A(&mb->mv, cnt[CNT_ZERO] > cnt[CNT_NEAREST] ? 0 : AV_RN32A(&near_mv[CNT_NEAREST]));
981 else
982 AV_WN32A(&mb->mv, cnt[CNT_ZERO] > cnt[CNT_NEAR] ? 0 : AV_RN32A(&near_mv[CNT_NEAR]));
983
984 if (vp56_rac_get_prob_branchy(c, vp7_mode_contexts[cnt[CNT_NEAR]][3])) {
985 mb->mode = VP8_MVMODE_SPLIT;
986 mb->mv = mb->bmv[decode_splitmvs(s, c, mb, layout, IS_VP7) - 1];
987 } else {
988 mb->mv.y += vp7_read_mv_component(c, s->prob->mvc[0]);
989 mb->mv.x += vp7_read_mv_component(c, s->prob->mvc[1]);
990 mb->bmv[0] = mb->mv;
991 }
992 } else {
993 mb->mv = near_mv[CNT_NEAR];
994 mb->bmv[0] = mb->mv;
995 }
996 } else {
997 mb->mv = near_mv[CNT_NEAREST];
998 mb->bmv[0] = mb->mv;
999 }
1000 } else {
1001 mb->mode = VP8_MVMODE_ZERO;
1002 AV_ZERO32(&mb->mv);
1003 mb->bmv[0] = mb->mv;
1004 }
1005}
1006
1007static av_always_inline
1008void vp8_decode_mvs(VP8Context *s, VP8Macroblock *mb,
1009 int mb_x, int mb_y, int layout)
1010{
1011 VP8Macroblock *mb_edge[3] = { 0 /* top */,
1012 mb - 1 /* left */,
1013 0 /* top-left */ };
1014 enum { CNT_ZERO, CNT_NEAREST, CNT_NEAR, CNT_SPLITMV };
1015 enum { VP8_EDGE_TOP, VP8_EDGE_LEFT, VP8_EDGE_TOPLEFT };
1016 int idx = CNT_ZERO;
1017 int cur_sign_bias = s->sign_bias[mb->ref_frame];
1018 int8_t *sign_bias = s->sign_bias;
1019 VP56mv near_mv[4];
1020 uint8_t cnt[4] = { 0 };
1021 VP56RangeCoder *c = &s->c;
1022
1023 if (!layout) { // layout is inlined (s->mb_layout is not)
1024 mb_edge[0] = mb + 2;
1025 mb_edge[2] = mb + 1;
1026 } else {
1027 mb_edge[0] = mb - s->mb_width - 1;
1028 mb_edge[2] = mb - s->mb_width - 2;
1029 }
1030
1031 AV_ZERO32(&near_mv[0]);
1032 AV_ZERO32(&near_mv[1]);
1033 AV_ZERO32(&near_mv[2]);
1034
1035 /* Process MB on top, left and top-left */
1036#define MV_EDGE_CHECK(n) \
1037 { \
1038 VP8Macroblock *edge = mb_edge[n]; \
1039 int edge_ref = edge->ref_frame; \
1040 if (edge_ref != VP56_FRAME_CURRENT) { \
1041 uint32_t mv = AV_RN32A(&edge->mv); \
1042 if (mv) { \
1043 if (cur_sign_bias != sign_bias[edge_ref]) { \
1044 /* SWAR negate of the values in mv. */ \
1045 mv = ~mv; \
1046 mv = ((mv & 0x7fff7fff) + \
1047 0x00010001) ^ (mv & 0x80008000); \
1048 } \
1049 if (!n || mv != AV_RN32A(&near_mv[idx])) \
1050 AV_WN32A(&near_mv[++idx], mv); \
1051 cnt[idx] += 1 + (n != 2); \
1052 } else \
1053 cnt[CNT_ZERO] += 1 + (n != 2); \
1054 } \
1055 }
1056
1057 MV_EDGE_CHECK(0)
1058 MV_EDGE_CHECK(1)
1059 MV_EDGE_CHECK(2)
1060
1061 mb->partitioning = VP8_SPLITMVMODE_NONE;
1062 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_ZERO]][0])) {
1063 mb->mode = VP8_MVMODE_MV;
1064
1065 /* If we have three distinct MVs, merge first and last if they're the same */
1066 if (cnt[CNT_SPLITMV] &&
1067 AV_RN32A(&near_mv[1 + VP8_EDGE_TOP]) == AV_RN32A(&near_mv[1 + VP8_EDGE_TOPLEFT]))
1068 cnt[CNT_NEAREST] += 1;
1069
1070 /* Swap near and nearest if necessary */
1071 if (cnt[CNT_NEAR] > cnt[CNT_NEAREST]) {
1072 FFSWAP(uint8_t, cnt[CNT_NEAREST], cnt[CNT_NEAR]);
1073 FFSWAP( VP56mv, near_mv[CNT_NEAREST], near_mv[CNT_NEAR]);
1074 }
1075
1076 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAREST]][1])) {
1077 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAR]][2])) {
1078 /* Choose the best mv out of 0,0 and the nearest mv */
1079 clamp_mv(s, &mb->mv, &near_mv[CNT_ZERO + (cnt[CNT_NEAREST] >= cnt[CNT_ZERO])]);
1080 cnt[CNT_SPLITMV] = ((mb_edge[VP8_EDGE_LEFT]->mode == VP8_MVMODE_SPLIT) +
1081 (mb_edge[VP8_EDGE_TOP]->mode == VP8_MVMODE_SPLIT)) * 2 +
1082 (mb_edge[VP8_EDGE_TOPLEFT]->mode == VP8_MVMODE_SPLIT);
1083
1084 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_SPLITMV]][3])) {
1085 mb->mode = VP8_MVMODE_SPLIT;
1086 mb->mv = mb->bmv[decode_splitmvs(s, c, mb, layout, IS_VP8) - 1];
1087 } else {
1088 mb->mv.y += vp8_read_mv_component(c, s->prob->mvc[0]);
1089 mb->mv.x += vp8_read_mv_component(c, s->prob->mvc[1]);
1090 mb->bmv[0] = mb->mv;
1091 }
1092 } else {
1093 clamp_mv(s, &mb->mv, &near_mv[CNT_NEAR]);
1094 mb->bmv[0] = mb->mv;
1095 }
1096 } else {
1097 clamp_mv(s, &mb->mv, &near_mv[CNT_NEAREST]);
1098 mb->bmv[0] = mb->mv;
1099 }
1100 } else {
1101 mb->mode = VP8_MVMODE_ZERO;
1102 AV_ZERO32(&mb->mv);
1103 mb->bmv[0] = mb->mv;
1104 }
1105}
1106
1107static av_always_inline
1108void decode_intra4x4_modes(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb,
1109 int mb_x, int keyframe, int layout)
1110{
1111 uint8_t *intra4x4 = mb->intra4x4_pred_mode_mb;
1112
1113 if (layout) {
1114 VP8Macroblock *mb_top = mb - s->mb_width - 1;
1115 memcpy(mb->intra4x4_pred_mode_top, mb_top->intra4x4_pred_mode_top, 4);
1116 }
1117 if (keyframe) {
1118 int x, y;
1119 uint8_t *top;
1120 uint8_t *const left = s->intra4x4_pred_mode_left;
1121 if (layout)
1122 top = mb->intra4x4_pred_mode_top;
1123 else
1124 top = s->intra4x4_pred_mode_top + 4 * mb_x;
1125 for (y = 0; y < 4; y++) {
1126 for (x = 0; x < 4; x++) {
1127 const uint8_t *ctx;
1128 ctx = vp8_pred4x4_prob_intra[top[x]][left[y]];
1129 *intra4x4 = vp8_rac_get_tree(c, vp8_pred4x4_tree, ctx);
1130 left[y] = top[x] = *intra4x4;
1131 intra4x4++;
1132 }
1133 }
1134 } else {
1135 int i;
1136 for (i = 0; i < 16; i++)
1137 intra4x4[i] = vp8_rac_get_tree(c, vp8_pred4x4_tree,
1138 vp8_pred4x4_prob_inter);
1139 }
1140}
1141
1142static av_always_inline
1143void decode_mb_mode(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y,
1144 uint8_t *segment, uint8_t *ref, int layout, int is_vp7)
1145{
1146 VP56RangeCoder *c = &s->c;
1147 const char *vp7_feature_name[] = { "q-index",
1148 "lf-delta",
1149 "partial-golden-update",
1150 "blit-pitch" };
1151 if (is_vp7) {
1152 int i;
1153 *segment = 0;
1154 for (i = 0; i < 4; i++) {
1155 if (s->feature_enabled[i]) {
1156 if (vp56_rac_get_prob_branchy(c, s->feature_present_prob[i])) {
1157 int index = vp8_rac_get_tree(c, vp7_feature_index_tree,
1158 s->feature_index_prob[i]);
1159 av_log(s->avctx, AV_LOG_WARNING,
1160 "Feature %s present in macroblock (value 0x%x)\n",
1161 vp7_feature_name[i], s->feature_value[i][index]);
1162 }
1163 }
1164 }
1165 } else if (s->segmentation.update_map) {
1166 int bit = vp56_rac_get_prob(c, s->prob->segmentid[0]);
1167 *segment = vp56_rac_get_prob(c, s->prob->segmentid[1+bit]) + 2*bit;
1168 } else if (s->segmentation.enabled)
1169 *segment = ref ? *ref : *segment;
1170 mb->segment = *segment;
1171
1172 mb->skip = s->mbskip_enabled ? vp56_rac_get_prob(c, s->prob->mbskip) : 0;
1173
1174 if (s->keyframe) {
1175 mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_intra,
1176 vp8_pred16x16_prob_intra);
1177
1178 if (mb->mode == MODE_I4x4) {
1179 decode_intra4x4_modes(s, c, mb, mb_x, 1, layout);
1180 } else {
1181 const uint32_t modes = (is_vp7 ? vp7_pred4x4_mode
1182 : vp8_pred4x4_mode)[mb->mode] * 0x01010101u;
1183 if (s->mb_layout)
1184 AV_WN32A(mb->intra4x4_pred_mode_top, modes);
1185 else
1186 AV_WN32A(s->intra4x4_pred_mode_top + 4 * mb_x, modes);
1187 AV_WN32A(s->intra4x4_pred_mode_left, modes);
1188 }
1189
1190 mb->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree,
1191 vp8_pred8x8c_prob_intra);
1192 mb->ref_frame = VP56_FRAME_CURRENT;
1193 } else if (vp56_rac_get_prob_branchy(c, s->prob->intra)) {
1194 // inter MB, 16.2
1195 if (vp56_rac_get_prob_branchy(c, s->prob->last))
1196 mb->ref_frame =
1197 (!is_vp7 && vp56_rac_get_prob(c, s->prob->golden)) ? VP56_FRAME_GOLDEN2 /* altref */
1198 : VP56_FRAME_GOLDEN;
1199 else
1200 mb->ref_frame = VP56_FRAME_PREVIOUS;
1201 s->ref_count[mb->ref_frame - 1]++;
1202
1203 // motion vectors, 16.3
1204 if (is_vp7)
1205 vp7_decode_mvs(s, mb, mb_x, mb_y, layout);
1206 else
1207 vp8_decode_mvs(s, mb, mb_x, mb_y, layout);
1208 } else {
1209 // intra MB, 16.1
1210 mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_inter, s->prob->pred16x16);
1211
1212 if (mb->mode == MODE_I4x4)
1213 decode_intra4x4_modes(s, c, mb, mb_x, 0, layout);
1214
1215 mb->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree,
1216 s->prob->pred8x8c);
1217 mb->ref_frame = VP56_FRAME_CURRENT;
1218 mb->partitioning = VP8_SPLITMVMODE_NONE;
1219 AV_ZERO32(&mb->bmv[0]);
1220 }
1221}
1222
1223/**
1224 * @param r arithmetic bitstream reader context
1225 * @param block destination for block coefficients
1226 * @param probs probabilities to use when reading trees from the bitstream
1227 * @param i initial coeff index, 0 unless a separate DC block is coded
1228 * @param qmul array holding the dc/ac dequant factor at position 0/1
1229 *
1230 * @return 0 if no coeffs were decoded
1231 * otherwise, the index of the last coeff decoded plus one
1232 */
1233static av_always_inline
1234int decode_block_coeffs_internal(VP56RangeCoder *r, int16_t block[16],
1235 uint8_t probs[16][3][NUM_DCT_TOKENS - 1],
1236 int i, uint8_t *token_prob, int16_t qmul[2],
1237 const uint8_t scan[16], int vp7)
1238{
1239 VP56RangeCoder c = *r;
1240 goto skip_eob;
1241 do {
1242 int coeff;
1243restart:
1244 if (!vp56_rac_get_prob_branchy(&c, token_prob[0])) // DCT_EOB
1245 break;
1246
1247skip_eob:
1248 if (!vp56_rac_get_prob_branchy(&c, token_prob[1])) { // DCT_0
1249 if (++i == 16)
1250 break; // invalid input; blocks should end with EOB
1251 token_prob = probs[i][0];
1252 if (vp7)
1253 goto restart;
1254 goto skip_eob;
1255 }
1256
1257 if (!vp56_rac_get_prob_branchy(&c, token_prob[2])) { // DCT_1
1258 coeff = 1;
1259 token_prob = probs[i + 1][1];
1260 } else {
1261 if (!vp56_rac_get_prob_branchy(&c, token_prob[3])) { // DCT 2,3,4
1262 coeff = vp56_rac_get_prob_branchy(&c, token_prob[4]);
1263 if (coeff)
1264 coeff += vp56_rac_get_prob(&c, token_prob[5]);
1265 coeff += 2;
1266 } else {
1267 // DCT_CAT*
1268 if (!vp56_rac_get_prob_branchy(&c, token_prob[6])) {
1269 if (!vp56_rac_get_prob_branchy(&c, token_prob[7])) { // DCT_CAT1
1270 coeff = 5 + vp56_rac_get_prob(&c, vp8_dct_cat1_prob[0]);
1271 } else { // DCT_CAT2
1272 coeff = 7;
1273 coeff += vp56_rac_get_prob(&c, vp8_dct_cat2_prob[0]) << 1;
1274 coeff += vp56_rac_get_prob(&c, vp8_dct_cat2_prob[1]);
1275 }
1276 } else { // DCT_CAT3 and up
1277 int a = vp56_rac_get_prob(&c, token_prob[8]);
1278 int b = vp56_rac_get_prob(&c, token_prob[9 + a]);
1279 int cat = (a << 1) + b;
1280 coeff = 3 + (8 << cat);
1281 coeff += vp8_rac_get_coeff(&c, ff_vp8_dct_cat_prob[cat]);
1282 }
1283 }
1284 token_prob = probs[i + 1][2];
1285 }
1286 block[scan[i]] = (vp8_rac_get(&c) ? -coeff : coeff) * qmul[!!i];
1287 } while (++i < 16);
1288
1289 *r = c;
1290 return i;
1291}
1292
1293static av_always_inline
1294int inter_predict_dc(int16_t block[16], int16_t pred[2])
1295{
1296 int16_t dc = block[0];
1297 int ret = 0;
1298
1299 if (pred[1] > 3) {
1300 dc += pred[0];
1301 ret = 1;
1302 }
1303
1304 if (!pred[0] | !dc | ((int32_t)pred[0] ^ (int32_t)dc) >> 31) {
1305 block[0] = pred[0] = dc;
1306 pred[1] = 0;
1307 } else {
1308 if (pred[0] == dc)
1309 pred[1]++;
1310 block[0] = pred[0] = dc;
1311 }
1312
1313 return ret;
1314}
1315
1316static int vp7_decode_block_coeffs_internal(VP56RangeCoder *r,
1317 int16_t block[16],
1318 uint8_t probs[16][3][NUM_DCT_TOKENS - 1],
1319 int i, uint8_t *token_prob,
1320 int16_t qmul[2],
1321 const uint8_t scan[16])
1322{
1323 return decode_block_coeffs_internal(r, block, probs, i,
1324 token_prob, qmul, scan, IS_VP7);
1325}
1326
1327#ifndef vp8_decode_block_coeffs_internal
1328static int vp8_decode_block_coeffs_internal(VP56RangeCoder *r,
1329 int16_t block[16],
1330 uint8_t probs[16][3][NUM_DCT_TOKENS - 1],
1331 int i, uint8_t *token_prob,
1332 int16_t qmul[2])
1333{
1334 return decode_block_coeffs_internal(r, block, probs, i,
1335 token_prob, qmul, zigzag_scan, IS_VP8);
1336}
1337#endif
1338
1339/**
1340 * @param c arithmetic bitstream reader context
1341 * @param block destination for block coefficients
1342 * @param probs probabilities to use when reading trees from the bitstream
1343 * @param i initial coeff index, 0 unless a separate DC block is coded
1344 * @param zero_nhood the initial prediction context for number of surrounding
1345 * all-zero blocks (only left/top, so 0-2)
1346 * @param qmul array holding the dc/ac dequant factor at position 0/1
1347 * @param scan scan pattern (VP7 only)
1348 *
1349 * @return 0 if no coeffs were decoded
1350 * otherwise, the index of the last coeff decoded plus one
1351 */
1352static av_always_inline
1353int decode_block_coeffs(VP56RangeCoder *c, int16_t block[16],
1354 uint8_t probs[16][3][NUM_DCT_TOKENS - 1],
1355 int i, int zero_nhood, int16_t qmul[2],
1356 const uint8_t scan[16], int vp7)
1357{
1358 uint8_t *token_prob = probs[i][zero_nhood];
1359 if (!vp56_rac_get_prob_branchy(c, token_prob[0])) // DCT_EOB
1360 return 0;
1361 return vp7 ? vp7_decode_block_coeffs_internal(c, block, probs, i,
1362 token_prob, qmul, scan)
1363 : vp8_decode_block_coeffs_internal(c, block, probs, i,
1364 token_prob, qmul);
1365}
1366
1367static av_always_inline
1368void decode_mb_coeffs(VP8Context *s, VP8ThreadData *td, VP56RangeCoder *c,
1369 VP8Macroblock *mb, uint8_t t_nnz[9], uint8_t l_nnz[9],
1370 int is_vp7)
1371{
1372 int i, x, y, luma_start = 0, luma_ctx = 3;
1373 int nnz_pred, nnz, nnz_total = 0;
1374 int segment = mb->segment;
1375 int block_dc = 0;
1376
1377 if (mb->mode != MODE_I4x4 && (is_vp7 || mb->mode != VP8_MVMODE_SPLIT)) {
1378 nnz_pred = t_nnz[8] + l_nnz[8];
1379
1380 // decode DC values and do hadamard
1381 nnz = decode_block_coeffs(c, td->block_dc, s->prob->token[1], 0,
1382 nnz_pred, s->qmat[segment].luma_dc_qmul,
1383 zigzag_scan, is_vp7);
1384 l_nnz[8] = t_nnz[8] = !!nnz;
1385
1386 if (is_vp7 && mb->mode > MODE_I4x4) {
1387 nnz |= inter_predict_dc(td->block_dc,
1388 s->inter_dc_pred[mb->ref_frame - 1]);
1389 }
1390
1391 if (nnz) {
1392 nnz_total += nnz;
1393 block_dc = 1;
1394 if (nnz == 1)
1395 s->vp8dsp.vp8_luma_dc_wht_dc(td->block, td->block_dc);
1396 else
1397 s->vp8dsp.vp8_luma_dc_wht(td->block, td->block_dc);
1398 }
1399 luma_start = 1;
1400 luma_ctx = 0;
1401 }
1402
1403 // luma blocks
1404 for (y = 0; y < 4; y++)
1405 for (x = 0; x < 4; x++) {
1406 nnz_pred = l_nnz[y] + t_nnz[x];
1407 nnz = decode_block_coeffs(c, td->block[y][x],
1408 s->prob->token[luma_ctx],
1409 luma_start, nnz_pred,
1410 s->qmat[segment].luma_qmul,
1411 s->prob[0].scan, is_vp7);
1412 /* nnz+block_dc may be one more than the actual last index,
1413 * but we don't care */
1414 td->non_zero_count_cache[y][x] = nnz + block_dc;
1415 t_nnz[x] = l_nnz[y] = !!nnz;
1416 nnz_total += nnz;
1417 }
1418
1419 // chroma blocks
1420 // TODO: what to do about dimensions? 2nd dim for luma is x,
1421 // but for chroma it's (y<<1)|x
1422 for (i = 4; i < 6; i++)
1423 for (y = 0; y < 2; y++)
1424 for (x = 0; x < 2; x++) {
1425 nnz_pred = l_nnz[i + 2 * y] + t_nnz[i + 2 * x];
1426 nnz = decode_block_coeffs(c, td->block[i][(y << 1) + x],
1427 s->prob->token[2], 0, nnz_pred,
1428 s->qmat[segment].chroma_qmul,
1429 s->prob[0].scan, is_vp7);
1430 td->non_zero_count_cache[i][(y << 1) + x] = nnz;
1431 t_nnz[i + 2 * x] = l_nnz[i + 2 * y] = !!nnz;
1432 nnz_total += nnz;
1433 }
1434
1435 // if there were no coded coeffs despite the macroblock not being marked skip,
1436 // we MUST not do the inner loop filter and should not do IDCT
1437 // Since skip isn't used for bitstream prediction, just manually set it.
1438 if (!nnz_total)
1439 mb->skip = 1;
1440}
1441
1442static av_always_inline
1443void backup_mb_border(uint8_t *top_border, uint8_t *src_y,
1444 uint8_t *src_cb, uint8_t *src_cr,
1445 int linesize, int uvlinesize, int simple)
1446{
1447 AV_COPY128(top_border, src_y + 15 * linesize);
1448 if (!simple) {
1449 AV_COPY64(top_border + 16, src_cb + 7 * uvlinesize);
1450 AV_COPY64(top_border + 24, src_cr + 7 * uvlinesize);
1451 }
1452}
1453
1454static av_always_inline
1455void xchg_mb_border(uint8_t *top_border, uint8_t *src_y, uint8_t *src_cb,
1456 uint8_t *src_cr, int linesize, int uvlinesize, int mb_x,
1457 int mb_y, int mb_width, int simple, int xchg)
1458{
1459 uint8_t *top_border_m1 = top_border - 32; // for TL prediction
1460 src_y -= linesize;
1461 src_cb -= uvlinesize;
1462 src_cr -= uvlinesize;
1463
1464#define XCHG(a, b, xchg) \
1465 do { \
1466 if (xchg) \
1467 AV_SWAP64(b, a); \
1468 else \
1469 AV_COPY64(b, a); \
1470 } while (0)
1471
1472 XCHG(top_border_m1 + 8, src_y - 8, xchg);
1473 XCHG(top_border, src_y, xchg);
1474 XCHG(top_border + 8, src_y + 8, 1);
1475 if (mb_x < mb_width - 1)
1476 XCHG(top_border + 32, src_y + 16, 1);
1477
1478 // only copy chroma for normal loop filter
1479 // or to initialize the top row to 127
1480 if (!simple || !mb_y) {
1481 XCHG(top_border_m1 + 16, src_cb - 8, xchg);
1482 XCHG(top_border_m1 + 24, src_cr - 8, xchg);
1483 XCHG(top_border + 16, src_cb, 1);
1484 XCHG(top_border + 24, src_cr, 1);
1485 }
1486}
1487
1488static av_always_inline
1489int check_dc_pred8x8_mode(int mode, int mb_x, int mb_y)
1490{
1491 if (!mb_x)
1492 return mb_y ? TOP_DC_PRED8x8 : DC_128_PRED8x8;
1493 else
1494 return mb_y ? mode : LEFT_DC_PRED8x8;
1495}
1496
1497static av_always_inline
1498int check_tm_pred8x8_mode(int mode, int mb_x, int mb_y, int vp7)
1499{
1500 if (!mb_x)
1501 return mb_y ? VERT_PRED8x8 : (vp7 ? DC_128_PRED8x8 : DC_129_PRED8x8);
1502 else
1503 return mb_y ? mode : HOR_PRED8x8;
1504}
1505
1506static av_always_inline
1507int check_intra_pred8x8_mode_emuedge(int mode, int mb_x, int mb_y, int vp7)
1508{
1509 switch (mode) {
1510 case DC_PRED8x8:
1511 return check_dc_pred8x8_mode(mode, mb_x, mb_y);
1512 case VERT_PRED8x8:
1513 return !mb_y ? (vp7 ? DC_128_PRED8x8 : DC_127_PRED8x8) : mode;
1514 case HOR_PRED8x8:
1515 return !mb_x ? (vp7 ? DC_128_PRED8x8 : DC_129_PRED8x8) : mode;
1516 case PLANE_PRED8x8: /* TM */
1517 return check_tm_pred8x8_mode(mode, mb_x, mb_y, vp7);
1518 }
1519 return mode;
1520}
1521
1522static av_always_inline
1523int check_tm_pred4x4_mode(int mode, int mb_x, int mb_y, int vp7)
1524{
1525 if (!mb_x) {
1526 return mb_y ? VERT_VP8_PRED : (vp7 ? DC_128_PRED : DC_129_PRED);
1527 } else {
1528 return mb_y ? mode : HOR_VP8_PRED;
1529 }
1530}
1531
1532static av_always_inline
1533int check_intra_pred4x4_mode_emuedge(int mode, int mb_x, int mb_y,
1534 int *copy_buf, int vp7)
1535{
1536 switch (mode) {
1537 case VERT_PRED:
1538 if (!mb_x && mb_y) {
1539 *copy_buf = 1;
1540 return mode;
1541 }
1542 /* fall-through */
1543 case DIAG_DOWN_LEFT_PRED:
1544 case VERT_LEFT_PRED:
1545 return !mb_y ? (vp7 ? DC_128_PRED : DC_127_PRED) : mode;
1546 case HOR_PRED:
1547 if (!mb_y) {
1548 *copy_buf = 1;
1549 return mode;
1550 }
1551 /* fall-through */
1552 case HOR_UP_PRED:
1553 return !mb_x ? (vp7 ? DC_128_PRED : DC_129_PRED) : mode;
1554 case TM_VP8_PRED:
1555 return check_tm_pred4x4_mode(mode, mb_x, mb_y, vp7);
1556 case DC_PRED: /* 4x4 DC doesn't use the same "H.264-style" exceptions
1557 * as 16x16/8x8 DC */
1558 case DIAG_DOWN_RIGHT_PRED:
1559 case VERT_RIGHT_PRED:
1560 case HOR_DOWN_PRED:
1561 if (!mb_y || !mb_x)
1562 *copy_buf = 1;
1563 return mode;
1564 }
1565 return mode;
1566}
1567
1568static av_always_inline
1569void intra_predict(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
1570 VP8Macroblock *mb, int mb_x, int mb_y, int is_vp7)
1571{
1572 int x, y, mode, nnz;
1573 uint32_t tr;
1574
1575 /* for the first row, we need to run xchg_mb_border to init the top edge
1576 * to 127 otherwise, skip it if we aren't going to deblock */
1577 if (mb_y && (s->deblock_filter || !mb_y) && td->thread_nr == 0)
1578 xchg_mb_border(s->top_border[mb_x + 1], dst[0], dst[1], dst[2],
1579 s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width,
1580 s->filter.simple, 1);
1581
1582 if (mb->mode < MODE_I4x4) {
1583 mode = check_intra_pred8x8_mode_emuedge(mb->mode, mb_x, mb_y, is_vp7);
1584 s->hpc.pred16x16[mode](dst[0], s->linesize);
1585 } else {
1586 uint8_t *ptr = dst[0];
1587 uint8_t *intra4x4 = mb->intra4x4_pred_mode_mb;
1588 const uint8_t lo = is_vp7 ? 128 : 127;
1589 const uint8_t hi = is_vp7 ? 128 : 129;
1590 uint8_t tr_top[4] = { lo, lo, lo, lo };
1591
1592 // all blocks on the right edge of the macroblock use bottom edge
1593 // the top macroblock for their topright edge
1594 uint8_t *tr_right = ptr - s->linesize + 16;
1595
1596 // if we're on the right edge of the frame, said edge is extended
1597 // from the top macroblock
1598 if (mb_y && mb_x == s->mb_width - 1) {
1599 tr = tr_right[-1] * 0x01010101u;
1600 tr_right = (uint8_t *) &tr;
1601 }
1602
1603 if (mb->skip)
1604 AV_ZERO128(td->non_zero_count_cache);
1605
1606 for (y = 0; y < 4; y++) {
1607 uint8_t *topright = ptr + 4 - s->linesize;
1608 for (x = 0; x < 4; x++) {
1609 int copy = 0, linesize = s->linesize;
1610 uint8_t *dst = ptr + 4 * x;
1611 DECLARE_ALIGNED(4, uint8_t, copy_dst)[5 * 8];
1612
1613 if ((y == 0 || x == 3) && mb_y == 0) {
1614 topright = tr_top;
1615 } else if (x == 3)
1616 topright = tr_right;
1617
1618 mode = check_intra_pred4x4_mode_emuedge(intra4x4[x], mb_x + x,
1619 mb_y + y, &copy, is_vp7);
1620 if (copy) {
1621 dst = copy_dst + 12;
1622 linesize = 8;
1623 if (!(mb_y + y)) {
1624 copy_dst[3] = lo;
1625 AV_WN32A(copy_dst + 4, lo * 0x01010101U);
1626 } else {
1627 AV_COPY32(copy_dst + 4, ptr + 4 * x - s->linesize);
1628 if (!(mb_x + x)) {
1629 copy_dst[3] = hi;
1630 } else {
1631 copy_dst[3] = ptr[4 * x - s->linesize - 1];
1632 }
1633 }
1634 if (!(mb_x + x)) {
1635 copy_dst[11] =
1636 copy_dst[19] =
1637 copy_dst[27] =
1638 copy_dst[35] = hi;
1639 } else {
1640 copy_dst[11] = ptr[4 * x - 1];
1641 copy_dst[19] = ptr[4 * x + s->linesize - 1];
1642 copy_dst[27] = ptr[4 * x + s->linesize * 2 - 1];
1643 copy_dst[35] = ptr[4 * x + s->linesize * 3 - 1];
1644 }
1645 }
1646 s->hpc.pred4x4[mode](dst, topright, linesize);
1647 if (copy) {
1648 AV_COPY32(ptr + 4 * x, copy_dst + 12);
1649 AV_COPY32(ptr + 4 * x + s->linesize, copy_dst + 20);
1650 AV_COPY32(ptr + 4 * x + s->linesize * 2, copy_dst + 28);
1651 AV_COPY32(ptr + 4 * x + s->linesize * 3, copy_dst + 36);
1652 }
1653
1654 nnz = td->non_zero_count_cache[y][x];
1655 if (nnz) {
1656 if (nnz == 1)
1657 s->vp8dsp.vp8_idct_dc_add(ptr + 4 * x,
1658 td->block[y][x], s->linesize);
1659 else
1660 s->vp8dsp.vp8_idct_add(ptr + 4 * x,
1661 td->block[y][x], s->linesize);
1662 }
1663 topright += 4;
1664 }
1665
1666 ptr += 4 * s->linesize;
1667 intra4x4 += 4;
1668 }
1669 }
1670
1671 mode = check_intra_pred8x8_mode_emuedge(mb->chroma_pred_mode,
1672 mb_x, mb_y, is_vp7);
1673 s->hpc.pred8x8[mode](dst[1], s->uvlinesize);
1674 s->hpc.pred8x8[mode](dst[2], s->uvlinesize);
1675
1676 if (mb_y && (s->deblock_filter || !mb_y) && td->thread_nr == 0)
1677 xchg_mb_border(s->top_border[mb_x + 1], dst[0], dst[1], dst[2],
1678 s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width,
1679 s->filter.simple, 0);
1680}
1681
1682static const uint8_t subpel_idx[3][8] = {
1683 { 0, 1, 2, 1, 2, 1, 2, 1 }, // nr. of left extra pixels,
1684 // also function pointer index
1685 { 0, 3, 5, 3, 5, 3, 5, 3 }, // nr. of extra pixels required
1686 { 0, 2, 3, 2, 3, 2, 3, 2 }, // nr. of right extra pixels
1687};
1688
1689/**
1690 * luma MC function
1691 *
1692 * @param s VP8 decoding context
1693 * @param dst target buffer for block data at block position
1694 * @param ref reference picture buffer at origin (0, 0)
1695 * @param mv motion vector (relative to block position) to get pixel data from
1696 * @param x_off horizontal position of block from origin (0, 0)
1697 * @param y_off vertical position of block from origin (0, 0)
1698 * @param block_w width of block (16, 8 or 4)
1699 * @param block_h height of block (always same as block_w)
1700 * @param width width of src/dst plane data
1701 * @param height height of src/dst plane data
1702 * @param linesize size of a single line of plane data, including padding
1703 * @param mc_func motion compensation function pointers (bilinear or sixtap MC)
1704 */
1705static av_always_inline
1706void vp8_mc_luma(VP8Context *s, VP8ThreadData *td, uint8_t *dst,
1707 ThreadFrame *ref, const VP56mv *mv,
1708 int x_off, int y_off, int block_w, int block_h,
1709 int width, int height, ptrdiff_t linesize,
1710 vp8_mc_func mc_func[3][3])
1711{
1712 uint8_t *src = ref->f->data[0];
1713
1714 if (AV_RN32A(mv)) {
1715 int src_linesize = linesize;
1716
1717 int mx = (mv->x << 1) & 7, mx_idx = subpel_idx[0][mx];
1718 int my = (mv->y << 1) & 7, my_idx = subpel_idx[0][my];
1719
1720 x_off += mv->x >> 2;
1721 y_off += mv->y >> 2;
1722
1723 // edge emulation
1724 ff_thread_await_progress(ref, (3 + y_off + block_h + subpel_idx[2][my]) >> 4, 0);
1725 src += y_off * linesize + x_off;
1726 if (x_off < mx_idx || x_off >= width - block_w - subpel_idx[2][mx] ||
1727 y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) {
1728 s->vdsp.emulated_edge_mc(td->edge_emu_buffer,
1729 src - my_idx * linesize - mx_idx,
1730 EDGE_EMU_LINESIZE, linesize,
1731 block_w + subpel_idx[1][mx],
1732 block_h + subpel_idx[1][my],
1733 x_off - mx_idx, y_off - my_idx,
1734 width, height);
1735 src = td->edge_emu_buffer + mx_idx + EDGE_EMU_LINESIZE * my_idx;
1736 src_linesize = EDGE_EMU_LINESIZE;
1737 }
1738 mc_func[my_idx][mx_idx](dst, linesize, src, src_linesize, block_h, mx, my);
1739 } else {
1740 ff_thread_await_progress(ref, (3 + y_off + block_h) >> 4, 0);
1741 mc_func[0][0](dst, linesize, src + y_off * linesize + x_off,
1742 linesize, block_h, 0, 0);
1743 }
1744}
1745
1746/**
1747 * chroma MC function
1748 *
1749 * @param s VP8 decoding context
1750 * @param dst1 target buffer for block data at block position (U plane)
1751 * @param dst2 target buffer for block data at block position (V plane)
1752 * @param ref reference picture buffer at origin (0, 0)
1753 * @param mv motion vector (relative to block position) to get pixel data from
1754 * @param x_off horizontal position of block from origin (0, 0)
1755 * @param y_off vertical position of block from origin (0, 0)
1756 * @param block_w width of block (16, 8 or 4)
1757 * @param block_h height of block (always same as block_w)
1758 * @param width width of src/dst plane data
1759 * @param height height of src/dst plane data
1760 * @param linesize size of a single line of plane data, including padding
1761 * @param mc_func motion compensation function pointers (bilinear or sixtap MC)
1762 */
1763static av_always_inline
1764void vp8_mc_chroma(VP8Context *s, VP8ThreadData *td, uint8_t *dst1,
1765 uint8_t *dst2, ThreadFrame *ref, const VP56mv *mv,
1766 int x_off, int y_off, int block_w, int block_h,
1767 int width, int height, ptrdiff_t linesize,
1768 vp8_mc_func mc_func[3][3])
1769{
1770 uint8_t *src1 = ref->f->data[1], *src2 = ref->f->data[2];
1771
1772 if (AV_RN32A(mv)) {
1773 int mx = mv->x & 7, mx_idx = subpel_idx[0][mx];
1774 int my = mv->y & 7, my_idx = subpel_idx[0][my];
1775
1776 x_off += mv->x >> 3;
1777 y_off += mv->y >> 3;
1778
1779 // edge emulation
1780 src1 += y_off * linesize + x_off;
1781 src2 += y_off * linesize + x_off;
1782 ff_thread_await_progress(ref, (3 + y_off + block_h + subpel_idx[2][my]) >> 3, 0);
1783 if (x_off < mx_idx || x_off >= width - block_w - subpel_idx[2][mx] ||
1784 y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) {
1785 s->vdsp.emulated_edge_mc(td->edge_emu_buffer,
1786 src1 - my_idx * linesize - mx_idx,
1787 EDGE_EMU_LINESIZE, linesize,
1788 block_w + subpel_idx[1][mx],
1789 block_h + subpel_idx[1][my],
1790 x_off - mx_idx, y_off - my_idx, width, height);
1791 src1 = td->edge_emu_buffer + mx_idx + EDGE_EMU_LINESIZE * my_idx;
1792 mc_func[my_idx][mx_idx](dst1, linesize, src1, EDGE_EMU_LINESIZE, block_h, mx, my);
1793
1794 s->vdsp.emulated_edge_mc(td->edge_emu_buffer,
1795 src2 - my_idx * linesize - mx_idx,
1796 EDGE_EMU_LINESIZE, linesize,
1797 block_w + subpel_idx[1][mx],
1798 block_h + subpel_idx[1][my],
1799 x_off - mx_idx, y_off - my_idx, width, height);
1800 src2 = td->edge_emu_buffer + mx_idx + EDGE_EMU_LINESIZE * my_idx;
1801 mc_func[my_idx][mx_idx](dst2, linesize, src2, EDGE_EMU_LINESIZE, block_h, mx, my);
1802 } else {
1803 mc_func[my_idx][mx_idx](dst1, linesize, src1, linesize, block_h, mx, my);
1804 mc_func[my_idx][mx_idx](dst2, linesize, src2, linesize, block_h, mx, my);
1805 }
1806 } else {
1807 ff_thread_await_progress(ref, (3 + y_off + block_h) >> 3, 0);
1808 mc_func[0][0](dst1, linesize, src1 + y_off * linesize + x_off, linesize, block_h, 0, 0);
1809 mc_func[0][0](dst2, linesize, src2 + y_off * linesize + x_off, linesize, block_h, 0, 0);
1810 }
1811}
1812
1813static av_always_inline
1814void vp8_mc_part(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
1815 ThreadFrame *ref_frame, int x_off, int y_off,
1816 int bx_off, int by_off, int block_w, int block_h,
1817 int width, int height, VP56mv *mv)
1818{
1819 VP56mv uvmv = *mv;
1820
1821 /* Y */
1822 vp8_mc_luma(s, td, dst[0] + by_off * s->linesize + bx_off,
1823 ref_frame, mv, x_off + bx_off, y_off + by_off,
1824 block_w, block_h, width, height, s->linesize,
1825 s->put_pixels_tab[block_w == 8]);
1826
1827 /* U/V */
1828 if (s->profile == 3) {
1829 /* this block only applies VP8; it is safe to check
1830 * only the profile, as VP7 profile <= 1 */
1831 uvmv.x &= ~7;
1832 uvmv.y &= ~7;
1833 }
1834 x_off >>= 1;
1835 y_off >>= 1;
1836 bx_off >>= 1;
1837 by_off >>= 1;
1838 width >>= 1;
1839 height >>= 1;
1840 block_w >>= 1;
1841 block_h >>= 1;
1842 vp8_mc_chroma(s, td, dst[1] + by_off * s->uvlinesize + bx_off,
1843 dst[2] + by_off * s->uvlinesize + bx_off, ref_frame,
1844 &uvmv, x_off + bx_off, y_off + by_off,
1845 block_w, block_h, width, height, s->uvlinesize,
1846 s->put_pixels_tab[1 + (block_w == 4)]);
1847}
1848
1849/* Fetch pixels for estimated mv 4 macroblocks ahead.
1850 * Optimized for 64-byte cache lines. Inspired by ffh264 prefetch_motion. */
1851static av_always_inline
1852void prefetch_motion(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y,
1853 int mb_xy, int ref)
1854{
1855 /* Don't prefetch refs that haven't been used very often this frame. */
1856 if (s->ref_count[ref - 1] > (mb_xy >> 5)) {
1857 int x_off = mb_x << 4, y_off = mb_y << 4;
1858 int mx = (mb->mv.x >> 2) + x_off + 8;
1859 int my = (mb->mv.y >> 2) + y_off;
1860 uint8_t **src = s->framep[ref]->tf.f->data;
1861 int off = mx + (my + (mb_x & 3) * 4) * s->linesize + 64;
1862 /* For threading, a ff_thread_await_progress here might be useful, but
1863 * it actually slows down the decoder. Since a bad prefetch doesn't
1864 * generate bad decoder output, we don't run it here. */
1865 s->vdsp.prefetch(src[0] + off, s->linesize, 4);
1866 off = (mx >> 1) + ((my >> 1) + (mb_x & 7)) * s->uvlinesize + 64;
1867 s->vdsp.prefetch(src[1] + off, src[2] - src[1], 2);
1868 }
1869}
1870
1871/**
1872 * Apply motion vectors to prediction buffer, chapter 18.
1873 */
1874static av_always_inline
1875void inter_predict(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
1876 VP8Macroblock *mb, int mb_x, int mb_y)
1877{
1878 int x_off = mb_x << 4, y_off = mb_y << 4;
1879 int width = 16 * s->mb_width, height = 16 * s->mb_height;
1880 ThreadFrame *ref = &s->framep[mb->ref_frame]->tf;
1881 VP56mv *bmv = mb->bmv;
1882
1883 switch (mb->partitioning) {
1884 case VP8_SPLITMVMODE_NONE:
1885 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1886 0, 0, 16, 16, width, height, &mb->mv);
1887 break;
1888 case VP8_SPLITMVMODE_4x4: {
1889 int x, y;
1890 VP56mv uvmv;
1891
1892 /* Y */
1893 for (y = 0; y < 4; y++) {
1894 for (x = 0; x < 4; x++) {
1895 vp8_mc_luma(s, td, dst[0] + 4 * y * s->linesize + x * 4,
1896 ref, &bmv[4 * y + x],
1897 4 * x + x_off, 4 * y + y_off, 4, 4,
1898 width, height, s->linesize,
1899 s->put_pixels_tab[2]);
1900 }
1901 }
1902
1903 /* U/V */
1904 x_off >>= 1;
1905 y_off >>= 1;
1906 width >>= 1;
1907 height >>= 1;
1908 for (y = 0; y < 2; y++) {
1909 for (x = 0; x < 2; x++) {
1910 uvmv.x = mb->bmv[2 * y * 4 + 2 * x ].x +
1911 mb->bmv[2 * y * 4 + 2 * x + 1].x +
1912 mb->bmv[(2 * y + 1) * 4 + 2 * x ].x +
1913 mb->bmv[(2 * y + 1) * 4 + 2 * x + 1].x;
1914 uvmv.y = mb->bmv[2 * y * 4 + 2 * x ].y +
1915 mb->bmv[2 * y * 4 + 2 * x + 1].y +
1916 mb->bmv[(2 * y + 1) * 4 + 2 * x ].y +
1917 mb->bmv[(2 * y + 1) * 4 + 2 * x + 1].y;
1918 uvmv.x = (uvmv.x + 2 + (uvmv.x >> (INT_BIT - 1))) >> 2;
1919 uvmv.y = (uvmv.y + 2 + (uvmv.y >> (INT_BIT - 1))) >> 2;
1920 if (s->profile == 3) {
1921 uvmv.x &= ~7;
1922 uvmv.y &= ~7;
1923 }
1924 vp8_mc_chroma(s, td, dst[1] + 4 * y * s->uvlinesize + x * 4,
1925 dst[2] + 4 * y * s->uvlinesize + x * 4, ref,
1926 &uvmv, 4 * x + x_off, 4 * y + y_off, 4, 4,
1927 width, height, s->uvlinesize,
1928 s->put_pixels_tab[2]);
1929 }
1930 }
1931 break;
1932 }
1933 case VP8_SPLITMVMODE_16x8:
1934 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1935 0, 0, 16, 8, width, height, &bmv[0]);
1936 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1937 0, 8, 16, 8, width, height, &bmv[1]);
1938 break;
1939 case VP8_SPLITMVMODE_8x16:
1940 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1941 0, 0, 8, 16, width, height, &bmv[0]);
1942 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1943 8, 0, 8, 16, width, height, &bmv[1]);
1944 break;
1945 case VP8_SPLITMVMODE_8x8:
1946 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1947 0, 0, 8, 8, width, height, &bmv[0]);
1948 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1949 8, 0, 8, 8, width, height, &bmv[1]);
1950 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1951 0, 8, 8, 8, width, height, &bmv[2]);
1952 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1953 8, 8, 8, 8, width, height, &bmv[3]);
1954 break;
1955 }
1956}
1957
1958static av_always_inline
1959void idct_mb(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3], VP8Macroblock *mb)
1960{
1961 int x, y, ch;
1962
1963 if (mb->mode != MODE_I4x4) {
1964 uint8_t *y_dst = dst[0];
1965 for (y = 0; y < 4; y++) {
1966 uint32_t nnz4 = AV_RL32(td->non_zero_count_cache[y]);
1967 if (nnz4) {
1968 if (nnz4 & ~0x01010101) {
1969 for (x = 0; x < 4; x++) {
1970 if ((uint8_t) nnz4 == 1)
1971 s->vp8dsp.vp8_idct_dc_add(y_dst + 4 * x,
1972 td->block[y][x],
1973 s->linesize);
1974 else if ((uint8_t) nnz4 > 1)
1975 s->vp8dsp.vp8_idct_add(y_dst + 4 * x,
1976 td->block[y][x],
1977 s->linesize);
1978 nnz4 >>= 8;
1979 if (!nnz4)
1980 break;
1981 }
1982 } else {
1983 s->vp8dsp.vp8_idct_dc_add4y(y_dst, td->block[y], s->linesize);
1984 }
1985 }
1986 y_dst += 4 * s->linesize;
1987 }
1988 }
1989
1990 for (ch = 0; ch < 2; ch++) {
1991 uint32_t nnz4 = AV_RL32(td->non_zero_count_cache[4 + ch]);
1992 if (nnz4) {
1993 uint8_t *ch_dst = dst[1 + ch];
1994 if (nnz4 & ~0x01010101) {
1995 for (y = 0; y < 2; y++) {
1996 for (x = 0; x < 2; x++) {
1997 if ((uint8_t) nnz4 == 1)
1998 s->vp8dsp.vp8_idct_dc_add(ch_dst + 4 * x,
1999 td->block[4 + ch][(y << 1) + x],
2000 s->uvlinesize);
2001 else if ((uint8_t) nnz4 > 1)
2002 s->vp8dsp.vp8_idct_add(ch_dst + 4 * x,
2003 td->block[4 + ch][(y << 1) + x],
2004 s->uvlinesize);
2005 nnz4 >>= 8;
2006 if (!nnz4)
2007 goto chroma_idct_end;
2008 }
2009 ch_dst += 4 * s->uvlinesize;
2010 }
2011 } else {
2012 s->vp8dsp.vp8_idct_dc_add4uv(ch_dst, td->block[4 + ch], s->uvlinesize);
2013 }
2014 }
2015chroma_idct_end:
2016 ;
2017 }
2018}
2019
2020static av_always_inline
2021void filter_level_for_mb(VP8Context *s, VP8Macroblock *mb,
2022 VP8FilterStrength *f, int is_vp7)
2023{
2024 int interior_limit, filter_level;
2025
2026 if (s->segmentation.enabled) {
2027 filter_level = s->segmentation.filter_level[mb->segment];
2028 if (!s->segmentation.absolute_vals)
2029 filter_level += s->filter.level;
2030 } else
2031 filter_level = s->filter.level;
2032
2033 if (s->lf_delta.enabled) {
2034 filter_level += s->lf_delta.ref[mb->ref_frame];
2035 filter_level += s->lf_delta.mode[mb->mode];
2036 }
2037
2038 filter_level = av_clip_uintp2(filter_level, 6);
2039
2040 interior_limit = filter_level;
2041 if (s->filter.sharpness) {
2042 interior_limit >>= (s->filter.sharpness + 3) >> 2;
2043 interior_limit = FFMIN(interior_limit, 9 - s->filter.sharpness);
2044 }
2045 interior_limit = FFMAX(interior_limit, 1);
2046
2047 f->filter_level = filter_level;
2048 f->inner_limit = interior_limit;
2049 f->inner_filter = is_vp7 || !mb->skip || mb->mode == MODE_I4x4 ||
2050 mb->mode == VP8_MVMODE_SPLIT;
2051}
2052
2053static av_always_inline
2054void filter_mb(VP8Context *s, uint8_t *dst[3], VP8FilterStrength *f,
2055 int mb_x, int mb_y, int is_vp7)
2056{
2057 int mbedge_lim, bedge_lim_y, bedge_lim_uv, hev_thresh;
2058 int filter_level = f->filter_level;
2059 int inner_limit = f->inner_limit;
2060 int inner_filter = f->inner_filter;
2061 int linesize = s->linesize;
2062 int uvlinesize = s->uvlinesize;
2063 static const uint8_t hev_thresh_lut[2][64] = {
2064 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
2065 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2066 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
2067 3, 3, 3, 3 },
2068 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
2069 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
2070 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2071 2, 2, 2, 2 }
2072 };
2073
2074 if (!filter_level)
2075 return;
2076
2077 if (is_vp7) {
2078 bedge_lim_y = filter_level;
2079 bedge_lim_uv = filter_level * 2;
2080 mbedge_lim = filter_level + 2;
2081 } else {
2082 bedge_lim_y =
2083 bedge_lim_uv = filter_level * 2 + inner_limit;
2084 mbedge_lim = bedge_lim_y + 4;
2085 }
2086
2087 hev_thresh = hev_thresh_lut[s->keyframe][filter_level];
2088
2089 if (mb_x) {
2090 s->vp8dsp.vp8_h_loop_filter16y(dst[0], linesize,
2091 mbedge_lim, inner_limit, hev_thresh);
2092 s->vp8dsp.vp8_h_loop_filter8uv(dst[1], dst[2], uvlinesize,
2093 mbedge_lim, inner_limit, hev_thresh);
2094 }
2095
2096#define H_LOOP_FILTER_16Y_INNER(cond) \
2097 if (cond && inner_filter) { \
2098 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0] + 4, linesize, \
2099 bedge_lim_y, inner_limit, \
2100 hev_thresh); \
2101 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0] + 8, linesize, \
2102 bedge_lim_y, inner_limit, \
2103 hev_thresh); \
2104 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0] + 12, linesize, \
2105 bedge_lim_y, inner_limit, \
2106 hev_thresh); \
2107 s->vp8dsp.vp8_h_loop_filter8uv_inner(dst[1] + 4, dst[2] + 4, \
2108 uvlinesize, bedge_lim_uv, \
2109 inner_limit, hev_thresh); \
2110 }
2111
2112 H_LOOP_FILTER_16Y_INNER(!is_vp7)
2113
2114 if (mb_y) {
2115 s->vp8dsp.vp8_v_loop_filter16y(dst[0], linesize,
2116 mbedge_lim, inner_limit, hev_thresh);
2117 s->vp8dsp.vp8_v_loop_filter8uv(dst[1], dst[2], uvlinesize,
2118 mbedge_lim, inner_limit, hev_thresh);
2119 }
2120
2121 if (inner_filter) {
2122 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0] + 4 * linesize,
2123 linesize, bedge_lim_y,
2124 inner_limit, hev_thresh);
2125 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0] + 8 * linesize,
2126 linesize, bedge_lim_y,
2127 inner_limit, hev_thresh);
2128 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0] + 12 * linesize,
2129 linesize, bedge_lim_y,
2130 inner_limit, hev_thresh);
2131 s->vp8dsp.vp8_v_loop_filter8uv_inner(dst[1] + 4 * uvlinesize,
2132 dst[2] + 4 * uvlinesize,
2133 uvlinesize, bedge_lim_uv,
2134 inner_limit, hev_thresh);
2135 }
2136
2137 H_LOOP_FILTER_16Y_INNER(is_vp7)
2138}
2139
2140static av_always_inline
2141void filter_mb_simple(VP8Context *s, uint8_t *dst, VP8FilterStrength *f,
2142 int mb_x, int mb_y)
2143{
2144 int mbedge_lim, bedge_lim;
2145 int filter_level = f->filter_level;
2146 int inner_limit = f->inner_limit;
2147 int inner_filter = f->inner_filter;
2148 int linesize = s->linesize;
2149
2150 if (!filter_level)
2151 return;
2152
2153 bedge_lim = 2 * filter_level + inner_limit;
2154 mbedge_lim = bedge_lim + 4;
2155
2156 if (mb_x)
2157 s->vp8dsp.vp8_h_loop_filter_simple(dst, linesize, mbedge_lim);
2158 if (inner_filter) {
2159 s->vp8dsp.vp8_h_loop_filter_simple(dst + 4, linesize, bedge_lim);
2160 s->vp8dsp.vp8_h_loop_filter_simple(dst + 8, linesize, bedge_lim);
2161 s->vp8dsp.vp8_h_loop_filter_simple(dst + 12, linesize, bedge_lim);
2162 }
2163
2164 if (mb_y)
2165 s->vp8dsp.vp8_v_loop_filter_simple(dst, linesize, mbedge_lim);
2166 if (inner_filter) {
2167 s->vp8dsp.vp8_v_loop_filter_simple(dst + 4 * linesize, linesize, bedge_lim);
2168 s->vp8dsp.vp8_v_loop_filter_simple(dst + 8 * linesize, linesize, bedge_lim);
2169 s->vp8dsp.vp8_v_loop_filter_simple(dst + 12 * linesize, linesize, bedge_lim);
2170 }
2171}
2172
2173#define MARGIN (16 << 2)
2174static av_always_inline
2175void vp78_decode_mv_mb_modes(AVCodecContext *avctx, VP8Frame *curframe,
2176 VP8Frame *prev_frame, int is_vp7)
2177{
2178 VP8Context *s = avctx->priv_data;
2179 int mb_x, mb_y;
2180
2181 s->mv_min.y = -MARGIN;
2182 s->mv_max.y = ((s->mb_height - 1) << 6) + MARGIN;
2183 for (mb_y = 0; mb_y < s->mb_height; mb_y++) {
2184 VP8Macroblock *mb = s->macroblocks_base +
2185 ((s->mb_width + 1) * (mb_y + 1) + 1);
2186 int mb_xy = mb_y * s->mb_width;
2187
2188 AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED * 0x01010101);
2189
2190 s->mv_min.x = -MARGIN;
2191 s->mv_max.x = ((s->mb_width - 1) << 6) + MARGIN;
2192 for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) {
2193 if (mb_y == 0)
2194 AV_WN32A((mb - s->mb_width - 1)->intra4x4_pred_mode_top,
2195 DC_PRED * 0x01010101);
2196 decode_mb_mode(s, mb, mb_x, mb_y, curframe->seg_map->data + mb_xy,
2197 prev_frame && prev_frame->seg_map ?
2198 prev_frame->seg_map->data + mb_xy : NULL, 1, is_vp7);
2199 s->mv_min.x -= 64;
2200 s->mv_max.x -= 64;
2201 }
2202 s->mv_min.y -= 64;
2203 s->mv_max.y -= 64;
2204 }
2205}
2206
2207static void vp7_decode_mv_mb_modes(AVCodecContext *avctx, VP8Frame *cur_frame,
2208 VP8Frame *prev_frame)
2209{
2210 vp78_decode_mv_mb_modes(avctx, cur_frame, prev_frame, IS_VP7);
2211}
2212
2213static void vp8_decode_mv_mb_modes(AVCodecContext *avctx, VP8Frame *cur_frame,
2214 VP8Frame *prev_frame)
2215{
2216 vp78_decode_mv_mb_modes(avctx, cur_frame, prev_frame, IS_VP8);
2217}
2218
2219#if HAVE_THREADS
2220#define check_thread_pos(td, otd, mb_x_check, mb_y_check) \
2221 do { \
2222 int tmp = (mb_y_check << 16) | (mb_x_check & 0xFFFF); \
2223 if (otd->thread_mb_pos < tmp) { \
2224 pthread_mutex_lock(&otd->lock); \
2225 td->wait_mb_pos = tmp; \
2226 do { \
2227 if (otd->thread_mb_pos >= tmp) \
2228 break; \
2229 pthread_cond_wait(&otd->cond, &otd->lock); \
2230 } while (1); \
2231 td->wait_mb_pos = INT_MAX; \
2232 pthread_mutex_unlock(&otd->lock); \
2233 } \
2234 } while (0);
2235
2236#define update_pos(td, mb_y, mb_x) \
2237 do { \
2238 int pos = (mb_y << 16) | (mb_x & 0xFFFF); \
2239 int sliced_threading = (avctx->active_thread_type == FF_THREAD_SLICE) && \
2240 (num_jobs > 1); \
2241 int is_null = !next_td || !prev_td; \
2242 int pos_check = (is_null) ? 1 \
2243 : (next_td != td && \
2244 pos >= next_td->wait_mb_pos) || \
2245 (prev_td != td && \
2246 pos >= prev_td->wait_mb_pos); \
2247 td->thread_mb_pos = pos; \
2248 if (sliced_threading && pos_check) { \
2249 pthread_mutex_lock(&td->lock); \
2250 pthread_cond_broadcast(&td->cond); \
2251 pthread_mutex_unlock(&td->lock); \
2252 } \
2253 } while (0);
2254#else
2255#define check_thread_pos(td, otd, mb_x_check, mb_y_check)
2256#define update_pos(td, mb_y, mb_x)
2257#endif
2258
2259static av_always_inline void decode_mb_row_no_filter(AVCodecContext *avctx, void *tdata,
2260 int jobnr, int threadnr, int is_vp7)
2261{
2262 VP8Context *s = avctx->priv_data;
2263 VP8ThreadData *prev_td, *next_td, *td = &s->thread_data[threadnr];
2264 int mb_y = td->thread_mb_pos >> 16;
2265 int mb_x, mb_xy = mb_y * s->mb_width;
2266 int num_jobs = s->num_jobs;
2267 VP8Frame *curframe = s->curframe, *prev_frame = s->prev_frame;
2268 VP56RangeCoder *c = &s->coeff_partition[mb_y & (s->num_coeff_partitions - 1)];
2269 VP8Macroblock *mb;
2270 uint8_t *dst[3] = {
2271 curframe->tf.f->data[0] + 16 * mb_y * s->linesize,
2272 curframe->tf.f->data[1] + 8 * mb_y * s->uvlinesize,
2273 curframe->tf.f->data[2] + 8 * mb_y * s->uvlinesize
2274 };
2275 if (mb_y == 0)
2276 prev_td = td;
2277 else
2278 prev_td = &s->thread_data[(jobnr + num_jobs - 1) % num_jobs];
2279 if (mb_y == s->mb_height - 1)
2280 next_td = td;
2281 else
2282 next_td = &s->thread_data[(jobnr + 1) % num_jobs];
2283 if (s->mb_layout == 1)
2284 mb = s->macroblocks_base + ((s->mb_width + 1) * (mb_y + 1) + 1);
2285 else {
2286 // Make sure the previous frame has read its segmentation map,
2287 // if we re-use the same map.
2288 if (prev_frame && s->segmentation.enabled &&
2289 !s->segmentation.update_map)
2290 ff_thread_await_progress(&prev_frame->tf, mb_y, 0);
2291 mb = s->macroblocks + (s->mb_height - mb_y - 1) * 2;
2292 memset(mb - 1, 0, sizeof(*mb)); // zero left macroblock
2293 AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED * 0x01010101);
2294 }
2295
2296 if (!is_vp7 || mb_y == 0)
2297 memset(td->left_nnz, 0, sizeof(td->left_nnz));
2298
2299 s->mv_min.x = -MARGIN;
2300 s->mv_max.x = ((s->mb_width - 1) << 6) + MARGIN;
2301
2302 for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) {
2303 // Wait for previous thread to read mb_x+2, and reach mb_y-1.
2304 if (prev_td != td) {
2305 if (threadnr != 0) {
2306 check_thread_pos(td, prev_td,
2307 mb_x + (is_vp7 ? 2 : 1),
2308 mb_y - (is_vp7 ? 2 : 1));
2309 } else {
2310 check_thread_pos(td, prev_td,
2311 mb_x + (is_vp7 ? 2 : 1) + s->mb_width + 3,
2312 mb_y - (is_vp7 ? 2 : 1));
2313 }
2314 }
2315
2316 s->vdsp.prefetch(dst[0] + (mb_x & 3) * 4 * s->linesize + 64,
2317 s->linesize, 4);
2318 s->vdsp.prefetch(dst[1] + (mb_x & 7) * s->uvlinesize + 64,
2319 dst[2] - dst[1], 2);
2320
2321 if (!s->mb_layout)
2322 decode_mb_mode(s, mb, mb_x, mb_y, curframe->seg_map->data + mb_xy,
2323 prev_frame && prev_frame->seg_map ?
2324 prev_frame->seg_map->data + mb_xy : NULL, 0, is_vp7);
2325
2326 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_PREVIOUS);
2327
2328 if (!mb->skip)
2329 decode_mb_coeffs(s, td, c, mb, s->top_nnz[mb_x], td->left_nnz, is_vp7);
2330
2331 if (mb->mode <= MODE_I4x4)
2332 intra_predict(s, td, dst, mb, mb_x, mb_y, is_vp7);
2333 else
2334 inter_predict(s, td, dst, mb, mb_x, mb_y);
2335
2336 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN);
2337
2338 if (!mb->skip) {
2339 idct_mb(s, td, dst, mb);
2340 } else {
2341 AV_ZERO64(td->left_nnz);
2342 AV_WN64(s->top_nnz[mb_x], 0); // array of 9, so unaligned
2343
2344 /* Reset DC block predictors if they would exist
2345 * if the mb had coefficients */
2346 if (mb->mode != MODE_I4x4 && mb->mode != VP8_MVMODE_SPLIT) {
2347 td->left_nnz[8] = 0;
2348 s->top_nnz[mb_x][8] = 0;
2349 }
2350 }
2351
2352 if (s->deblock_filter)
2353 filter_level_for_mb(s, mb, &td->filter_strength[mb_x], is_vp7);
2354
2355 if (s->deblock_filter && num_jobs != 1 && threadnr == num_jobs - 1) {
2356 if (s->filter.simple)
2357 backup_mb_border(s->top_border[mb_x + 1], dst[0],
2358 NULL, NULL, s->linesize, 0, 1);
2359 else
2360 backup_mb_border(s->top_border[mb_x + 1], dst[0],
2361 dst[1], dst[2], s->linesize, s->uvlinesize, 0);
2362 }
2363
2364 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN2);
2365
2366 dst[0] += 16;
2367 dst[1] += 8;
2368 dst[2] += 8;
2369 s->mv_min.x -= 64;
2370 s->mv_max.x -= 64;
2371
2372 if (mb_x == s->mb_width + 1) {
2373 update_pos(td, mb_y, s->mb_width + 3);
2374 } else {
2375 update_pos(td, mb_y, mb_x);
2376 }
2377 }
2378}
2379
2380static void vp7_decode_mb_row_no_filter(AVCodecContext *avctx, void *tdata,
2381 int jobnr, int threadnr)
2382{
2383 decode_mb_row_no_filter(avctx, tdata, jobnr, threadnr, 1);
2384}
2385
2386static void vp8_decode_mb_row_no_filter(AVCodecContext *avctx, void *tdata,
2387 int jobnr, int threadnr)
2388{
2389 decode_mb_row_no_filter(avctx, tdata, jobnr, threadnr, 0);
2390}
2391
2392static av_always_inline void filter_mb_row(AVCodecContext *avctx, void *tdata,
2393 int jobnr, int threadnr, int is_vp7)
2394{
2395 VP8Context *s = avctx->priv_data;
2396 VP8ThreadData *td = &s->thread_data[threadnr];
2397 int mb_x, mb_y = td->thread_mb_pos >> 16, num_jobs = s->num_jobs;
2398 AVFrame *curframe = s->curframe->tf.f;
2399 VP8Macroblock *mb;
2400 VP8ThreadData *prev_td, *next_td;
2401 uint8_t *dst[3] = {
2402 curframe->data[0] + 16 * mb_y * s->linesize,
2403 curframe->data[1] + 8 * mb_y * s->uvlinesize,
2404 curframe->data[2] + 8 * mb_y * s->uvlinesize
2405 };
2406
2407 if (s->mb_layout == 1)
2408 mb = s->macroblocks_base + ((s->mb_width + 1) * (mb_y + 1) + 1);
2409 else
2410 mb = s->macroblocks + (s->mb_height - mb_y - 1) * 2;
2411
2412 if (mb_y == 0)
2413 prev_td = td;
2414 else
2415 prev_td = &s->thread_data[(jobnr + num_jobs - 1) % num_jobs];
2416 if (mb_y == s->mb_height - 1)
2417 next_td = td;
2418 else
2419 next_td = &s->thread_data[(jobnr + 1) % num_jobs];
2420
2421 for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb++) {
2422 VP8FilterStrength *f = &td->filter_strength[mb_x];
2423 if (prev_td != td)
2424 check_thread_pos(td, prev_td,
2425 (mb_x + 1) + (s->mb_width + 3), mb_y - 1);
2426 if (next_td != td)
2427 if (next_td != &s->thread_data[0])
2428 check_thread_pos(td, next_td, mb_x + 1, mb_y + 1);
2429
2430 if (num_jobs == 1) {
2431 if (s->filter.simple)
2432 backup_mb_border(s->top_border[mb_x + 1], dst[0],
2433 NULL, NULL, s->linesize, 0, 1);
2434 else
2435 backup_mb_border(s->top_border[mb_x + 1], dst[0],
2436 dst[1], dst[2], s->linesize, s->uvlinesize, 0);
2437 }
2438
2439 if (s->filter.simple)
2440 filter_mb_simple(s, dst[0], f, mb_x, mb_y);
2441 else
2442 filter_mb(s, dst, f, mb_x, mb_y, is_vp7);
2443 dst[0] += 16;
2444 dst[1] += 8;
2445 dst[2] += 8;
2446
2447 update_pos(td, mb_y, (s->mb_width + 3) + mb_x);
2448 }
2449}
2450
2451static void vp7_filter_mb_row(AVCodecContext *avctx, void *tdata,
2452 int jobnr, int threadnr)
2453{
2454 filter_mb_row(avctx, tdata, jobnr, threadnr, 1);
2455}
2456
2457static void vp8_filter_mb_row(AVCodecContext *avctx, void *tdata,
2458 int jobnr, int threadnr)
2459{
2460 filter_mb_row(avctx, tdata, jobnr, threadnr, 0);
2461}
2462
2463static av_always_inline
2464int vp78_decode_mb_row_sliced(AVCodecContext *avctx, void *tdata, int jobnr,
2465 int threadnr, int is_vp7)
2466{
2467 VP8Context *s = avctx->priv_data;
2468 VP8ThreadData *td = &s->thread_data[jobnr];
2469 VP8ThreadData *next_td = NULL, *prev_td = NULL;
2470 VP8Frame *curframe = s->curframe;
2471 int mb_y, num_jobs = s->num_jobs;
2472
2473 td->thread_nr = threadnr;
2474 for (mb_y = jobnr; mb_y < s->mb_height; mb_y += num_jobs) {
2475 if (mb_y >= s->mb_height)
2476 break;
2477 td->thread_mb_pos = mb_y << 16;
2478 s->decode_mb_row_no_filter(avctx, tdata, jobnr, threadnr);
2479 if (s->deblock_filter)
2480 s->filter_mb_row(avctx, tdata, jobnr, threadnr);
2481 update_pos(td, mb_y, INT_MAX & 0xFFFF);
2482
2483 s->mv_min.y -= 64;
2484 s->mv_max.y -= 64;
2485
2486 if (avctx->active_thread_type == FF_THREAD_FRAME)
2487 ff_thread_report_progress(&curframe->tf, mb_y, 0);
2488 }
2489
2490 return 0;
2491}
2492
2493static int vp7_decode_mb_row_sliced(AVCodecContext *avctx, void *tdata,
2494 int jobnr, int threadnr)
2495{
2496 return vp78_decode_mb_row_sliced(avctx, tdata, jobnr, threadnr, IS_VP7);
2497}
2498
2499static int vp8_decode_mb_row_sliced(AVCodecContext *avctx, void *tdata,
2500 int jobnr, int threadnr)
2501{
2502 return vp78_decode_mb_row_sliced(avctx, tdata, jobnr, threadnr, IS_VP8);
2503}
2504
2505
2506static av_always_inline
2507int vp78_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
2508 AVPacket *avpkt, int is_vp7)
2509{
2510 VP8Context *s = avctx->priv_data;
2511 int ret, i, referenced, num_jobs;
2512 enum AVDiscard skip_thresh;
2513 VP8Frame *av_uninit(curframe), *prev_frame;
2514
2515 if (is_vp7)
2516 ret = vp7_decode_frame_header(s, avpkt->data, avpkt->size);
2517 else
2518 ret = vp8_decode_frame_header(s, avpkt->data, avpkt->size);
2519
2520 if (ret < 0)
2521 goto err;
2522
2523 prev_frame = s->framep[VP56_FRAME_CURRENT];
2524
2525 referenced = s->update_last || s->update_golden == VP56_FRAME_CURRENT ||
2526 s->update_altref == VP56_FRAME_CURRENT;
2527
2528 skip_thresh = !referenced ? AVDISCARD_NONREF
2529 : !s->keyframe ? AVDISCARD_NONKEY
2530 : AVDISCARD_ALL;
2531
2532 if (avctx->skip_frame >= skip_thresh) {
2533 s->invisible = 1;
2534 memcpy(&s->next_framep[0], &s->framep[0], sizeof(s->framep[0]) * 4);
2535 goto skip_decode;
2536 }
2537 s->deblock_filter = s->filter.level && avctx->skip_loop_filter < skip_thresh;
2538
2539 // release no longer referenced frames
2540 for (i = 0; i < 5; i++)
2541 if (s->frames[i].tf.f->data[0] &&
2542 &s->frames[i] != prev_frame &&
2543 &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] &&
2544 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] &&
2545 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2])
2546 vp8_release_frame(s, &s->frames[i]);
2547
2548 curframe = s->framep[VP56_FRAME_CURRENT] = vp8_find_free_buffer(s);
2549
2550 /* Given that arithmetic probabilities are updated every frame, it's quite
2551 * likely that the values we have on a random interframe are complete
2552 * junk if we didn't start decode on a keyframe. So just don't display
2553 * anything rather than junk. */
2554 if (!s->keyframe && (!s->framep[VP56_FRAME_PREVIOUS] ||
2555 !s->framep[VP56_FRAME_GOLDEN] ||
2556 !s->framep[VP56_FRAME_GOLDEN2])) {
2557 av_log(avctx, AV_LOG_WARNING,
2558 "Discarding interframe without a prior keyframe!\n");
2559 ret = AVERROR_INVALIDDATA;
2560 goto err;
2561 }
2562
2563 curframe->tf.f->key_frame = s->keyframe;
2564 curframe->tf.f->pict_type = s->keyframe ? AV_PICTURE_TYPE_I
2565 : AV_PICTURE_TYPE_P;
2566 if ((ret = vp8_alloc_frame(s, curframe, referenced)) < 0)
2567 goto err;
2568
2569 // check if golden and altref are swapped
2570 if (s->update_altref != VP56_FRAME_NONE)
2571 s->next_framep[VP56_FRAME_GOLDEN2] = s->framep[s->update_altref];
2572 else
2573 s->next_framep[VP56_FRAME_GOLDEN2] = s->framep[VP56_FRAME_GOLDEN2];
2574
2575 if (s->update_golden != VP56_FRAME_NONE)
2576 s->next_framep[VP56_FRAME_GOLDEN] = s->framep[s->update_golden];
2577 else
2578 s->next_framep[VP56_FRAME_GOLDEN] = s->framep[VP56_FRAME_GOLDEN];
2579
2580 if (s->update_last)
2581 s->next_framep[VP56_FRAME_PREVIOUS] = curframe;
2582 else
2583 s->next_framep[VP56_FRAME_PREVIOUS] = s->framep[VP56_FRAME_PREVIOUS];
2584
2585 s->next_framep[VP56_FRAME_CURRENT] = curframe;
2586
2587 if (avctx->codec->update_thread_context)
2588 ff_thread_finish_setup(avctx);
2589
2590 s->linesize = curframe->tf.f->linesize[0];
2591 s->uvlinesize = curframe->tf.f->linesize[1];
2592
2593 memset(s->top_nnz, 0, s->mb_width * sizeof(*s->top_nnz));
2594 /* Zero macroblock structures for top/top-left prediction
2595 * from outside the frame. */
2596 if (!s->mb_layout)
2597 memset(s->macroblocks + s->mb_height * 2 - 1, 0,
2598 (s->mb_width + 1) * sizeof(*s->macroblocks));
2599 if (!s->mb_layout && s->keyframe)
2600 memset(s->intra4x4_pred_mode_top, DC_PRED, s->mb_width * 4);
2601
2602 memset(s->ref_count, 0, sizeof(s->ref_count));
2603
2604 if (s->mb_layout == 1) {
2605 // Make sure the previous frame has read its segmentation map,
2606 // if we re-use the same map.
2607 if (prev_frame && s->segmentation.enabled &&
2608 !s->segmentation.update_map)
2609 ff_thread_await_progress(&prev_frame->tf, 1, 0);
2610 if (is_vp7)
2611 vp7_decode_mv_mb_modes(avctx, curframe, prev_frame);
2612 else
2613 vp8_decode_mv_mb_modes(avctx, curframe, prev_frame);
2614 }
2615
2616 if (avctx->active_thread_type == FF_THREAD_FRAME)
2617 num_jobs = 1;
2618 else
2619 num_jobs = FFMIN(s->num_coeff_partitions, avctx->thread_count);
2620 s->num_jobs = num_jobs;
2621 s->curframe = curframe;
2622 s->prev_frame = prev_frame;
2623 s->mv_min.y = -MARGIN;
2624 s->mv_max.y = ((s->mb_height - 1) << 6) + MARGIN;
2625 for (i = 0; i < MAX_THREADS; i++) {
2626 s->thread_data[i].thread_mb_pos = 0;
2627 s->thread_data[i].wait_mb_pos = INT_MAX;
2628 }
2629 if (is_vp7)
2630 avctx->execute2(avctx, vp7_decode_mb_row_sliced, s->thread_data, NULL,
2631 num_jobs);
2632 else
2633 avctx->execute2(avctx, vp8_decode_mb_row_sliced, s->thread_data, NULL,
2634 num_jobs);
2635
2636 ff_thread_report_progress(&curframe->tf, INT_MAX, 0);
2637 memcpy(&s->framep[0], &s->next_framep[0], sizeof(s->framep[0]) * 4);
2638
2639skip_decode:
2640 // if future frames don't use the updated probabilities,
2641 // reset them to the values we saved
2642 if (!s->update_probabilities)
2643 s->prob[0] = s->prob[1];
2644
2645 if (!s->invisible) {
2646 if ((ret = av_frame_ref(data, curframe->tf.f)) < 0)
2647 return ret;
2648 *got_frame = 1;
2649 }
2650
2651 return avpkt->size;
2652err:
2653 memcpy(&s->next_framep[0], &s->framep[0], sizeof(s->framep[0]) * 4);
2654 return ret;
2655}
2656
2657int ff_vp8_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
2658 AVPacket *avpkt)
2659{
2660 return vp78_decode_frame(avctx, data, got_frame, avpkt, IS_VP8);
2661}
2662
2663#if CONFIG_VP7_DECODER
2664static int vp7_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
2665 AVPacket *avpkt)
2666{
2667 return vp78_decode_frame(avctx, data, got_frame, avpkt, IS_VP7);
2668}
2669#endif /* CONFIG_VP7_DECODER */
2670
2671av_cold int ff_vp8_decode_free(AVCodecContext *avctx)
2672{
2673 VP8Context *s = avctx->priv_data;
2674 int i;
2675
2676 vp8_decode_flush_impl(avctx, 1);
2677 for (i = 0; i < FF_ARRAY_ELEMS(s->frames); i++)
2678 av_frame_free(&s->frames[i].tf.f);
2679
2680 return 0;
2681}
2682
2683static av_cold int vp8_init_frames(VP8Context *s)
2684{
2685 int i;
2686 for (i = 0; i < FF_ARRAY_ELEMS(s->frames); i++) {
2687 s->frames[i].tf.f = av_frame_alloc();
2688 if (!s->frames[i].tf.f)
2689 return AVERROR(ENOMEM);
2690 }
2691 return 0;
2692}
2693
2694static av_always_inline
2695int vp78_decode_init(AVCodecContext *avctx, int is_vp7)
2696{
2697 VP8Context *s = avctx->priv_data;
2698 int ret;
2699
2700 s->avctx = avctx;
2701 s->vp7 = avctx->codec->id == AV_CODEC_ID_VP7;
2702 avctx->pix_fmt = AV_PIX_FMT_YUV420P;
2703 avctx->internal->allocate_progress = 1;
2704
2705 ff_videodsp_init(&s->vdsp, 8);
2706
2707 ff_vp78dsp_init(&s->vp8dsp);
2708 if (CONFIG_VP7_DECODER && is_vp7) {
2709 ff_h264_pred_init(&s->hpc, AV_CODEC_ID_VP7, 8, 1);
2710 ff_vp7dsp_init(&s->vp8dsp);
2711 s->decode_mb_row_no_filter = vp7_decode_mb_row_no_filter;
2712 s->filter_mb_row = vp7_filter_mb_row;
2713 } else if (CONFIG_VP8_DECODER && !is_vp7) {
2714 ff_h264_pred_init(&s->hpc, AV_CODEC_ID_VP8, 8, 1);
2715 ff_vp8dsp_init(&s->vp8dsp);
2716 s->decode_mb_row_no_filter = vp8_decode_mb_row_no_filter;
2717 s->filter_mb_row = vp8_filter_mb_row;
2718 }
2719
2720 /* does not change for VP8 */
2721 memcpy(s->prob[0].scan, zigzag_scan, sizeof(s->prob[0].scan));
2722
2723 if ((ret = vp8_init_frames(s)) < 0) {
2724 ff_vp8_decode_free(avctx);
2725 return ret;
2726 }
2727
2728 return 0;
2729}
2730
2731#if CONFIG_VP7_DECODER
2732static int vp7_decode_init(AVCodecContext *avctx)
2733{
2734 return vp78_decode_init(avctx, IS_VP7);
2735}
2736#endif /* CONFIG_VP7_DECODER */
2737
2738av_cold int ff_vp8_decode_init(AVCodecContext *avctx)
2739{
2740 return vp78_decode_init(avctx, IS_VP8);
2741}
2742
2743#if CONFIG_VP8_DECODER
2744static av_cold int vp8_decode_init_thread_copy(AVCodecContext *avctx)
2745{
2746 VP8Context *s = avctx->priv_data;
2747 int ret;
2748
2749 s->avctx = avctx;
2750
2751 if ((ret = vp8_init_frames(s)) < 0) {
2752 ff_vp8_decode_free(avctx);
2753 return ret;
2754 }
2755
2756 return 0;
2757}
2758
2759#define REBASE(pic) ((pic) ? (pic) - &s_src->frames[0] + &s->frames[0] : NULL)
2760
2761static int vp8_decode_update_thread_context(AVCodecContext *dst,
2762 const AVCodecContext *src)
2763{
2764 VP8Context *s = dst->priv_data, *s_src = src->priv_data;
2765 int i;
2766
2767 if (s->macroblocks_base &&
2768 (s_src->mb_width != s->mb_width || s_src->mb_height != s->mb_height)) {
2769 free_buffers(s);
2770 s->mb_width = s_src->mb_width;
2771 s->mb_height = s_src->mb_height;
2772 }
2773
2774 s->prob[0] = s_src->prob[!s_src->update_probabilities];
2775 s->segmentation = s_src->segmentation;
2776 s->lf_delta = s_src->lf_delta;
2777 memcpy(s->sign_bias, s_src->sign_bias, sizeof(s->sign_bias));
2778
2779 for (i = 0; i < FF_ARRAY_ELEMS(s_src->frames); i++) {
2780 if (s_src->frames[i].tf.f->data[0]) {
2781 int ret = vp8_ref_frame(s, &s->frames[i], &s_src->frames[i]);
2782 if (ret < 0)
2783 return ret;
2784 }
2785 }
2786
2787 s->framep[0] = REBASE(s_src->next_framep[0]);
2788 s->framep[1] = REBASE(s_src->next_framep[1]);
2789 s->framep[2] = REBASE(s_src->next_framep[2]);
2790 s->framep[3] = REBASE(s_src->next_framep[3]);
2791
2792 return 0;
2793}
2794#endif /* CONFIG_VP8_DECODER */
2795
2796#if CONFIG_VP7_DECODER
2797AVCodec ff_vp7_decoder = {
2798 .name = "vp7",
2799 .long_name = NULL_IF_CONFIG_SMALL("On2 VP7"),
2800 .type = AVMEDIA_TYPE_VIDEO,
2801 .id = AV_CODEC_ID_VP7,
2802 .priv_data_size = sizeof(VP8Context),
2803 .init = vp7_decode_init,
2804 .close = ff_vp8_decode_free,
2805 .decode = vp7_decode_frame,
2806 .capabilities = CODEC_CAP_DR1,
2807 .flush = vp8_decode_flush,
2808};
2809#endif /* CONFIG_VP7_DECODER */
2810
2811#if CONFIG_VP8_DECODER
2812AVCodec ff_vp8_decoder = {
2813 .name = "vp8",
2814 .long_name = NULL_IF_CONFIG_SMALL("On2 VP8"),
2815 .type = AVMEDIA_TYPE_VIDEO,
2816 .id = AV_CODEC_ID_VP8,
2817 .priv_data_size = sizeof(VP8Context),
2818 .init = ff_vp8_decode_init,
2819 .close = ff_vp8_decode_free,
2820 .decode = ff_vp8_decode_frame,
2821 .capabilities = CODEC_CAP_DR1 | CODEC_CAP_FRAME_THREADS | CODEC_CAP_SLICE_THREADS,
2822 .flush = vp8_decode_flush,
2823 .init_thread_copy = ONLY_IF_THREADS_ENABLED(vp8_decode_init_thread_copy),
2824 .update_thread_context = ONLY_IF_THREADS_ENABLED(vp8_decode_update_thread_context),
2825};
2826#endif /* CONFIG_VP7_DECODER */