Imported Debian version 2.5.3~trusty1
[deb_ffmpeg.git] / ffmpeg / libavcodec / vp8.c
1 /*
2 * VP7/VP8 compatible video decoder
3 *
4 * Copyright (C) 2010 David Conrad
5 * Copyright (C) 2010 Ronald S. Bultje
6 * Copyright (C) 2010 Fiona Glaser
7 * Copyright (C) 2012 Daniel Kang
8 * Copyright (C) 2014 Peter Ross
9 *
10 * This file is part of FFmpeg.
11 *
12 * FFmpeg is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU Lesser General Public
14 * License as published by the Free Software Foundation; either
15 * version 2.1 of the License, or (at your option) any later version.
16 *
17 * FFmpeg is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * Lesser General Public License for more details.
21 *
22 * You should have received a copy of the GNU Lesser General Public
23 * License along with FFmpeg; if not, write to the Free Software
24 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
25 */
26
27 #include "libavutil/imgutils.h"
28
29 #include "avcodec.h"
30 #include "internal.h"
31 #include "rectangle.h"
32 #include "thread.h"
33 #include "vp8.h"
34 #include "vp8data.h"
35
36 #if ARCH_ARM
37 # include "arm/vp8.h"
38 #endif
39
40 #if CONFIG_VP7_DECODER && CONFIG_VP8_DECODER
41 #define VPX(vp7, f) (vp7 ? vp7_ ## f : vp8_ ## f)
42 #elif CONFIG_VP7_DECODER
43 #define VPX(vp7, f) vp7_ ## f
44 #else // CONFIG_VP8_DECODER
45 #define VPX(vp7, f) vp8_ ## f
46 #endif
47
48 static void free_buffers(VP8Context *s)
49 {
50 int i;
51 if (s->thread_data)
52 for (i = 0; i < MAX_THREADS; i++) {
53 #if HAVE_THREADS
54 pthread_cond_destroy(&s->thread_data[i].cond);
55 pthread_mutex_destroy(&s->thread_data[i].lock);
56 #endif
57 av_freep(&s->thread_data[i].filter_strength);
58 }
59 av_freep(&s->thread_data);
60 av_freep(&s->macroblocks_base);
61 av_freep(&s->intra4x4_pred_mode_top);
62 av_freep(&s->top_nnz);
63 av_freep(&s->top_border);
64
65 s->macroblocks = NULL;
66 }
67
68 static int vp8_alloc_frame(VP8Context *s, VP8Frame *f, int ref)
69 {
70 int ret;
71 if ((ret = ff_thread_get_buffer(s->avctx, &f->tf,
72 ref ? AV_GET_BUFFER_FLAG_REF : 0)) < 0)
73 return ret;
74 if (!(f->seg_map = av_buffer_allocz(s->mb_width * s->mb_height))) {
75 ff_thread_release_buffer(s->avctx, &f->tf);
76 return AVERROR(ENOMEM);
77 }
78 return 0;
79 }
80
81 static void vp8_release_frame(VP8Context *s, VP8Frame *f)
82 {
83 av_buffer_unref(&f->seg_map);
84 ff_thread_release_buffer(s->avctx, &f->tf);
85 }
86
87 #if CONFIG_VP8_DECODER
88 static int vp8_ref_frame(VP8Context *s, VP8Frame *dst, VP8Frame *src)
89 {
90 int ret;
91
92 vp8_release_frame(s, dst);
93
94 if ((ret = ff_thread_ref_frame(&dst->tf, &src->tf)) < 0)
95 return ret;
96 if (src->seg_map &&
97 !(dst->seg_map = av_buffer_ref(src->seg_map))) {
98 vp8_release_frame(s, dst);
99 return AVERROR(ENOMEM);
100 }
101
102 return 0;
103 }
104 #endif /* CONFIG_VP8_DECODER */
105
106 static void vp8_decode_flush_impl(AVCodecContext *avctx, int free_mem)
107 {
108 VP8Context *s = avctx->priv_data;
109 int i;
110
111 for (i = 0; i < FF_ARRAY_ELEMS(s->frames); i++)
112 vp8_release_frame(s, &s->frames[i]);
113 memset(s->framep, 0, sizeof(s->framep));
114
115 if (free_mem)
116 free_buffers(s);
117 }
118
119 static void vp8_decode_flush(AVCodecContext *avctx)
120 {
121 vp8_decode_flush_impl(avctx, 0);
122 }
123
124 static VP8Frame *vp8_find_free_buffer(VP8Context *s)
125 {
126 VP8Frame *frame = NULL;
127 int i;
128
129 // find a free buffer
130 for (i = 0; i < 5; i++)
131 if (&s->frames[i] != s->framep[VP56_FRAME_CURRENT] &&
132 &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] &&
133 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] &&
134 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2]) {
135 frame = &s->frames[i];
136 break;
137 }
138 if (i == 5) {
139 av_log(s->avctx, AV_LOG_FATAL, "Ran out of free frames!\n");
140 abort();
141 }
142 if (frame->tf.f->data[0])
143 vp8_release_frame(s, frame);
144
145 return frame;
146 }
147
148 static av_always_inline
149 int update_dimensions(VP8Context *s, int width, int height, int is_vp7)
150 {
151 AVCodecContext *avctx = s->avctx;
152 int i, ret;
153
154 if (width != s->avctx->width || ((width+15)/16 != s->mb_width || (height+15)/16 != s->mb_height) && s->macroblocks_base ||
155 height != s->avctx->height) {
156 vp8_decode_flush_impl(s->avctx, 1);
157
158 ret = ff_set_dimensions(s->avctx, width, height);
159 if (ret < 0)
160 return ret;
161 }
162
163 s->mb_width = (s->avctx->coded_width + 15) / 16;
164 s->mb_height = (s->avctx->coded_height + 15) / 16;
165
166 s->mb_layout = is_vp7 || avctx->active_thread_type == FF_THREAD_SLICE &&
167 FFMIN(s->num_coeff_partitions, avctx->thread_count) > 1;
168 if (!s->mb_layout) { // Frame threading and one thread
169 s->macroblocks_base = av_mallocz((s->mb_width + s->mb_height * 2 + 1) *
170 sizeof(*s->macroblocks));
171 s->intra4x4_pred_mode_top = av_mallocz(s->mb_width * 4);
172 } else // Sliced threading
173 s->macroblocks_base = av_mallocz((s->mb_width + 2) * (s->mb_height + 2) *
174 sizeof(*s->macroblocks));
175 s->top_nnz = av_mallocz(s->mb_width * sizeof(*s->top_nnz));
176 s->top_border = av_mallocz((s->mb_width + 1) * sizeof(*s->top_border));
177 s->thread_data = av_mallocz(MAX_THREADS * sizeof(VP8ThreadData));
178
179 for (i = 0; i < MAX_THREADS; i++) {
180 s->thread_data[i].filter_strength =
181 av_mallocz(s->mb_width * sizeof(*s->thread_data[0].filter_strength));
182 #if HAVE_THREADS
183 pthread_mutex_init(&s->thread_data[i].lock, NULL);
184 pthread_cond_init(&s->thread_data[i].cond, NULL);
185 #endif
186 }
187
188 if (!s->macroblocks_base || !s->top_nnz || !s->top_border ||
189 (!s->intra4x4_pred_mode_top && !s->mb_layout))
190 return AVERROR(ENOMEM);
191
192 s->macroblocks = s->macroblocks_base + 1;
193
194 return 0;
195 }
196
197 static int vp7_update_dimensions(VP8Context *s, int width, int height)
198 {
199 return update_dimensions(s, width, height, IS_VP7);
200 }
201
202 static int vp8_update_dimensions(VP8Context *s, int width, int height)
203 {
204 return update_dimensions(s, width, height, IS_VP8);
205 }
206
207
208 static void parse_segment_info(VP8Context *s)
209 {
210 VP56RangeCoder *c = &s->c;
211 int i;
212
213 s->segmentation.update_map = vp8_rac_get(c);
214
215 if (vp8_rac_get(c)) { // update segment feature data
216 s->segmentation.absolute_vals = vp8_rac_get(c);
217
218 for (i = 0; i < 4; i++)
219 s->segmentation.base_quant[i] = vp8_rac_get_sint(c, 7);
220
221 for (i = 0; i < 4; i++)
222 s->segmentation.filter_level[i] = vp8_rac_get_sint(c, 6);
223 }
224 if (s->segmentation.update_map)
225 for (i = 0; i < 3; i++)
226 s->prob->segmentid[i] = vp8_rac_get(c) ? vp8_rac_get_uint(c, 8) : 255;
227 }
228
229 static void update_lf_deltas(VP8Context *s)
230 {
231 VP56RangeCoder *c = &s->c;
232 int i;
233
234 for (i = 0; i < 4; i++) {
235 if (vp8_rac_get(c)) {
236 s->lf_delta.ref[i] = vp8_rac_get_uint(c, 6);
237
238 if (vp8_rac_get(c))
239 s->lf_delta.ref[i] = -s->lf_delta.ref[i];
240 }
241 }
242
243 for (i = MODE_I4x4; i <= VP8_MVMODE_SPLIT; i++) {
244 if (vp8_rac_get(c)) {
245 s->lf_delta.mode[i] = vp8_rac_get_uint(c, 6);
246
247 if (vp8_rac_get(c))
248 s->lf_delta.mode[i] = -s->lf_delta.mode[i];
249 }
250 }
251 }
252
253 static int setup_partitions(VP8Context *s, const uint8_t *buf, int buf_size)
254 {
255 const uint8_t *sizes = buf;
256 int i;
257
258 s->num_coeff_partitions = 1 << vp8_rac_get_uint(&s->c, 2);
259
260 buf += 3 * (s->num_coeff_partitions - 1);
261 buf_size -= 3 * (s->num_coeff_partitions - 1);
262 if (buf_size < 0)
263 return -1;
264
265 for (i = 0; i < s->num_coeff_partitions - 1; i++) {
266 int size = AV_RL24(sizes + 3 * i);
267 if (buf_size - size < 0)
268 return -1;
269
270 ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, size);
271 buf += size;
272 buf_size -= size;
273 }
274 ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, buf_size);
275
276 return 0;
277 }
278
279 static void vp7_get_quants(VP8Context *s)
280 {
281 VP56RangeCoder *c = &s->c;
282
283 int yac_qi = vp8_rac_get_uint(c, 7);
284 int ydc_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi;
285 int y2dc_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi;
286 int y2ac_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi;
287 int uvdc_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi;
288 int uvac_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi;
289
290 s->qmat[0].luma_qmul[0] = vp7_ydc_qlookup[ydc_qi];
291 s->qmat[0].luma_qmul[1] = vp7_yac_qlookup[yac_qi];
292 s->qmat[0].luma_dc_qmul[0] = vp7_y2dc_qlookup[y2dc_qi];
293 s->qmat[0].luma_dc_qmul[1] = vp7_y2ac_qlookup[y2ac_qi];
294 s->qmat[0].chroma_qmul[0] = FFMIN(vp7_ydc_qlookup[uvdc_qi], 132);
295 s->qmat[0].chroma_qmul[1] = vp7_yac_qlookup[uvac_qi];
296 }
297
298 static void vp8_get_quants(VP8Context *s)
299 {
300 VP56RangeCoder *c = &s->c;
301 int i, base_qi;
302
303 int yac_qi = vp8_rac_get_uint(c, 7);
304 int ydc_delta = vp8_rac_get_sint(c, 4);
305 int y2dc_delta = vp8_rac_get_sint(c, 4);
306 int y2ac_delta = vp8_rac_get_sint(c, 4);
307 int uvdc_delta = vp8_rac_get_sint(c, 4);
308 int uvac_delta = vp8_rac_get_sint(c, 4);
309
310 for (i = 0; i < 4; i++) {
311 if (s->segmentation.enabled) {
312 base_qi = s->segmentation.base_quant[i];
313 if (!s->segmentation.absolute_vals)
314 base_qi += yac_qi;
315 } else
316 base_qi = yac_qi;
317
318 s->qmat[i].luma_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + ydc_delta, 7)];
319 s->qmat[i].luma_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi, 7)];
320 s->qmat[i].luma_dc_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + y2dc_delta, 7)] * 2;
321 /* 101581>>16 is equivalent to 155/100 */
322 s->qmat[i].luma_dc_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi + y2ac_delta, 7)] * 101581 >> 16;
323 s->qmat[i].chroma_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + uvdc_delta, 7)];
324 s->qmat[i].chroma_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi + uvac_delta, 7)];
325
326 s->qmat[i].luma_dc_qmul[1] = FFMAX(s->qmat[i].luma_dc_qmul[1], 8);
327 s->qmat[i].chroma_qmul[0] = FFMIN(s->qmat[i].chroma_qmul[0], 132);
328 }
329 }
330
331 /**
332 * Determine which buffers golden and altref should be updated with after this frame.
333 * The spec isn't clear here, so I'm going by my understanding of what libvpx does
334 *
335 * Intra frames update all 3 references
336 * Inter frames update VP56_FRAME_PREVIOUS if the update_last flag is set
337 * If the update (golden|altref) flag is set, it's updated with the current frame
338 * if update_last is set, and VP56_FRAME_PREVIOUS otherwise.
339 * If the flag is not set, the number read means:
340 * 0: no update
341 * 1: VP56_FRAME_PREVIOUS
342 * 2: update golden with altref, or update altref with golden
343 */
344 static VP56Frame ref_to_update(VP8Context *s, int update, VP56Frame ref)
345 {
346 VP56RangeCoder *c = &s->c;
347
348 if (update)
349 return VP56_FRAME_CURRENT;
350
351 switch (vp8_rac_get_uint(c, 2)) {
352 case 1:
353 return VP56_FRAME_PREVIOUS;
354 case 2:
355 return (ref == VP56_FRAME_GOLDEN) ? VP56_FRAME_GOLDEN2 : VP56_FRAME_GOLDEN;
356 }
357 return VP56_FRAME_NONE;
358 }
359
360 static void vp78_reset_probability_tables(VP8Context *s)
361 {
362 int i, j;
363 for (i = 0; i < 4; i++)
364 for (j = 0; j < 16; j++)
365 memcpy(s->prob->token[i][j], vp8_token_default_probs[i][vp8_coeff_band[j]],
366 sizeof(s->prob->token[i][j]));
367 }
368
369 static void vp78_update_probability_tables(VP8Context *s)
370 {
371 VP56RangeCoder *c = &s->c;
372 int i, j, k, l, m;
373
374 for (i = 0; i < 4; i++)
375 for (j = 0; j < 8; j++)
376 for (k = 0; k < 3; k++)
377 for (l = 0; l < NUM_DCT_TOKENS-1; l++)
378 if (vp56_rac_get_prob_branchy(c, vp8_token_update_probs[i][j][k][l])) {
379 int prob = vp8_rac_get_uint(c, 8);
380 for (m = 0; vp8_coeff_band_indexes[j][m] >= 0; m++)
381 s->prob->token[i][vp8_coeff_band_indexes[j][m]][k][l] = prob;
382 }
383 }
384
385 #define VP7_MVC_SIZE 17
386 #define VP8_MVC_SIZE 19
387
388 static void vp78_update_pred16x16_pred8x8_mvc_probabilities(VP8Context *s,
389 int mvc_size)
390 {
391 VP56RangeCoder *c = &s->c;
392 int i, j;
393
394 if (vp8_rac_get(c))
395 for (i = 0; i < 4; i++)
396 s->prob->pred16x16[i] = vp8_rac_get_uint(c, 8);
397 if (vp8_rac_get(c))
398 for (i = 0; i < 3; i++)
399 s->prob->pred8x8c[i] = vp8_rac_get_uint(c, 8);
400
401 // 17.2 MV probability update
402 for (i = 0; i < 2; i++)
403 for (j = 0; j < mvc_size; j++)
404 if (vp56_rac_get_prob_branchy(c, vp8_mv_update_prob[i][j]))
405 s->prob->mvc[i][j] = vp8_rac_get_nn(c);
406 }
407
408 static void update_refs(VP8Context *s)
409 {
410 VP56RangeCoder *c = &s->c;
411
412 int update_golden = vp8_rac_get(c);
413 int update_altref = vp8_rac_get(c);
414
415 s->update_golden = ref_to_update(s, update_golden, VP56_FRAME_GOLDEN);
416 s->update_altref = ref_to_update(s, update_altref, VP56_FRAME_GOLDEN2);
417 }
418
419 static void copy_chroma(AVFrame *dst, AVFrame *src, int width, int height)
420 {
421 int i, j;
422
423 for (j = 1; j < 3; j++) {
424 for (i = 0; i < height / 2; i++)
425 memcpy(dst->data[j] + i * dst->linesize[j],
426 src->data[j] + i * src->linesize[j], width / 2);
427 }
428 }
429
430 static void fade(uint8_t *dst, int dst_linesize,
431 const uint8_t *src, int src_linesize,
432 int width, int height,
433 int alpha, int beta)
434 {
435 int i, j;
436 for (j = 0; j < height; j++) {
437 for (i = 0; i < width; i++) {
438 uint8_t y = src[j * src_linesize + i];
439 dst[j * dst_linesize + i] = av_clip_uint8(y + ((y * beta) >> 8) + alpha);
440 }
441 }
442 }
443
444 static int vp7_fade_frame(VP8Context *s, VP56RangeCoder *c)
445 {
446 int alpha = (int8_t) vp8_rac_get_uint(c, 8);
447 int beta = (int8_t) vp8_rac_get_uint(c, 8);
448 int ret;
449
450 if (!s->keyframe && (alpha || beta)) {
451 int width = s->mb_width * 16;
452 int height = s->mb_height * 16;
453 AVFrame *src, *dst;
454
455 if (!s->framep[VP56_FRAME_PREVIOUS] ||
456 !s->framep[VP56_FRAME_GOLDEN]) {
457 av_log(s->avctx, AV_LOG_WARNING, "Discarding interframe without a prior keyframe!\n");
458 return AVERROR_INVALIDDATA;
459 }
460
461 dst =
462 src = s->framep[VP56_FRAME_PREVIOUS]->tf.f;
463
464 /* preserve the golden frame, write a new previous frame */
465 if (s->framep[VP56_FRAME_GOLDEN] == s->framep[VP56_FRAME_PREVIOUS]) {
466 s->framep[VP56_FRAME_PREVIOUS] = vp8_find_free_buffer(s);
467 if ((ret = vp8_alloc_frame(s, s->framep[VP56_FRAME_PREVIOUS], 1)) < 0)
468 return ret;
469
470 dst = s->framep[VP56_FRAME_PREVIOUS]->tf.f;
471
472 copy_chroma(dst, src, width, height);
473 }
474
475 fade(dst->data[0], dst->linesize[0],
476 src->data[0], src->linesize[0],
477 width, height, alpha, beta);
478 }
479
480 return 0;
481 }
482
483 static int vp7_decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_size)
484 {
485 VP56RangeCoder *c = &s->c;
486 int part1_size, hscale, vscale, i, j, ret;
487 int width = s->avctx->width;
488 int height = s->avctx->height;
489
490 s->profile = (buf[0] >> 1) & 7;
491 if (s->profile > 1) {
492 avpriv_request_sample(s->avctx, "Unknown profile %d", s->profile);
493 return AVERROR_INVALIDDATA;
494 }
495
496 s->keyframe = !(buf[0] & 1);
497 s->invisible = 0;
498 part1_size = AV_RL24(buf) >> 4;
499
500 if (buf_size < 4 - s->profile + part1_size) {
501 av_log(s->avctx, AV_LOG_ERROR, "Buffer size %d is too small, needed : %d\n", buf_size, 4 - s->profile + part1_size);
502 return AVERROR_INVALIDDATA;
503 }
504
505 buf += 4 - s->profile;
506 buf_size -= 4 - s->profile;
507
508 memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_epel_pixels_tab, sizeof(s->put_pixels_tab));
509
510 ff_vp56_init_range_decoder(c, buf, part1_size);
511 buf += part1_size;
512 buf_size -= part1_size;
513
514 /* A. Dimension information (keyframes only) */
515 if (s->keyframe) {
516 width = vp8_rac_get_uint(c, 12);
517 height = vp8_rac_get_uint(c, 12);
518 hscale = vp8_rac_get_uint(c, 2);
519 vscale = vp8_rac_get_uint(c, 2);
520 if (hscale || vscale)
521 avpriv_request_sample(s->avctx, "Upscaling");
522
523 s->update_golden = s->update_altref = VP56_FRAME_CURRENT;
524 vp78_reset_probability_tables(s);
525 memcpy(s->prob->pred16x16, vp8_pred16x16_prob_inter,
526 sizeof(s->prob->pred16x16));
527 memcpy(s->prob->pred8x8c, vp8_pred8x8c_prob_inter,
528 sizeof(s->prob->pred8x8c));
529 for (i = 0; i < 2; i++)
530 memcpy(s->prob->mvc[i], vp7_mv_default_prob[i],
531 sizeof(vp7_mv_default_prob[i]));
532 memset(&s->segmentation, 0, sizeof(s->segmentation));
533 memset(&s->lf_delta, 0, sizeof(s->lf_delta));
534 memcpy(s->prob[0].scan, zigzag_scan, sizeof(s->prob[0].scan));
535 }
536
537 if (s->keyframe || s->profile > 0)
538 memset(s->inter_dc_pred, 0 , sizeof(s->inter_dc_pred));
539
540 /* B. Decoding information for all four macroblock-level features */
541 for (i = 0; i < 4; i++) {
542 s->feature_enabled[i] = vp8_rac_get(c);
543 if (s->feature_enabled[i]) {
544 s->feature_present_prob[i] = vp8_rac_get_uint(c, 8);
545
546 for (j = 0; j < 3; j++)
547 s->feature_index_prob[i][j] =
548 vp8_rac_get(c) ? vp8_rac_get_uint(c, 8) : 255;
549
550 if (vp7_feature_value_size[s->profile][i])
551 for (j = 0; j < 4; j++)
552 s->feature_value[i][j] =
553 vp8_rac_get(c) ? vp8_rac_get_uint(c, vp7_feature_value_size[s->profile][i]) : 0;
554 }
555 }
556
557 s->segmentation.enabled = 0;
558 s->segmentation.update_map = 0;
559 s->lf_delta.enabled = 0;
560
561 s->num_coeff_partitions = 1;
562 ff_vp56_init_range_decoder(&s->coeff_partition[0], buf, buf_size);
563
564 if (!s->macroblocks_base || /* first frame */
565 width != s->avctx->width || height != s->avctx->height ||
566 (width + 15) / 16 != s->mb_width || (height + 15) / 16 != s->mb_height) {
567 if ((ret = vp7_update_dimensions(s, width, height)) < 0)
568 return ret;
569 }
570
571 /* C. Dequantization indices */
572 vp7_get_quants(s);
573
574 /* D. Golden frame update flag (a Flag) for interframes only */
575 if (!s->keyframe) {
576 s->update_golden = vp8_rac_get(c) ? VP56_FRAME_CURRENT : VP56_FRAME_NONE;
577 s->sign_bias[VP56_FRAME_GOLDEN] = 0;
578 }
579
580 s->update_last = 1;
581 s->update_probabilities = 1;
582 s->fade_present = 1;
583
584 if (s->profile > 0) {
585 s->update_probabilities = vp8_rac_get(c);
586 if (!s->update_probabilities)
587 s->prob[1] = s->prob[0];
588
589 if (!s->keyframe)
590 s->fade_present = vp8_rac_get(c);
591 }
592
593 /* E. Fading information for previous frame */
594 if (s->fade_present && vp8_rac_get(c)) {
595 if ((ret = vp7_fade_frame(s ,c)) < 0)
596 return ret;
597 }
598
599 /* F. Loop filter type */
600 if (!s->profile)
601 s->filter.simple = vp8_rac_get(c);
602
603 /* G. DCT coefficient ordering specification */
604 if (vp8_rac_get(c))
605 for (i = 1; i < 16; i++)
606 s->prob[0].scan[i] = zigzag_scan[vp8_rac_get_uint(c, 4)];
607
608 /* H. Loop filter levels */
609 if (s->profile > 0)
610 s->filter.simple = vp8_rac_get(c);
611 s->filter.level = vp8_rac_get_uint(c, 6);
612 s->filter.sharpness = vp8_rac_get_uint(c, 3);
613
614 /* I. DCT coefficient probability update; 13.3 Token Probability Updates */
615 vp78_update_probability_tables(s);
616
617 s->mbskip_enabled = 0;
618
619 /* J. The remaining frame header data occurs ONLY FOR INTERFRAMES */
620 if (!s->keyframe) {
621 s->prob->intra = vp8_rac_get_uint(c, 8);
622 s->prob->last = vp8_rac_get_uint(c, 8);
623 vp78_update_pred16x16_pred8x8_mvc_probabilities(s, VP7_MVC_SIZE);
624 }
625
626 return 0;
627 }
628
629 static int vp8_decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_size)
630 {
631 VP56RangeCoder *c = &s->c;
632 int header_size, hscale, vscale, ret;
633 int width = s->avctx->width;
634 int height = s->avctx->height;
635
636 s->keyframe = !(buf[0] & 1);
637 s->profile = (buf[0]>>1) & 7;
638 s->invisible = !(buf[0] & 0x10);
639 header_size = AV_RL24(buf) >> 5;
640 buf += 3;
641 buf_size -= 3;
642
643 if (s->profile > 3)
644 av_log(s->avctx, AV_LOG_WARNING, "Unknown profile %d\n", s->profile);
645
646 if (!s->profile)
647 memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_epel_pixels_tab,
648 sizeof(s->put_pixels_tab));
649 else // profile 1-3 use bilinear, 4+ aren't defined so whatever
650 memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_bilinear_pixels_tab,
651 sizeof(s->put_pixels_tab));
652
653 if (header_size > buf_size - 7 * s->keyframe) {
654 av_log(s->avctx, AV_LOG_ERROR, "Header size larger than data provided\n");
655 return AVERROR_INVALIDDATA;
656 }
657
658 if (s->keyframe) {
659 if (AV_RL24(buf) != 0x2a019d) {
660 av_log(s->avctx, AV_LOG_ERROR,
661 "Invalid start code 0x%x\n", AV_RL24(buf));
662 return AVERROR_INVALIDDATA;
663 }
664 width = AV_RL16(buf + 3) & 0x3fff;
665 height = AV_RL16(buf + 5) & 0x3fff;
666 hscale = buf[4] >> 6;
667 vscale = buf[6] >> 6;
668 buf += 7;
669 buf_size -= 7;
670
671 if (hscale || vscale)
672 avpriv_request_sample(s->avctx, "Upscaling");
673
674 s->update_golden = s->update_altref = VP56_FRAME_CURRENT;
675 vp78_reset_probability_tables(s);
676 memcpy(s->prob->pred16x16, vp8_pred16x16_prob_inter,
677 sizeof(s->prob->pred16x16));
678 memcpy(s->prob->pred8x8c, vp8_pred8x8c_prob_inter,
679 sizeof(s->prob->pred8x8c));
680 memcpy(s->prob->mvc, vp8_mv_default_prob,
681 sizeof(s->prob->mvc));
682 memset(&s->segmentation, 0, sizeof(s->segmentation));
683 memset(&s->lf_delta, 0, sizeof(s->lf_delta));
684 }
685
686 ff_vp56_init_range_decoder(c, buf, header_size);
687 buf += header_size;
688 buf_size -= header_size;
689
690 if (s->keyframe) {
691 s->colorspace = vp8_rac_get(c);
692 if (s->colorspace)
693 av_log(s->avctx, AV_LOG_WARNING, "Unspecified colorspace\n");
694 s->fullrange = vp8_rac_get(c);
695 }
696
697 if ((s->segmentation.enabled = vp8_rac_get(c)))
698 parse_segment_info(s);
699 else
700 s->segmentation.update_map = 0; // FIXME: move this to some init function?
701
702 s->filter.simple = vp8_rac_get(c);
703 s->filter.level = vp8_rac_get_uint(c, 6);
704 s->filter.sharpness = vp8_rac_get_uint(c, 3);
705
706 if ((s->lf_delta.enabled = vp8_rac_get(c)))
707 if (vp8_rac_get(c))
708 update_lf_deltas(s);
709
710 if (setup_partitions(s, buf, buf_size)) {
711 av_log(s->avctx, AV_LOG_ERROR, "Invalid partitions\n");
712 return AVERROR_INVALIDDATA;
713 }
714
715 if (!s->macroblocks_base || /* first frame */
716 width != s->avctx->width || height != s->avctx->height ||
717 (width+15)/16 != s->mb_width || (height+15)/16 != s->mb_height)
718 if ((ret = vp8_update_dimensions(s, width, height)) < 0)
719 return ret;
720
721 vp8_get_quants(s);
722
723 if (!s->keyframe) {
724 update_refs(s);
725 s->sign_bias[VP56_FRAME_GOLDEN] = vp8_rac_get(c);
726 s->sign_bias[VP56_FRAME_GOLDEN2 /* altref */] = vp8_rac_get(c);
727 }
728
729 // if we aren't saving this frame's probabilities for future frames,
730 // make a copy of the current probabilities
731 if (!(s->update_probabilities = vp8_rac_get(c)))
732 s->prob[1] = s->prob[0];
733
734 s->update_last = s->keyframe || vp8_rac_get(c);
735
736 vp78_update_probability_tables(s);
737
738 if ((s->mbskip_enabled = vp8_rac_get(c)))
739 s->prob->mbskip = vp8_rac_get_uint(c, 8);
740
741 if (!s->keyframe) {
742 s->prob->intra = vp8_rac_get_uint(c, 8);
743 s->prob->last = vp8_rac_get_uint(c, 8);
744 s->prob->golden = vp8_rac_get_uint(c, 8);
745 vp78_update_pred16x16_pred8x8_mvc_probabilities(s, VP8_MVC_SIZE);
746 }
747
748 return 0;
749 }
750
751 static av_always_inline
752 void clamp_mv(VP8Context *s, VP56mv *dst, const VP56mv *src)
753 {
754 dst->x = av_clip(src->x, s->mv_min.x, s->mv_max.x);
755 dst->y = av_clip(src->y, s->mv_min.y, s->mv_max.y);
756 }
757
758 /**
759 * Motion vector coding, 17.1.
760 */
761 static av_always_inline int read_mv_component(VP56RangeCoder *c, const uint8_t *p, int vp7)
762 {
763 int bit, x = 0;
764
765 if (vp56_rac_get_prob_branchy(c, p[0])) {
766 int i;
767
768 for (i = 0; i < 3; i++)
769 x += vp56_rac_get_prob(c, p[9 + i]) << i;
770 for (i = (vp7 ? 7 : 9); i > 3; i--)
771 x += vp56_rac_get_prob(c, p[9 + i]) << i;
772 if (!(x & (vp7 ? 0xF0 : 0xFFF0)) || vp56_rac_get_prob(c, p[12]))
773 x += 8;
774 } else {
775 // small_mvtree
776 const uint8_t *ps = p + 2;
777 bit = vp56_rac_get_prob(c, *ps);
778 ps += 1 + 3 * bit;
779 x += 4 * bit;
780 bit = vp56_rac_get_prob(c, *ps);
781 ps += 1 + bit;
782 x += 2 * bit;
783 x += vp56_rac_get_prob(c, *ps);
784 }
785
786 return (x && vp56_rac_get_prob(c, p[1])) ? -x : x;
787 }
788
789 static int vp7_read_mv_component(VP56RangeCoder *c, const uint8_t *p)
790 {
791 return read_mv_component(c, p, 1);
792 }
793
794 static int vp8_read_mv_component(VP56RangeCoder *c, const uint8_t *p)
795 {
796 return read_mv_component(c, p, 0);
797 }
798
799 static av_always_inline
800 const uint8_t *get_submv_prob(uint32_t left, uint32_t top, int is_vp7)
801 {
802 if (is_vp7)
803 return vp7_submv_prob;
804
805 if (left == top)
806 return vp8_submv_prob[4 - !!left];
807 if (!top)
808 return vp8_submv_prob[2];
809 return vp8_submv_prob[1 - !!left];
810 }
811
812 /**
813 * Split motion vector prediction, 16.4.
814 * @returns the number of motion vectors parsed (2, 4 or 16)
815 */
816 static av_always_inline
817 int decode_splitmvs(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb,
818 int layout, int is_vp7)
819 {
820 int part_idx;
821 int n, num;
822 VP8Macroblock *top_mb;
823 VP8Macroblock *left_mb = &mb[-1];
824 const uint8_t *mbsplits_left = vp8_mbsplits[left_mb->partitioning];
825 const uint8_t *mbsplits_top, *mbsplits_cur, *firstidx;
826 VP56mv *top_mv;
827 VP56mv *left_mv = left_mb->bmv;
828 VP56mv *cur_mv = mb->bmv;
829
830 if (!layout) // layout is inlined, s->mb_layout is not
831 top_mb = &mb[2];
832 else
833 top_mb = &mb[-s->mb_width - 1];
834 mbsplits_top = vp8_mbsplits[top_mb->partitioning];
835 top_mv = top_mb->bmv;
836
837 if (vp56_rac_get_prob_branchy(c, vp8_mbsplit_prob[0])) {
838 if (vp56_rac_get_prob_branchy(c, vp8_mbsplit_prob[1]))
839 part_idx = VP8_SPLITMVMODE_16x8 + vp56_rac_get_prob(c, vp8_mbsplit_prob[2]);
840 else
841 part_idx = VP8_SPLITMVMODE_8x8;
842 } else {
843 part_idx = VP8_SPLITMVMODE_4x4;
844 }
845
846 num = vp8_mbsplit_count[part_idx];
847 mbsplits_cur = vp8_mbsplits[part_idx],
848 firstidx = vp8_mbfirstidx[part_idx];
849 mb->partitioning = part_idx;
850
851 for (n = 0; n < num; n++) {
852 int k = firstidx[n];
853 uint32_t left, above;
854 const uint8_t *submv_prob;
855
856 if (!(k & 3))
857 left = AV_RN32A(&left_mv[mbsplits_left[k + 3]]);
858 else
859 left = AV_RN32A(&cur_mv[mbsplits_cur[k - 1]]);
860 if (k <= 3)
861 above = AV_RN32A(&top_mv[mbsplits_top[k + 12]]);
862 else
863 above = AV_RN32A(&cur_mv[mbsplits_cur[k - 4]]);
864
865 submv_prob = get_submv_prob(left, above, is_vp7);
866
867 if (vp56_rac_get_prob_branchy(c, submv_prob[0])) {
868 if (vp56_rac_get_prob_branchy(c, submv_prob[1])) {
869 if (vp56_rac_get_prob_branchy(c, submv_prob[2])) {
870 mb->bmv[n].y = mb->mv.y +
871 read_mv_component(c, s->prob->mvc[0], is_vp7);
872 mb->bmv[n].x = mb->mv.x +
873 read_mv_component(c, s->prob->mvc[1], is_vp7);
874 } else {
875 AV_ZERO32(&mb->bmv[n]);
876 }
877 } else {
878 AV_WN32A(&mb->bmv[n], above);
879 }
880 } else {
881 AV_WN32A(&mb->bmv[n], left);
882 }
883 }
884
885 return num;
886 }
887
888 /**
889 * The vp7 reference decoder uses a padding macroblock column (added to right
890 * edge of the frame) to guard against illegal macroblock offsets. The
891 * algorithm has bugs that permit offsets to straddle the padding column.
892 * This function replicates those bugs.
893 *
894 * @param[out] edge_x macroblock x address
895 * @param[out] edge_y macroblock y address
896 *
897 * @return macroblock offset legal (boolean)
898 */
899 static int vp7_calculate_mb_offset(int mb_x, int mb_y, int mb_width,
900 int xoffset, int yoffset, int boundary,
901 int *edge_x, int *edge_y)
902 {
903 int vwidth = mb_width + 1;
904 int new = (mb_y + yoffset) * vwidth + mb_x + xoffset;
905 if (new < boundary || new % vwidth == vwidth - 1)
906 return 0;
907 *edge_y = new / vwidth;
908 *edge_x = new % vwidth;
909 return 1;
910 }
911
912 static const VP56mv *get_bmv_ptr(const VP8Macroblock *mb, int subblock)
913 {
914 return &mb->bmv[mb->mode == VP8_MVMODE_SPLIT ? vp8_mbsplits[mb->partitioning][subblock] : 0];
915 }
916
917 static av_always_inline
918 void vp7_decode_mvs(VP8Context *s, VP8Macroblock *mb,
919 int mb_x, int mb_y, int layout)
920 {
921 VP8Macroblock *mb_edge[12];
922 enum { CNT_ZERO, CNT_NEAREST, CNT_NEAR };
923 enum { VP8_EDGE_TOP, VP8_EDGE_LEFT, VP8_EDGE_TOPLEFT };
924 int idx = CNT_ZERO;
925 VP56mv near_mv[3];
926 uint8_t cnt[3] = { 0 };
927 VP56RangeCoder *c = &s->c;
928 int i;
929
930 AV_ZERO32(&near_mv[0]);
931 AV_ZERO32(&near_mv[1]);
932 AV_ZERO32(&near_mv[2]);
933
934 for (i = 0; i < VP7_MV_PRED_COUNT; i++) {
935 const VP7MVPred * pred = &vp7_mv_pred[i];
936 int edge_x, edge_y;
937
938 if (vp7_calculate_mb_offset(mb_x, mb_y, s->mb_width, pred->xoffset,
939 pred->yoffset, !s->profile, &edge_x, &edge_y)) {
940 VP8Macroblock *edge = mb_edge[i] = (s->mb_layout == 1)
941 ? s->macroblocks_base + 1 + edge_x +
942 (s->mb_width + 1) * (edge_y + 1)
943 : s->macroblocks + edge_x +
944 (s->mb_height - edge_y - 1) * 2;
945 uint32_t mv = AV_RN32A(get_bmv_ptr(edge, vp7_mv_pred[i].subblock));
946 if (mv) {
947 if (AV_RN32A(&near_mv[CNT_NEAREST])) {
948 if (mv == AV_RN32A(&near_mv[CNT_NEAREST])) {
949 idx = CNT_NEAREST;
950 } else if (AV_RN32A(&near_mv[CNT_NEAR])) {
951 if (mv != AV_RN32A(&near_mv[CNT_NEAR]))
952 continue;
953 idx = CNT_NEAR;
954 } else {
955 AV_WN32A(&near_mv[CNT_NEAR], mv);
956 idx = CNT_NEAR;
957 }
958 } else {
959 AV_WN32A(&near_mv[CNT_NEAREST], mv);
960 idx = CNT_NEAREST;
961 }
962 } else {
963 idx = CNT_ZERO;
964 }
965 } else {
966 idx = CNT_ZERO;
967 }
968 cnt[idx] += vp7_mv_pred[i].score;
969 }
970
971 mb->partitioning = VP8_SPLITMVMODE_NONE;
972
973 if (vp56_rac_get_prob_branchy(c, vp7_mode_contexts[cnt[CNT_ZERO]][0])) {
974 mb->mode = VP8_MVMODE_MV;
975
976 if (vp56_rac_get_prob_branchy(c, vp7_mode_contexts[cnt[CNT_NEAREST]][1])) {
977
978 if (vp56_rac_get_prob_branchy(c, vp7_mode_contexts[cnt[CNT_NEAR]][2])) {
979
980 if (cnt[CNT_NEAREST] > cnt[CNT_NEAR])
981 AV_WN32A(&mb->mv, cnt[CNT_ZERO] > cnt[CNT_NEAREST] ? 0 : AV_RN32A(&near_mv[CNT_NEAREST]));
982 else
983 AV_WN32A(&mb->mv, cnt[CNT_ZERO] > cnt[CNT_NEAR] ? 0 : AV_RN32A(&near_mv[CNT_NEAR]));
984
985 if (vp56_rac_get_prob_branchy(c, vp7_mode_contexts[cnt[CNT_NEAR]][3])) {
986 mb->mode = VP8_MVMODE_SPLIT;
987 mb->mv = mb->bmv[decode_splitmvs(s, c, mb, layout, IS_VP7) - 1];
988 } else {
989 mb->mv.y += vp7_read_mv_component(c, s->prob->mvc[0]);
990 mb->mv.x += vp7_read_mv_component(c, s->prob->mvc[1]);
991 mb->bmv[0] = mb->mv;
992 }
993 } else {
994 mb->mv = near_mv[CNT_NEAR];
995 mb->bmv[0] = mb->mv;
996 }
997 } else {
998 mb->mv = near_mv[CNT_NEAREST];
999 mb->bmv[0] = mb->mv;
1000 }
1001 } else {
1002 mb->mode = VP8_MVMODE_ZERO;
1003 AV_ZERO32(&mb->mv);
1004 mb->bmv[0] = mb->mv;
1005 }
1006 }
1007
1008 static av_always_inline
1009 void vp8_decode_mvs(VP8Context *s, VP8Macroblock *mb,
1010 int mb_x, int mb_y, int layout)
1011 {
1012 VP8Macroblock *mb_edge[3] = { 0 /* top */,
1013 mb - 1 /* left */,
1014 0 /* top-left */ };
1015 enum { CNT_ZERO, CNT_NEAREST, CNT_NEAR, CNT_SPLITMV };
1016 enum { VP8_EDGE_TOP, VP8_EDGE_LEFT, VP8_EDGE_TOPLEFT };
1017 int idx = CNT_ZERO;
1018 int cur_sign_bias = s->sign_bias[mb->ref_frame];
1019 int8_t *sign_bias = s->sign_bias;
1020 VP56mv near_mv[4];
1021 uint8_t cnt[4] = { 0 };
1022 VP56RangeCoder *c = &s->c;
1023
1024 if (!layout) { // layout is inlined (s->mb_layout is not)
1025 mb_edge[0] = mb + 2;
1026 mb_edge[2] = mb + 1;
1027 } else {
1028 mb_edge[0] = mb - s->mb_width - 1;
1029 mb_edge[2] = mb - s->mb_width - 2;
1030 }
1031
1032 AV_ZERO32(&near_mv[0]);
1033 AV_ZERO32(&near_mv[1]);
1034 AV_ZERO32(&near_mv[2]);
1035
1036 /* Process MB on top, left and top-left */
1037 #define MV_EDGE_CHECK(n) \
1038 { \
1039 VP8Macroblock *edge = mb_edge[n]; \
1040 int edge_ref = edge->ref_frame; \
1041 if (edge_ref != VP56_FRAME_CURRENT) { \
1042 uint32_t mv = AV_RN32A(&edge->mv); \
1043 if (mv) { \
1044 if (cur_sign_bias != sign_bias[edge_ref]) { \
1045 /* SWAR negate of the values in mv. */ \
1046 mv = ~mv; \
1047 mv = ((mv & 0x7fff7fff) + \
1048 0x00010001) ^ (mv & 0x80008000); \
1049 } \
1050 if (!n || mv != AV_RN32A(&near_mv[idx])) \
1051 AV_WN32A(&near_mv[++idx], mv); \
1052 cnt[idx] += 1 + (n != 2); \
1053 } else \
1054 cnt[CNT_ZERO] += 1 + (n != 2); \
1055 } \
1056 }
1057
1058 MV_EDGE_CHECK(0)
1059 MV_EDGE_CHECK(1)
1060 MV_EDGE_CHECK(2)
1061
1062 mb->partitioning = VP8_SPLITMVMODE_NONE;
1063 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_ZERO]][0])) {
1064 mb->mode = VP8_MVMODE_MV;
1065
1066 /* If we have three distinct MVs, merge first and last if they're the same */
1067 if (cnt[CNT_SPLITMV] &&
1068 AV_RN32A(&near_mv[1 + VP8_EDGE_TOP]) == AV_RN32A(&near_mv[1 + VP8_EDGE_TOPLEFT]))
1069 cnt[CNT_NEAREST] += 1;
1070
1071 /* Swap near and nearest if necessary */
1072 if (cnt[CNT_NEAR] > cnt[CNT_NEAREST]) {
1073 FFSWAP(uint8_t, cnt[CNT_NEAREST], cnt[CNT_NEAR]);
1074 FFSWAP( VP56mv, near_mv[CNT_NEAREST], near_mv[CNT_NEAR]);
1075 }
1076
1077 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAREST]][1])) {
1078 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAR]][2])) {
1079 /* Choose the best mv out of 0,0 and the nearest mv */
1080 clamp_mv(s, &mb->mv, &near_mv[CNT_ZERO + (cnt[CNT_NEAREST] >= cnt[CNT_ZERO])]);
1081 cnt[CNT_SPLITMV] = ((mb_edge[VP8_EDGE_LEFT]->mode == VP8_MVMODE_SPLIT) +
1082 (mb_edge[VP8_EDGE_TOP]->mode == VP8_MVMODE_SPLIT)) * 2 +
1083 (mb_edge[VP8_EDGE_TOPLEFT]->mode == VP8_MVMODE_SPLIT);
1084
1085 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_SPLITMV]][3])) {
1086 mb->mode = VP8_MVMODE_SPLIT;
1087 mb->mv = mb->bmv[decode_splitmvs(s, c, mb, layout, IS_VP8) - 1];
1088 } else {
1089 mb->mv.y += vp8_read_mv_component(c, s->prob->mvc[0]);
1090 mb->mv.x += vp8_read_mv_component(c, s->prob->mvc[1]);
1091 mb->bmv[0] = mb->mv;
1092 }
1093 } else {
1094 clamp_mv(s, &mb->mv, &near_mv[CNT_NEAR]);
1095 mb->bmv[0] = mb->mv;
1096 }
1097 } else {
1098 clamp_mv(s, &mb->mv, &near_mv[CNT_NEAREST]);
1099 mb->bmv[0] = mb->mv;
1100 }
1101 } else {
1102 mb->mode = VP8_MVMODE_ZERO;
1103 AV_ZERO32(&mb->mv);
1104 mb->bmv[0] = mb->mv;
1105 }
1106 }
1107
1108 static av_always_inline
1109 void decode_intra4x4_modes(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb,
1110 int mb_x, int keyframe, int layout)
1111 {
1112 uint8_t *intra4x4 = mb->intra4x4_pred_mode_mb;
1113
1114 if (layout) {
1115 VP8Macroblock *mb_top = mb - s->mb_width - 1;
1116 memcpy(mb->intra4x4_pred_mode_top, mb_top->intra4x4_pred_mode_top, 4);
1117 }
1118 if (keyframe) {
1119 int x, y;
1120 uint8_t *top;
1121 uint8_t *const left = s->intra4x4_pred_mode_left;
1122 if (layout)
1123 top = mb->intra4x4_pred_mode_top;
1124 else
1125 top = s->intra4x4_pred_mode_top + 4 * mb_x;
1126 for (y = 0; y < 4; y++) {
1127 for (x = 0; x < 4; x++) {
1128 const uint8_t *ctx;
1129 ctx = vp8_pred4x4_prob_intra[top[x]][left[y]];
1130 *intra4x4 = vp8_rac_get_tree(c, vp8_pred4x4_tree, ctx);
1131 left[y] = top[x] = *intra4x4;
1132 intra4x4++;
1133 }
1134 }
1135 } else {
1136 int i;
1137 for (i = 0; i < 16; i++)
1138 intra4x4[i] = vp8_rac_get_tree(c, vp8_pred4x4_tree,
1139 vp8_pred4x4_prob_inter);
1140 }
1141 }
1142
1143 static av_always_inline
1144 void decode_mb_mode(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y,
1145 uint8_t *segment, uint8_t *ref, int layout, int is_vp7)
1146 {
1147 VP56RangeCoder *c = &s->c;
1148 const char *vp7_feature_name[] = { "q-index",
1149 "lf-delta",
1150 "partial-golden-update",
1151 "blit-pitch" };
1152 if (is_vp7) {
1153 int i;
1154 *segment = 0;
1155 for (i = 0; i < 4; i++) {
1156 if (s->feature_enabled[i]) {
1157 if (vp56_rac_get_prob_branchy(c, s->feature_present_prob[i])) {
1158 int index = vp8_rac_get_tree(c, vp7_feature_index_tree,
1159 s->feature_index_prob[i]);
1160 av_log(s->avctx, AV_LOG_WARNING,
1161 "Feature %s present in macroblock (value 0x%x)\n",
1162 vp7_feature_name[i], s->feature_value[i][index]);
1163 }
1164 }
1165 }
1166 } else if (s->segmentation.update_map) {
1167 int bit = vp56_rac_get_prob(c, s->prob->segmentid[0]);
1168 *segment = vp56_rac_get_prob(c, s->prob->segmentid[1+bit]) + 2*bit;
1169 } else if (s->segmentation.enabled)
1170 *segment = ref ? *ref : *segment;
1171 mb->segment = *segment;
1172
1173 mb->skip = s->mbskip_enabled ? vp56_rac_get_prob(c, s->prob->mbskip) : 0;
1174
1175 if (s->keyframe) {
1176 mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_intra,
1177 vp8_pred16x16_prob_intra);
1178
1179 if (mb->mode == MODE_I4x4) {
1180 decode_intra4x4_modes(s, c, mb, mb_x, 1, layout);
1181 } else {
1182 const uint32_t modes = (is_vp7 ? vp7_pred4x4_mode
1183 : vp8_pred4x4_mode)[mb->mode] * 0x01010101u;
1184 if (s->mb_layout)
1185 AV_WN32A(mb->intra4x4_pred_mode_top, modes);
1186 else
1187 AV_WN32A(s->intra4x4_pred_mode_top + 4 * mb_x, modes);
1188 AV_WN32A(s->intra4x4_pred_mode_left, modes);
1189 }
1190
1191 mb->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree,
1192 vp8_pred8x8c_prob_intra);
1193 mb->ref_frame = VP56_FRAME_CURRENT;
1194 } else if (vp56_rac_get_prob_branchy(c, s->prob->intra)) {
1195 // inter MB, 16.2
1196 if (vp56_rac_get_prob_branchy(c, s->prob->last))
1197 mb->ref_frame =
1198 (!is_vp7 && vp56_rac_get_prob(c, s->prob->golden)) ? VP56_FRAME_GOLDEN2 /* altref */
1199 : VP56_FRAME_GOLDEN;
1200 else
1201 mb->ref_frame = VP56_FRAME_PREVIOUS;
1202 s->ref_count[mb->ref_frame - 1]++;
1203
1204 // motion vectors, 16.3
1205 if (is_vp7)
1206 vp7_decode_mvs(s, mb, mb_x, mb_y, layout);
1207 else
1208 vp8_decode_mvs(s, mb, mb_x, mb_y, layout);
1209 } else {
1210 // intra MB, 16.1
1211 mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_inter, s->prob->pred16x16);
1212
1213 if (mb->mode == MODE_I4x4)
1214 decode_intra4x4_modes(s, c, mb, mb_x, 0, layout);
1215
1216 mb->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree,
1217 s->prob->pred8x8c);
1218 mb->ref_frame = VP56_FRAME_CURRENT;
1219 mb->partitioning = VP8_SPLITMVMODE_NONE;
1220 AV_ZERO32(&mb->bmv[0]);
1221 }
1222 }
1223
1224 /**
1225 * @param r arithmetic bitstream reader context
1226 * @param block destination for block coefficients
1227 * @param probs probabilities to use when reading trees from the bitstream
1228 * @param i initial coeff index, 0 unless a separate DC block is coded
1229 * @param qmul array holding the dc/ac dequant factor at position 0/1
1230 *
1231 * @return 0 if no coeffs were decoded
1232 * otherwise, the index of the last coeff decoded plus one
1233 */
1234 static av_always_inline
1235 int decode_block_coeffs_internal(VP56RangeCoder *r, int16_t block[16],
1236 uint8_t probs[16][3][NUM_DCT_TOKENS - 1],
1237 int i, uint8_t *token_prob, int16_t qmul[2],
1238 const uint8_t scan[16], int vp7)
1239 {
1240 VP56RangeCoder c = *r;
1241 goto skip_eob;
1242 do {
1243 int coeff;
1244 restart:
1245 if (!vp56_rac_get_prob_branchy(&c, token_prob[0])) // DCT_EOB
1246 break;
1247
1248 skip_eob:
1249 if (!vp56_rac_get_prob_branchy(&c, token_prob[1])) { // DCT_0
1250 if (++i == 16)
1251 break; // invalid input; blocks should end with EOB
1252 token_prob = probs[i][0];
1253 if (vp7)
1254 goto restart;
1255 goto skip_eob;
1256 }
1257
1258 if (!vp56_rac_get_prob_branchy(&c, token_prob[2])) { // DCT_1
1259 coeff = 1;
1260 token_prob = probs[i + 1][1];
1261 } else {
1262 if (!vp56_rac_get_prob_branchy(&c, token_prob[3])) { // DCT 2,3,4
1263 coeff = vp56_rac_get_prob_branchy(&c, token_prob[4]);
1264 if (coeff)
1265 coeff += vp56_rac_get_prob(&c, token_prob[5]);
1266 coeff += 2;
1267 } else {
1268 // DCT_CAT*
1269 if (!vp56_rac_get_prob_branchy(&c, token_prob[6])) {
1270 if (!vp56_rac_get_prob_branchy(&c, token_prob[7])) { // DCT_CAT1
1271 coeff = 5 + vp56_rac_get_prob(&c, vp8_dct_cat1_prob[0]);
1272 } else { // DCT_CAT2
1273 coeff = 7;
1274 coeff += vp56_rac_get_prob(&c, vp8_dct_cat2_prob[0]) << 1;
1275 coeff += vp56_rac_get_prob(&c, vp8_dct_cat2_prob[1]);
1276 }
1277 } else { // DCT_CAT3 and up
1278 int a = vp56_rac_get_prob(&c, token_prob[8]);
1279 int b = vp56_rac_get_prob(&c, token_prob[9 + a]);
1280 int cat = (a << 1) + b;
1281 coeff = 3 + (8 << cat);
1282 coeff += vp8_rac_get_coeff(&c, ff_vp8_dct_cat_prob[cat]);
1283 }
1284 }
1285 token_prob = probs[i + 1][2];
1286 }
1287 block[scan[i]] = (vp8_rac_get(&c) ? -coeff : coeff) * qmul[!!i];
1288 } while (++i < 16);
1289
1290 *r = c;
1291 return i;
1292 }
1293
1294 static av_always_inline
1295 int inter_predict_dc(int16_t block[16], int16_t pred[2])
1296 {
1297 int16_t dc = block[0];
1298 int ret = 0;
1299
1300 if (pred[1] > 3) {
1301 dc += pred[0];
1302 ret = 1;
1303 }
1304
1305 if (!pred[0] | !dc | ((int32_t)pred[0] ^ (int32_t)dc) >> 31) {
1306 block[0] = pred[0] = dc;
1307 pred[1] = 0;
1308 } else {
1309 if (pred[0] == dc)
1310 pred[1]++;
1311 block[0] = pred[0] = dc;
1312 }
1313
1314 return ret;
1315 }
1316
1317 static int vp7_decode_block_coeffs_internal(VP56RangeCoder *r,
1318 int16_t block[16],
1319 uint8_t probs[16][3][NUM_DCT_TOKENS - 1],
1320 int i, uint8_t *token_prob,
1321 int16_t qmul[2],
1322 const uint8_t scan[16])
1323 {
1324 return decode_block_coeffs_internal(r, block, probs, i,
1325 token_prob, qmul, scan, IS_VP7);
1326 }
1327
1328 #ifndef vp8_decode_block_coeffs_internal
1329 static int vp8_decode_block_coeffs_internal(VP56RangeCoder *r,
1330 int16_t block[16],
1331 uint8_t probs[16][3][NUM_DCT_TOKENS - 1],
1332 int i, uint8_t *token_prob,
1333 int16_t qmul[2])
1334 {
1335 return decode_block_coeffs_internal(r, block, probs, i,
1336 token_prob, qmul, zigzag_scan, IS_VP8);
1337 }
1338 #endif
1339
1340 /**
1341 * @param c arithmetic bitstream reader context
1342 * @param block destination for block coefficients
1343 * @param probs probabilities to use when reading trees from the bitstream
1344 * @param i initial coeff index, 0 unless a separate DC block is coded
1345 * @param zero_nhood the initial prediction context for number of surrounding
1346 * all-zero blocks (only left/top, so 0-2)
1347 * @param qmul array holding the dc/ac dequant factor at position 0/1
1348 * @param scan scan pattern (VP7 only)
1349 *
1350 * @return 0 if no coeffs were decoded
1351 * otherwise, the index of the last coeff decoded plus one
1352 */
1353 static av_always_inline
1354 int decode_block_coeffs(VP56RangeCoder *c, int16_t block[16],
1355 uint8_t probs[16][3][NUM_DCT_TOKENS - 1],
1356 int i, int zero_nhood, int16_t qmul[2],
1357 const uint8_t scan[16], int vp7)
1358 {
1359 uint8_t *token_prob = probs[i][zero_nhood];
1360 if (!vp56_rac_get_prob_branchy(c, token_prob[0])) // DCT_EOB
1361 return 0;
1362 return vp7 ? vp7_decode_block_coeffs_internal(c, block, probs, i,
1363 token_prob, qmul, scan)
1364 : vp8_decode_block_coeffs_internal(c, block, probs, i,
1365 token_prob, qmul);
1366 }
1367
1368 static av_always_inline
1369 void decode_mb_coeffs(VP8Context *s, VP8ThreadData *td, VP56RangeCoder *c,
1370 VP8Macroblock *mb, uint8_t t_nnz[9], uint8_t l_nnz[9],
1371 int is_vp7)
1372 {
1373 int i, x, y, luma_start = 0, luma_ctx = 3;
1374 int nnz_pred, nnz, nnz_total = 0;
1375 int segment = mb->segment;
1376 int block_dc = 0;
1377
1378 if (mb->mode != MODE_I4x4 && (is_vp7 || mb->mode != VP8_MVMODE_SPLIT)) {
1379 nnz_pred = t_nnz[8] + l_nnz[8];
1380
1381 // decode DC values and do hadamard
1382 nnz = decode_block_coeffs(c, td->block_dc, s->prob->token[1], 0,
1383 nnz_pred, s->qmat[segment].luma_dc_qmul,
1384 zigzag_scan, is_vp7);
1385 l_nnz[8] = t_nnz[8] = !!nnz;
1386
1387 if (is_vp7 && mb->mode > MODE_I4x4) {
1388 nnz |= inter_predict_dc(td->block_dc,
1389 s->inter_dc_pred[mb->ref_frame - 1]);
1390 }
1391
1392 if (nnz) {
1393 nnz_total += nnz;
1394 block_dc = 1;
1395 if (nnz == 1)
1396 s->vp8dsp.vp8_luma_dc_wht_dc(td->block, td->block_dc);
1397 else
1398 s->vp8dsp.vp8_luma_dc_wht(td->block, td->block_dc);
1399 }
1400 luma_start = 1;
1401 luma_ctx = 0;
1402 }
1403
1404 // luma blocks
1405 for (y = 0; y < 4; y++)
1406 for (x = 0; x < 4; x++) {
1407 nnz_pred = l_nnz[y] + t_nnz[x];
1408 nnz = decode_block_coeffs(c, td->block[y][x],
1409 s->prob->token[luma_ctx],
1410 luma_start, nnz_pred,
1411 s->qmat[segment].luma_qmul,
1412 s->prob[0].scan, is_vp7);
1413 /* nnz+block_dc may be one more than the actual last index,
1414 * but we don't care */
1415 td->non_zero_count_cache[y][x] = nnz + block_dc;
1416 t_nnz[x] = l_nnz[y] = !!nnz;
1417 nnz_total += nnz;
1418 }
1419
1420 // chroma blocks
1421 // TODO: what to do about dimensions? 2nd dim for luma is x,
1422 // but for chroma it's (y<<1)|x
1423 for (i = 4; i < 6; i++)
1424 for (y = 0; y < 2; y++)
1425 for (x = 0; x < 2; x++) {
1426 nnz_pred = l_nnz[i + 2 * y] + t_nnz[i + 2 * x];
1427 nnz = decode_block_coeffs(c, td->block[i][(y << 1) + x],
1428 s->prob->token[2], 0, nnz_pred,
1429 s->qmat[segment].chroma_qmul,
1430 s->prob[0].scan, is_vp7);
1431 td->non_zero_count_cache[i][(y << 1) + x] = nnz;
1432 t_nnz[i + 2 * x] = l_nnz[i + 2 * y] = !!nnz;
1433 nnz_total += nnz;
1434 }
1435
1436 // if there were no coded coeffs despite the macroblock not being marked skip,
1437 // we MUST not do the inner loop filter and should not do IDCT
1438 // Since skip isn't used for bitstream prediction, just manually set it.
1439 if (!nnz_total)
1440 mb->skip = 1;
1441 }
1442
1443 static av_always_inline
1444 void backup_mb_border(uint8_t *top_border, uint8_t *src_y,
1445 uint8_t *src_cb, uint8_t *src_cr,
1446 int linesize, int uvlinesize, int simple)
1447 {
1448 AV_COPY128(top_border, src_y + 15 * linesize);
1449 if (!simple) {
1450 AV_COPY64(top_border + 16, src_cb + 7 * uvlinesize);
1451 AV_COPY64(top_border + 24, src_cr + 7 * uvlinesize);
1452 }
1453 }
1454
1455 static av_always_inline
1456 void xchg_mb_border(uint8_t *top_border, uint8_t *src_y, uint8_t *src_cb,
1457 uint8_t *src_cr, int linesize, int uvlinesize, int mb_x,
1458 int mb_y, int mb_width, int simple, int xchg)
1459 {
1460 uint8_t *top_border_m1 = top_border - 32; // for TL prediction
1461 src_y -= linesize;
1462 src_cb -= uvlinesize;
1463 src_cr -= uvlinesize;
1464
1465 #define XCHG(a, b, xchg) \
1466 do { \
1467 if (xchg) \
1468 AV_SWAP64(b, a); \
1469 else \
1470 AV_COPY64(b, a); \
1471 } while (0)
1472
1473 XCHG(top_border_m1 + 8, src_y - 8, xchg);
1474 XCHG(top_border, src_y, xchg);
1475 XCHG(top_border + 8, src_y + 8, 1);
1476 if (mb_x < mb_width - 1)
1477 XCHG(top_border + 32, src_y + 16, 1);
1478
1479 // only copy chroma for normal loop filter
1480 // or to initialize the top row to 127
1481 if (!simple || !mb_y) {
1482 XCHG(top_border_m1 + 16, src_cb - 8, xchg);
1483 XCHG(top_border_m1 + 24, src_cr - 8, xchg);
1484 XCHG(top_border + 16, src_cb, 1);
1485 XCHG(top_border + 24, src_cr, 1);
1486 }
1487 }
1488
1489 static av_always_inline
1490 int check_dc_pred8x8_mode(int mode, int mb_x, int mb_y)
1491 {
1492 if (!mb_x)
1493 return mb_y ? TOP_DC_PRED8x8 : DC_128_PRED8x8;
1494 else
1495 return mb_y ? mode : LEFT_DC_PRED8x8;
1496 }
1497
1498 static av_always_inline
1499 int check_tm_pred8x8_mode(int mode, int mb_x, int mb_y, int vp7)
1500 {
1501 if (!mb_x)
1502 return mb_y ? VERT_PRED8x8 : (vp7 ? DC_128_PRED8x8 : DC_129_PRED8x8);
1503 else
1504 return mb_y ? mode : HOR_PRED8x8;
1505 }
1506
1507 static av_always_inline
1508 int check_intra_pred8x8_mode_emuedge(int mode, int mb_x, int mb_y, int vp7)
1509 {
1510 switch (mode) {
1511 case DC_PRED8x8:
1512 return check_dc_pred8x8_mode(mode, mb_x, mb_y);
1513 case VERT_PRED8x8:
1514 return !mb_y ? (vp7 ? DC_128_PRED8x8 : DC_127_PRED8x8) : mode;
1515 case HOR_PRED8x8:
1516 return !mb_x ? (vp7 ? DC_128_PRED8x8 : DC_129_PRED8x8) : mode;
1517 case PLANE_PRED8x8: /* TM */
1518 return check_tm_pred8x8_mode(mode, mb_x, mb_y, vp7);
1519 }
1520 return mode;
1521 }
1522
1523 static av_always_inline
1524 int check_tm_pred4x4_mode(int mode, int mb_x, int mb_y, int vp7)
1525 {
1526 if (!mb_x) {
1527 return mb_y ? VERT_VP8_PRED : (vp7 ? DC_128_PRED : DC_129_PRED);
1528 } else {
1529 return mb_y ? mode : HOR_VP8_PRED;
1530 }
1531 }
1532
1533 static av_always_inline
1534 int check_intra_pred4x4_mode_emuedge(int mode, int mb_x, int mb_y,
1535 int *copy_buf, int vp7)
1536 {
1537 switch (mode) {
1538 case VERT_PRED:
1539 if (!mb_x && mb_y) {
1540 *copy_buf = 1;
1541 return mode;
1542 }
1543 /* fall-through */
1544 case DIAG_DOWN_LEFT_PRED:
1545 case VERT_LEFT_PRED:
1546 return !mb_y ? (vp7 ? DC_128_PRED : DC_127_PRED) : mode;
1547 case HOR_PRED:
1548 if (!mb_y) {
1549 *copy_buf = 1;
1550 return mode;
1551 }
1552 /* fall-through */
1553 case HOR_UP_PRED:
1554 return !mb_x ? (vp7 ? DC_128_PRED : DC_129_PRED) : mode;
1555 case TM_VP8_PRED:
1556 return check_tm_pred4x4_mode(mode, mb_x, mb_y, vp7);
1557 case DC_PRED: /* 4x4 DC doesn't use the same "H.264-style" exceptions
1558 * as 16x16/8x8 DC */
1559 case DIAG_DOWN_RIGHT_PRED:
1560 case VERT_RIGHT_PRED:
1561 case HOR_DOWN_PRED:
1562 if (!mb_y || !mb_x)
1563 *copy_buf = 1;
1564 return mode;
1565 }
1566 return mode;
1567 }
1568
1569 static av_always_inline
1570 void intra_predict(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
1571 VP8Macroblock *mb, int mb_x, int mb_y, int is_vp7)
1572 {
1573 int x, y, mode, nnz;
1574 uint32_t tr;
1575
1576 /* for the first row, we need to run xchg_mb_border to init the top edge
1577 * to 127 otherwise, skip it if we aren't going to deblock */
1578 if (mb_y && (s->deblock_filter || !mb_y) && td->thread_nr == 0)
1579 xchg_mb_border(s->top_border[mb_x + 1], dst[0], dst[1], dst[2],
1580 s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width,
1581 s->filter.simple, 1);
1582
1583 if (mb->mode < MODE_I4x4) {
1584 mode = check_intra_pred8x8_mode_emuedge(mb->mode, mb_x, mb_y, is_vp7);
1585 s->hpc.pred16x16[mode](dst[0], s->linesize);
1586 } else {
1587 uint8_t *ptr = dst[0];
1588 uint8_t *intra4x4 = mb->intra4x4_pred_mode_mb;
1589 const uint8_t lo = is_vp7 ? 128 : 127;
1590 const uint8_t hi = is_vp7 ? 128 : 129;
1591 uint8_t tr_top[4] = { lo, lo, lo, lo };
1592
1593 // all blocks on the right edge of the macroblock use bottom edge
1594 // the top macroblock for their topright edge
1595 uint8_t *tr_right = ptr - s->linesize + 16;
1596
1597 // if we're on the right edge of the frame, said edge is extended
1598 // from the top macroblock
1599 if (mb_y && mb_x == s->mb_width - 1) {
1600 tr = tr_right[-1] * 0x01010101u;
1601 tr_right = (uint8_t *) &tr;
1602 }
1603
1604 if (mb->skip)
1605 AV_ZERO128(td->non_zero_count_cache);
1606
1607 for (y = 0; y < 4; y++) {
1608 uint8_t *topright = ptr + 4 - s->linesize;
1609 for (x = 0; x < 4; x++) {
1610 int copy = 0, linesize = s->linesize;
1611 uint8_t *dst = ptr + 4 * x;
1612 DECLARE_ALIGNED(4, uint8_t, copy_dst)[5 * 8];
1613
1614 if ((y == 0 || x == 3) && mb_y == 0) {
1615 topright = tr_top;
1616 } else if (x == 3)
1617 topright = tr_right;
1618
1619 mode = check_intra_pred4x4_mode_emuedge(intra4x4[x], mb_x + x,
1620 mb_y + y, &copy, is_vp7);
1621 if (copy) {
1622 dst = copy_dst + 12;
1623 linesize = 8;
1624 if (!(mb_y + y)) {
1625 copy_dst[3] = lo;
1626 AV_WN32A(copy_dst + 4, lo * 0x01010101U);
1627 } else {
1628 AV_COPY32(copy_dst + 4, ptr + 4 * x - s->linesize);
1629 if (!(mb_x + x)) {
1630 copy_dst[3] = hi;
1631 } else {
1632 copy_dst[3] = ptr[4 * x - s->linesize - 1];
1633 }
1634 }
1635 if (!(mb_x + x)) {
1636 copy_dst[11] =
1637 copy_dst[19] =
1638 copy_dst[27] =
1639 copy_dst[35] = hi;
1640 } else {
1641 copy_dst[11] = ptr[4 * x - 1];
1642 copy_dst[19] = ptr[4 * x + s->linesize - 1];
1643 copy_dst[27] = ptr[4 * x + s->linesize * 2 - 1];
1644 copy_dst[35] = ptr[4 * x + s->linesize * 3 - 1];
1645 }
1646 }
1647 s->hpc.pred4x4[mode](dst, topright, linesize);
1648 if (copy) {
1649 AV_COPY32(ptr + 4 * x, copy_dst + 12);
1650 AV_COPY32(ptr + 4 * x + s->linesize, copy_dst + 20);
1651 AV_COPY32(ptr + 4 * x + s->linesize * 2, copy_dst + 28);
1652 AV_COPY32(ptr + 4 * x + s->linesize * 3, copy_dst + 36);
1653 }
1654
1655 nnz = td->non_zero_count_cache[y][x];
1656 if (nnz) {
1657 if (nnz == 1)
1658 s->vp8dsp.vp8_idct_dc_add(ptr + 4 * x,
1659 td->block[y][x], s->linesize);
1660 else
1661 s->vp8dsp.vp8_idct_add(ptr + 4 * x,
1662 td->block[y][x], s->linesize);
1663 }
1664 topright += 4;
1665 }
1666
1667 ptr += 4 * s->linesize;
1668 intra4x4 += 4;
1669 }
1670 }
1671
1672 mode = check_intra_pred8x8_mode_emuedge(mb->chroma_pred_mode,
1673 mb_x, mb_y, is_vp7);
1674 s->hpc.pred8x8[mode](dst[1], s->uvlinesize);
1675 s->hpc.pred8x8[mode](dst[2], s->uvlinesize);
1676
1677 if (mb_y && (s->deblock_filter || !mb_y) && td->thread_nr == 0)
1678 xchg_mb_border(s->top_border[mb_x + 1], dst[0], dst[1], dst[2],
1679 s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width,
1680 s->filter.simple, 0);
1681 }
1682
1683 static const uint8_t subpel_idx[3][8] = {
1684 { 0, 1, 2, 1, 2, 1, 2, 1 }, // nr. of left extra pixels,
1685 // also function pointer index
1686 { 0, 3, 5, 3, 5, 3, 5, 3 }, // nr. of extra pixels required
1687 { 0, 2, 3, 2, 3, 2, 3, 2 }, // nr. of right extra pixels
1688 };
1689
1690 /**
1691 * luma MC function
1692 *
1693 * @param s VP8 decoding context
1694 * @param dst target buffer for block data at block position
1695 * @param ref reference picture buffer at origin (0, 0)
1696 * @param mv motion vector (relative to block position) to get pixel data from
1697 * @param x_off horizontal position of block from origin (0, 0)
1698 * @param y_off vertical position of block from origin (0, 0)
1699 * @param block_w width of block (16, 8 or 4)
1700 * @param block_h height of block (always same as block_w)
1701 * @param width width of src/dst plane data
1702 * @param height height of src/dst plane data
1703 * @param linesize size of a single line of plane data, including padding
1704 * @param mc_func motion compensation function pointers (bilinear or sixtap MC)
1705 */
1706 static av_always_inline
1707 void vp8_mc_luma(VP8Context *s, VP8ThreadData *td, uint8_t *dst,
1708 ThreadFrame *ref, const VP56mv *mv,
1709 int x_off, int y_off, int block_w, int block_h,
1710 int width, int height, ptrdiff_t linesize,
1711 vp8_mc_func mc_func[3][3])
1712 {
1713 uint8_t *src = ref->f->data[0];
1714
1715 if (AV_RN32A(mv)) {
1716 int src_linesize = linesize;
1717
1718 int mx = (mv->x << 1) & 7, mx_idx = subpel_idx[0][mx];
1719 int my = (mv->y << 1) & 7, my_idx = subpel_idx[0][my];
1720
1721 x_off += mv->x >> 2;
1722 y_off += mv->y >> 2;
1723
1724 // edge emulation
1725 ff_thread_await_progress(ref, (3 + y_off + block_h + subpel_idx[2][my]) >> 4, 0);
1726 src += y_off * linesize + x_off;
1727 if (x_off < mx_idx || x_off >= width - block_w - subpel_idx[2][mx] ||
1728 y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) {
1729 s->vdsp.emulated_edge_mc(td->edge_emu_buffer,
1730 src - my_idx * linesize - mx_idx,
1731 EDGE_EMU_LINESIZE, linesize,
1732 block_w + subpel_idx[1][mx],
1733 block_h + subpel_idx[1][my],
1734 x_off - mx_idx, y_off - my_idx,
1735 width, height);
1736 src = td->edge_emu_buffer + mx_idx + EDGE_EMU_LINESIZE * my_idx;
1737 src_linesize = EDGE_EMU_LINESIZE;
1738 }
1739 mc_func[my_idx][mx_idx](dst, linesize, src, src_linesize, block_h, mx, my);
1740 } else {
1741 ff_thread_await_progress(ref, (3 + y_off + block_h) >> 4, 0);
1742 mc_func[0][0](dst, linesize, src + y_off * linesize + x_off,
1743 linesize, block_h, 0, 0);
1744 }
1745 }
1746
1747 /**
1748 * chroma MC function
1749 *
1750 * @param s VP8 decoding context
1751 * @param dst1 target buffer for block data at block position (U plane)
1752 * @param dst2 target buffer for block data at block position (V plane)
1753 * @param ref reference picture buffer at origin (0, 0)
1754 * @param mv motion vector (relative to block position) to get pixel data from
1755 * @param x_off horizontal position of block from origin (0, 0)
1756 * @param y_off vertical position of block from origin (0, 0)
1757 * @param block_w width of block (16, 8 or 4)
1758 * @param block_h height of block (always same as block_w)
1759 * @param width width of src/dst plane data
1760 * @param height height of src/dst plane data
1761 * @param linesize size of a single line of plane data, including padding
1762 * @param mc_func motion compensation function pointers (bilinear or sixtap MC)
1763 */
1764 static av_always_inline
1765 void vp8_mc_chroma(VP8Context *s, VP8ThreadData *td, uint8_t *dst1,
1766 uint8_t *dst2, ThreadFrame *ref, const VP56mv *mv,
1767 int x_off, int y_off, int block_w, int block_h,
1768 int width, int height, ptrdiff_t linesize,
1769 vp8_mc_func mc_func[3][3])
1770 {
1771 uint8_t *src1 = ref->f->data[1], *src2 = ref->f->data[2];
1772
1773 if (AV_RN32A(mv)) {
1774 int mx = mv->x & 7, mx_idx = subpel_idx[0][mx];
1775 int my = mv->y & 7, my_idx = subpel_idx[0][my];
1776
1777 x_off += mv->x >> 3;
1778 y_off += mv->y >> 3;
1779
1780 // edge emulation
1781 src1 += y_off * linesize + x_off;
1782 src2 += y_off * linesize + x_off;
1783 ff_thread_await_progress(ref, (3 + y_off + block_h + subpel_idx[2][my]) >> 3, 0);
1784 if (x_off < mx_idx || x_off >= width - block_w - subpel_idx[2][mx] ||
1785 y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) {
1786 s->vdsp.emulated_edge_mc(td->edge_emu_buffer,
1787 src1 - my_idx * linesize - mx_idx,
1788 EDGE_EMU_LINESIZE, linesize,
1789 block_w + subpel_idx[1][mx],
1790 block_h + subpel_idx[1][my],
1791 x_off - mx_idx, y_off - my_idx, width, height);
1792 src1 = td->edge_emu_buffer + mx_idx + EDGE_EMU_LINESIZE * my_idx;
1793 mc_func[my_idx][mx_idx](dst1, linesize, src1, EDGE_EMU_LINESIZE, block_h, mx, my);
1794
1795 s->vdsp.emulated_edge_mc(td->edge_emu_buffer,
1796 src2 - my_idx * linesize - mx_idx,
1797 EDGE_EMU_LINESIZE, linesize,
1798 block_w + subpel_idx[1][mx],
1799 block_h + subpel_idx[1][my],
1800 x_off - mx_idx, y_off - my_idx, width, height);
1801 src2 = td->edge_emu_buffer + mx_idx + EDGE_EMU_LINESIZE * my_idx;
1802 mc_func[my_idx][mx_idx](dst2, linesize, src2, EDGE_EMU_LINESIZE, block_h, mx, my);
1803 } else {
1804 mc_func[my_idx][mx_idx](dst1, linesize, src1, linesize, block_h, mx, my);
1805 mc_func[my_idx][mx_idx](dst2, linesize, src2, linesize, block_h, mx, my);
1806 }
1807 } else {
1808 ff_thread_await_progress(ref, (3 + y_off + block_h) >> 3, 0);
1809 mc_func[0][0](dst1, linesize, src1 + y_off * linesize + x_off, linesize, block_h, 0, 0);
1810 mc_func[0][0](dst2, linesize, src2 + y_off * linesize + x_off, linesize, block_h, 0, 0);
1811 }
1812 }
1813
1814 static av_always_inline
1815 void vp8_mc_part(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
1816 ThreadFrame *ref_frame, int x_off, int y_off,
1817 int bx_off, int by_off, int block_w, int block_h,
1818 int width, int height, VP56mv *mv)
1819 {
1820 VP56mv uvmv = *mv;
1821
1822 /* Y */
1823 vp8_mc_luma(s, td, dst[0] + by_off * s->linesize + bx_off,
1824 ref_frame, mv, x_off + bx_off, y_off + by_off,
1825 block_w, block_h, width, height, s->linesize,
1826 s->put_pixels_tab[block_w == 8]);
1827
1828 /* U/V */
1829 if (s->profile == 3) {
1830 /* this block only applies VP8; it is safe to check
1831 * only the profile, as VP7 profile <= 1 */
1832 uvmv.x &= ~7;
1833 uvmv.y &= ~7;
1834 }
1835 x_off >>= 1;
1836 y_off >>= 1;
1837 bx_off >>= 1;
1838 by_off >>= 1;
1839 width >>= 1;
1840 height >>= 1;
1841 block_w >>= 1;
1842 block_h >>= 1;
1843 vp8_mc_chroma(s, td, dst[1] + by_off * s->uvlinesize + bx_off,
1844 dst[2] + by_off * s->uvlinesize + bx_off, ref_frame,
1845 &uvmv, x_off + bx_off, y_off + by_off,
1846 block_w, block_h, width, height, s->uvlinesize,
1847 s->put_pixels_tab[1 + (block_w == 4)]);
1848 }
1849
1850 /* Fetch pixels for estimated mv 4 macroblocks ahead.
1851 * Optimized for 64-byte cache lines. Inspired by ffh264 prefetch_motion. */
1852 static av_always_inline
1853 void prefetch_motion(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y,
1854 int mb_xy, int ref)
1855 {
1856 /* Don't prefetch refs that haven't been used very often this frame. */
1857 if (s->ref_count[ref - 1] > (mb_xy >> 5)) {
1858 int x_off = mb_x << 4, y_off = mb_y << 4;
1859 int mx = (mb->mv.x >> 2) + x_off + 8;
1860 int my = (mb->mv.y >> 2) + y_off;
1861 uint8_t **src = s->framep[ref]->tf.f->data;
1862 int off = mx + (my + (mb_x & 3) * 4) * s->linesize + 64;
1863 /* For threading, a ff_thread_await_progress here might be useful, but
1864 * it actually slows down the decoder. Since a bad prefetch doesn't
1865 * generate bad decoder output, we don't run it here. */
1866 s->vdsp.prefetch(src[0] + off, s->linesize, 4);
1867 off = (mx >> 1) + ((my >> 1) + (mb_x & 7)) * s->uvlinesize + 64;
1868 s->vdsp.prefetch(src[1] + off, src[2] - src[1], 2);
1869 }
1870 }
1871
1872 /**
1873 * Apply motion vectors to prediction buffer, chapter 18.
1874 */
1875 static av_always_inline
1876 void inter_predict(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
1877 VP8Macroblock *mb, int mb_x, int mb_y)
1878 {
1879 int x_off = mb_x << 4, y_off = mb_y << 4;
1880 int width = 16 * s->mb_width, height = 16 * s->mb_height;
1881 ThreadFrame *ref = &s->framep[mb->ref_frame]->tf;
1882 VP56mv *bmv = mb->bmv;
1883
1884 switch (mb->partitioning) {
1885 case VP8_SPLITMVMODE_NONE:
1886 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1887 0, 0, 16, 16, width, height, &mb->mv);
1888 break;
1889 case VP8_SPLITMVMODE_4x4: {
1890 int x, y;
1891 VP56mv uvmv;
1892
1893 /* Y */
1894 for (y = 0; y < 4; y++) {
1895 for (x = 0; x < 4; x++) {
1896 vp8_mc_luma(s, td, dst[0] + 4 * y * s->linesize + x * 4,
1897 ref, &bmv[4 * y + x],
1898 4 * x + x_off, 4 * y + y_off, 4, 4,
1899 width, height, s->linesize,
1900 s->put_pixels_tab[2]);
1901 }
1902 }
1903
1904 /* U/V */
1905 x_off >>= 1;
1906 y_off >>= 1;
1907 width >>= 1;
1908 height >>= 1;
1909 for (y = 0; y < 2; y++) {
1910 for (x = 0; x < 2; x++) {
1911 uvmv.x = mb->bmv[2 * y * 4 + 2 * x ].x +
1912 mb->bmv[2 * y * 4 + 2 * x + 1].x +
1913 mb->bmv[(2 * y + 1) * 4 + 2 * x ].x +
1914 mb->bmv[(2 * y + 1) * 4 + 2 * x + 1].x;
1915 uvmv.y = mb->bmv[2 * y * 4 + 2 * x ].y +
1916 mb->bmv[2 * y * 4 + 2 * x + 1].y +
1917 mb->bmv[(2 * y + 1) * 4 + 2 * x ].y +
1918 mb->bmv[(2 * y + 1) * 4 + 2 * x + 1].y;
1919 uvmv.x = (uvmv.x + 2 + FF_SIGNBIT(uvmv.x)) >> 2;
1920 uvmv.y = (uvmv.y + 2 + FF_SIGNBIT(uvmv.y)) >> 2;
1921 if (s->profile == 3) {
1922 uvmv.x &= ~7;
1923 uvmv.y &= ~7;
1924 }
1925 vp8_mc_chroma(s, td, dst[1] + 4 * y * s->uvlinesize + x * 4,
1926 dst[2] + 4 * y * s->uvlinesize + x * 4, ref,
1927 &uvmv, 4 * x + x_off, 4 * y + y_off, 4, 4,
1928 width, height, s->uvlinesize,
1929 s->put_pixels_tab[2]);
1930 }
1931 }
1932 break;
1933 }
1934 case VP8_SPLITMVMODE_16x8:
1935 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1936 0, 0, 16, 8, width, height, &bmv[0]);
1937 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1938 0, 8, 16, 8, width, height, &bmv[1]);
1939 break;
1940 case VP8_SPLITMVMODE_8x16:
1941 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1942 0, 0, 8, 16, width, height, &bmv[0]);
1943 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1944 8, 0, 8, 16, width, height, &bmv[1]);
1945 break;
1946 case VP8_SPLITMVMODE_8x8:
1947 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1948 0, 0, 8, 8, width, height, &bmv[0]);
1949 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1950 8, 0, 8, 8, width, height, &bmv[1]);
1951 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1952 0, 8, 8, 8, width, height, &bmv[2]);
1953 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1954 8, 8, 8, 8, width, height, &bmv[3]);
1955 break;
1956 }
1957 }
1958
1959 static av_always_inline
1960 void idct_mb(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3], VP8Macroblock *mb)
1961 {
1962 int x, y, ch;
1963
1964 if (mb->mode != MODE_I4x4) {
1965 uint8_t *y_dst = dst[0];
1966 for (y = 0; y < 4; y++) {
1967 uint32_t nnz4 = AV_RL32(td->non_zero_count_cache[y]);
1968 if (nnz4) {
1969 if (nnz4 & ~0x01010101) {
1970 for (x = 0; x < 4; x++) {
1971 if ((uint8_t) nnz4 == 1)
1972 s->vp8dsp.vp8_idct_dc_add(y_dst + 4 * x,
1973 td->block[y][x],
1974 s->linesize);
1975 else if ((uint8_t) nnz4 > 1)
1976 s->vp8dsp.vp8_idct_add(y_dst + 4 * x,
1977 td->block[y][x],
1978 s->linesize);
1979 nnz4 >>= 8;
1980 if (!nnz4)
1981 break;
1982 }
1983 } else {
1984 s->vp8dsp.vp8_idct_dc_add4y(y_dst, td->block[y], s->linesize);
1985 }
1986 }
1987 y_dst += 4 * s->linesize;
1988 }
1989 }
1990
1991 for (ch = 0; ch < 2; ch++) {
1992 uint32_t nnz4 = AV_RL32(td->non_zero_count_cache[4 + ch]);
1993 if (nnz4) {
1994 uint8_t *ch_dst = dst[1 + ch];
1995 if (nnz4 & ~0x01010101) {
1996 for (y = 0; y < 2; y++) {
1997 for (x = 0; x < 2; x++) {
1998 if ((uint8_t) nnz4 == 1)
1999 s->vp8dsp.vp8_idct_dc_add(ch_dst + 4 * x,
2000 td->block[4 + ch][(y << 1) + x],
2001 s->uvlinesize);
2002 else if ((uint8_t) nnz4 > 1)
2003 s->vp8dsp.vp8_idct_add(ch_dst + 4 * x,
2004 td->block[4 + ch][(y << 1) + x],
2005 s->uvlinesize);
2006 nnz4 >>= 8;
2007 if (!nnz4)
2008 goto chroma_idct_end;
2009 }
2010 ch_dst += 4 * s->uvlinesize;
2011 }
2012 } else {
2013 s->vp8dsp.vp8_idct_dc_add4uv(ch_dst, td->block[4 + ch], s->uvlinesize);
2014 }
2015 }
2016 chroma_idct_end:
2017 ;
2018 }
2019 }
2020
2021 static av_always_inline
2022 void filter_level_for_mb(VP8Context *s, VP8Macroblock *mb,
2023 VP8FilterStrength *f, int is_vp7)
2024 {
2025 int interior_limit, filter_level;
2026
2027 if (s->segmentation.enabled) {
2028 filter_level = s->segmentation.filter_level[mb->segment];
2029 if (!s->segmentation.absolute_vals)
2030 filter_level += s->filter.level;
2031 } else
2032 filter_level = s->filter.level;
2033
2034 if (s->lf_delta.enabled) {
2035 filter_level += s->lf_delta.ref[mb->ref_frame];
2036 filter_level += s->lf_delta.mode[mb->mode];
2037 }
2038
2039 filter_level = av_clip_uintp2(filter_level, 6);
2040
2041 interior_limit = filter_level;
2042 if (s->filter.sharpness) {
2043 interior_limit >>= (s->filter.sharpness + 3) >> 2;
2044 interior_limit = FFMIN(interior_limit, 9 - s->filter.sharpness);
2045 }
2046 interior_limit = FFMAX(interior_limit, 1);
2047
2048 f->filter_level = filter_level;
2049 f->inner_limit = interior_limit;
2050 f->inner_filter = is_vp7 || !mb->skip || mb->mode == MODE_I4x4 ||
2051 mb->mode == VP8_MVMODE_SPLIT;
2052 }
2053
2054 static av_always_inline
2055 void filter_mb(VP8Context *s, uint8_t *dst[3], VP8FilterStrength *f,
2056 int mb_x, int mb_y, int is_vp7)
2057 {
2058 int mbedge_lim, bedge_lim_y, bedge_lim_uv, hev_thresh;
2059 int filter_level = f->filter_level;
2060 int inner_limit = f->inner_limit;
2061 int inner_filter = f->inner_filter;
2062 int linesize = s->linesize;
2063 int uvlinesize = s->uvlinesize;
2064 static const uint8_t hev_thresh_lut[2][64] = {
2065 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
2066 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2067 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
2068 3, 3, 3, 3 },
2069 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
2070 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
2071 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2072 2, 2, 2, 2 }
2073 };
2074
2075 if (!filter_level)
2076 return;
2077
2078 if (is_vp7) {
2079 bedge_lim_y = filter_level;
2080 bedge_lim_uv = filter_level * 2;
2081 mbedge_lim = filter_level + 2;
2082 } else {
2083 bedge_lim_y =
2084 bedge_lim_uv = filter_level * 2 + inner_limit;
2085 mbedge_lim = bedge_lim_y + 4;
2086 }
2087
2088 hev_thresh = hev_thresh_lut[s->keyframe][filter_level];
2089
2090 if (mb_x) {
2091 s->vp8dsp.vp8_h_loop_filter16y(dst[0], linesize,
2092 mbedge_lim, inner_limit, hev_thresh);
2093 s->vp8dsp.vp8_h_loop_filter8uv(dst[1], dst[2], uvlinesize,
2094 mbedge_lim, inner_limit, hev_thresh);
2095 }
2096
2097 #define H_LOOP_FILTER_16Y_INNER(cond) \
2098 if (cond && inner_filter) { \
2099 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0] + 4, linesize, \
2100 bedge_lim_y, inner_limit, \
2101 hev_thresh); \
2102 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0] + 8, linesize, \
2103 bedge_lim_y, inner_limit, \
2104 hev_thresh); \
2105 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0] + 12, linesize, \
2106 bedge_lim_y, inner_limit, \
2107 hev_thresh); \
2108 s->vp8dsp.vp8_h_loop_filter8uv_inner(dst[1] + 4, dst[2] + 4, \
2109 uvlinesize, bedge_lim_uv, \
2110 inner_limit, hev_thresh); \
2111 }
2112
2113 H_LOOP_FILTER_16Y_INNER(!is_vp7)
2114
2115 if (mb_y) {
2116 s->vp8dsp.vp8_v_loop_filter16y(dst[0], linesize,
2117 mbedge_lim, inner_limit, hev_thresh);
2118 s->vp8dsp.vp8_v_loop_filter8uv(dst[1], dst[2], uvlinesize,
2119 mbedge_lim, inner_limit, hev_thresh);
2120 }
2121
2122 if (inner_filter) {
2123 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0] + 4 * linesize,
2124 linesize, bedge_lim_y,
2125 inner_limit, hev_thresh);
2126 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0] + 8 * linesize,
2127 linesize, bedge_lim_y,
2128 inner_limit, hev_thresh);
2129 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0] + 12 * linesize,
2130 linesize, bedge_lim_y,
2131 inner_limit, hev_thresh);
2132 s->vp8dsp.vp8_v_loop_filter8uv_inner(dst[1] + 4 * uvlinesize,
2133 dst[2] + 4 * uvlinesize,
2134 uvlinesize, bedge_lim_uv,
2135 inner_limit, hev_thresh);
2136 }
2137
2138 H_LOOP_FILTER_16Y_INNER(is_vp7)
2139 }
2140
2141 static av_always_inline
2142 void filter_mb_simple(VP8Context *s, uint8_t *dst, VP8FilterStrength *f,
2143 int mb_x, int mb_y)
2144 {
2145 int mbedge_lim, bedge_lim;
2146 int filter_level = f->filter_level;
2147 int inner_limit = f->inner_limit;
2148 int inner_filter = f->inner_filter;
2149 int linesize = s->linesize;
2150
2151 if (!filter_level)
2152 return;
2153
2154 bedge_lim = 2 * filter_level + inner_limit;
2155 mbedge_lim = bedge_lim + 4;
2156
2157 if (mb_x)
2158 s->vp8dsp.vp8_h_loop_filter_simple(dst, linesize, mbedge_lim);
2159 if (inner_filter) {
2160 s->vp8dsp.vp8_h_loop_filter_simple(dst + 4, linesize, bedge_lim);
2161 s->vp8dsp.vp8_h_loop_filter_simple(dst + 8, linesize, bedge_lim);
2162 s->vp8dsp.vp8_h_loop_filter_simple(dst + 12, linesize, bedge_lim);
2163 }
2164
2165 if (mb_y)
2166 s->vp8dsp.vp8_v_loop_filter_simple(dst, linesize, mbedge_lim);
2167 if (inner_filter) {
2168 s->vp8dsp.vp8_v_loop_filter_simple(dst + 4 * linesize, linesize, bedge_lim);
2169 s->vp8dsp.vp8_v_loop_filter_simple(dst + 8 * linesize, linesize, bedge_lim);
2170 s->vp8dsp.vp8_v_loop_filter_simple(dst + 12 * linesize, linesize, bedge_lim);
2171 }
2172 }
2173
2174 #define MARGIN (16 << 2)
2175 static av_always_inline
2176 void vp78_decode_mv_mb_modes(AVCodecContext *avctx, VP8Frame *curframe,
2177 VP8Frame *prev_frame, int is_vp7)
2178 {
2179 VP8Context *s = avctx->priv_data;
2180 int mb_x, mb_y;
2181
2182 s->mv_min.y = -MARGIN;
2183 s->mv_max.y = ((s->mb_height - 1) << 6) + MARGIN;
2184 for (mb_y = 0; mb_y < s->mb_height; mb_y++) {
2185 VP8Macroblock *mb = s->macroblocks_base +
2186 ((s->mb_width + 1) * (mb_y + 1) + 1);
2187 int mb_xy = mb_y * s->mb_width;
2188
2189 AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED * 0x01010101);
2190
2191 s->mv_min.x = -MARGIN;
2192 s->mv_max.x = ((s->mb_width - 1) << 6) + MARGIN;
2193 for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) {
2194 if (mb_y == 0)
2195 AV_WN32A((mb - s->mb_width - 1)->intra4x4_pred_mode_top,
2196 DC_PRED * 0x01010101);
2197 decode_mb_mode(s, mb, mb_x, mb_y, curframe->seg_map->data + mb_xy,
2198 prev_frame && prev_frame->seg_map ?
2199 prev_frame->seg_map->data + mb_xy : NULL, 1, is_vp7);
2200 s->mv_min.x -= 64;
2201 s->mv_max.x -= 64;
2202 }
2203 s->mv_min.y -= 64;
2204 s->mv_max.y -= 64;
2205 }
2206 }
2207
2208 static void vp7_decode_mv_mb_modes(AVCodecContext *avctx, VP8Frame *cur_frame,
2209 VP8Frame *prev_frame)
2210 {
2211 vp78_decode_mv_mb_modes(avctx, cur_frame, prev_frame, IS_VP7);
2212 }
2213
2214 static void vp8_decode_mv_mb_modes(AVCodecContext *avctx, VP8Frame *cur_frame,
2215 VP8Frame *prev_frame)
2216 {
2217 vp78_decode_mv_mb_modes(avctx, cur_frame, prev_frame, IS_VP8);
2218 }
2219
2220 #if HAVE_THREADS
2221 #define check_thread_pos(td, otd, mb_x_check, mb_y_check) \
2222 do { \
2223 int tmp = (mb_y_check << 16) | (mb_x_check & 0xFFFF); \
2224 if (otd->thread_mb_pos < tmp) { \
2225 pthread_mutex_lock(&otd->lock); \
2226 td->wait_mb_pos = tmp; \
2227 do { \
2228 if (otd->thread_mb_pos >= tmp) \
2229 break; \
2230 pthread_cond_wait(&otd->cond, &otd->lock); \
2231 } while (1); \
2232 td->wait_mb_pos = INT_MAX; \
2233 pthread_mutex_unlock(&otd->lock); \
2234 } \
2235 } while (0);
2236
2237 #define update_pos(td, mb_y, mb_x) \
2238 do { \
2239 int pos = (mb_y << 16) | (mb_x & 0xFFFF); \
2240 int sliced_threading = (avctx->active_thread_type == FF_THREAD_SLICE) && \
2241 (num_jobs > 1); \
2242 int is_null = !next_td || !prev_td; \
2243 int pos_check = (is_null) ? 1 \
2244 : (next_td != td && \
2245 pos >= next_td->wait_mb_pos) || \
2246 (prev_td != td && \
2247 pos >= prev_td->wait_mb_pos); \
2248 td->thread_mb_pos = pos; \
2249 if (sliced_threading && pos_check) { \
2250 pthread_mutex_lock(&td->lock); \
2251 pthread_cond_broadcast(&td->cond); \
2252 pthread_mutex_unlock(&td->lock); \
2253 } \
2254 } while (0);
2255 #else
2256 #define check_thread_pos(td, otd, mb_x_check, mb_y_check)
2257 #define update_pos(td, mb_y, mb_x)
2258 #endif
2259
2260 static av_always_inline void decode_mb_row_no_filter(AVCodecContext *avctx, void *tdata,
2261 int jobnr, int threadnr, int is_vp7)
2262 {
2263 VP8Context *s = avctx->priv_data;
2264 VP8ThreadData *prev_td, *next_td, *td = &s->thread_data[threadnr];
2265 int mb_y = td->thread_mb_pos >> 16;
2266 int mb_x, mb_xy = mb_y * s->mb_width;
2267 int num_jobs = s->num_jobs;
2268 VP8Frame *curframe = s->curframe, *prev_frame = s->prev_frame;
2269 VP56RangeCoder *c = &s->coeff_partition[mb_y & (s->num_coeff_partitions - 1)];
2270 VP8Macroblock *mb;
2271 uint8_t *dst[3] = {
2272 curframe->tf.f->data[0] + 16 * mb_y * s->linesize,
2273 curframe->tf.f->data[1] + 8 * mb_y * s->uvlinesize,
2274 curframe->tf.f->data[2] + 8 * mb_y * s->uvlinesize
2275 };
2276 if (mb_y == 0)
2277 prev_td = td;
2278 else
2279 prev_td = &s->thread_data[(jobnr + num_jobs - 1) % num_jobs];
2280 if (mb_y == s->mb_height - 1)
2281 next_td = td;
2282 else
2283 next_td = &s->thread_data[(jobnr + 1) % num_jobs];
2284 if (s->mb_layout == 1)
2285 mb = s->macroblocks_base + ((s->mb_width + 1) * (mb_y + 1) + 1);
2286 else {
2287 // Make sure the previous frame has read its segmentation map,
2288 // if we re-use the same map.
2289 if (prev_frame && s->segmentation.enabled &&
2290 !s->segmentation.update_map)
2291 ff_thread_await_progress(&prev_frame->tf, mb_y, 0);
2292 mb = s->macroblocks + (s->mb_height - mb_y - 1) * 2;
2293 memset(mb - 1, 0, sizeof(*mb)); // zero left macroblock
2294 AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED * 0x01010101);
2295 }
2296
2297 if (!is_vp7 || mb_y == 0)
2298 memset(td->left_nnz, 0, sizeof(td->left_nnz));
2299
2300 s->mv_min.x = -MARGIN;
2301 s->mv_max.x = ((s->mb_width - 1) << 6) + MARGIN;
2302
2303 for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) {
2304 // Wait for previous thread to read mb_x+2, and reach mb_y-1.
2305 if (prev_td != td) {
2306 if (threadnr != 0) {
2307 check_thread_pos(td, prev_td,
2308 mb_x + (is_vp7 ? 2 : 1),
2309 mb_y - (is_vp7 ? 2 : 1));
2310 } else {
2311 check_thread_pos(td, prev_td,
2312 mb_x + (is_vp7 ? 2 : 1) + s->mb_width + 3,
2313 mb_y - (is_vp7 ? 2 : 1));
2314 }
2315 }
2316
2317 s->vdsp.prefetch(dst[0] + (mb_x & 3) * 4 * s->linesize + 64,
2318 s->linesize, 4);
2319 s->vdsp.prefetch(dst[1] + (mb_x & 7) * s->uvlinesize + 64,
2320 dst[2] - dst[1], 2);
2321
2322 if (!s->mb_layout)
2323 decode_mb_mode(s, mb, mb_x, mb_y, curframe->seg_map->data + mb_xy,
2324 prev_frame && prev_frame->seg_map ?
2325 prev_frame->seg_map->data + mb_xy : NULL, 0, is_vp7);
2326
2327 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_PREVIOUS);
2328
2329 if (!mb->skip)
2330 decode_mb_coeffs(s, td, c, mb, s->top_nnz[mb_x], td->left_nnz, is_vp7);
2331
2332 if (mb->mode <= MODE_I4x4)
2333 intra_predict(s, td, dst, mb, mb_x, mb_y, is_vp7);
2334 else
2335 inter_predict(s, td, dst, mb, mb_x, mb_y);
2336
2337 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN);
2338
2339 if (!mb->skip) {
2340 idct_mb(s, td, dst, mb);
2341 } else {
2342 AV_ZERO64(td->left_nnz);
2343 AV_WN64(s->top_nnz[mb_x], 0); // array of 9, so unaligned
2344
2345 /* Reset DC block predictors if they would exist
2346 * if the mb had coefficients */
2347 if (mb->mode != MODE_I4x4 && mb->mode != VP8_MVMODE_SPLIT) {
2348 td->left_nnz[8] = 0;
2349 s->top_nnz[mb_x][8] = 0;
2350 }
2351 }
2352
2353 if (s->deblock_filter)
2354 filter_level_for_mb(s, mb, &td->filter_strength[mb_x], is_vp7);
2355
2356 if (s->deblock_filter && num_jobs != 1 && threadnr == num_jobs - 1) {
2357 if (s->filter.simple)
2358 backup_mb_border(s->top_border[mb_x + 1], dst[0],
2359 NULL, NULL, s->linesize, 0, 1);
2360 else
2361 backup_mb_border(s->top_border[mb_x + 1], dst[0],
2362 dst[1], dst[2], s->linesize, s->uvlinesize, 0);
2363 }
2364
2365 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN2);
2366
2367 dst[0] += 16;
2368 dst[1] += 8;
2369 dst[2] += 8;
2370 s->mv_min.x -= 64;
2371 s->mv_max.x -= 64;
2372
2373 if (mb_x == s->mb_width + 1) {
2374 update_pos(td, mb_y, s->mb_width + 3);
2375 } else {
2376 update_pos(td, mb_y, mb_x);
2377 }
2378 }
2379 }
2380
2381 static void vp7_decode_mb_row_no_filter(AVCodecContext *avctx, void *tdata,
2382 int jobnr, int threadnr)
2383 {
2384 decode_mb_row_no_filter(avctx, tdata, jobnr, threadnr, 1);
2385 }
2386
2387 static void vp8_decode_mb_row_no_filter(AVCodecContext *avctx, void *tdata,
2388 int jobnr, int threadnr)
2389 {
2390 decode_mb_row_no_filter(avctx, tdata, jobnr, threadnr, 0);
2391 }
2392
2393 static av_always_inline void filter_mb_row(AVCodecContext *avctx, void *tdata,
2394 int jobnr, int threadnr, int is_vp7)
2395 {
2396 VP8Context *s = avctx->priv_data;
2397 VP8ThreadData *td = &s->thread_data[threadnr];
2398 int mb_x, mb_y = td->thread_mb_pos >> 16, num_jobs = s->num_jobs;
2399 AVFrame *curframe = s->curframe->tf.f;
2400 VP8Macroblock *mb;
2401 VP8ThreadData *prev_td, *next_td;
2402 uint8_t *dst[3] = {
2403 curframe->data[0] + 16 * mb_y * s->linesize,
2404 curframe->data[1] + 8 * mb_y * s->uvlinesize,
2405 curframe->data[2] + 8 * mb_y * s->uvlinesize
2406 };
2407
2408 if (s->mb_layout == 1)
2409 mb = s->macroblocks_base + ((s->mb_width + 1) * (mb_y + 1) + 1);
2410 else
2411 mb = s->macroblocks + (s->mb_height - mb_y - 1) * 2;
2412
2413 if (mb_y == 0)
2414 prev_td = td;
2415 else
2416 prev_td = &s->thread_data[(jobnr + num_jobs - 1) % num_jobs];
2417 if (mb_y == s->mb_height - 1)
2418 next_td = td;
2419 else
2420 next_td = &s->thread_data[(jobnr + 1) % num_jobs];
2421
2422 for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb++) {
2423 VP8FilterStrength *f = &td->filter_strength[mb_x];
2424 if (prev_td != td)
2425 check_thread_pos(td, prev_td,
2426 (mb_x + 1) + (s->mb_width + 3), mb_y - 1);
2427 if (next_td != td)
2428 if (next_td != &s->thread_data[0])
2429 check_thread_pos(td, next_td, mb_x + 1, mb_y + 1);
2430
2431 if (num_jobs == 1) {
2432 if (s->filter.simple)
2433 backup_mb_border(s->top_border[mb_x + 1], dst[0],
2434 NULL, NULL, s->linesize, 0, 1);
2435 else
2436 backup_mb_border(s->top_border[mb_x + 1], dst[0],
2437 dst[1], dst[2], s->linesize, s->uvlinesize, 0);
2438 }
2439
2440 if (s->filter.simple)
2441 filter_mb_simple(s, dst[0], f, mb_x, mb_y);
2442 else
2443 filter_mb(s, dst, f, mb_x, mb_y, is_vp7);
2444 dst[0] += 16;
2445 dst[1] += 8;
2446 dst[2] += 8;
2447
2448 update_pos(td, mb_y, (s->mb_width + 3) + mb_x);
2449 }
2450 }
2451
2452 static void vp7_filter_mb_row(AVCodecContext *avctx, void *tdata,
2453 int jobnr, int threadnr)
2454 {
2455 filter_mb_row(avctx, tdata, jobnr, threadnr, 1);
2456 }
2457
2458 static void vp8_filter_mb_row(AVCodecContext *avctx, void *tdata,
2459 int jobnr, int threadnr)
2460 {
2461 filter_mb_row(avctx, tdata, jobnr, threadnr, 0);
2462 }
2463
2464 static av_always_inline
2465 int vp78_decode_mb_row_sliced(AVCodecContext *avctx, void *tdata, int jobnr,
2466 int threadnr, int is_vp7)
2467 {
2468 VP8Context *s = avctx->priv_data;
2469 VP8ThreadData *td = &s->thread_data[jobnr];
2470 VP8ThreadData *next_td = NULL, *prev_td = NULL;
2471 VP8Frame *curframe = s->curframe;
2472 int mb_y, num_jobs = s->num_jobs;
2473
2474 td->thread_nr = threadnr;
2475 for (mb_y = jobnr; mb_y < s->mb_height; mb_y += num_jobs) {
2476 if (mb_y >= s->mb_height)
2477 break;
2478 td->thread_mb_pos = mb_y << 16;
2479 s->decode_mb_row_no_filter(avctx, tdata, jobnr, threadnr);
2480 if (s->deblock_filter)
2481 s->filter_mb_row(avctx, tdata, jobnr, threadnr);
2482 update_pos(td, mb_y, INT_MAX & 0xFFFF);
2483
2484 s->mv_min.y -= 64;
2485 s->mv_max.y -= 64;
2486
2487 if (avctx->active_thread_type == FF_THREAD_FRAME)
2488 ff_thread_report_progress(&curframe->tf, mb_y, 0);
2489 }
2490
2491 return 0;
2492 }
2493
2494 static int vp7_decode_mb_row_sliced(AVCodecContext *avctx, void *tdata,
2495 int jobnr, int threadnr)
2496 {
2497 return vp78_decode_mb_row_sliced(avctx, tdata, jobnr, threadnr, IS_VP7);
2498 }
2499
2500 static int vp8_decode_mb_row_sliced(AVCodecContext *avctx, void *tdata,
2501 int jobnr, int threadnr)
2502 {
2503 return vp78_decode_mb_row_sliced(avctx, tdata, jobnr, threadnr, IS_VP8);
2504 }
2505
2506
2507 static av_always_inline
2508 int vp78_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
2509 AVPacket *avpkt, int is_vp7)
2510 {
2511 VP8Context *s = avctx->priv_data;
2512 int ret, i, referenced, num_jobs;
2513 enum AVDiscard skip_thresh;
2514 VP8Frame *av_uninit(curframe), *prev_frame;
2515
2516 if (is_vp7)
2517 ret = vp7_decode_frame_header(s, avpkt->data, avpkt->size);
2518 else
2519 ret = vp8_decode_frame_header(s, avpkt->data, avpkt->size);
2520
2521 if (ret < 0)
2522 goto err;
2523
2524 prev_frame = s->framep[VP56_FRAME_CURRENT];
2525
2526 referenced = s->update_last || s->update_golden == VP56_FRAME_CURRENT ||
2527 s->update_altref == VP56_FRAME_CURRENT;
2528
2529 skip_thresh = !referenced ? AVDISCARD_NONREF
2530 : !s->keyframe ? AVDISCARD_NONKEY
2531 : AVDISCARD_ALL;
2532
2533 if (avctx->skip_frame >= skip_thresh) {
2534 s->invisible = 1;
2535 memcpy(&s->next_framep[0], &s->framep[0], sizeof(s->framep[0]) * 4);
2536 goto skip_decode;
2537 }
2538 s->deblock_filter = s->filter.level && avctx->skip_loop_filter < skip_thresh;
2539
2540 // release no longer referenced frames
2541 for (i = 0; i < 5; i++)
2542 if (s->frames[i].tf.f->data[0] &&
2543 &s->frames[i] != prev_frame &&
2544 &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] &&
2545 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] &&
2546 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2])
2547 vp8_release_frame(s, &s->frames[i]);
2548
2549 curframe = s->framep[VP56_FRAME_CURRENT] = vp8_find_free_buffer(s);
2550
2551 if (!s->colorspace)
2552 avctx->colorspace = AVCOL_SPC_BT470BG;
2553 if (s->fullrange)
2554 avctx->color_range = AVCOL_RANGE_JPEG;
2555 else
2556 avctx->color_range = AVCOL_RANGE_MPEG;
2557
2558 /* Given that arithmetic probabilities are updated every frame, it's quite
2559 * likely that the values we have on a random interframe are complete
2560 * junk if we didn't start decode on a keyframe. So just don't display
2561 * anything rather than junk. */
2562 if (!s->keyframe && (!s->framep[VP56_FRAME_PREVIOUS] ||
2563 !s->framep[VP56_FRAME_GOLDEN] ||
2564 !s->framep[VP56_FRAME_GOLDEN2])) {
2565 av_log(avctx, AV_LOG_WARNING,
2566 "Discarding interframe without a prior keyframe!\n");
2567 ret = AVERROR_INVALIDDATA;
2568 goto err;
2569 }
2570
2571 curframe->tf.f->key_frame = s->keyframe;
2572 curframe->tf.f->pict_type = s->keyframe ? AV_PICTURE_TYPE_I
2573 : AV_PICTURE_TYPE_P;
2574 if ((ret = vp8_alloc_frame(s, curframe, referenced)) < 0)
2575 goto err;
2576
2577 // check if golden and altref are swapped
2578 if (s->update_altref != VP56_FRAME_NONE)
2579 s->next_framep[VP56_FRAME_GOLDEN2] = s->framep[s->update_altref];
2580 else
2581 s->next_framep[VP56_FRAME_GOLDEN2] = s->framep[VP56_FRAME_GOLDEN2];
2582
2583 if (s->update_golden != VP56_FRAME_NONE)
2584 s->next_framep[VP56_FRAME_GOLDEN] = s->framep[s->update_golden];
2585 else
2586 s->next_framep[VP56_FRAME_GOLDEN] = s->framep[VP56_FRAME_GOLDEN];
2587
2588 if (s->update_last)
2589 s->next_framep[VP56_FRAME_PREVIOUS] = curframe;
2590 else
2591 s->next_framep[VP56_FRAME_PREVIOUS] = s->framep[VP56_FRAME_PREVIOUS];
2592
2593 s->next_framep[VP56_FRAME_CURRENT] = curframe;
2594
2595 if (avctx->codec->update_thread_context)
2596 ff_thread_finish_setup(avctx);
2597
2598 s->linesize = curframe->tf.f->linesize[0];
2599 s->uvlinesize = curframe->tf.f->linesize[1];
2600
2601 memset(s->top_nnz, 0, s->mb_width * sizeof(*s->top_nnz));
2602 /* Zero macroblock structures for top/top-left prediction
2603 * from outside the frame. */
2604 if (!s->mb_layout)
2605 memset(s->macroblocks + s->mb_height * 2 - 1, 0,
2606 (s->mb_width + 1) * sizeof(*s->macroblocks));
2607 if (!s->mb_layout && s->keyframe)
2608 memset(s->intra4x4_pred_mode_top, DC_PRED, s->mb_width * 4);
2609
2610 memset(s->ref_count, 0, sizeof(s->ref_count));
2611
2612 if (s->mb_layout == 1) {
2613 // Make sure the previous frame has read its segmentation map,
2614 // if we re-use the same map.
2615 if (prev_frame && s->segmentation.enabled &&
2616 !s->segmentation.update_map)
2617 ff_thread_await_progress(&prev_frame->tf, 1, 0);
2618 if (is_vp7)
2619 vp7_decode_mv_mb_modes(avctx, curframe, prev_frame);
2620 else
2621 vp8_decode_mv_mb_modes(avctx, curframe, prev_frame);
2622 }
2623
2624 if (avctx->active_thread_type == FF_THREAD_FRAME)
2625 num_jobs = 1;
2626 else
2627 num_jobs = FFMIN(s->num_coeff_partitions, avctx->thread_count);
2628 s->num_jobs = num_jobs;
2629 s->curframe = curframe;
2630 s->prev_frame = prev_frame;
2631 s->mv_min.y = -MARGIN;
2632 s->mv_max.y = ((s->mb_height - 1) << 6) + MARGIN;
2633 for (i = 0; i < MAX_THREADS; i++) {
2634 s->thread_data[i].thread_mb_pos = 0;
2635 s->thread_data[i].wait_mb_pos = INT_MAX;
2636 }
2637 if (is_vp7)
2638 avctx->execute2(avctx, vp7_decode_mb_row_sliced, s->thread_data, NULL,
2639 num_jobs);
2640 else
2641 avctx->execute2(avctx, vp8_decode_mb_row_sliced, s->thread_data, NULL,
2642 num_jobs);
2643
2644 ff_thread_report_progress(&curframe->tf, INT_MAX, 0);
2645 memcpy(&s->framep[0], &s->next_framep[0], sizeof(s->framep[0]) * 4);
2646
2647 skip_decode:
2648 // if future frames don't use the updated probabilities,
2649 // reset them to the values we saved
2650 if (!s->update_probabilities)
2651 s->prob[0] = s->prob[1];
2652
2653 if (!s->invisible) {
2654 if ((ret = av_frame_ref(data, curframe->tf.f)) < 0)
2655 return ret;
2656 *got_frame = 1;
2657 }
2658
2659 return avpkt->size;
2660 err:
2661 memcpy(&s->next_framep[0], &s->framep[0], sizeof(s->framep[0]) * 4);
2662 return ret;
2663 }
2664
2665 int ff_vp8_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
2666 AVPacket *avpkt)
2667 {
2668 return vp78_decode_frame(avctx, data, got_frame, avpkt, IS_VP8);
2669 }
2670
2671 #if CONFIG_VP7_DECODER
2672 static int vp7_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
2673 AVPacket *avpkt)
2674 {
2675 return vp78_decode_frame(avctx, data, got_frame, avpkt, IS_VP7);
2676 }
2677 #endif /* CONFIG_VP7_DECODER */
2678
2679 av_cold int ff_vp8_decode_free(AVCodecContext *avctx)
2680 {
2681 VP8Context *s = avctx->priv_data;
2682 int i;
2683
2684 vp8_decode_flush_impl(avctx, 1);
2685 for (i = 0; i < FF_ARRAY_ELEMS(s->frames); i++)
2686 av_frame_free(&s->frames[i].tf.f);
2687
2688 return 0;
2689 }
2690
2691 static av_cold int vp8_init_frames(VP8Context *s)
2692 {
2693 int i;
2694 for (i = 0; i < FF_ARRAY_ELEMS(s->frames); i++) {
2695 s->frames[i].tf.f = av_frame_alloc();
2696 if (!s->frames[i].tf.f)
2697 return AVERROR(ENOMEM);
2698 }
2699 return 0;
2700 }
2701
2702 static av_always_inline
2703 int vp78_decode_init(AVCodecContext *avctx, int is_vp7)
2704 {
2705 VP8Context *s = avctx->priv_data;
2706 int ret;
2707
2708 s->avctx = avctx;
2709 s->vp7 = avctx->codec->id == AV_CODEC_ID_VP7;
2710 avctx->pix_fmt = AV_PIX_FMT_YUV420P;
2711 avctx->internal->allocate_progress = 1;
2712
2713 ff_videodsp_init(&s->vdsp, 8);
2714
2715 ff_vp78dsp_init(&s->vp8dsp);
2716 if (CONFIG_VP7_DECODER && is_vp7) {
2717 ff_h264_pred_init(&s->hpc, AV_CODEC_ID_VP7, 8, 1);
2718 ff_vp7dsp_init(&s->vp8dsp);
2719 s->decode_mb_row_no_filter = vp7_decode_mb_row_no_filter;
2720 s->filter_mb_row = vp7_filter_mb_row;
2721 } else if (CONFIG_VP8_DECODER && !is_vp7) {
2722 ff_h264_pred_init(&s->hpc, AV_CODEC_ID_VP8, 8, 1);
2723 ff_vp8dsp_init(&s->vp8dsp);
2724 s->decode_mb_row_no_filter = vp8_decode_mb_row_no_filter;
2725 s->filter_mb_row = vp8_filter_mb_row;
2726 }
2727
2728 /* does not change for VP8 */
2729 memcpy(s->prob[0].scan, zigzag_scan, sizeof(s->prob[0].scan));
2730
2731 if ((ret = vp8_init_frames(s)) < 0) {
2732 ff_vp8_decode_free(avctx);
2733 return ret;
2734 }
2735
2736 return 0;
2737 }
2738
2739 #if CONFIG_VP7_DECODER
2740 static int vp7_decode_init(AVCodecContext *avctx)
2741 {
2742 return vp78_decode_init(avctx, IS_VP7);
2743 }
2744 #endif /* CONFIG_VP7_DECODER */
2745
2746 av_cold int ff_vp8_decode_init(AVCodecContext *avctx)
2747 {
2748 return vp78_decode_init(avctx, IS_VP8);
2749 }
2750
2751 #if CONFIG_VP8_DECODER
2752 static av_cold int vp8_decode_init_thread_copy(AVCodecContext *avctx)
2753 {
2754 VP8Context *s = avctx->priv_data;
2755 int ret;
2756
2757 s->avctx = avctx;
2758
2759 if ((ret = vp8_init_frames(s)) < 0) {
2760 ff_vp8_decode_free(avctx);
2761 return ret;
2762 }
2763
2764 return 0;
2765 }
2766
2767 #define REBASE(pic) ((pic) ? (pic) - &s_src->frames[0] + &s->frames[0] : NULL)
2768
2769 static int vp8_decode_update_thread_context(AVCodecContext *dst,
2770 const AVCodecContext *src)
2771 {
2772 VP8Context *s = dst->priv_data, *s_src = src->priv_data;
2773 int i;
2774
2775 if (s->macroblocks_base &&
2776 (s_src->mb_width != s->mb_width || s_src->mb_height != s->mb_height)) {
2777 free_buffers(s);
2778 s->mb_width = s_src->mb_width;
2779 s->mb_height = s_src->mb_height;
2780 }
2781
2782 s->prob[0] = s_src->prob[!s_src->update_probabilities];
2783 s->segmentation = s_src->segmentation;
2784 s->lf_delta = s_src->lf_delta;
2785 memcpy(s->sign_bias, s_src->sign_bias, sizeof(s->sign_bias));
2786
2787 for (i = 0; i < FF_ARRAY_ELEMS(s_src->frames); i++) {
2788 if (s_src->frames[i].tf.f->data[0]) {
2789 int ret = vp8_ref_frame(s, &s->frames[i], &s_src->frames[i]);
2790 if (ret < 0)
2791 return ret;
2792 }
2793 }
2794
2795 s->framep[0] = REBASE(s_src->next_framep[0]);
2796 s->framep[1] = REBASE(s_src->next_framep[1]);
2797 s->framep[2] = REBASE(s_src->next_framep[2]);
2798 s->framep[3] = REBASE(s_src->next_framep[3]);
2799
2800 return 0;
2801 }
2802 #endif /* CONFIG_VP8_DECODER */
2803
2804 #if CONFIG_VP7_DECODER
2805 AVCodec ff_vp7_decoder = {
2806 .name = "vp7",
2807 .long_name = NULL_IF_CONFIG_SMALL("On2 VP7"),
2808 .type = AVMEDIA_TYPE_VIDEO,
2809 .id = AV_CODEC_ID_VP7,
2810 .priv_data_size = sizeof(VP8Context),
2811 .init = vp7_decode_init,
2812 .close = ff_vp8_decode_free,
2813 .decode = vp7_decode_frame,
2814 .capabilities = CODEC_CAP_DR1,
2815 .flush = vp8_decode_flush,
2816 };
2817 #endif /* CONFIG_VP7_DECODER */
2818
2819 #if CONFIG_VP8_DECODER
2820 AVCodec ff_vp8_decoder = {
2821 .name = "vp8",
2822 .long_name = NULL_IF_CONFIG_SMALL("On2 VP8"),
2823 .type = AVMEDIA_TYPE_VIDEO,
2824 .id = AV_CODEC_ID_VP8,
2825 .priv_data_size = sizeof(VP8Context),
2826 .init = ff_vp8_decode_init,
2827 .close = ff_vp8_decode_free,
2828 .decode = ff_vp8_decode_frame,
2829 .capabilities = CODEC_CAP_DR1 | CODEC_CAP_FRAME_THREADS | CODEC_CAP_SLICE_THREADS,
2830 .flush = vp8_decode_flush,
2831 .init_thread_copy = ONLY_IF_THREADS_ENABLED(vp8_decode_init_thread_copy),
2832 .update_thread_context = ONLY_IF_THREADS_ENABLED(vp8_decode_update_thread_context),
2833 };
2834 #endif /* CONFIG_VP7_DECODER */