Imported Debian version 2.5.0~trusty1.1
[deb_ffmpeg.git] / ffmpeg / libavcodec / vp9.c
CommitLineData
2ba45a60
DM
1/*
2 * VP9 compatible video decoder
3 *
4 * Copyright (C) 2013 Ronald S. Bultje <rsbultje gmail com>
5 * Copyright (C) 2013 Clément Bœsch <u pkh me>
6 *
7 * This file is part of FFmpeg.
8 *
9 * FFmpeg is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
13 *
14 * FFmpeg is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
18 *
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with FFmpeg; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 */
23
24#include "avcodec.h"
25#include "get_bits.h"
26#include "internal.h"
27#include "thread.h"
28#include "videodsp.h"
29#include "vp56.h"
30#include "vp9.h"
31#include "vp9data.h"
32#include "vp9dsp.h"
33#include "libavutil/avassert.h"
34
35#define VP9_SYNCCODE 0x498342
36
37enum CompPredMode {
38 PRED_SINGLEREF,
39 PRED_COMPREF,
40 PRED_SWITCHABLE,
41};
42
43enum BlockLevel {
44 BL_64X64,
45 BL_32X32,
46 BL_16X16,
47 BL_8X8,
48};
49
50enum BlockSize {
51 BS_64x64,
52 BS_64x32,
53 BS_32x64,
54 BS_32x32,
55 BS_32x16,
56 BS_16x32,
57 BS_16x16,
58 BS_16x8,
59 BS_8x16,
60 BS_8x8,
61 BS_8x4,
62 BS_4x8,
63 BS_4x4,
64 N_BS_SIZES,
65};
66
67struct VP9mvrefPair {
68 VP56mv mv[2];
69 int8_t ref[2];
70};
71
72typedef struct VP9Frame {
73 ThreadFrame tf;
74 AVBufferRef *extradata;
75 uint8_t *segmentation_map;
76 struct VP9mvrefPair *mv;
77} VP9Frame;
78
79struct VP9Filter {
80 uint8_t level[8 * 8];
81 uint8_t /* bit=col */ mask[2 /* 0=y, 1=uv */][2 /* 0=col, 1=row */]
82 [8 /* rows */][4 /* 0=16, 1=8, 2=4, 3=inner4 */];
83};
84
85typedef struct VP9Block {
86 uint8_t seg_id, intra, comp, ref[2], mode[4], uvmode, skip;
87 enum FilterMode filter;
88 VP56mv mv[4 /* b_idx */][2 /* ref */];
89 enum BlockSize bs;
90 enum TxfmMode tx, uvtx;
91 enum BlockLevel bl;
92 enum BlockPartition bp;
93} VP9Block;
94
95typedef struct VP9Context {
96 VP9DSPContext dsp;
97 VideoDSPContext vdsp;
98 GetBitContext gb;
99 VP56RangeCoder c;
100 VP56RangeCoder *c_b;
101 unsigned c_b_size;
102 VP9Block *b_base, *b;
103 int pass, uses_2pass, last_uses_2pass;
104 int row, row7, col, col7;
105 uint8_t *dst[3];
106 ptrdiff_t y_stride, uv_stride;
107
108 // bitstream header
109 uint8_t profile;
110 uint8_t keyframe, last_keyframe;
111 uint8_t invisible;
112 uint8_t use_last_frame_mvs;
113 uint8_t errorres;
114 uint8_t colorspace;
115 uint8_t fullrange;
116 uint8_t intraonly;
117 uint8_t resetctx;
118 uint8_t refreshrefmask;
119 uint8_t highprecisionmvs;
120 enum FilterMode filtermode;
121 uint8_t allowcompinter;
122 uint8_t fixcompref;
123 uint8_t refreshctx;
124 uint8_t parallelmode;
125 uint8_t framectxid;
126 uint8_t refidx[3];
127 uint8_t signbias[3];
128 uint8_t varcompref[2];
129 ThreadFrame refs[8], next_refs[8];
130#define CUR_FRAME 0
131#define LAST_FRAME 1
132 VP9Frame frames[2];
133
134 struct {
135 uint8_t level;
136 int8_t sharpness;
137 uint8_t lim_lut[64];
138 uint8_t mblim_lut[64];
139 } filter;
140 struct {
141 uint8_t enabled;
142 int8_t mode[2];
143 int8_t ref[4];
144 } lf_delta;
145 uint8_t yac_qi;
146 int8_t ydc_qdelta, uvdc_qdelta, uvac_qdelta;
147 uint8_t lossless;
f6fa7814 148#define MAX_SEGMENT 8
2ba45a60
DM
149 struct {
150 uint8_t enabled;
151 uint8_t temporal;
152 uint8_t absolute_vals;
153 uint8_t update_map;
154 struct {
155 uint8_t q_enabled;
156 uint8_t lf_enabled;
157 uint8_t ref_enabled;
158 uint8_t skip_enabled;
159 uint8_t ref_val;
160 int16_t q_val;
161 int8_t lf_val;
162 int16_t qmul[2][2];
163 uint8_t lflvl[4][2];
f6fa7814 164 } feat[MAX_SEGMENT];
2ba45a60
DM
165 } segmentation;
166 struct {
167 unsigned log2_tile_cols, log2_tile_rows;
168 unsigned tile_cols, tile_rows;
169 unsigned tile_row_start, tile_row_end, tile_col_start, tile_col_end;
170 } tiling;
171 unsigned sb_cols, sb_rows, rows, cols;
172 struct {
173 prob_context p;
174 uint8_t coef[4][2][2][6][6][3];
175 } prob_ctx[4];
176 struct {
177 prob_context p;
178 uint8_t coef[4][2][2][6][6][11];
179 uint8_t seg[7];
180 uint8_t segpred[3];
181 } prob;
182 struct {
183 unsigned y_mode[4][10];
184 unsigned uv_mode[10][10];
185 unsigned filter[4][3];
186 unsigned mv_mode[7][4];
187 unsigned intra[4][2];
188 unsigned comp[5][2];
189 unsigned single_ref[5][2][2];
190 unsigned comp_ref[5][2];
191 unsigned tx32p[2][4];
192 unsigned tx16p[2][3];
193 unsigned tx8p[2][2];
194 unsigned skip[3][2];
195 unsigned mv_joint[4];
196 struct {
197 unsigned sign[2];
198 unsigned classes[11];
199 unsigned class0[2];
200 unsigned bits[10][2];
201 unsigned class0_fp[2][4];
202 unsigned fp[4];
203 unsigned class0_hp[2];
204 unsigned hp[2];
205 } mv_comp[2];
206 unsigned partition[4][4][4];
207 unsigned coef[4][2][2][6][6][3];
208 unsigned eob[4][2][2][6][6][2];
209 } counts;
210 enum TxfmMode txfmmode;
211 enum CompPredMode comppredmode;
212
213 // contextual (left/above) cache
214 DECLARE_ALIGNED(16, uint8_t, left_y_nnz_ctx)[16];
215 DECLARE_ALIGNED(16, uint8_t, left_mode_ctx)[16];
216 DECLARE_ALIGNED(16, VP56mv, left_mv_ctx)[16][2];
217 DECLARE_ALIGNED(8, uint8_t, left_uv_nnz_ctx)[2][8];
218 DECLARE_ALIGNED(8, uint8_t, left_partition_ctx)[8];
219 DECLARE_ALIGNED(8, uint8_t, left_skip_ctx)[8];
220 DECLARE_ALIGNED(8, uint8_t, left_txfm_ctx)[8];
221 DECLARE_ALIGNED(8, uint8_t, left_segpred_ctx)[8];
222 DECLARE_ALIGNED(8, uint8_t, left_intra_ctx)[8];
223 DECLARE_ALIGNED(8, uint8_t, left_comp_ctx)[8];
224 DECLARE_ALIGNED(8, uint8_t, left_ref_ctx)[8];
225 DECLARE_ALIGNED(8, uint8_t, left_filter_ctx)[8];
226 uint8_t *above_partition_ctx;
227 uint8_t *above_mode_ctx;
228 // FIXME maybe merge some of the below in a flags field?
229 uint8_t *above_y_nnz_ctx;
230 uint8_t *above_uv_nnz_ctx[2];
231 uint8_t *above_skip_ctx; // 1bit
232 uint8_t *above_txfm_ctx; // 2bit
233 uint8_t *above_segpred_ctx; // 1bit
234 uint8_t *above_intra_ctx; // 1bit
235 uint8_t *above_comp_ctx; // 1bit
236 uint8_t *above_ref_ctx; // 2bit
237 uint8_t *above_filter_ctx;
238 VP56mv (*above_mv_ctx)[2];
239
240 // whole-frame cache
241 uint8_t *intra_pred_data[3];
242 struct VP9Filter *lflvl;
243 DECLARE_ALIGNED(32, uint8_t, edge_emu_buffer)[71*80];
244
245 // block reconstruction intermediates
246 int block_alloc_using_2pass;
247 int16_t *block_base, *block, *uvblock_base[2], *uvblock[2];
248 uint8_t *eob_base, *uveob_base[2], *eob, *uveob[2];
249 struct { int x, y; } min_mv, max_mv;
250 DECLARE_ALIGNED(32, uint8_t, tmp_y)[64*64];
251 DECLARE_ALIGNED(32, uint8_t, tmp_uv)[2][32*32];
252} VP9Context;
253
254static const uint8_t bwh_tab[2][N_BS_SIZES][2] = {
255 {
256 { 16, 16 }, { 16, 8 }, { 8, 16 }, { 8, 8 }, { 8, 4 }, { 4, 8 },
257 { 4, 4 }, { 4, 2 }, { 2, 4 }, { 2, 2 }, { 2, 1 }, { 1, 2 }, { 1, 1 },
258 }, {
259 { 8, 8 }, { 8, 4 }, { 4, 8 }, { 4, 4 }, { 4, 2 }, { 2, 4 },
260 { 2, 2 }, { 2, 1 }, { 1, 2 }, { 1, 1 }, { 1, 1 }, { 1, 1 }, { 1, 1 },
261 }
262};
263
264static int vp9_alloc_frame(AVCodecContext *ctx, VP9Frame *f)
265{
266 VP9Context *s = ctx->priv_data;
267 int ret, sz;
268
269 if ((ret = ff_thread_get_buffer(ctx, &f->tf, AV_GET_BUFFER_FLAG_REF)) < 0)
270 return ret;
271 sz = 64 * s->sb_cols * s->sb_rows;
272 if (!(f->extradata = av_buffer_allocz(sz * (1 + sizeof(struct VP9mvrefPair))))) {
273 ff_thread_release_buffer(ctx, &f->tf);
274 return AVERROR(ENOMEM);
275 }
276
277 f->segmentation_map = f->extradata->data;
278 f->mv = (struct VP9mvrefPair *) (f->extradata->data + sz);
279
280 // retain segmentation map if it doesn't update
281 if (s->segmentation.enabled && !s->segmentation.update_map &&
282 !s->intraonly && !s->keyframe && !s->errorres) {
283 memcpy(f->segmentation_map, s->frames[LAST_FRAME].segmentation_map, sz);
284 }
285
286 return 0;
287}
288
289static void vp9_unref_frame(AVCodecContext *ctx, VP9Frame *f)
290{
291 ff_thread_release_buffer(ctx, &f->tf);
292 av_buffer_unref(&f->extradata);
293}
294
295static int vp9_ref_frame(AVCodecContext *ctx, VP9Frame *dst, VP9Frame *src)
296{
297 int res;
298
299 if ((res = ff_thread_ref_frame(&dst->tf, &src->tf)) < 0) {
300 return res;
301 } else if (!(dst->extradata = av_buffer_ref(src->extradata))) {
302 vp9_unref_frame(ctx, dst);
303 return AVERROR(ENOMEM);
304 }
305
306 dst->segmentation_map = src->segmentation_map;
307 dst->mv = src->mv;
308
309 return 0;
310}
311
312static int update_size(AVCodecContext *ctx, int w, int h)
313{
314 VP9Context *s = ctx->priv_data;
315 uint8_t *p;
316
317 av_assert0(w > 0 && h > 0);
318
319 if (s->intra_pred_data[0] && w == ctx->width && h == ctx->height)
320 return 0;
321
322 ctx->width = w;
323 ctx->height = h;
324 s->sb_cols = (w + 63) >> 6;
325 s->sb_rows = (h + 63) >> 6;
326 s->cols = (w + 7) >> 3;
327 s->rows = (h + 7) >> 3;
328
329#define assign(var, type, n) var = (type) p; p += s->sb_cols * (n) * sizeof(*var)
330 av_freep(&s->intra_pred_data[0]);
331 p = av_malloc(s->sb_cols * (240 + sizeof(*s->lflvl) + 16 * sizeof(*s->above_mv_ctx)));
332 if (!p)
333 return AVERROR(ENOMEM);
334 assign(s->intra_pred_data[0], uint8_t *, 64);
335 assign(s->intra_pred_data[1], uint8_t *, 32);
336 assign(s->intra_pred_data[2], uint8_t *, 32);
337 assign(s->above_y_nnz_ctx, uint8_t *, 16);
338 assign(s->above_mode_ctx, uint8_t *, 16);
339 assign(s->above_mv_ctx, VP56mv(*)[2], 16);
340 assign(s->above_partition_ctx, uint8_t *, 8);
341 assign(s->above_skip_ctx, uint8_t *, 8);
342 assign(s->above_txfm_ctx, uint8_t *, 8);
343 assign(s->above_uv_nnz_ctx[0], uint8_t *, 8);
344 assign(s->above_uv_nnz_ctx[1], uint8_t *, 8);
345 assign(s->above_segpred_ctx, uint8_t *, 8);
346 assign(s->above_intra_ctx, uint8_t *, 8);
347 assign(s->above_comp_ctx, uint8_t *, 8);
348 assign(s->above_ref_ctx, uint8_t *, 8);
349 assign(s->above_filter_ctx, uint8_t *, 8);
350 assign(s->lflvl, struct VP9Filter *, 1);
351#undef assign
352
353 // these will be re-allocated a little later
354 av_freep(&s->b_base);
355 av_freep(&s->block_base);
356
357 return 0;
358}
359
360static int update_block_buffers(AVCodecContext *ctx)
361{
362 VP9Context *s = ctx->priv_data;
363
364 if (s->b_base && s->block_base && s->block_alloc_using_2pass == s->uses_2pass)
365 return 0;
366
367 av_free(s->b_base);
368 av_free(s->block_base);
369 if (s->uses_2pass) {
370 int sbs = s->sb_cols * s->sb_rows;
371
372 s->b_base = av_malloc_array(s->cols * s->rows, sizeof(VP9Block));
373 s->block_base = av_mallocz((64 * 64 + 128) * sbs * 3);
374 if (!s->b_base || !s->block_base)
375 return AVERROR(ENOMEM);
376 s->uvblock_base[0] = s->block_base + sbs * 64 * 64;
377 s->uvblock_base[1] = s->uvblock_base[0] + sbs * 32 * 32;
378 s->eob_base = (uint8_t *) (s->uvblock_base[1] + sbs * 32 * 32);
379 s->uveob_base[0] = s->eob_base + 256 * sbs;
380 s->uveob_base[1] = s->uveob_base[0] + 64 * sbs;
381 } else {
382 s->b_base = av_malloc(sizeof(VP9Block));
383 s->block_base = av_mallocz((64 * 64 + 128) * 3);
384 if (!s->b_base || !s->block_base)
385 return AVERROR(ENOMEM);
386 s->uvblock_base[0] = s->block_base + 64 * 64;
387 s->uvblock_base[1] = s->uvblock_base[0] + 32 * 32;
388 s->eob_base = (uint8_t *) (s->uvblock_base[1] + 32 * 32);
389 s->uveob_base[0] = s->eob_base + 256;
390 s->uveob_base[1] = s->uveob_base[0] + 64;
391 }
392 s->block_alloc_using_2pass = s->uses_2pass;
393
394 return 0;
395}
396
397// for some reason the sign bit is at the end, not the start, of a bit sequence
398static av_always_inline int get_sbits_inv(GetBitContext *gb, int n)
399{
400 int v = get_bits(gb, n);
401 return get_bits1(gb) ? -v : v;
402}
403
404static av_always_inline int inv_recenter_nonneg(int v, int m)
405{
406 return v > 2 * m ? v : v & 1 ? m - ((v + 1) >> 1) : m + (v >> 1);
407}
408
409// differential forward probability updates
410static int update_prob(VP56RangeCoder *c, int p)
411{
412 static const int inv_map_table[254] = {
413 7, 20, 33, 46, 59, 72, 85, 98, 111, 124, 137, 150, 163, 176,
414 189, 202, 215, 228, 241, 254, 1, 2, 3, 4, 5, 6, 8, 9,
415 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 21, 22, 23, 24,
416 25, 26, 27, 28, 29, 30, 31, 32, 34, 35, 36, 37, 38, 39,
417 40, 41, 42, 43, 44, 45, 47, 48, 49, 50, 51, 52, 53, 54,
418 55, 56, 57, 58, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69,
419 70, 71, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84,
420 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 99, 100,
421 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 112, 113, 114, 115,
422 116, 117, 118, 119, 120, 121, 122, 123, 125, 126, 127, 128, 129, 130,
423 131, 132, 133, 134, 135, 136, 138, 139, 140, 141, 142, 143, 144, 145,
424 146, 147, 148, 149, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160,
425 161, 162, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175,
426 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 190, 191,
427 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 203, 204, 205, 206,
428 207, 208, 209, 210, 211, 212, 213, 214, 216, 217, 218, 219, 220, 221,
429 222, 223, 224, 225, 226, 227, 229, 230, 231, 232, 233, 234, 235, 236,
430 237, 238, 239, 240, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251,
431 252, 253,
432 };
433 int d;
434
435 /* This code is trying to do a differential probability update. For a
436 * current probability A in the range [1, 255], the difference to a new
437 * probability of any value can be expressed differentially as 1-A,255-A
438 * where some part of this (absolute range) exists both in positive as
439 * well as the negative part, whereas another part only exists in one
440 * half. We're trying to code this shared part differentially, i.e.
441 * times two where the value of the lowest bit specifies the sign, and
442 * the single part is then coded on top of this. This absolute difference
443 * then again has a value of [0,254], but a bigger value in this range
444 * indicates that we're further away from the original value A, so we
445 * can code this as a VLC code, since higher values are increasingly
446 * unlikely. The first 20 values in inv_map_table[] allow 'cheap, rough'
447 * updates vs. the 'fine, exact' updates further down the range, which
448 * adds one extra dimension to this differential update model. */
449
450 if (!vp8_rac_get(c)) {
451 d = vp8_rac_get_uint(c, 4) + 0;
452 } else if (!vp8_rac_get(c)) {
453 d = vp8_rac_get_uint(c, 4) + 16;
454 } else if (!vp8_rac_get(c)) {
455 d = vp8_rac_get_uint(c, 5) + 32;
456 } else {
457 d = vp8_rac_get_uint(c, 7);
458 if (d >= 65)
459 d = (d << 1) - 65 + vp8_rac_get(c);
460 d += 64;
461 }
462
463 return p <= 128 ? 1 + inv_recenter_nonneg(inv_map_table[d], p - 1) :
464 255 - inv_recenter_nonneg(inv_map_table[d], 255 - p);
465}
466
467static int decode_frame_header(AVCodecContext *ctx,
468 const uint8_t *data, int size, int *ref)
469{
470 VP9Context *s = ctx->priv_data;
471 int c, i, j, k, l, m, n, w, h, max, size2, res, sharp;
472 int last_invisible;
473 const uint8_t *data2;
474
475 /* general header */
476 if ((res = init_get_bits8(&s->gb, data, size)) < 0) {
477 av_log(ctx, AV_LOG_ERROR, "Failed to initialize bitstream reader\n");
478 return res;
479 }
480 if (get_bits(&s->gb, 2) != 0x2) { // frame marker
481 av_log(ctx, AV_LOG_ERROR, "Invalid frame marker\n");
482 return AVERROR_INVALIDDATA;
483 }
484 s->profile = get_bits1(&s->gb);
485 if (get_bits1(&s->gb)) { // reserved bit
486 av_log(ctx, AV_LOG_ERROR, "Reserved bit should be zero\n");
487 return AVERROR_INVALIDDATA;
488 }
489 if (get_bits1(&s->gb)) {
490 *ref = get_bits(&s->gb, 3);
491 return 0;
492 }
493 s->last_uses_2pass = s->uses_2pass;
494 s->last_keyframe = s->keyframe;
495 s->keyframe = !get_bits1(&s->gb);
496 last_invisible = s->invisible;
497 s->invisible = !get_bits1(&s->gb);
498 s->errorres = get_bits1(&s->gb);
499 s->use_last_frame_mvs = !s->errorres && !last_invisible;
500 if (s->keyframe) {
501 if (get_bits_long(&s->gb, 24) != VP9_SYNCCODE) { // synccode
502 av_log(ctx, AV_LOG_ERROR, "Invalid sync code\n");
503 return AVERROR_INVALIDDATA;
504 }
505 s->colorspace = get_bits(&s->gb, 3);
506 if (s->colorspace == 7) { // RGB = profile 1
507 av_log(ctx, AV_LOG_ERROR, "RGB not supported in profile 0\n");
508 return AVERROR_INVALIDDATA;
509 }
510 s->fullrange = get_bits1(&s->gb);
511 // for profile 1, here follows the subsampling bits
512 s->refreshrefmask = 0xff;
513 w = get_bits(&s->gb, 16) + 1;
514 h = get_bits(&s->gb, 16) + 1;
515 if (get_bits1(&s->gb)) // display size
516 skip_bits(&s->gb, 32);
517 } else {
518 s->intraonly = s->invisible ? get_bits1(&s->gb) : 0;
519 s->resetctx = s->errorres ? 0 : get_bits(&s->gb, 2);
520 if (s->intraonly) {
521 if (get_bits_long(&s->gb, 24) != VP9_SYNCCODE) { // synccode
522 av_log(ctx, AV_LOG_ERROR, "Invalid sync code\n");
523 return AVERROR_INVALIDDATA;
524 }
525 s->refreshrefmask = get_bits(&s->gb, 8);
526 w = get_bits(&s->gb, 16) + 1;
527 h = get_bits(&s->gb, 16) + 1;
528 if (get_bits1(&s->gb)) // display size
529 skip_bits(&s->gb, 32);
530 } else {
531 s->refreshrefmask = get_bits(&s->gb, 8);
532 s->refidx[0] = get_bits(&s->gb, 3);
533 s->signbias[0] = get_bits1(&s->gb);
534 s->refidx[1] = get_bits(&s->gb, 3);
535 s->signbias[1] = get_bits1(&s->gb);
536 s->refidx[2] = get_bits(&s->gb, 3);
537 s->signbias[2] = get_bits1(&s->gb);
538 if (!s->refs[s->refidx[0]].f->data[0] ||
539 !s->refs[s->refidx[1]].f->data[0] ||
540 !s->refs[s->refidx[2]].f->data[0]) {
541 av_log(ctx, AV_LOG_ERROR, "Not all references are available\n");
542 return AVERROR_INVALIDDATA;
543 }
544 if (get_bits1(&s->gb)) {
545 w = s->refs[s->refidx[0]].f->width;
546 h = s->refs[s->refidx[0]].f->height;
547 } else if (get_bits1(&s->gb)) {
548 w = s->refs[s->refidx[1]].f->width;
549 h = s->refs[s->refidx[1]].f->height;
550 } else if (get_bits1(&s->gb)) {
551 w = s->refs[s->refidx[2]].f->width;
552 h = s->refs[s->refidx[2]].f->height;
553 } else {
554 w = get_bits(&s->gb, 16) + 1;
555 h = get_bits(&s->gb, 16) + 1;
556 }
557 // Note that in this code, "CUR_FRAME" is actually before we
558 // have formally allocated a frame, and thus actually represents
559 // the _last_ frame
560 s->use_last_frame_mvs &= s->frames[CUR_FRAME].tf.f->width == w &&
561 s->frames[CUR_FRAME].tf.f->height == h;
562 if (get_bits1(&s->gb)) // display size
563 skip_bits(&s->gb, 32);
564 s->highprecisionmvs = get_bits1(&s->gb);
565 s->filtermode = get_bits1(&s->gb) ? FILTER_SWITCHABLE :
566 get_bits(&s->gb, 2);
567 s->allowcompinter = s->signbias[0] != s->signbias[1] ||
568 s->signbias[0] != s->signbias[2];
569 if (s->allowcompinter) {
570 if (s->signbias[0] == s->signbias[1]) {
571 s->fixcompref = 2;
572 s->varcompref[0] = 0;
573 s->varcompref[1] = 1;
574 } else if (s->signbias[0] == s->signbias[2]) {
575 s->fixcompref = 1;
576 s->varcompref[0] = 0;
577 s->varcompref[1] = 2;
578 } else {
579 s->fixcompref = 0;
580 s->varcompref[0] = 1;
581 s->varcompref[1] = 2;
582 }
583 }
584 }
585 }
586 s->refreshctx = s->errorres ? 0 : get_bits1(&s->gb);
587 s->parallelmode = s->errorres ? 1 : get_bits1(&s->gb);
588 s->framectxid = c = get_bits(&s->gb, 2);
589
590 /* loopfilter header data */
591 s->filter.level = get_bits(&s->gb, 6);
592 sharp = get_bits(&s->gb, 3);
593 // if sharpness changed, reinit lim/mblim LUTs. if it didn't change, keep
594 // the old cache values since they are still valid
595 if (s->filter.sharpness != sharp)
596 memset(s->filter.lim_lut, 0, sizeof(s->filter.lim_lut));
597 s->filter.sharpness = sharp;
598 if ((s->lf_delta.enabled = get_bits1(&s->gb))) {
599 if (get_bits1(&s->gb)) {
600 for (i = 0; i < 4; i++)
601 if (get_bits1(&s->gb))
602 s->lf_delta.ref[i] = get_sbits_inv(&s->gb, 6);
603 for (i = 0; i < 2; i++)
604 if (get_bits1(&s->gb))
605 s->lf_delta.mode[i] = get_sbits_inv(&s->gb, 6);
606 }
607 } else {
608 memset(&s->lf_delta, 0, sizeof(s->lf_delta));
609 }
610
611 /* quantization header data */
612 s->yac_qi = get_bits(&s->gb, 8);
613 s->ydc_qdelta = get_bits1(&s->gb) ? get_sbits_inv(&s->gb, 4) : 0;
614 s->uvdc_qdelta = get_bits1(&s->gb) ? get_sbits_inv(&s->gb, 4) : 0;
615 s->uvac_qdelta = get_bits1(&s->gb) ? get_sbits_inv(&s->gb, 4) : 0;
616 s->lossless = s->yac_qi == 0 && s->ydc_qdelta == 0 &&
617 s->uvdc_qdelta == 0 && s->uvac_qdelta == 0;
618
619 /* segmentation header info */
620 if ((s->segmentation.enabled = get_bits1(&s->gb))) {
621 if ((s->segmentation.update_map = get_bits1(&s->gb))) {
622 for (i = 0; i < 7; i++)
623 s->prob.seg[i] = get_bits1(&s->gb) ?
624 get_bits(&s->gb, 8) : 255;
625 if ((s->segmentation.temporal = get_bits1(&s->gb))) {
626 for (i = 0; i < 3; i++)
627 s->prob.segpred[i] = get_bits1(&s->gb) ?
628 get_bits(&s->gb, 8) : 255;
629 }
630 }
631 if ((!s->segmentation.update_map || s->segmentation.temporal) &&
632 (w != s->frames[CUR_FRAME].tf.f->width ||
633 h != s->frames[CUR_FRAME].tf.f->height)) {
634 av_log(ctx, AV_LOG_ERROR,
635 "Reference segmap (temp=%d,update=%d) enabled on size-change!\n",
636 s->segmentation.temporal, s->segmentation.update_map);
637 return AVERROR_INVALIDDATA;
638 }
639
640 if (get_bits1(&s->gb)) {
641 s->segmentation.absolute_vals = get_bits1(&s->gb);
642 for (i = 0; i < 8; i++) {
643 if ((s->segmentation.feat[i].q_enabled = get_bits1(&s->gb)))
644 s->segmentation.feat[i].q_val = get_sbits_inv(&s->gb, 8);
645 if ((s->segmentation.feat[i].lf_enabled = get_bits1(&s->gb)))
646 s->segmentation.feat[i].lf_val = get_sbits_inv(&s->gb, 6);
647 if ((s->segmentation.feat[i].ref_enabled = get_bits1(&s->gb)))
648 s->segmentation.feat[i].ref_val = get_bits(&s->gb, 2);
649 s->segmentation.feat[i].skip_enabled = get_bits1(&s->gb);
650 }
651 }
652 } else {
653 s->segmentation.feat[0].q_enabled = 0;
654 s->segmentation.feat[0].lf_enabled = 0;
655 s->segmentation.feat[0].skip_enabled = 0;
656 s->segmentation.feat[0].ref_enabled = 0;
657 }
658
659 // set qmul[] based on Y/UV, AC/DC and segmentation Q idx deltas
660 for (i = 0; i < (s->segmentation.enabled ? 8 : 1); i++) {
661 int qyac, qydc, quvac, quvdc, lflvl, sh;
662
663 if (s->segmentation.feat[i].q_enabled) {
664 if (s->segmentation.absolute_vals)
665 qyac = s->segmentation.feat[i].q_val;
666 else
667 qyac = s->yac_qi + s->segmentation.feat[i].q_val;
668 } else {
669 qyac = s->yac_qi;
670 }
671 qydc = av_clip_uintp2(qyac + s->ydc_qdelta, 8);
672 quvdc = av_clip_uintp2(qyac + s->uvdc_qdelta, 8);
673 quvac = av_clip_uintp2(qyac + s->uvac_qdelta, 8);
674 qyac = av_clip_uintp2(qyac, 8);
675
676 s->segmentation.feat[i].qmul[0][0] = vp9_dc_qlookup[qydc];
677 s->segmentation.feat[i].qmul[0][1] = vp9_ac_qlookup[qyac];
678 s->segmentation.feat[i].qmul[1][0] = vp9_dc_qlookup[quvdc];
679 s->segmentation.feat[i].qmul[1][1] = vp9_ac_qlookup[quvac];
680
681 sh = s->filter.level >= 32;
682 if (s->segmentation.feat[i].lf_enabled) {
683 if (s->segmentation.absolute_vals)
684 lflvl = s->segmentation.feat[i].lf_val;
685 else
686 lflvl = s->filter.level + s->segmentation.feat[i].lf_val;
687 } else {
688 lflvl = s->filter.level;
689 }
690 s->segmentation.feat[i].lflvl[0][0] =
691 s->segmentation.feat[i].lflvl[0][1] =
692 av_clip_uintp2(lflvl + (s->lf_delta.ref[0] << sh), 6);
693 for (j = 1; j < 4; j++) {
694 s->segmentation.feat[i].lflvl[j][0] =
695 av_clip_uintp2(lflvl + ((s->lf_delta.ref[j] +
696 s->lf_delta.mode[0]) << sh), 6);
697 s->segmentation.feat[i].lflvl[j][1] =
698 av_clip_uintp2(lflvl + ((s->lf_delta.ref[j] +
699 s->lf_delta.mode[1]) << sh), 6);
700 }
701 }
702
703 /* tiling info */
704 if ((res = update_size(ctx, w, h)) < 0) {
705 av_log(ctx, AV_LOG_ERROR, "Failed to initialize decoder for %dx%d\n", w, h);
706 return res;
707 }
708 for (s->tiling.log2_tile_cols = 0;
709 (s->sb_cols >> s->tiling.log2_tile_cols) > 64;
710 s->tiling.log2_tile_cols++) ;
711 for (max = 0; (s->sb_cols >> max) >= 4; max++) ;
712 max = FFMAX(0, max - 1);
713 while (max > s->tiling.log2_tile_cols) {
714 if (get_bits1(&s->gb))
715 s->tiling.log2_tile_cols++;
716 else
717 break;
718 }
719 s->tiling.log2_tile_rows = decode012(&s->gb);
720 s->tiling.tile_rows = 1 << s->tiling.log2_tile_rows;
721 if (s->tiling.tile_cols != (1 << s->tiling.log2_tile_cols)) {
722 s->tiling.tile_cols = 1 << s->tiling.log2_tile_cols;
723 s->c_b = av_fast_realloc(s->c_b, &s->c_b_size,
724 sizeof(VP56RangeCoder) * s->tiling.tile_cols);
725 if (!s->c_b) {
726 av_log(ctx, AV_LOG_ERROR, "Ran out of memory during range coder init\n");
727 return AVERROR(ENOMEM);
728 }
729 }
730
731 if (s->keyframe || s->errorres || s->intraonly) {
732 s->prob_ctx[0].p = s->prob_ctx[1].p = s->prob_ctx[2].p =
733 s->prob_ctx[3].p = vp9_default_probs;
734 memcpy(s->prob_ctx[0].coef, vp9_default_coef_probs,
735 sizeof(vp9_default_coef_probs));
736 memcpy(s->prob_ctx[1].coef, vp9_default_coef_probs,
737 sizeof(vp9_default_coef_probs));
738 memcpy(s->prob_ctx[2].coef, vp9_default_coef_probs,
739 sizeof(vp9_default_coef_probs));
740 memcpy(s->prob_ctx[3].coef, vp9_default_coef_probs,
741 sizeof(vp9_default_coef_probs));
742 }
743
744 // next 16 bits is size of the rest of the header (arith-coded)
745 size2 = get_bits(&s->gb, 16);
746 data2 = align_get_bits(&s->gb);
747 if (size2 > size - (data2 - data)) {
748 av_log(ctx, AV_LOG_ERROR, "Invalid compressed header size\n");
749 return AVERROR_INVALIDDATA;
750 }
751 ff_vp56_init_range_decoder(&s->c, data2, size2);
752 if (vp56_rac_get_prob_branchy(&s->c, 128)) { // marker bit
753 av_log(ctx, AV_LOG_ERROR, "Marker bit was set\n");
754 return AVERROR_INVALIDDATA;
755 }
756
757 if (s->keyframe || s->intraonly) {
758 memset(s->counts.coef, 0, sizeof(s->counts.coef) + sizeof(s->counts.eob));
759 } else {
760 memset(&s->counts, 0, sizeof(s->counts));
761 }
762 // FIXME is it faster to not copy here, but do it down in the fw updates
763 // as explicit copies if the fw update is missing (and skip the copy upon
764 // fw update)?
765 s->prob.p = s->prob_ctx[c].p;
766
767 // txfm updates
768 if (s->lossless) {
769 s->txfmmode = TX_4X4;
770 } else {
771 s->txfmmode = vp8_rac_get_uint(&s->c, 2);
772 if (s->txfmmode == 3)
773 s->txfmmode += vp8_rac_get(&s->c);
774
775 if (s->txfmmode == TX_SWITCHABLE) {
776 for (i = 0; i < 2; i++)
777 if (vp56_rac_get_prob_branchy(&s->c, 252))
778 s->prob.p.tx8p[i] = update_prob(&s->c, s->prob.p.tx8p[i]);
779 for (i = 0; i < 2; i++)
780 for (j = 0; j < 2; j++)
781 if (vp56_rac_get_prob_branchy(&s->c, 252))
782 s->prob.p.tx16p[i][j] =
783 update_prob(&s->c, s->prob.p.tx16p[i][j]);
784 for (i = 0; i < 2; i++)
785 for (j = 0; j < 3; j++)
786 if (vp56_rac_get_prob_branchy(&s->c, 252))
787 s->prob.p.tx32p[i][j] =
788 update_prob(&s->c, s->prob.p.tx32p[i][j]);
789 }
790 }
791
792 // coef updates
793 for (i = 0; i < 4; i++) {
794 uint8_t (*ref)[2][6][6][3] = s->prob_ctx[c].coef[i];
795 if (vp8_rac_get(&s->c)) {
796 for (j = 0; j < 2; j++)
797 for (k = 0; k < 2; k++)
798 for (l = 0; l < 6; l++)
799 for (m = 0; m < 6; m++) {
800 uint8_t *p = s->prob.coef[i][j][k][l][m];
801 uint8_t *r = ref[j][k][l][m];
802 if (m >= 3 && l == 0) // dc only has 3 pt
803 break;
804 for (n = 0; n < 3; n++) {
805 if (vp56_rac_get_prob_branchy(&s->c, 252)) {
806 p[n] = update_prob(&s->c, r[n]);
807 } else {
808 p[n] = r[n];
809 }
810 }
811 p[3] = 0;
812 }
813 } else {
814 for (j = 0; j < 2; j++)
815 for (k = 0; k < 2; k++)
816 for (l = 0; l < 6; l++)
817 for (m = 0; m < 6; m++) {
818 uint8_t *p = s->prob.coef[i][j][k][l][m];
819 uint8_t *r = ref[j][k][l][m];
820 if (m > 3 && l == 0) // dc only has 3 pt
821 break;
822 memcpy(p, r, 3);
823 p[3] = 0;
824 }
825 }
826 if (s->txfmmode == i)
827 break;
828 }
829
830 // mode updates
831 for (i = 0; i < 3; i++)
832 if (vp56_rac_get_prob_branchy(&s->c, 252))
833 s->prob.p.skip[i] = update_prob(&s->c, s->prob.p.skip[i]);
834 if (!s->keyframe && !s->intraonly) {
835 for (i = 0; i < 7; i++)
836 for (j = 0; j < 3; j++)
837 if (vp56_rac_get_prob_branchy(&s->c, 252))
838 s->prob.p.mv_mode[i][j] =
839 update_prob(&s->c, s->prob.p.mv_mode[i][j]);
840
841 if (s->filtermode == FILTER_SWITCHABLE)
842 for (i = 0; i < 4; i++)
843 for (j = 0; j < 2; j++)
844 if (vp56_rac_get_prob_branchy(&s->c, 252))
845 s->prob.p.filter[i][j] =
846 update_prob(&s->c, s->prob.p.filter[i][j]);
847
848 for (i = 0; i < 4; i++)
849 if (vp56_rac_get_prob_branchy(&s->c, 252))
850 s->prob.p.intra[i] = update_prob(&s->c, s->prob.p.intra[i]);
851
852 if (s->allowcompinter) {
853 s->comppredmode = vp8_rac_get(&s->c);
854 if (s->comppredmode)
855 s->comppredmode += vp8_rac_get(&s->c);
856 if (s->comppredmode == PRED_SWITCHABLE)
857 for (i = 0; i < 5; i++)
858 if (vp56_rac_get_prob_branchy(&s->c, 252))
859 s->prob.p.comp[i] =
860 update_prob(&s->c, s->prob.p.comp[i]);
861 } else {
862 s->comppredmode = PRED_SINGLEREF;
863 }
864
865 if (s->comppredmode != PRED_COMPREF) {
866 for (i = 0; i < 5; i++) {
867 if (vp56_rac_get_prob_branchy(&s->c, 252))
868 s->prob.p.single_ref[i][0] =
869 update_prob(&s->c, s->prob.p.single_ref[i][0]);
870 if (vp56_rac_get_prob_branchy(&s->c, 252))
871 s->prob.p.single_ref[i][1] =
872 update_prob(&s->c, s->prob.p.single_ref[i][1]);
873 }
874 }
875
876 if (s->comppredmode != PRED_SINGLEREF) {
877 for (i = 0; i < 5; i++)
878 if (vp56_rac_get_prob_branchy(&s->c, 252))
879 s->prob.p.comp_ref[i] =
880 update_prob(&s->c, s->prob.p.comp_ref[i]);
881 }
882
883 for (i = 0; i < 4; i++)
884 for (j = 0; j < 9; j++)
885 if (vp56_rac_get_prob_branchy(&s->c, 252))
886 s->prob.p.y_mode[i][j] =
887 update_prob(&s->c, s->prob.p.y_mode[i][j]);
888
889 for (i = 0; i < 4; i++)
890 for (j = 0; j < 4; j++)
891 for (k = 0; k < 3; k++)
892 if (vp56_rac_get_prob_branchy(&s->c, 252))
893 s->prob.p.partition[3 - i][j][k] =
894 update_prob(&s->c, s->prob.p.partition[3 - i][j][k]);
895
896 // mv fields don't use the update_prob subexp model for some reason
897 for (i = 0; i < 3; i++)
898 if (vp56_rac_get_prob_branchy(&s->c, 252))
899 s->prob.p.mv_joint[i] = (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
900
901 for (i = 0; i < 2; i++) {
902 if (vp56_rac_get_prob_branchy(&s->c, 252))
903 s->prob.p.mv_comp[i].sign = (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
904
905 for (j = 0; j < 10; j++)
906 if (vp56_rac_get_prob_branchy(&s->c, 252))
907 s->prob.p.mv_comp[i].classes[j] =
908 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
909
910 if (vp56_rac_get_prob_branchy(&s->c, 252))
911 s->prob.p.mv_comp[i].class0 = (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
912
913 for (j = 0; j < 10; j++)
914 if (vp56_rac_get_prob_branchy(&s->c, 252))
915 s->prob.p.mv_comp[i].bits[j] =
916 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
917 }
918
919 for (i = 0; i < 2; i++) {
920 for (j = 0; j < 2; j++)
921 for (k = 0; k < 3; k++)
922 if (vp56_rac_get_prob_branchy(&s->c, 252))
923 s->prob.p.mv_comp[i].class0_fp[j][k] =
924 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
925
926 for (j = 0; j < 3; j++)
927 if (vp56_rac_get_prob_branchy(&s->c, 252))
928 s->prob.p.mv_comp[i].fp[j] =
929 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
930 }
931
932 if (s->highprecisionmvs) {
933 for (i = 0; i < 2; i++) {
934 if (vp56_rac_get_prob_branchy(&s->c, 252))
935 s->prob.p.mv_comp[i].class0_hp =
936 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
937
938 if (vp56_rac_get_prob_branchy(&s->c, 252))
939 s->prob.p.mv_comp[i].hp =
940 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
941 }
942 }
943 }
944
945 return (data2 - data) + size2;
946}
947
948static av_always_inline void clamp_mv(VP56mv *dst, const VP56mv *src,
949 VP9Context *s)
950{
951 dst->x = av_clip(src->x, s->min_mv.x, s->max_mv.x);
952 dst->y = av_clip(src->y, s->min_mv.y, s->max_mv.y);
953}
954
955static void find_ref_mvs(VP9Context *s,
956 VP56mv *pmv, int ref, int z, int idx, int sb)
957{
958 static const int8_t mv_ref_blk_off[N_BS_SIZES][8][2] = {
959 [BS_64x64] = {{ 3, -1 }, { -1, 3 }, { 4, -1 }, { -1, 4 },
960 { -1, -1 }, { 0, -1 }, { -1, 0 }, { 6, -1 }},
961 [BS_64x32] = {{ 0, -1 }, { -1, 0 }, { 4, -1 }, { -1, 2 },
962 { -1, -1 }, { 0, -3 }, { -3, 0 }, { 2, -1 }},
963 [BS_32x64] = {{ -1, 0 }, { 0, -1 }, { -1, 4 }, { 2, -1 },
964 { -1, -1 }, { -3, 0 }, { 0, -3 }, { -1, 2 }},
965 [BS_32x32] = {{ 1, -1 }, { -1, 1 }, { 2, -1 }, { -1, 2 },
966 { -1, -1 }, { 0, -3 }, { -3, 0 }, { -3, -3 }},
967 [BS_32x16] = {{ 0, -1 }, { -1, 0 }, { 2, -1 }, { -1, -1 },
968 { -1, 1 }, { 0, -3 }, { -3, 0 }, { -3, -3 }},
969 [BS_16x32] = {{ -1, 0 }, { 0, -1 }, { -1, 2 }, { -1, -1 },
970 { 1, -1 }, { -3, 0 }, { 0, -3 }, { -3, -3 }},
971 [BS_16x16] = {{ 0, -1 }, { -1, 0 }, { 1, -1 }, { -1, 1 },
972 { -1, -1 }, { 0, -3 }, { -3, 0 }, { -3, -3 }},
973 [BS_16x8] = {{ 0, -1 }, { -1, 0 }, { 1, -1 }, { -1, -1 },
974 { 0, -2 }, { -2, 0 }, { -2, -1 }, { -1, -2 }},
975 [BS_8x16] = {{ -1, 0 }, { 0, -1 }, { -1, 1 }, { -1, -1 },
976 { -2, 0 }, { 0, -2 }, { -1, -2 }, { -2, -1 }},
977 [BS_8x8] = {{ 0, -1 }, { -1, 0 }, { -1, -1 }, { 0, -2 },
978 { -2, 0 }, { -1, -2 }, { -2, -1 }, { -2, -2 }},
979 [BS_8x4] = {{ 0, -1 }, { -1, 0 }, { -1, -1 }, { 0, -2 },
980 { -2, 0 }, { -1, -2 }, { -2, -1 }, { -2, -2 }},
981 [BS_4x8] = {{ 0, -1 }, { -1, 0 }, { -1, -1 }, { 0, -2 },
982 { -2, 0 }, { -1, -2 }, { -2, -1 }, { -2, -2 }},
983 [BS_4x4] = {{ 0, -1 }, { -1, 0 }, { -1, -1 }, { 0, -2 },
984 { -2, 0 }, { -1, -2 }, { -2, -1 }, { -2, -2 }},
985 };
986 VP9Block *b = s->b;
987 int row = s->row, col = s->col, row7 = s->row7;
988 const int8_t (*p)[2] = mv_ref_blk_off[b->bs];
989#define INVALID_MV 0x80008000U
990 uint32_t mem = INVALID_MV;
991 int i;
992
993#define RETURN_DIRECT_MV(mv) \
994 do { \
995 uint32_t m = AV_RN32A(&mv); \
996 if (!idx) { \
997 AV_WN32A(pmv, m); \
998 return; \
999 } else if (mem == INVALID_MV) { \
1000 mem = m; \
1001 } else if (m != mem) { \
1002 AV_WN32A(pmv, m); \
1003 return; \
1004 } \
1005 } while (0)
1006
1007 if (sb >= 0) {
1008 if (sb == 2 || sb == 1) {
1009 RETURN_DIRECT_MV(b->mv[0][z]);
1010 } else if (sb == 3) {
1011 RETURN_DIRECT_MV(b->mv[2][z]);
1012 RETURN_DIRECT_MV(b->mv[1][z]);
1013 RETURN_DIRECT_MV(b->mv[0][z]);
1014 }
1015
1016#define RETURN_MV(mv) \
1017 do { \
1018 if (sb > 0) { \
1019 VP56mv tmp; \
1020 uint32_t m; \
1021 clamp_mv(&tmp, &mv, s); \
1022 m = AV_RN32A(&tmp); \
1023 if (!idx) { \
1024 AV_WN32A(pmv, m); \
1025 return; \
1026 } else if (mem == INVALID_MV) { \
1027 mem = m; \
1028 } else if (m != mem) { \
1029 AV_WN32A(pmv, m); \
1030 return; \
1031 } \
1032 } else { \
1033 uint32_t m = AV_RN32A(&mv); \
1034 if (!idx) { \
1035 clamp_mv(pmv, &mv, s); \
1036 return; \
1037 } else if (mem == INVALID_MV) { \
1038 mem = m; \
1039 } else if (m != mem) { \
1040 clamp_mv(pmv, &mv, s); \
1041 return; \
1042 } \
1043 } \
1044 } while (0)
1045
1046 if (row > 0) {
1047 struct VP9mvrefPair *mv = &s->frames[CUR_FRAME].mv[(row - 1) * s->sb_cols * 8 + col];
1048 if (mv->ref[0] == ref) {
1049 RETURN_MV(s->above_mv_ctx[2 * col + (sb & 1)][0]);
1050 } else if (mv->ref[1] == ref) {
1051 RETURN_MV(s->above_mv_ctx[2 * col + (sb & 1)][1]);
1052 }
1053 }
1054 if (col > s->tiling.tile_col_start) {
1055 struct VP9mvrefPair *mv = &s->frames[CUR_FRAME].mv[row * s->sb_cols * 8 + col - 1];
1056 if (mv->ref[0] == ref) {
1057 RETURN_MV(s->left_mv_ctx[2 * row7 + (sb >> 1)][0]);
1058 } else if (mv->ref[1] == ref) {
1059 RETURN_MV(s->left_mv_ctx[2 * row7 + (sb >> 1)][1]);
1060 }
1061 }
1062 i = 2;
1063 } else {
1064 i = 0;
1065 }
1066
1067 // previously coded MVs in this neighbourhood, using same reference frame
1068 for (; i < 8; i++) {
1069 int c = p[i][0] + col, r = p[i][1] + row;
1070
1071 if (c >= s->tiling.tile_col_start && c < s->cols && r >= 0 && r < s->rows) {
1072 struct VP9mvrefPair *mv = &s->frames[CUR_FRAME].mv[r * s->sb_cols * 8 + c];
1073
1074 if (mv->ref[0] == ref) {
1075 RETURN_MV(mv->mv[0]);
1076 } else if (mv->ref[1] == ref) {
1077 RETURN_MV(mv->mv[1]);
1078 }
1079 }
1080 }
1081
1082 // MV at this position in previous frame, using same reference frame
1083 if (s->use_last_frame_mvs) {
1084 struct VP9mvrefPair *mv = &s->frames[LAST_FRAME].mv[row * s->sb_cols * 8 + col];
1085
1086 if (!s->last_uses_2pass)
1087 ff_thread_await_progress(&s->frames[LAST_FRAME].tf, row >> 3, 0);
1088 if (mv->ref[0] == ref) {
1089 RETURN_MV(mv->mv[0]);
1090 } else if (mv->ref[1] == ref) {
1091 RETURN_MV(mv->mv[1]);
1092 }
1093 }
1094
1095#define RETURN_SCALE_MV(mv, scale) \
1096 do { \
1097 if (scale) { \
1098 VP56mv mv_temp = { -mv.x, -mv.y }; \
1099 RETURN_MV(mv_temp); \
1100 } else { \
1101 RETURN_MV(mv); \
1102 } \
1103 } while (0)
1104
1105 // previously coded MVs in this neighbourhood, using different reference frame
1106 for (i = 0; i < 8; i++) {
1107 int c = p[i][0] + col, r = p[i][1] + row;
1108
1109 if (c >= s->tiling.tile_col_start && c < s->cols && r >= 0 && r < s->rows) {
1110 struct VP9mvrefPair *mv = &s->frames[CUR_FRAME].mv[r * s->sb_cols * 8 + c];
1111
1112 if (mv->ref[0] != ref && mv->ref[0] >= 0) {
1113 RETURN_SCALE_MV(mv->mv[0], s->signbias[mv->ref[0]] != s->signbias[ref]);
1114 }
1115 if (mv->ref[1] != ref && mv->ref[1] >= 0 &&
1116 // BUG - libvpx has this condition regardless of whether
1117 // we used the first ref MV and pre-scaling
1118 AV_RN32A(&mv->mv[0]) != AV_RN32A(&mv->mv[1])) {
1119 RETURN_SCALE_MV(mv->mv[1], s->signbias[mv->ref[1]] != s->signbias[ref]);
1120 }
1121 }
1122 }
1123
1124 // MV at this position in previous frame, using different reference frame
1125 if (s->use_last_frame_mvs) {
1126 struct VP9mvrefPair *mv = &s->frames[LAST_FRAME].mv[row * s->sb_cols * 8 + col];
1127
1128 // no need to await_progress, because we already did that above
1129 if (mv->ref[0] != ref && mv->ref[0] >= 0) {
1130 RETURN_SCALE_MV(mv->mv[0], s->signbias[mv->ref[0]] != s->signbias[ref]);
1131 }
1132 if (mv->ref[1] != ref && mv->ref[1] >= 0 &&
1133 // BUG - libvpx has this condition regardless of whether
1134 // we used the first ref MV and pre-scaling
1135 AV_RN32A(&mv->mv[0]) != AV_RN32A(&mv->mv[1])) {
1136 RETURN_SCALE_MV(mv->mv[1], s->signbias[mv->ref[1]] != s->signbias[ref]);
1137 }
1138 }
1139
1140 AV_ZERO32(pmv);
1141#undef INVALID_MV
1142#undef RETURN_MV
1143#undef RETURN_SCALE_MV
1144}
1145
1146static av_always_inline int read_mv_component(VP9Context *s, int idx, int hp)
1147{
1148 int bit, sign = vp56_rac_get_prob(&s->c, s->prob.p.mv_comp[idx].sign);
1149 int n, c = vp8_rac_get_tree(&s->c, vp9_mv_class_tree,
1150 s->prob.p.mv_comp[idx].classes);
1151
1152 s->counts.mv_comp[idx].sign[sign]++;
1153 s->counts.mv_comp[idx].classes[c]++;
1154 if (c) {
1155 int m;
1156
1157 for (n = 0, m = 0; m < c; m++) {
1158 bit = vp56_rac_get_prob(&s->c, s->prob.p.mv_comp[idx].bits[m]);
1159 n |= bit << m;
1160 s->counts.mv_comp[idx].bits[m][bit]++;
1161 }
1162 n <<= 3;
1163 bit = vp8_rac_get_tree(&s->c, vp9_mv_fp_tree, s->prob.p.mv_comp[idx].fp);
1164 n |= bit << 1;
1165 s->counts.mv_comp[idx].fp[bit]++;
1166 if (hp) {
1167 bit = vp56_rac_get_prob(&s->c, s->prob.p.mv_comp[idx].hp);
1168 s->counts.mv_comp[idx].hp[bit]++;
1169 n |= bit;
1170 } else {
1171 n |= 1;
1172 // bug in libvpx - we count for bw entropy purposes even if the
1173 // bit wasn't coded
1174 s->counts.mv_comp[idx].hp[1]++;
1175 }
1176 n += 8 << c;
1177 } else {
1178 n = vp56_rac_get_prob(&s->c, s->prob.p.mv_comp[idx].class0);
1179 s->counts.mv_comp[idx].class0[n]++;
1180 bit = vp8_rac_get_tree(&s->c, vp9_mv_fp_tree,
1181 s->prob.p.mv_comp[idx].class0_fp[n]);
1182 s->counts.mv_comp[idx].class0_fp[n][bit]++;
1183 n = (n << 3) | (bit << 1);
1184 if (hp) {
1185 bit = vp56_rac_get_prob(&s->c, s->prob.p.mv_comp[idx].class0_hp);
1186 s->counts.mv_comp[idx].class0_hp[bit]++;
1187 n |= bit;
1188 } else {
1189 n |= 1;
1190 // bug in libvpx - we count for bw entropy purposes even if the
1191 // bit wasn't coded
1192 s->counts.mv_comp[idx].class0_hp[1]++;
1193 }
1194 }
1195
1196 return sign ? -(n + 1) : (n + 1);
1197}
1198
1199static void fill_mv(VP9Context *s,
1200 VP56mv *mv, int mode, int sb)
1201{
1202 VP9Block *b = s->b;
1203
1204 if (mode == ZEROMV) {
1205 AV_ZERO64(mv);
1206 } else {
1207 int hp;
1208
1209 // FIXME cache this value and reuse for other subblocks
1210 find_ref_mvs(s, &mv[0], b->ref[0], 0, mode == NEARMV,
1211 mode == NEWMV ? -1 : sb);
1212 // FIXME maybe move this code into find_ref_mvs()
1213 if ((mode == NEWMV || sb == -1) &&
1214 !(hp = s->highprecisionmvs && abs(mv[0].x) < 64 && abs(mv[0].y) < 64)) {
1215 if (mv[0].y & 1) {
1216 if (mv[0].y < 0)
1217 mv[0].y++;
1218 else
1219 mv[0].y--;
1220 }
1221 if (mv[0].x & 1) {
1222 if (mv[0].x < 0)
1223 mv[0].x++;
1224 else
1225 mv[0].x--;
1226 }
1227 }
1228 if (mode == NEWMV) {
1229 enum MVJoint j = vp8_rac_get_tree(&s->c, vp9_mv_joint_tree,
1230 s->prob.p.mv_joint);
1231
1232 s->counts.mv_joint[j]++;
1233 if (j >= MV_JOINT_V)
1234 mv[0].y += read_mv_component(s, 0, hp);
1235 if (j & 1)
1236 mv[0].x += read_mv_component(s, 1, hp);
1237 }
1238
1239 if (b->comp) {
1240 // FIXME cache this value and reuse for other subblocks
1241 find_ref_mvs(s, &mv[1], b->ref[1], 1, mode == NEARMV,
1242 mode == NEWMV ? -1 : sb);
1243 if ((mode == NEWMV || sb == -1) &&
1244 !(hp = s->highprecisionmvs && abs(mv[1].x) < 64 && abs(mv[1].y) < 64)) {
1245 if (mv[1].y & 1) {
1246 if (mv[1].y < 0)
1247 mv[1].y++;
1248 else
1249 mv[1].y--;
1250 }
1251 if (mv[1].x & 1) {
1252 if (mv[1].x < 0)
1253 mv[1].x++;
1254 else
1255 mv[1].x--;
1256 }
1257 }
1258 if (mode == NEWMV) {
1259 enum MVJoint j = vp8_rac_get_tree(&s->c, vp9_mv_joint_tree,
1260 s->prob.p.mv_joint);
1261
1262 s->counts.mv_joint[j]++;
1263 if (j >= MV_JOINT_V)
1264 mv[1].y += read_mv_component(s, 0, hp);
1265 if (j & 1)
1266 mv[1].x += read_mv_component(s, 1, hp);
1267 }
1268 }
1269 }
1270}
1271
1272static av_always_inline void setctx_2d(uint8_t *ptr, int w, int h,
1273 ptrdiff_t stride, int v)
1274{
1275 switch (w) {
1276 case 1:
1277 do {
1278 *ptr = v;
1279 ptr += stride;
1280 } while (--h);
1281 break;
1282 case 2: {
1283 int v16 = v * 0x0101;
1284 do {
1285 AV_WN16A(ptr, v16);
1286 ptr += stride;
1287 } while (--h);
1288 break;
1289 }
1290 case 4: {
1291 uint32_t v32 = v * 0x01010101;
1292 do {
1293 AV_WN32A(ptr, v32);
1294 ptr += stride;
1295 } while (--h);
1296 break;
1297 }
1298 case 8: {
1299#if HAVE_FAST_64BIT
1300 uint64_t v64 = v * 0x0101010101010101ULL;
1301 do {
1302 AV_WN64A(ptr, v64);
1303 ptr += stride;
1304 } while (--h);
1305#else
1306 uint32_t v32 = v * 0x01010101;
1307 do {
1308 AV_WN32A(ptr, v32);
1309 AV_WN32A(ptr + 4, v32);
1310 ptr += stride;
1311 } while (--h);
1312#endif
1313 break;
1314 }
1315 }
1316}
1317
1318static void decode_mode(AVCodecContext *ctx)
1319{
1320 static const uint8_t left_ctx[N_BS_SIZES] = {
1321 0x0, 0x8, 0x0, 0x8, 0xc, 0x8, 0xc, 0xe, 0xc, 0xe, 0xf, 0xe, 0xf
1322 };
1323 static const uint8_t above_ctx[N_BS_SIZES] = {
1324 0x0, 0x0, 0x8, 0x8, 0x8, 0xc, 0xc, 0xc, 0xe, 0xe, 0xe, 0xf, 0xf
1325 };
1326 static const uint8_t max_tx_for_bl_bp[N_BS_SIZES] = {
1327 TX_32X32, TX_32X32, TX_32X32, TX_32X32, TX_16X16, TX_16X16,
1328 TX_16X16, TX_8X8, TX_8X8, TX_8X8, TX_4X4, TX_4X4, TX_4X4
1329 };
1330 VP9Context *s = ctx->priv_data;
1331 VP9Block *b = s->b;
1332 int row = s->row, col = s->col, row7 = s->row7;
1333 enum TxfmMode max_tx = max_tx_for_bl_bp[b->bs];
1334 int w4 = FFMIN(s->cols - col, bwh_tab[1][b->bs][0]);
1335 int h4 = FFMIN(s->rows - row, bwh_tab[1][b->bs][1]), y;
1336 int have_a = row > 0, have_l = col > s->tiling.tile_col_start;
1337 int vref, filter_id;
1338
1339 if (!s->segmentation.enabled) {
1340 b->seg_id = 0;
1341 } else if (s->keyframe || s->intraonly) {
1342 b->seg_id = vp8_rac_get_tree(&s->c, vp9_segmentation_tree, s->prob.seg);
1343 } else if (!s->segmentation.update_map ||
1344 (s->segmentation.temporal &&
1345 vp56_rac_get_prob_branchy(&s->c,
1346 s->prob.segpred[s->above_segpred_ctx[col] +
1347 s->left_segpred_ctx[row7]]))) {
1348 if (!s->errorres) {
1349 int pred = 8, x;
1350 uint8_t *refsegmap = s->frames[LAST_FRAME].segmentation_map;
1351
1352 if (!s->last_uses_2pass)
1353 ff_thread_await_progress(&s->frames[LAST_FRAME].tf, row >> 3, 0);
1354 for (y = 0; y < h4; y++)
1355 for (x = 0; x < w4; x++)
1356 pred = FFMIN(pred, refsegmap[(y + row) * 8 * s->sb_cols + x + col]);
1357 av_assert1(pred < 8);
1358 b->seg_id = pred;
1359 } else {
1360 b->seg_id = 0;
1361 }
1362
1363 memset(&s->above_segpred_ctx[col], 1, w4);
1364 memset(&s->left_segpred_ctx[row7], 1, h4);
1365 } else {
1366 b->seg_id = vp8_rac_get_tree(&s->c, vp9_segmentation_tree,
1367 s->prob.seg);
1368
1369 memset(&s->above_segpred_ctx[col], 0, w4);
1370 memset(&s->left_segpred_ctx[row7], 0, h4);
1371 }
1372 if (s->segmentation.enabled &&
1373 (s->segmentation.update_map || s->keyframe || s->intraonly)) {
1374 setctx_2d(&s->frames[CUR_FRAME].segmentation_map[row * 8 * s->sb_cols + col],
1375 w4, h4, 8 * s->sb_cols, b->seg_id);
1376 }
1377
1378 b->skip = s->segmentation.enabled &&
1379 s->segmentation.feat[b->seg_id].skip_enabled;
1380 if (!b->skip) {
1381 int c = s->left_skip_ctx[row7] + s->above_skip_ctx[col];
1382 b->skip = vp56_rac_get_prob(&s->c, s->prob.p.skip[c]);
1383 s->counts.skip[c][b->skip]++;
1384 }
1385
1386 if (s->keyframe || s->intraonly) {
1387 b->intra = 1;
1388 } else if (s->segmentation.feat[b->seg_id].ref_enabled) {
1389 b->intra = !s->segmentation.feat[b->seg_id].ref_val;
1390 } else {
1391 int c, bit;
1392
1393 if (have_a && have_l) {
1394 c = s->above_intra_ctx[col] + s->left_intra_ctx[row7];
1395 c += (c == 2);
1396 } else {
1397 c = have_a ? 2 * s->above_intra_ctx[col] :
1398 have_l ? 2 * s->left_intra_ctx[row7] : 0;
1399 }
1400 bit = vp56_rac_get_prob(&s->c, s->prob.p.intra[c]);
1401 s->counts.intra[c][bit]++;
1402 b->intra = !bit;
1403 }
1404
1405 if ((b->intra || !b->skip) && s->txfmmode == TX_SWITCHABLE) {
1406 int c;
1407 if (have_a) {
1408 if (have_l) {
1409 c = (s->above_skip_ctx[col] ? max_tx :
1410 s->above_txfm_ctx[col]) +
1411 (s->left_skip_ctx[row7] ? max_tx :
1412 s->left_txfm_ctx[row7]) > max_tx;
1413 } else {
1414 c = s->above_skip_ctx[col] ? 1 :
1415 (s->above_txfm_ctx[col] * 2 > max_tx);
1416 }
1417 } else if (have_l) {
1418 c = s->left_skip_ctx[row7] ? 1 :
1419 (s->left_txfm_ctx[row7] * 2 > max_tx);
1420 } else {
1421 c = 1;
1422 }
1423 switch (max_tx) {
1424 case TX_32X32:
1425 b->tx = vp56_rac_get_prob(&s->c, s->prob.p.tx32p[c][0]);
1426 if (b->tx) {
1427 b->tx += vp56_rac_get_prob(&s->c, s->prob.p.tx32p[c][1]);
1428 if (b->tx == 2)
1429 b->tx += vp56_rac_get_prob(&s->c, s->prob.p.tx32p[c][2]);
1430 }
1431 s->counts.tx32p[c][b->tx]++;
1432 break;
1433 case TX_16X16:
1434 b->tx = vp56_rac_get_prob(&s->c, s->prob.p.tx16p[c][0]);
1435 if (b->tx)
1436 b->tx += vp56_rac_get_prob(&s->c, s->prob.p.tx16p[c][1]);
1437 s->counts.tx16p[c][b->tx]++;
1438 break;
1439 case TX_8X8:
1440 b->tx = vp56_rac_get_prob(&s->c, s->prob.p.tx8p[c]);
1441 s->counts.tx8p[c][b->tx]++;
1442 break;
1443 case TX_4X4:
1444 b->tx = TX_4X4;
1445 break;
1446 }
1447 } else {
1448 b->tx = FFMIN(max_tx, s->txfmmode);
1449 }
1450
1451 if (s->keyframe || s->intraonly) {
1452 uint8_t *a = &s->above_mode_ctx[col * 2];
1453 uint8_t *l = &s->left_mode_ctx[(row7) << 1];
1454
1455 b->comp = 0;
1456 if (b->bs > BS_8x8) {
1457 // FIXME the memory storage intermediates here aren't really
1458 // necessary, they're just there to make the code slightly
1459 // simpler for now
1460 b->mode[0] = a[0] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1461 vp9_default_kf_ymode_probs[a[0]][l[0]]);
1462 if (b->bs != BS_8x4) {
1463 b->mode[1] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1464 vp9_default_kf_ymode_probs[a[1]][b->mode[0]]);
1465 l[0] = a[1] = b->mode[1];
1466 } else {
1467 l[0] = a[1] = b->mode[1] = b->mode[0];
1468 }
1469 if (b->bs != BS_4x8) {
1470 b->mode[2] = a[0] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1471 vp9_default_kf_ymode_probs[a[0]][l[1]]);
1472 if (b->bs != BS_8x4) {
1473 b->mode[3] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1474 vp9_default_kf_ymode_probs[a[1]][b->mode[2]]);
1475 l[1] = a[1] = b->mode[3];
1476 } else {
1477 l[1] = a[1] = b->mode[3] = b->mode[2];
1478 }
1479 } else {
1480 b->mode[2] = b->mode[0];
1481 l[1] = a[1] = b->mode[3] = b->mode[1];
1482 }
1483 } else {
1484 b->mode[0] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1485 vp9_default_kf_ymode_probs[*a][*l]);
1486 b->mode[3] = b->mode[2] = b->mode[1] = b->mode[0];
1487 // FIXME this can probably be optimized
1488 memset(a, b->mode[0], bwh_tab[0][b->bs][0]);
1489 memset(l, b->mode[0], bwh_tab[0][b->bs][1]);
1490 }
1491 b->uvmode = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1492 vp9_default_kf_uvmode_probs[b->mode[3]]);
1493 } else if (b->intra) {
1494 b->comp = 0;
1495 if (b->bs > BS_8x8) {
1496 b->mode[0] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1497 s->prob.p.y_mode[0]);
1498 s->counts.y_mode[0][b->mode[0]]++;
1499 if (b->bs != BS_8x4) {
1500 b->mode[1] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1501 s->prob.p.y_mode[0]);
1502 s->counts.y_mode[0][b->mode[1]]++;
1503 } else {
1504 b->mode[1] = b->mode[0];
1505 }
1506 if (b->bs != BS_4x8) {
1507 b->mode[2] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1508 s->prob.p.y_mode[0]);
1509 s->counts.y_mode[0][b->mode[2]]++;
1510 if (b->bs != BS_8x4) {
1511 b->mode[3] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1512 s->prob.p.y_mode[0]);
1513 s->counts.y_mode[0][b->mode[3]]++;
1514 } else {
1515 b->mode[3] = b->mode[2];
1516 }
1517 } else {
1518 b->mode[2] = b->mode[0];
1519 b->mode[3] = b->mode[1];
1520 }
1521 } else {
1522 static const uint8_t size_group[10] = {
1523 3, 3, 3, 3, 2, 2, 2, 1, 1, 1
1524 };
1525 int sz = size_group[b->bs];
1526
1527 b->mode[0] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1528 s->prob.p.y_mode[sz]);
1529 b->mode[1] = b->mode[2] = b->mode[3] = b->mode[0];
1530 s->counts.y_mode[sz][b->mode[3]]++;
1531 }
1532 b->uvmode = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1533 s->prob.p.uv_mode[b->mode[3]]);
1534 s->counts.uv_mode[b->mode[3]][b->uvmode]++;
1535 } else {
1536 static const uint8_t inter_mode_ctx_lut[14][14] = {
1537 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1538 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1539 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1540 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1541 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1542 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1543 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1544 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1545 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1546 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1547 { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 2, 2, 1, 3 },
1548 { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 2, 2, 1, 3 },
1549 { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 1, 1, 0, 3 },
1550 { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 3, 3, 3, 4 },
1551 };
1552
1553 if (s->segmentation.feat[b->seg_id].ref_enabled) {
1554 av_assert2(s->segmentation.feat[b->seg_id].ref_val != 0);
1555 b->comp = 0;
1556 b->ref[0] = s->segmentation.feat[b->seg_id].ref_val - 1;
1557 } else {
1558 // read comp_pred flag
1559 if (s->comppredmode != PRED_SWITCHABLE) {
1560 b->comp = s->comppredmode == PRED_COMPREF;
1561 } else {
1562 int c;
1563
1564 // FIXME add intra as ref=0xff (or -1) to make these easier?
1565 if (have_a) {
1566 if (have_l) {
1567 if (s->above_comp_ctx[col] && s->left_comp_ctx[row7]) {
1568 c = 4;
1569 } else if (s->above_comp_ctx[col]) {
1570 c = 2 + (s->left_intra_ctx[row7] ||
1571 s->left_ref_ctx[row7] == s->fixcompref);
1572 } else if (s->left_comp_ctx[row7]) {
1573 c = 2 + (s->above_intra_ctx[col] ||
1574 s->above_ref_ctx[col] == s->fixcompref);
1575 } else {
1576 c = (!s->above_intra_ctx[col] &&
1577 s->above_ref_ctx[col] == s->fixcompref) ^
1578 (!s->left_intra_ctx[row7] &&
1579 s->left_ref_ctx[row & 7] == s->fixcompref);
1580 }
1581 } else {
1582 c = s->above_comp_ctx[col] ? 3 :
1583 (!s->above_intra_ctx[col] && s->above_ref_ctx[col] == s->fixcompref);
1584 }
1585 } else if (have_l) {
1586 c = s->left_comp_ctx[row7] ? 3 :
1587 (!s->left_intra_ctx[row7] && s->left_ref_ctx[row7] == s->fixcompref);
1588 } else {
1589 c = 1;
1590 }
1591 b->comp = vp56_rac_get_prob(&s->c, s->prob.p.comp[c]);
1592 s->counts.comp[c][b->comp]++;
1593 }
1594
1595 // read actual references
1596 // FIXME probably cache a few variables here to prevent repetitive
1597 // memory accesses below
1598 if (b->comp) /* two references */ {
1599 int fix_idx = s->signbias[s->fixcompref], var_idx = !fix_idx, c, bit;
1600
1601 b->ref[fix_idx] = s->fixcompref;
1602 // FIXME can this codeblob be replaced by some sort of LUT?
1603 if (have_a) {
1604 if (have_l) {
1605 if (s->above_intra_ctx[col]) {
1606 if (s->left_intra_ctx[row7]) {
1607 c = 2;
1608 } else {
1609 c = 1 + 2 * (s->left_ref_ctx[row7] != s->varcompref[1]);
1610 }
1611 } else if (s->left_intra_ctx[row7]) {
1612 c = 1 + 2 * (s->above_ref_ctx[col] != s->varcompref[1]);
1613 } else {
1614 int refl = s->left_ref_ctx[row7], refa = s->above_ref_ctx[col];
1615
1616 if (refl == refa && refa == s->varcompref[1]) {
1617 c = 0;
1618 } else if (!s->left_comp_ctx[row7] && !s->above_comp_ctx[col]) {
1619 if ((refa == s->fixcompref && refl == s->varcompref[0]) ||
1620 (refl == s->fixcompref && refa == s->varcompref[0])) {
1621 c = 4;
1622 } else {
1623 c = (refa == refl) ? 3 : 1;
1624 }
1625 } else if (!s->left_comp_ctx[row7]) {
1626 if (refa == s->varcompref[1] && refl != s->varcompref[1]) {
1627 c = 1;
1628 } else {
1629 c = (refl == s->varcompref[1] &&
1630 refa != s->varcompref[1]) ? 2 : 4;
1631 }
1632 } else if (!s->above_comp_ctx[col]) {
1633 if (refl == s->varcompref[1] && refa != s->varcompref[1]) {
1634 c = 1;
1635 } else {
1636 c = (refa == s->varcompref[1] &&
1637 refl != s->varcompref[1]) ? 2 : 4;
1638 }
1639 } else {
1640 c = (refl == refa) ? 4 : 2;
1641 }
1642 }
1643 } else {
1644 if (s->above_intra_ctx[col]) {
1645 c = 2;
1646 } else if (s->above_comp_ctx[col]) {
1647 c = 4 * (s->above_ref_ctx[col] != s->varcompref[1]);
1648 } else {
1649 c = 3 * (s->above_ref_ctx[col] != s->varcompref[1]);
1650 }
1651 }
1652 } else if (have_l) {
1653 if (s->left_intra_ctx[row7]) {
1654 c = 2;
1655 } else if (s->left_comp_ctx[row7]) {
1656 c = 4 * (s->left_ref_ctx[row7] != s->varcompref[1]);
1657 } else {
1658 c = 3 * (s->left_ref_ctx[row7] != s->varcompref[1]);
1659 }
1660 } else {
1661 c = 2;
1662 }
1663 bit = vp56_rac_get_prob(&s->c, s->prob.p.comp_ref[c]);
1664 b->ref[var_idx] = s->varcompref[bit];
1665 s->counts.comp_ref[c][bit]++;
1666 } else /* single reference */ {
1667 int bit, c;
1668
1669 if (have_a && !s->above_intra_ctx[col]) {
1670 if (have_l && !s->left_intra_ctx[row7]) {
1671 if (s->left_comp_ctx[row7]) {
1672 if (s->above_comp_ctx[col]) {
1673 c = 1 + (!s->fixcompref || !s->left_ref_ctx[row7] ||
1674 !s->above_ref_ctx[col]);
1675 } else {
1676 c = (3 * !s->above_ref_ctx[col]) +
1677 (!s->fixcompref || !s->left_ref_ctx[row7]);
1678 }
1679 } else if (s->above_comp_ctx[col]) {
1680 c = (3 * !s->left_ref_ctx[row7]) +
1681 (!s->fixcompref || !s->above_ref_ctx[col]);
1682 } else {
1683 c = 2 * !s->left_ref_ctx[row7] + 2 * !s->above_ref_ctx[col];
1684 }
1685 } else if (s->above_intra_ctx[col]) {
1686 c = 2;
1687 } else if (s->above_comp_ctx[col]) {
1688 c = 1 + (!s->fixcompref || !s->above_ref_ctx[col]);
1689 } else {
1690 c = 4 * (!s->above_ref_ctx[col]);
1691 }
1692 } else if (have_l && !s->left_intra_ctx[row7]) {
1693 if (s->left_intra_ctx[row7]) {
1694 c = 2;
1695 } else if (s->left_comp_ctx[row7]) {
1696 c = 1 + (!s->fixcompref || !s->left_ref_ctx[row7]);
1697 } else {
1698 c = 4 * (!s->left_ref_ctx[row7]);
1699 }
1700 } else {
1701 c = 2;
1702 }
1703 bit = vp56_rac_get_prob(&s->c, s->prob.p.single_ref[c][0]);
1704 s->counts.single_ref[c][0][bit]++;
1705 if (!bit) {
1706 b->ref[0] = 0;
1707 } else {
1708 // FIXME can this codeblob be replaced by some sort of LUT?
1709 if (have_a) {
1710 if (have_l) {
1711 if (s->left_intra_ctx[row7]) {
1712 if (s->above_intra_ctx[col]) {
1713 c = 2;
1714 } else if (s->above_comp_ctx[col]) {
1715 c = 1 + 2 * (s->fixcompref == 1 ||
1716 s->above_ref_ctx[col] == 1);
1717 } else if (!s->above_ref_ctx[col]) {
1718 c = 3;
1719 } else {
1720 c = 4 * (s->above_ref_ctx[col] == 1);
1721 }
1722 } else if (s->above_intra_ctx[col]) {
1723 if (s->left_intra_ctx[row7]) {
1724 c = 2;
1725 } else if (s->left_comp_ctx[row7]) {
1726 c = 1 + 2 * (s->fixcompref == 1 ||
1727 s->left_ref_ctx[row7] == 1);
1728 } else if (!s->left_ref_ctx[row7]) {
1729 c = 3;
1730 } else {
1731 c = 4 * (s->left_ref_ctx[row7] == 1);
1732 }
1733 } else if (s->above_comp_ctx[col]) {
1734 if (s->left_comp_ctx[row7]) {
1735 if (s->left_ref_ctx[row7] == s->above_ref_ctx[col]) {
1736 c = 3 * (s->fixcompref == 1 ||
1737 s->left_ref_ctx[row7] == 1);
1738 } else {
1739 c = 2;
1740 }
1741 } else if (!s->left_ref_ctx[row7]) {
1742 c = 1 + 2 * (s->fixcompref == 1 ||
1743 s->above_ref_ctx[col] == 1);
1744 } else {
1745 c = 3 * (s->left_ref_ctx[row7] == 1) +
1746 (s->fixcompref == 1 || s->above_ref_ctx[col] == 1);
1747 }
1748 } else if (s->left_comp_ctx[row7]) {
1749 if (!s->above_ref_ctx[col]) {
1750 c = 1 + 2 * (s->fixcompref == 1 ||
1751 s->left_ref_ctx[row7] == 1);
1752 } else {
1753 c = 3 * (s->above_ref_ctx[col] == 1) +
1754 (s->fixcompref == 1 || s->left_ref_ctx[row7] == 1);
1755 }
1756 } else if (!s->above_ref_ctx[col]) {
1757 if (!s->left_ref_ctx[row7]) {
1758 c = 3;
1759 } else {
1760 c = 4 * (s->left_ref_ctx[row7] == 1);
1761 }
1762 } else if (!s->left_ref_ctx[row7]) {
1763 c = 4 * (s->above_ref_ctx[col] == 1);
1764 } else {
1765 c = 2 * (s->left_ref_ctx[row7] == 1) +
1766 2 * (s->above_ref_ctx[col] == 1);
1767 }
1768 } else {
1769 if (s->above_intra_ctx[col] ||
1770 (!s->above_comp_ctx[col] && !s->above_ref_ctx[col])) {
1771 c = 2;
1772 } else if (s->above_comp_ctx[col]) {
1773 c = 3 * (s->fixcompref == 1 || s->above_ref_ctx[col] == 1);
1774 } else {
1775 c = 4 * (s->above_ref_ctx[col] == 1);
1776 }
1777 }
1778 } else if (have_l) {
1779 if (s->left_intra_ctx[row7] ||
1780 (!s->left_comp_ctx[row7] && !s->left_ref_ctx[row7])) {
1781 c = 2;
1782 } else if (s->left_comp_ctx[row7]) {
1783 c = 3 * (s->fixcompref == 1 || s->left_ref_ctx[row7] == 1);
1784 } else {
1785 c = 4 * (s->left_ref_ctx[row7] == 1);
1786 }
1787 } else {
1788 c = 2;
1789 }
1790 bit = vp56_rac_get_prob(&s->c, s->prob.p.single_ref[c][1]);
1791 s->counts.single_ref[c][1][bit]++;
1792 b->ref[0] = 1 + bit;
1793 }
1794 }
1795 }
1796
1797 if (b->bs <= BS_8x8) {
1798 if (s->segmentation.feat[b->seg_id].skip_enabled) {
1799 b->mode[0] = b->mode[1] = b->mode[2] = b->mode[3] = ZEROMV;
1800 } else {
1801 static const uint8_t off[10] = {
1802 3, 0, 0, 1, 0, 0, 0, 0, 0, 0
1803 };
1804
1805 // FIXME this needs to use the LUT tables from find_ref_mvs
1806 // because not all are -1,0/0,-1
1807 int c = inter_mode_ctx_lut[s->above_mode_ctx[col + off[b->bs]]]
1808 [s->left_mode_ctx[row7 + off[b->bs]]];
1809
1810 b->mode[0] = vp8_rac_get_tree(&s->c, vp9_inter_mode_tree,
1811 s->prob.p.mv_mode[c]);
1812 b->mode[1] = b->mode[2] = b->mode[3] = b->mode[0];
1813 s->counts.mv_mode[c][b->mode[0] - 10]++;
1814 }
1815 }
1816
1817 if (s->filtermode == FILTER_SWITCHABLE) {
1818 int c;
1819
1820 if (have_a && s->above_mode_ctx[col] >= NEARESTMV) {
1821 if (have_l && s->left_mode_ctx[row7] >= NEARESTMV) {
1822 c = s->above_filter_ctx[col] == s->left_filter_ctx[row7] ?
1823 s->left_filter_ctx[row7] : 3;
1824 } else {
1825 c = s->above_filter_ctx[col];
1826 }
1827 } else if (have_l && s->left_mode_ctx[row7] >= NEARESTMV) {
1828 c = s->left_filter_ctx[row7];
1829 } else {
1830 c = 3;
1831 }
1832
1833 filter_id = vp8_rac_get_tree(&s->c, vp9_filter_tree,
1834 s->prob.p.filter[c]);
1835 s->counts.filter[c][filter_id]++;
1836 b->filter = vp9_filter_lut[filter_id];
1837 } else {
1838 b->filter = s->filtermode;
1839 }
1840
1841 if (b->bs > BS_8x8) {
1842 int c = inter_mode_ctx_lut[s->above_mode_ctx[col]][s->left_mode_ctx[row7]];
1843
1844 b->mode[0] = vp8_rac_get_tree(&s->c, vp9_inter_mode_tree,
1845 s->prob.p.mv_mode[c]);
1846 s->counts.mv_mode[c][b->mode[0] - 10]++;
1847 fill_mv(s, b->mv[0], b->mode[0], 0);
1848
1849 if (b->bs != BS_8x4) {
1850 b->mode[1] = vp8_rac_get_tree(&s->c, vp9_inter_mode_tree,
1851 s->prob.p.mv_mode[c]);
1852 s->counts.mv_mode[c][b->mode[1] - 10]++;
1853 fill_mv(s, b->mv[1], b->mode[1], 1);
1854 } else {
1855 b->mode[1] = b->mode[0];
1856 AV_COPY32(&b->mv[1][0], &b->mv[0][0]);
1857 AV_COPY32(&b->mv[1][1], &b->mv[0][1]);
1858 }
1859
1860 if (b->bs != BS_4x8) {
1861 b->mode[2] = vp8_rac_get_tree(&s->c, vp9_inter_mode_tree,
1862 s->prob.p.mv_mode[c]);
1863 s->counts.mv_mode[c][b->mode[2] - 10]++;
1864 fill_mv(s, b->mv[2], b->mode[2], 2);
1865
1866 if (b->bs != BS_8x4) {
1867 b->mode[3] = vp8_rac_get_tree(&s->c, vp9_inter_mode_tree,
1868 s->prob.p.mv_mode[c]);
1869 s->counts.mv_mode[c][b->mode[3] - 10]++;
1870 fill_mv(s, b->mv[3], b->mode[3], 3);
1871 } else {
1872 b->mode[3] = b->mode[2];
1873 AV_COPY32(&b->mv[3][0], &b->mv[2][0]);
1874 AV_COPY32(&b->mv[3][1], &b->mv[2][1]);
1875 }
1876 } else {
1877 b->mode[2] = b->mode[0];
1878 AV_COPY32(&b->mv[2][0], &b->mv[0][0]);
1879 AV_COPY32(&b->mv[2][1], &b->mv[0][1]);
1880 b->mode[3] = b->mode[1];
1881 AV_COPY32(&b->mv[3][0], &b->mv[1][0]);
1882 AV_COPY32(&b->mv[3][1], &b->mv[1][1]);
1883 }
1884 } else {
1885 fill_mv(s, b->mv[0], b->mode[0], -1);
1886 AV_COPY32(&b->mv[1][0], &b->mv[0][0]);
1887 AV_COPY32(&b->mv[2][0], &b->mv[0][0]);
1888 AV_COPY32(&b->mv[3][0], &b->mv[0][0]);
1889 AV_COPY32(&b->mv[1][1], &b->mv[0][1]);
1890 AV_COPY32(&b->mv[2][1], &b->mv[0][1]);
1891 AV_COPY32(&b->mv[3][1], &b->mv[0][1]);
1892 }
1893
1894 vref = b->ref[b->comp ? s->signbias[s->varcompref[0]] : 0];
1895 }
1896
1897#if HAVE_FAST_64BIT
1898#define SPLAT_CTX(var, val, n) \
1899 switch (n) { \
1900 case 1: var = val; break; \
1901 case 2: AV_WN16A(&var, val * 0x0101); break; \
1902 case 4: AV_WN32A(&var, val * 0x01010101); break; \
1903 case 8: AV_WN64A(&var, val * 0x0101010101010101ULL); break; \
1904 case 16: { \
1905 uint64_t v64 = val * 0x0101010101010101ULL; \
1906 AV_WN64A( &var, v64); \
1907 AV_WN64A(&((uint8_t *) &var)[8], v64); \
1908 break; \
1909 } \
1910 }
1911#else
1912#define SPLAT_CTX(var, val, n) \
1913 switch (n) { \
1914 case 1: var = val; break; \
1915 case 2: AV_WN16A(&var, val * 0x0101); break; \
1916 case 4: AV_WN32A(&var, val * 0x01010101); break; \
1917 case 8: { \
1918 uint32_t v32 = val * 0x01010101; \
1919 AV_WN32A( &var, v32); \
1920 AV_WN32A(&((uint8_t *) &var)[4], v32); \
1921 break; \
1922 } \
1923 case 16: { \
1924 uint32_t v32 = val * 0x01010101; \
1925 AV_WN32A( &var, v32); \
1926 AV_WN32A(&((uint8_t *) &var)[4], v32); \
1927 AV_WN32A(&((uint8_t *) &var)[8], v32); \
1928 AV_WN32A(&((uint8_t *) &var)[12], v32); \
1929 break; \
1930 } \
1931 }
1932#endif
1933
1934 switch (bwh_tab[1][b->bs][0]) {
1935#define SET_CTXS(dir, off, n) \
1936 do { \
1937 SPLAT_CTX(s->dir##_skip_ctx[off], b->skip, n); \
1938 SPLAT_CTX(s->dir##_txfm_ctx[off], b->tx, n); \
1939 SPLAT_CTX(s->dir##_partition_ctx[off], dir##_ctx[b->bs], n); \
1940 if (!s->keyframe && !s->intraonly) { \
1941 SPLAT_CTX(s->dir##_intra_ctx[off], b->intra, n); \
1942 SPLAT_CTX(s->dir##_comp_ctx[off], b->comp, n); \
1943 SPLAT_CTX(s->dir##_mode_ctx[off], b->mode[3], n); \
1944 if (!b->intra) { \
1945 SPLAT_CTX(s->dir##_ref_ctx[off], vref, n); \
1946 if (s->filtermode == FILTER_SWITCHABLE) { \
1947 SPLAT_CTX(s->dir##_filter_ctx[off], filter_id, n); \
1948 } \
1949 } \
1950 } \
1951 } while (0)
1952 case 1: SET_CTXS(above, col, 1); break;
1953 case 2: SET_CTXS(above, col, 2); break;
1954 case 4: SET_CTXS(above, col, 4); break;
1955 case 8: SET_CTXS(above, col, 8); break;
1956 }
1957 switch (bwh_tab[1][b->bs][1]) {
1958 case 1: SET_CTXS(left, row7, 1); break;
1959 case 2: SET_CTXS(left, row7, 2); break;
1960 case 4: SET_CTXS(left, row7, 4); break;
1961 case 8: SET_CTXS(left, row7, 8); break;
1962 }
1963#undef SPLAT_CTX
1964#undef SET_CTXS
1965
1966 if (!s->keyframe && !s->intraonly) {
1967 if (b->bs > BS_8x8) {
1968 int mv0 = AV_RN32A(&b->mv[3][0]), mv1 = AV_RN32A(&b->mv[3][1]);
1969
1970 AV_COPY32(&s->left_mv_ctx[row7 * 2 + 0][0], &b->mv[1][0]);
1971 AV_COPY32(&s->left_mv_ctx[row7 * 2 + 0][1], &b->mv[1][1]);
1972 AV_WN32A(&s->left_mv_ctx[row7 * 2 + 1][0], mv0);
1973 AV_WN32A(&s->left_mv_ctx[row7 * 2 + 1][1], mv1);
1974 AV_COPY32(&s->above_mv_ctx[col * 2 + 0][0], &b->mv[2][0]);
1975 AV_COPY32(&s->above_mv_ctx[col * 2 + 0][1], &b->mv[2][1]);
1976 AV_WN32A(&s->above_mv_ctx[col * 2 + 1][0], mv0);
1977 AV_WN32A(&s->above_mv_ctx[col * 2 + 1][1], mv1);
1978 } else {
1979 int n, mv0 = AV_RN32A(&b->mv[3][0]), mv1 = AV_RN32A(&b->mv[3][1]);
1980
1981 for (n = 0; n < w4 * 2; n++) {
1982 AV_WN32A(&s->above_mv_ctx[col * 2 + n][0], mv0);
1983 AV_WN32A(&s->above_mv_ctx[col * 2 + n][1], mv1);
1984 }
1985 for (n = 0; n < h4 * 2; n++) {
1986 AV_WN32A(&s->left_mv_ctx[row7 * 2 + n][0], mv0);
1987 AV_WN32A(&s->left_mv_ctx[row7 * 2 + n][1], mv1);
1988 }
1989 }
1990 }
1991
1992 // FIXME kinda ugly
1993 for (y = 0; y < h4; y++) {
1994 int x, o = (row + y) * s->sb_cols * 8 + col;
1995 struct VP9mvrefPair *mv = &s->frames[CUR_FRAME].mv[o];
1996
1997 if (b->intra) {
1998 for (x = 0; x < w4; x++) {
1999 mv[x].ref[0] =
2000 mv[x].ref[1] = -1;
2001 }
2002 } else if (b->comp) {
2003 for (x = 0; x < w4; x++) {
2004 mv[x].ref[0] = b->ref[0];
2005 mv[x].ref[1] = b->ref[1];
2006 AV_COPY32(&mv[x].mv[0], &b->mv[3][0]);
2007 AV_COPY32(&mv[x].mv[1], &b->mv[3][1]);
2008 }
2009 } else {
2010 for (x = 0; x < w4; x++) {
2011 mv[x].ref[0] = b->ref[0];
2012 mv[x].ref[1] = -1;
2013 AV_COPY32(&mv[x].mv[0], &b->mv[3][0]);
2014 }
2015 }
2016 }
2017}
2018
2019// FIXME merge cnt/eob arguments?
2020static av_always_inline int
2021decode_coeffs_b_generic(VP56RangeCoder *c, int16_t *coef, int n_coeffs,
2022 int is_tx32x32, unsigned (*cnt)[6][3],
2023 unsigned (*eob)[6][2], uint8_t (*p)[6][11],
2024 int nnz, const int16_t *scan, const int16_t (*nb)[2],
2025 const int16_t *band_counts, const int16_t *qmul)
2026{
2027 int i = 0, band = 0, band_left = band_counts[band];
2028 uint8_t *tp = p[0][nnz];
2029 uint8_t cache[1024];
2030
2031 do {
2032 int val, rc;
2033
2034 val = vp56_rac_get_prob_branchy(c, tp[0]); // eob
2035 eob[band][nnz][val]++;
2036 if (!val)
2037 break;
2038
2039 skip_eob:
2040 if (!vp56_rac_get_prob_branchy(c, tp[1])) { // zero
2041 cnt[band][nnz][0]++;
2042 if (!--band_left)
2043 band_left = band_counts[++band];
2044 cache[scan[i]] = 0;
2045 nnz = (1 + cache[nb[i][0]] + cache[nb[i][1]]) >> 1;
2046 tp = p[band][nnz];
2047 if (++i == n_coeffs)
2048 break; //invalid input; blocks should end with EOB
2049 goto skip_eob;
2050 }
2051
2052 rc = scan[i];
2053 if (!vp56_rac_get_prob_branchy(c, tp[2])) { // one
2054 cnt[band][nnz][1]++;
2055 val = 1;
2056 cache[rc] = 1;
2057 } else {
2058 // fill in p[3-10] (model fill) - only once per frame for each pos
2059 if (!tp[3])
2060 memcpy(&tp[3], vp9_model_pareto8[tp[2]], 8);
2061
2062 cnt[band][nnz][2]++;
2063 if (!vp56_rac_get_prob_branchy(c, tp[3])) { // 2, 3, 4
2064 if (!vp56_rac_get_prob_branchy(c, tp[4])) {
2065 cache[rc] = val = 2;
2066 } else {
2067 val = 3 + vp56_rac_get_prob(c, tp[5]);
2068 cache[rc] = 3;
2069 }
2070 } else if (!vp56_rac_get_prob_branchy(c, tp[6])) { // cat1/2
2071 cache[rc] = 4;
2072 if (!vp56_rac_get_prob_branchy(c, tp[7])) {
2073 val = 5 + vp56_rac_get_prob(c, 159);
2074 } else {
2075 val = 7 + (vp56_rac_get_prob(c, 165) << 1);
2076 val += vp56_rac_get_prob(c, 145);
2077 }
2078 } else { // cat 3-6
2079 cache[rc] = 5;
2080 if (!vp56_rac_get_prob_branchy(c, tp[8])) {
2081 if (!vp56_rac_get_prob_branchy(c, tp[9])) {
2082 val = 11 + (vp56_rac_get_prob(c, 173) << 2);
2083 val += (vp56_rac_get_prob(c, 148) << 1);
2084 val += vp56_rac_get_prob(c, 140);
2085 } else {
2086 val = 19 + (vp56_rac_get_prob(c, 176) << 3);
2087 val += (vp56_rac_get_prob(c, 155) << 2);
2088 val += (vp56_rac_get_prob(c, 140) << 1);
2089 val += vp56_rac_get_prob(c, 135);
2090 }
2091 } else if (!vp56_rac_get_prob_branchy(c, tp[10])) {
2092 val = 35 + (vp56_rac_get_prob(c, 180) << 4);
2093 val += (vp56_rac_get_prob(c, 157) << 3);
2094 val += (vp56_rac_get_prob(c, 141) << 2);
2095 val += (vp56_rac_get_prob(c, 134) << 1);
2096 val += vp56_rac_get_prob(c, 130);
2097 } else {
2098 val = 67 + (vp56_rac_get_prob(c, 254) << 13);
2099 val += (vp56_rac_get_prob(c, 254) << 12);
2100 val += (vp56_rac_get_prob(c, 254) << 11);
2101 val += (vp56_rac_get_prob(c, 252) << 10);
2102 val += (vp56_rac_get_prob(c, 249) << 9);
2103 val += (vp56_rac_get_prob(c, 243) << 8);
2104 val += (vp56_rac_get_prob(c, 230) << 7);
2105 val += (vp56_rac_get_prob(c, 196) << 6);
2106 val += (vp56_rac_get_prob(c, 177) << 5);
2107 val += (vp56_rac_get_prob(c, 153) << 4);
2108 val += (vp56_rac_get_prob(c, 140) << 3);
2109 val += (vp56_rac_get_prob(c, 133) << 2);
2110 val += (vp56_rac_get_prob(c, 130) << 1);
2111 val += vp56_rac_get_prob(c, 129);
2112 }
2113 }
2114 }
2115 if (!--band_left)
2116 band_left = band_counts[++band];
2117 if (is_tx32x32)
2118 coef[rc] = ((vp8_rac_get(c) ? -val : val) * qmul[!!i]) / 2;
2119 else
2120 coef[rc] = (vp8_rac_get(c) ? -val : val) * qmul[!!i];
2121 nnz = (1 + cache[nb[i][0]] + cache[nb[i][1]]) >> 1;
2122 tp = p[band][nnz];
2123 } while (++i < n_coeffs);
2124
2125 return i;
2126}
2127
2128static int decode_coeffs_b(VP56RangeCoder *c, int16_t *coef, int n_coeffs,
2129 unsigned (*cnt)[6][3], unsigned (*eob)[6][2],
2130 uint8_t (*p)[6][11], int nnz, const int16_t *scan,
2131 const int16_t (*nb)[2], const int16_t *band_counts,
2132 const int16_t *qmul)
2133{
2134 return decode_coeffs_b_generic(c, coef, n_coeffs, 0, cnt, eob, p,
2135 nnz, scan, nb, band_counts, qmul);
2136}
2137
2138static int decode_coeffs_b32(VP56RangeCoder *c, int16_t *coef, int n_coeffs,
2139 unsigned (*cnt)[6][3], unsigned (*eob)[6][2],
2140 uint8_t (*p)[6][11], int nnz, const int16_t *scan,
2141 const int16_t (*nb)[2], const int16_t *band_counts,
2142 const int16_t *qmul)
2143{
2144 return decode_coeffs_b_generic(c, coef, n_coeffs, 1, cnt, eob, p,
2145 nnz, scan, nb, band_counts, qmul);
2146}
2147
2148static void decode_coeffs(AVCodecContext *ctx)
2149{
2150 VP9Context *s = ctx->priv_data;
2151 VP9Block *b = s->b;
2152 int row = s->row, col = s->col;
2153 uint8_t (*p)[6][11] = s->prob.coef[b->tx][0 /* y */][!b->intra];
2154 unsigned (*c)[6][3] = s->counts.coef[b->tx][0 /* y */][!b->intra];
2155 unsigned (*e)[6][2] = s->counts.eob[b->tx][0 /* y */][!b->intra];
2156 int w4 = bwh_tab[1][b->bs][0] << 1, h4 = bwh_tab[1][b->bs][1] << 1;
2157 int end_x = FFMIN(2 * (s->cols - col), w4);
2158 int end_y = FFMIN(2 * (s->rows - row), h4);
2159 int n, pl, x, y, res;
2160 int16_t (*qmul)[2] = s->segmentation.feat[b->seg_id].qmul;
2161 int tx = 4 * s->lossless + b->tx;
2162 const int16_t * const *yscans = vp9_scans[tx];
2163 const int16_t (* const *ynbs)[2] = vp9_scans_nb[tx];
2164 const int16_t *uvscan = vp9_scans[b->uvtx][DCT_DCT];
2165 const int16_t (*uvnb)[2] = vp9_scans_nb[b->uvtx][DCT_DCT];
2166 uint8_t *a = &s->above_y_nnz_ctx[col * 2];
2167 uint8_t *l = &s->left_y_nnz_ctx[(row & 7) << 1];
2168 static const int16_t band_counts[4][8] = {
2169 { 1, 2, 3, 4, 3, 16 - 13 },
2170 { 1, 2, 3, 4, 11, 64 - 21 },
2171 { 1, 2, 3, 4, 11, 256 - 21 },
2172 { 1, 2, 3, 4, 11, 1024 - 21 },
2173 };
2174 const int16_t *y_band_counts = band_counts[b->tx];
2175 const int16_t *uv_band_counts = band_counts[b->uvtx];
2176
2177#define MERGE(la, end, step, rd) \
2178 for (n = 0; n < end; n += step) \
2179 la[n] = !!rd(&la[n])
2180#define MERGE_CTX(step, rd) \
2181 do { \
2182 MERGE(l, end_y, step, rd); \
2183 MERGE(a, end_x, step, rd); \
2184 } while (0)
2185
2186#define DECODE_Y_COEF_LOOP(step, mode_index, v) \
2187 for (n = 0, y = 0; y < end_y; y += step) { \
2188 for (x = 0; x < end_x; x += step, n += step * step) { \
2189 enum TxfmType txtp = vp9_intra_txfm_type[b->mode[mode_index]]; \
2190 res = decode_coeffs_b##v(&s->c, s->block + 16 * n, 16 * step * step, \
2191 c, e, p, a[x] + l[y], yscans[txtp], \
2192 ynbs[txtp], y_band_counts, qmul[0]); \
2193 a[x] = l[y] = !!res; \
2194 if (step >= 4) { \
2195 AV_WN16A(&s->eob[n], res); \
2196 } else { \
2197 s->eob[n] = res; \
2198 } \
2199 } \
2200 }
2201
2202#define SPLAT(la, end, step, cond) \
2203 if (step == 2) { \
2204 for (n = 1; n < end; n += step) \
2205 la[n] = la[n - 1]; \
2206 } else if (step == 4) { \
2207 if (cond) { \
2208 for (n = 0; n < end; n += step) \
2209 AV_WN32A(&la[n], la[n] * 0x01010101); \
2210 } else { \
2211 for (n = 0; n < end; n += step) \
2212 memset(&la[n + 1], la[n], FFMIN(end - n - 1, 3)); \
2213 } \
2214 } else /* step == 8 */ { \
2215 if (cond) { \
2216 if (HAVE_FAST_64BIT) { \
2217 for (n = 0; n < end; n += step) \
2218 AV_WN64A(&la[n], la[n] * 0x0101010101010101ULL); \
2219 } else { \
2220 for (n = 0; n < end; n += step) { \
2221 uint32_t v32 = la[n] * 0x01010101; \
2222 AV_WN32A(&la[n], v32); \
2223 AV_WN32A(&la[n + 4], v32); \
2224 } \
2225 } \
2226 } else { \
2227 for (n = 0; n < end; n += step) \
2228 memset(&la[n + 1], la[n], FFMIN(end - n - 1, 7)); \
2229 } \
2230 }
2231#define SPLAT_CTX(step) \
2232 do { \
2233 SPLAT(a, end_x, step, end_x == w4); \
2234 SPLAT(l, end_y, step, end_y == h4); \
2235 } while (0)
2236
2237 /* y tokens */
2238 switch (b->tx) {
2239 case TX_4X4:
2240 DECODE_Y_COEF_LOOP(1, b->bs > BS_8x8 ? n : 0,);
2241 break;
2242 case TX_8X8:
2243 MERGE_CTX(2, AV_RN16A);
2244 DECODE_Y_COEF_LOOP(2, 0,);
2245 SPLAT_CTX(2);
2246 break;
2247 case TX_16X16:
2248 MERGE_CTX(4, AV_RN32A);
2249 DECODE_Y_COEF_LOOP(4, 0,);
2250 SPLAT_CTX(4);
2251 break;
2252 case TX_32X32:
2253 MERGE_CTX(8, AV_RN64A);
2254 DECODE_Y_COEF_LOOP(8, 0, 32);
2255 SPLAT_CTX(8);
2256 break;
2257 }
2258
2259#define DECODE_UV_COEF_LOOP(step) \
2260 for (n = 0, y = 0; y < end_y; y += step) { \
2261 for (x = 0; x < end_x; x += step, n += step * step) { \
2262 res = decode_coeffs_b(&s->c, s->uvblock[pl] + 16 * n, \
2263 16 * step * step, c, e, p, a[x] + l[y], \
2264 uvscan, uvnb, uv_band_counts, qmul[1]); \
2265 a[x] = l[y] = !!res; \
2266 if (step >= 4) { \
2267 AV_WN16A(&s->uveob[pl][n], res); \
2268 } else { \
2269 s->uveob[pl][n] = res; \
2270 } \
2271 } \
2272 }
2273
2274 p = s->prob.coef[b->uvtx][1 /* uv */][!b->intra];
2275 c = s->counts.coef[b->uvtx][1 /* uv */][!b->intra];
2276 e = s->counts.eob[b->uvtx][1 /* uv */][!b->intra];
2277 w4 >>= 1;
2278 h4 >>= 1;
2279 end_x >>= 1;
2280 end_y >>= 1;
2281 for (pl = 0; pl < 2; pl++) {
2282 a = &s->above_uv_nnz_ctx[pl][col];
2283 l = &s->left_uv_nnz_ctx[pl][row & 7];
2284 switch (b->uvtx) {
2285 case TX_4X4:
2286 DECODE_UV_COEF_LOOP(1);
2287 break;
2288 case TX_8X8:
2289 MERGE_CTX(2, AV_RN16A);
2290 DECODE_UV_COEF_LOOP(2);
2291 SPLAT_CTX(2);
2292 break;
2293 case TX_16X16:
2294 MERGE_CTX(4, AV_RN32A);
2295 DECODE_UV_COEF_LOOP(4);
2296 SPLAT_CTX(4);
2297 break;
2298 case TX_32X32:
2299 MERGE_CTX(8, AV_RN64A);
2300 // a 64x64 (max) uv block can ever only contain 1 tx32x32 block
2301 // so there is no need to loop
2302 res = decode_coeffs_b32(&s->c, s->uvblock[pl],
2303 1024, c, e, p, a[0] + l[0],
2304 uvscan, uvnb, uv_band_counts, qmul[1]);
2305 a[0] = l[0] = !!res;
2306 AV_WN16A(&s->uveob[pl][0], res);
2307 SPLAT_CTX(8);
2308 break;
2309 }
2310 }
2311}
2312
2313static av_always_inline int check_intra_mode(VP9Context *s, int mode, uint8_t **a,
2314 uint8_t *dst_edge, ptrdiff_t stride_edge,
2315 uint8_t *dst_inner, ptrdiff_t stride_inner,
2316 uint8_t *l, int col, int x, int w,
2317 int row, int y, enum TxfmMode tx,
2318 int p)
2319{
2320 int have_top = row > 0 || y > 0;
2321 int have_left = col > s->tiling.tile_col_start || x > 0;
2322 int have_right = x < w - 1;
2323 static const uint8_t mode_conv[10][2 /* have_left */][2 /* have_top */] = {
2324 [VERT_PRED] = { { DC_127_PRED, VERT_PRED },
2325 { DC_127_PRED, VERT_PRED } },
2326 [HOR_PRED] = { { DC_129_PRED, DC_129_PRED },
2327 { HOR_PRED, HOR_PRED } },
2328 [DC_PRED] = { { DC_128_PRED, TOP_DC_PRED },
2329 { LEFT_DC_PRED, DC_PRED } },
2330 [DIAG_DOWN_LEFT_PRED] = { { DC_127_PRED, DIAG_DOWN_LEFT_PRED },
2331 { DC_127_PRED, DIAG_DOWN_LEFT_PRED } },
2332 [DIAG_DOWN_RIGHT_PRED] = { { DIAG_DOWN_RIGHT_PRED, DIAG_DOWN_RIGHT_PRED },
2333 { DIAG_DOWN_RIGHT_PRED, DIAG_DOWN_RIGHT_PRED } },
2334 [VERT_RIGHT_PRED] = { { VERT_RIGHT_PRED, VERT_RIGHT_PRED },
2335 { VERT_RIGHT_PRED, VERT_RIGHT_PRED } },
2336 [HOR_DOWN_PRED] = { { HOR_DOWN_PRED, HOR_DOWN_PRED },
2337 { HOR_DOWN_PRED, HOR_DOWN_PRED } },
2338 [VERT_LEFT_PRED] = { { DC_127_PRED, VERT_LEFT_PRED },
2339 { DC_127_PRED, VERT_LEFT_PRED } },
2340 [HOR_UP_PRED] = { { DC_129_PRED, DC_129_PRED },
2341 { HOR_UP_PRED, HOR_UP_PRED } },
2342 [TM_VP8_PRED] = { { DC_129_PRED, VERT_PRED },
2343 { HOR_PRED, TM_VP8_PRED } },
2344 };
2345 static const struct {
2346 uint8_t needs_left:1;
2347 uint8_t needs_top:1;
2348 uint8_t needs_topleft:1;
2349 uint8_t needs_topright:1;
2350 } edges[N_INTRA_PRED_MODES] = {
2351 [VERT_PRED] = { .needs_top = 1 },
2352 [HOR_PRED] = { .needs_left = 1 },
2353 [DC_PRED] = { .needs_top = 1, .needs_left = 1 },
2354 [DIAG_DOWN_LEFT_PRED] = { .needs_top = 1, .needs_topright = 1 },
2355 [DIAG_DOWN_RIGHT_PRED] = { .needs_left = 1, .needs_top = 1, .needs_topleft = 1 },
2356 [VERT_RIGHT_PRED] = { .needs_left = 1, .needs_top = 1, .needs_topleft = 1 },
2357 [HOR_DOWN_PRED] = { .needs_left = 1, .needs_top = 1, .needs_topleft = 1 },
2358 [VERT_LEFT_PRED] = { .needs_top = 1, .needs_topright = 1 },
2359 [HOR_UP_PRED] = { .needs_left = 1 },
2360 [TM_VP8_PRED] = { .needs_left = 1, .needs_top = 1, .needs_topleft = 1 },
2361 [LEFT_DC_PRED] = { .needs_left = 1 },
2362 [TOP_DC_PRED] = { .needs_top = 1 },
2363 [DC_128_PRED] = { 0 },
2364 [DC_127_PRED] = { 0 },
2365 [DC_129_PRED] = { 0 }
2366 };
2367
2368 av_assert2(mode >= 0 && mode < 10);
2369 mode = mode_conv[mode][have_left][have_top];
2370 if (edges[mode].needs_top) {
2371 uint8_t *top, *topleft;
2372 int n_px_need = 4 << tx, n_px_have = (((s->cols - col) << !p) - x) * 4;
2373 int n_px_need_tr = 0;
2374
2375 if (tx == TX_4X4 && edges[mode].needs_topright && have_right)
2376 n_px_need_tr = 4;
2377
2378 // if top of sb64-row, use s->intra_pred_data[] instead of
2379 // dst[-stride] for intra prediction (it contains pre- instead of
2380 // post-loopfilter data)
2381 if (have_top) {
2382 top = !(row & 7) && !y ?
2383 s->intra_pred_data[p] + col * (8 >> !!p) + x * 4 :
2384 y == 0 ? &dst_edge[-stride_edge] : &dst_inner[-stride_inner];
2385 if (have_left)
2386 topleft = !(row & 7) && !y ?
2387 s->intra_pred_data[p] + col * (8 >> !!p) + x * 4 :
2388 y == 0 || x == 0 ? &dst_edge[-stride_edge] :
2389 &dst_inner[-stride_inner];
2390 }
2391
2392 if (have_top &&
2393 (!edges[mode].needs_topleft || (have_left && top == topleft)) &&
2394 (tx != TX_4X4 || !edges[mode].needs_topright || have_right) &&
2395 n_px_need + n_px_need_tr <= n_px_have) {
2396 *a = top;
2397 } else {
2398 if (have_top) {
2399 if (n_px_need <= n_px_have) {
2400 memcpy(*a, top, n_px_need);
2401 } else {
2402 memcpy(*a, top, n_px_have);
2403 memset(&(*a)[n_px_have], (*a)[n_px_have - 1],
2404 n_px_need - n_px_have);
2405 }
2406 } else {
2407 memset(*a, 127, n_px_need);
2408 }
2409 if (edges[mode].needs_topleft) {
2410 if (have_left && have_top) {
2411 (*a)[-1] = topleft[-1];
2412 } else {
2413 (*a)[-1] = have_top ? 129 : 127;
2414 }
2415 }
2416 if (tx == TX_4X4 && edges[mode].needs_topright) {
2417 if (have_top && have_right &&
2418 n_px_need + n_px_need_tr <= n_px_have) {
2419 memcpy(&(*a)[4], &top[4], 4);
2420 } else {
2421 memset(&(*a)[4], (*a)[3], 4);
2422 }
2423 }
2424 }
2425 }
2426 if (edges[mode].needs_left) {
2427 if (have_left) {
2428 int n_px_need = 4 << tx, i, n_px_have = (((s->rows - row) << !p) - y) * 4;
2429 uint8_t *dst = x == 0 ? dst_edge : dst_inner;
2430 ptrdiff_t stride = x == 0 ? stride_edge : stride_inner;
2431
2432 if (n_px_need <= n_px_have) {
2433 for (i = 0; i < n_px_need; i++)
2434 l[n_px_need - 1 - i] = dst[i * stride - 1];
2435 } else {
2436 for (i = 0; i < n_px_have; i++)
2437 l[n_px_need - 1 - i] = dst[i * stride - 1];
2438 memset(l, l[n_px_need - n_px_have], n_px_need - n_px_have);
2439 }
2440 } else {
2441 memset(l, 129, 4 << tx);
2442 }
2443 }
2444
2445 return mode;
2446}
2447
2448static void intra_recon(AVCodecContext *ctx, ptrdiff_t y_off, ptrdiff_t uv_off)
2449{
2450 VP9Context *s = ctx->priv_data;
2451 VP9Block *b = s->b;
2452 int row = s->row, col = s->col;
2453 int w4 = bwh_tab[1][b->bs][0] << 1, step1d = 1 << b->tx, n;
2454 int h4 = bwh_tab[1][b->bs][1] << 1, x, y, step = 1 << (b->tx * 2);
2455 int end_x = FFMIN(2 * (s->cols - col), w4);
2456 int end_y = FFMIN(2 * (s->rows - row), h4);
2457 int tx = 4 * s->lossless + b->tx, uvtx = b->uvtx + 4 * s->lossless;
2458 int uvstep1d = 1 << b->uvtx, p;
2459 uint8_t *dst = s->dst[0], *dst_r = s->frames[CUR_FRAME].tf.f->data[0] + y_off;
2460 LOCAL_ALIGNED_32(uint8_t, a_buf, [64]);
2461 LOCAL_ALIGNED_32(uint8_t, l, [32]);
2462
2463 for (n = 0, y = 0; y < end_y; y += step1d) {
2464 uint8_t *ptr = dst, *ptr_r = dst_r;
2465 for (x = 0; x < end_x; x += step1d, ptr += 4 * step1d,
2466 ptr_r += 4 * step1d, n += step) {
2467 int mode = b->mode[b->bs > BS_8x8 && b->tx == TX_4X4 ?
2468 y * 2 + x : 0];
2469 uint8_t *a = &a_buf[32];
2470 enum TxfmType txtp = vp9_intra_txfm_type[mode];
2471 int eob = b->skip ? 0 : b->tx > TX_8X8 ? AV_RN16A(&s->eob[n]) : s->eob[n];
2472
2473 mode = check_intra_mode(s, mode, &a, ptr_r,
2474 s->frames[CUR_FRAME].tf.f->linesize[0],
2475 ptr, s->y_stride, l,
2476 col, x, w4, row, y, b->tx, 0);
2477 s->dsp.intra_pred[b->tx][mode](ptr, s->y_stride, l, a);
2478 if (eob)
2479 s->dsp.itxfm_add[tx][txtp](ptr, s->y_stride,
2480 s->block + 16 * n, eob);
2481 }
2482 dst_r += 4 * step1d * s->frames[CUR_FRAME].tf.f->linesize[0];
2483 dst += 4 * step1d * s->y_stride;
2484 }
2485
2486 // U/V
2487 w4 >>= 1;
2488 end_x >>= 1;
2489 end_y >>= 1;
2490 step = 1 << (b->uvtx * 2);
2491 for (p = 0; p < 2; p++) {
2492 dst = s->dst[1 + p];
2493 dst_r = s->frames[CUR_FRAME].tf.f->data[1 + p] + uv_off;
2494 for (n = 0, y = 0; y < end_y; y += uvstep1d) {
2495 uint8_t *ptr = dst, *ptr_r = dst_r;
2496 for (x = 0; x < end_x; x += uvstep1d, ptr += 4 * uvstep1d,
2497 ptr_r += 4 * uvstep1d, n += step) {
2498 int mode = b->uvmode;
2499 uint8_t *a = &a_buf[16];
2500 int eob = b->skip ? 0 : b->uvtx > TX_8X8 ? AV_RN16A(&s->uveob[p][n]) : s->uveob[p][n];
2501
2502 mode = check_intra_mode(s, mode, &a, ptr_r,
2503 s->frames[CUR_FRAME].tf.f->linesize[1],
2504 ptr, s->uv_stride, l,
2505 col, x, w4, row, y, b->uvtx, p + 1);
2506 s->dsp.intra_pred[b->uvtx][mode](ptr, s->uv_stride, l, a);
2507 if (eob)
2508 s->dsp.itxfm_add[uvtx][DCT_DCT](ptr, s->uv_stride,
2509 s->uvblock[p] + 16 * n, eob);
2510 }
2511 dst_r += 4 * uvstep1d * s->frames[CUR_FRAME].tf.f->linesize[1];
2512 dst += 4 * uvstep1d * s->uv_stride;
2513 }
2514 }
2515}
2516
2517static av_always_inline void mc_luma_dir(VP9Context *s, vp9_mc_func (*mc)[2],
2518 uint8_t *dst, ptrdiff_t dst_stride,
2519 const uint8_t *ref, ptrdiff_t ref_stride,
2520 ThreadFrame *ref_frame,
2521 ptrdiff_t y, ptrdiff_t x, const VP56mv *mv,
2522 int bw, int bh, int w, int h)
2523{
2524 int mx = mv->x, my = mv->y, th;
2525
2526 y += my >> 3;
2527 x += mx >> 3;
2528 ref += y * ref_stride + x;
2529 mx &= 7;
2530 my &= 7;
2531 // FIXME bilinear filter only needs 0/1 pixels, not 3/4
2532 // we use +7 because the last 7 pixels of each sbrow can be changed in
2533 // the longest loopfilter of the next sbrow
2534 th = (y + bh + 4 * !!my + 7) >> 6;
2535 ff_thread_await_progress(ref_frame, FFMAX(th, 0), 0);
2536 if (x < !!mx * 3 || y < !!my * 3 ||
2537 x + !!mx * 4 > w - bw || y + !!my * 4 > h - bh) {
2538 s->vdsp.emulated_edge_mc(s->edge_emu_buffer,
2539 ref - !!my * 3 * ref_stride - !!mx * 3,
2540 80, ref_stride,
2541 bw + !!mx * 7, bh + !!my * 7,
2542 x - !!mx * 3, y - !!my * 3, w, h);
2543 ref = s->edge_emu_buffer + !!my * 3 * 80 + !!mx * 3;
2544 ref_stride = 80;
2545 }
2546 mc[!!mx][!!my](dst, dst_stride, ref, ref_stride, bh, mx << 1, my << 1);
2547}
2548
2549static av_always_inline void mc_chroma_dir(VP9Context *s, vp9_mc_func (*mc)[2],
2550 uint8_t *dst_u, uint8_t *dst_v,
2551 ptrdiff_t dst_stride,
2552 const uint8_t *ref_u, ptrdiff_t src_stride_u,
2553 const uint8_t *ref_v, ptrdiff_t src_stride_v,
2554 ThreadFrame *ref_frame,
2555 ptrdiff_t y, ptrdiff_t x, const VP56mv *mv,
2556 int bw, int bh, int w, int h)
2557{
2558 int mx = mv->x, my = mv->y, th;
2559
2560 y += my >> 4;
2561 x += mx >> 4;
2562 ref_u += y * src_stride_u + x;
2563 ref_v += y * src_stride_v + x;
2564 mx &= 15;
2565 my &= 15;
2566 // FIXME bilinear filter only needs 0/1 pixels, not 3/4
2567 // we use +7 because the last 7 pixels of each sbrow can be changed in
2568 // the longest loopfilter of the next sbrow
2569 th = (y + bh + 4 * !!my + 7) >> 5;
2570 ff_thread_await_progress(ref_frame, FFMAX(th, 0), 0);
2571 if (x < !!mx * 3 || y < !!my * 3 ||
2572 x + !!mx * 4 > w - bw || y + !!my * 4 > h - bh) {
2573 s->vdsp.emulated_edge_mc(s->edge_emu_buffer,
2574 ref_u - !!my * 3 * src_stride_u - !!mx * 3,
2575 80, src_stride_u,
2576 bw + !!mx * 7, bh + !!my * 7,
2577 x - !!mx * 3, y - !!my * 3, w, h);
2578 ref_u = s->edge_emu_buffer + !!my * 3 * 80 + !!mx * 3;
2579 mc[!!mx][!!my](dst_u, dst_stride, ref_u, 80, bh, mx, my);
2580
2581 s->vdsp.emulated_edge_mc(s->edge_emu_buffer,
2582 ref_v - !!my * 3 * src_stride_v - !!mx * 3,
2583 80, src_stride_v,
2584 bw + !!mx * 7, bh + !!my * 7,
2585 x - !!mx * 3, y - !!my * 3, w, h);
2586 ref_v = s->edge_emu_buffer + !!my * 3 * 80 + !!mx * 3;
2587 mc[!!mx][!!my](dst_v, dst_stride, ref_v, 80, bh, mx, my);
2588 } else {
2589 mc[!!mx][!!my](dst_u, dst_stride, ref_u, src_stride_u, bh, mx, my);
2590 mc[!!mx][!!my](dst_v, dst_stride, ref_v, src_stride_v, bh, mx, my);
2591 }
2592}
2593
2594static void inter_recon(AVCodecContext *ctx)
2595{
2596 static const uint8_t bwlog_tab[2][N_BS_SIZES] = {
2597 { 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4 },
2598 { 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4 },
2599 };
2600 VP9Context *s = ctx->priv_data;
2601 VP9Block *b = s->b;
2602 int row = s->row, col = s->col;
2603 ThreadFrame *tref1 = &s->refs[s->refidx[b->ref[0]]], *tref2;
2604 AVFrame *ref1 = tref1->f, *ref2;
2605 int w1 = ref1->width, h1 = ref1->height, w2, h2;
2606 ptrdiff_t ls_y = s->y_stride, ls_uv = s->uv_stride;
2607
2608 if (b->comp) {
2609 tref2 = &s->refs[s->refidx[b->ref[1]]];
2610 ref2 = tref2->f;
2611 w2 = ref2->width;
2612 h2 = ref2->height;
2613 }
2614
2615 // y inter pred
2616 if (b->bs > BS_8x8) {
2617 if (b->bs == BS_8x4) {
2618 mc_luma_dir(s, s->dsp.mc[3][b->filter][0], s->dst[0], ls_y,
2619 ref1->data[0], ref1->linesize[0], tref1,
2620 row << 3, col << 3, &b->mv[0][0], 8, 4, w1, h1);
2621 mc_luma_dir(s, s->dsp.mc[3][b->filter][0],
2622 s->dst[0] + 4 * ls_y, ls_y,
2623 ref1->data[0], ref1->linesize[0], tref1,
2624 (row << 3) + 4, col << 3, &b->mv[2][0], 8, 4, w1, h1);
2625
2626 if (b->comp) {
2627 mc_luma_dir(s, s->dsp.mc[3][b->filter][1], s->dst[0], ls_y,
2628 ref2->data[0], ref2->linesize[0], tref2,
2629 row << 3, col << 3, &b->mv[0][1], 8, 4, w2, h2);
2630 mc_luma_dir(s, s->dsp.mc[3][b->filter][1],
2631 s->dst[0] + 4 * ls_y, ls_y,
2632 ref2->data[0], ref2->linesize[0], tref2,
2633 (row << 3) + 4, col << 3, &b->mv[2][1], 8, 4, w2, h2);
2634 }
2635 } else if (b->bs == BS_4x8) {
2636 mc_luma_dir(s, s->dsp.mc[4][b->filter][0], s->dst[0], ls_y,
2637 ref1->data[0], ref1->linesize[0], tref1,
2638 row << 3, col << 3, &b->mv[0][0], 4, 8, w1, h1);
2639 mc_luma_dir(s, s->dsp.mc[4][b->filter][0], s->dst[0] + 4, ls_y,
2640 ref1->data[0], ref1->linesize[0], tref1,
2641 row << 3, (col << 3) + 4, &b->mv[1][0], 4, 8, w1, h1);
2642
2643 if (b->comp) {
2644 mc_luma_dir(s, s->dsp.mc[4][b->filter][1], s->dst[0], ls_y,
2645 ref2->data[0], ref2->linesize[0], tref2,
2646 row << 3, col << 3, &b->mv[0][1], 4, 8, w2, h2);
2647 mc_luma_dir(s, s->dsp.mc[4][b->filter][1], s->dst[0] + 4, ls_y,
2648 ref2->data[0], ref2->linesize[0], tref2,
2649 row << 3, (col << 3) + 4, &b->mv[1][1], 4, 8, w2, h2);
2650 }
2651 } else {
2652 av_assert2(b->bs == BS_4x4);
2653
2654 // FIXME if two horizontally adjacent blocks have the same MV,
2655 // do a w8 instead of a w4 call
2656 mc_luma_dir(s, s->dsp.mc[4][b->filter][0], s->dst[0], ls_y,
2657 ref1->data[0], ref1->linesize[0], tref1,
2658 row << 3, col << 3, &b->mv[0][0], 4, 4, w1, h1);
2659 mc_luma_dir(s, s->dsp.mc[4][b->filter][0], s->dst[0] + 4, ls_y,
2660 ref1->data[0], ref1->linesize[0], tref1,
2661 row << 3, (col << 3) + 4, &b->mv[1][0], 4, 4, w1, h1);
2662 mc_luma_dir(s, s->dsp.mc[4][b->filter][0],
2663 s->dst[0] + 4 * ls_y, ls_y,
2664 ref1->data[0], ref1->linesize[0], tref1,
2665 (row << 3) + 4, col << 3, &b->mv[2][0], 4, 4, w1, h1);
2666 mc_luma_dir(s, s->dsp.mc[4][b->filter][0],
2667 s->dst[0] + 4 * ls_y + 4, ls_y,
2668 ref1->data[0], ref1->linesize[0], tref1,
2669 (row << 3) + 4, (col << 3) + 4, &b->mv[3][0], 4, 4, w1, h1);
2670
2671 if (b->comp) {
2672 mc_luma_dir(s, s->dsp.mc[4][b->filter][1], s->dst[0], ls_y,
2673 ref2->data[0], ref2->linesize[0], tref2,
2674 row << 3, col << 3, &b->mv[0][1], 4, 4, w2, h2);
2675 mc_luma_dir(s, s->dsp.mc[4][b->filter][1], s->dst[0] + 4, ls_y,
2676 ref2->data[0], ref2->linesize[0], tref2,
2677 row << 3, (col << 3) + 4, &b->mv[1][1], 4, 4, w2, h2);
2678 mc_luma_dir(s, s->dsp.mc[4][b->filter][1],
2679 s->dst[0] + 4 * ls_y, ls_y,
2680 ref2->data[0], ref2->linesize[0], tref2,
2681 (row << 3) + 4, col << 3, &b->mv[2][1], 4, 4, w2, h2);
2682 mc_luma_dir(s, s->dsp.mc[4][b->filter][1],
2683 s->dst[0] + 4 * ls_y + 4, ls_y,
2684 ref2->data[0], ref2->linesize[0], tref2,
2685 (row << 3) + 4, (col << 3) + 4, &b->mv[3][1], 4, 4, w2, h2);
2686 }
2687 }
2688 } else {
2689 int bwl = bwlog_tab[0][b->bs];
2690 int bw = bwh_tab[0][b->bs][0] * 4, bh = bwh_tab[0][b->bs][1] * 4;
2691
2692 mc_luma_dir(s, s->dsp.mc[bwl][b->filter][0], s->dst[0], ls_y,
2693 ref1->data[0], ref1->linesize[0], tref1,
2694 row << 3, col << 3, &b->mv[0][0],bw, bh, w1, h1);
2695
2696 if (b->comp)
2697 mc_luma_dir(s, s->dsp.mc[bwl][b->filter][1], s->dst[0], ls_y,
2698 ref2->data[0], ref2->linesize[0], tref2,
2699 row << 3, col << 3, &b->mv[0][1], bw, bh, w2, h2);
2700 }
2701
2702 // uv inter pred
2703 {
2704 int bwl = bwlog_tab[1][b->bs];
2705 int bw = bwh_tab[1][b->bs][0] * 4, bh = bwh_tab[1][b->bs][1] * 4;
2706 VP56mv mvuv;
2707
2708 w1 = (w1 + 1) >> 1;
2709 h1 = (h1 + 1) >> 1;
2710 if (b->comp) {
2711 w2 = (w2 + 1) >> 1;
2712 h2 = (h2 + 1) >> 1;
2713 }
2714 if (b->bs > BS_8x8) {
2715 mvuv.x = ROUNDED_DIV(b->mv[0][0].x + b->mv[1][0].x + b->mv[2][0].x + b->mv[3][0].x, 4);
2716 mvuv.y = ROUNDED_DIV(b->mv[0][0].y + b->mv[1][0].y + b->mv[2][0].y + b->mv[3][0].y, 4);
2717 } else {
2718 mvuv = b->mv[0][0];
2719 }
2720
2721 mc_chroma_dir(s, s->dsp.mc[bwl][b->filter][0],
2722 s->dst[1], s->dst[2], ls_uv,
2723 ref1->data[1], ref1->linesize[1],
2724 ref1->data[2], ref1->linesize[2], tref1,
2725 row << 2, col << 2, &mvuv, bw, bh, w1, h1);
2726
2727 if (b->comp) {
2728 if (b->bs > BS_8x8) {
2729 mvuv.x = ROUNDED_DIV(b->mv[0][1].x + b->mv[1][1].x + b->mv[2][1].x + b->mv[3][1].x, 4);
2730 mvuv.y = ROUNDED_DIV(b->mv[0][1].y + b->mv[1][1].y + b->mv[2][1].y + b->mv[3][1].y, 4);
2731 } else {
2732 mvuv = b->mv[0][1];
2733 }
2734 mc_chroma_dir(s, s->dsp.mc[bwl][b->filter][1],
2735 s->dst[1], s->dst[2], ls_uv,
2736 ref2->data[1], ref2->linesize[1],
2737 ref2->data[2], ref2->linesize[2], tref2,
2738 row << 2, col << 2, &mvuv, bw, bh, w2, h2);
2739 }
2740 }
2741
2742 if (!b->skip) {
2743 /* mostly copied intra_reconn() */
2744
2745 int w4 = bwh_tab[1][b->bs][0] << 1, step1d = 1 << b->tx, n;
2746 int h4 = bwh_tab[1][b->bs][1] << 1, x, y, step = 1 << (b->tx * 2);
2747 int end_x = FFMIN(2 * (s->cols - col), w4);
2748 int end_y = FFMIN(2 * (s->rows - row), h4);
2749 int tx = 4 * s->lossless + b->tx, uvtx = b->uvtx + 4 * s->lossless;
2750 int uvstep1d = 1 << b->uvtx, p;
2751 uint8_t *dst = s->dst[0];
2752
2753 // y itxfm add
2754 for (n = 0, y = 0; y < end_y; y += step1d) {
2755 uint8_t *ptr = dst;
2756 for (x = 0; x < end_x; x += step1d, ptr += 4 * step1d, n += step) {
2757 int eob = b->tx > TX_8X8 ? AV_RN16A(&s->eob[n]) : s->eob[n];
2758
2759 if (eob)
2760 s->dsp.itxfm_add[tx][DCT_DCT](ptr, s->y_stride,
2761 s->block + 16 * n, eob);
2762 }
2763 dst += 4 * s->y_stride * step1d;
2764 }
2765
2766 // uv itxfm add
2767 end_x >>= 1;
2768 end_y >>= 1;
2769 step = 1 << (b->uvtx * 2);
2770 for (p = 0; p < 2; p++) {
2771 dst = s->dst[p + 1];
2772 for (n = 0, y = 0; y < end_y; y += uvstep1d) {
2773 uint8_t *ptr = dst;
2774 for (x = 0; x < end_x; x += uvstep1d, ptr += 4 * uvstep1d, n += step) {
2775 int eob = b->uvtx > TX_8X8 ? AV_RN16A(&s->uveob[p][n]) : s->uveob[p][n];
2776
2777 if (eob)
2778 s->dsp.itxfm_add[uvtx][DCT_DCT](ptr, s->uv_stride,
2779 s->uvblock[p] + 16 * n, eob);
2780 }
2781 dst += 4 * uvstep1d * s->uv_stride;
2782 }
2783 }
2784 }
2785}
2786
2787static av_always_inline void mask_edges(struct VP9Filter *lflvl, int is_uv,
2788 int row_and_7, int col_and_7,
2789 int w, int h, int col_end, int row_end,
2790 enum TxfmMode tx, int skip_inter)
2791{
2792 // FIXME I'm pretty sure all loops can be replaced by a single LUT if
2793 // we make VP9Filter.mask uint64_t (i.e. row/col all single variable)
2794 // and make the LUT 5-indexed (bl, bp, is_uv, tx and row/col), and then
2795 // use row_and_7/col_and_7 as shifts (1*col_and_7+8*row_and_7)
2796
2797 // the intended behaviour of the vp9 loopfilter is to work on 8-pixel
2798 // edges. This means that for UV, we work on two subsampled blocks at
2799 // a time, and we only use the topleft block's mode information to set
2800 // things like block strength. Thus, for any block size smaller than
2801 // 16x16, ignore the odd portion of the block.
2802 if (tx == TX_4X4 && is_uv) {
2803 if (h == 1) {
2804 if (row_and_7 & 1)
2805 return;
2806 if (!row_end)
2807 h += 1;
2808 }
2809 if (w == 1) {
2810 if (col_and_7 & 1)
2811 return;
2812 if (!col_end)
2813 w += 1;
2814 }
2815 }
2816
2817 if (tx == TX_4X4 && !skip_inter) {
2818 int t = 1 << col_and_7, m_col = (t << w) - t, y;
2819 int m_col_odd = (t << (w - 1)) - t;
2820
2821 // on 32-px edges, use the 8-px wide loopfilter; else, use 4-px wide
2822 if (is_uv) {
2823 int m_row_8 = m_col & 0x01, m_row_4 = m_col - m_row_8;
2824
2825 for (y = row_and_7; y < h + row_and_7; y++) {
2826 int col_mask_id = 2 - !(y & 7);
2827
2828 lflvl->mask[is_uv][0][y][1] |= m_row_8;
2829 lflvl->mask[is_uv][0][y][2] |= m_row_4;
2830 // for odd lines, if the odd col is not being filtered,
2831 // skip odd row also:
2832 // .---. <-- a
2833 // | |
2834 // |___| <-- b
2835 // ^ ^
2836 // c d
2837 //
2838 // if a/c are even row/col and b/d are odd, and d is skipped,
2839 // e.g. right edge of size-66x66.webm, then skip b also (bug)
2840 if ((col_end & 1) && (y & 1)) {
2841 lflvl->mask[is_uv][1][y][col_mask_id] |= m_col_odd;
2842 } else {
2843 lflvl->mask[is_uv][1][y][col_mask_id] |= m_col;
2844 }
2845 }
2846 } else {
2847 int m_row_8 = m_col & 0x11, m_row_4 = m_col - m_row_8;
2848
2849 for (y = row_and_7; y < h + row_and_7; y++) {
2850 int col_mask_id = 2 - !(y & 3);
2851
2852 lflvl->mask[is_uv][0][y][1] |= m_row_8; // row edge
2853 lflvl->mask[is_uv][0][y][2] |= m_row_4;
2854 lflvl->mask[is_uv][1][y][col_mask_id] |= m_col; // col edge
2855 lflvl->mask[is_uv][0][y][3] |= m_col;
2856 lflvl->mask[is_uv][1][y][3] |= m_col;
2857 }
2858 }
2859 } else {
2860 int y, t = 1 << col_and_7, m_col = (t << w) - t;
2861
2862 if (!skip_inter) {
2863 int mask_id = (tx == TX_8X8);
2864 int l2 = tx + is_uv - 1, step1d = 1 << l2;
2865 static const unsigned masks[4] = { 0xff, 0x55, 0x11, 0x01 };
2866 int m_row = m_col & masks[l2];
2867
2868 // at odd UV col/row edges tx16/tx32 loopfilter edges, force
2869 // 8wd loopfilter to prevent going off the visible edge.
2870 if (is_uv && tx > TX_8X8 && (w ^ (w - 1)) == 1) {
2871 int m_row_16 = ((t << (w - 1)) - t) & masks[l2];
2872 int m_row_8 = m_row - m_row_16;
2873
2874 for (y = row_and_7; y < h + row_and_7; y++) {
2875 lflvl->mask[is_uv][0][y][0] |= m_row_16;
2876 lflvl->mask[is_uv][0][y][1] |= m_row_8;
2877 }
2878 } else {
2879 for (y = row_and_7; y < h + row_and_7; y++)
2880 lflvl->mask[is_uv][0][y][mask_id] |= m_row;
2881 }
2882
2883 if (is_uv && tx > TX_8X8 && (h ^ (h - 1)) == 1) {
2884 for (y = row_and_7; y < h + row_and_7 - 1; y += step1d)
2885 lflvl->mask[is_uv][1][y][0] |= m_col;
2886 if (y - row_and_7 == h - 1)
2887 lflvl->mask[is_uv][1][y][1] |= m_col;
2888 } else {
2889 for (y = row_and_7; y < h + row_and_7; y += step1d)
2890 lflvl->mask[is_uv][1][y][mask_id] |= m_col;
2891 }
2892 } else if (tx != TX_4X4) {
2893 int mask_id;
2894
2895 mask_id = (tx == TX_8X8) || (is_uv && h == 1);
2896 lflvl->mask[is_uv][1][row_and_7][mask_id] |= m_col;
2897 mask_id = (tx == TX_8X8) || (is_uv && w == 1);
2898 for (y = row_and_7; y < h + row_and_7; y++)
2899 lflvl->mask[is_uv][0][y][mask_id] |= t;
2900 } else if (is_uv) {
2901 int t8 = t & 0x01, t4 = t - t8;
2902
2903 for (y = row_and_7; y < h + row_and_7; y++) {
2904 lflvl->mask[is_uv][0][y][2] |= t4;
2905 lflvl->mask[is_uv][0][y][1] |= t8;
2906 }
2907 lflvl->mask[is_uv][1][row_and_7][2 - !(row_and_7 & 7)] |= m_col;
2908 } else {
2909 int t8 = t & 0x11, t4 = t - t8;
2910
2911 for (y = row_and_7; y < h + row_and_7; y++) {
2912 lflvl->mask[is_uv][0][y][2] |= t4;
2913 lflvl->mask[is_uv][0][y][1] |= t8;
2914 }
2915 lflvl->mask[is_uv][1][row_and_7][2 - !(row_and_7 & 3)] |= m_col;
2916 }
2917 }
2918}
2919
2920static void decode_b(AVCodecContext *ctx, int row, int col,
2921 struct VP9Filter *lflvl, ptrdiff_t yoff, ptrdiff_t uvoff,
2922 enum BlockLevel bl, enum BlockPartition bp)
2923{
2924 VP9Context *s = ctx->priv_data;
2925 VP9Block *b = s->b;
2926 enum BlockSize bs = bl * 3 + bp;
2927 int w4 = bwh_tab[1][bs][0], h4 = bwh_tab[1][bs][1], lvl;
2928 int emu[2];
2929 AVFrame *f = s->frames[CUR_FRAME].tf.f;
2930
2931 s->row = row;
2932 s->row7 = row & 7;
2933 s->col = col;
2934 s->col7 = col & 7;
2935 s->min_mv.x = -(128 + col * 64);
2936 s->min_mv.y = -(128 + row * 64);
2937 s->max_mv.x = 128 + (s->cols - col - w4) * 64;
2938 s->max_mv.y = 128 + (s->rows - row - h4) * 64;
2939 if (s->pass < 2) {
2940 b->bs = bs;
2941 b->bl = bl;
2942 b->bp = bp;
2943 decode_mode(ctx);
2944 b->uvtx = b->tx - (w4 * 2 == (1 << b->tx) || h4 * 2 == (1 << b->tx));
2945
2946 if (!b->skip) {
2947 decode_coeffs(ctx);
2948 } else {
2949 int row7 = s->row7;
2950
2951#define SPLAT_ZERO_CTX(v, n) \
2952 switch (n) { \
2953 case 1: v = 0; break; \
2954 case 2: AV_ZERO16(&v); break; \
2955 case 4: AV_ZERO32(&v); break; \
2956 case 8: AV_ZERO64(&v); break; \
2957 case 16: AV_ZERO128(&v); break; \
2958 }
2959#define SPLAT_ZERO_YUV(dir, var, off, n) \
2960 do { \
2961 SPLAT_ZERO_CTX(s->dir##_y_##var[off * 2], n * 2); \
2962 SPLAT_ZERO_CTX(s->dir##_uv_##var[0][off], n); \
2963 SPLAT_ZERO_CTX(s->dir##_uv_##var[1][off], n); \
2964 } while (0)
2965
2966 switch (w4) {
2967 case 1: SPLAT_ZERO_YUV(above, nnz_ctx, col, 1); break;
2968 case 2: SPLAT_ZERO_YUV(above, nnz_ctx, col, 2); break;
2969 case 4: SPLAT_ZERO_YUV(above, nnz_ctx, col, 4); break;
2970 case 8: SPLAT_ZERO_YUV(above, nnz_ctx, col, 8); break;
2971 }
2972 switch (h4) {
2973 case 1: SPLAT_ZERO_YUV(left, nnz_ctx, row7, 1); break;
2974 case 2: SPLAT_ZERO_YUV(left, nnz_ctx, row7, 2); break;
2975 case 4: SPLAT_ZERO_YUV(left, nnz_ctx, row7, 4); break;
2976 case 8: SPLAT_ZERO_YUV(left, nnz_ctx, row7, 8); break;
2977 }
2978 }
2979 if (s->pass == 1) {
2980 s->b++;
2981 s->block += w4 * h4 * 64;
2982 s->uvblock[0] += w4 * h4 * 16;
2983 s->uvblock[1] += w4 * h4 * 16;
2984 s->eob += 4 * w4 * h4;
2985 s->uveob[0] += w4 * h4;
2986 s->uveob[1] += w4 * h4;
2987
2988 return;
2989 }
2990 }
2991
2992 // emulated overhangs if the stride of the target buffer can't hold. This
2993 // allows to support emu-edge and so on even if we have large block
2994 // overhangs
2995 emu[0] = (col + w4) * 8 > f->linesize[0] ||
2996 (row + h4) > s->rows;
2997 emu[1] = (col + w4) * 4 > f->linesize[1] ||
2998 (row + h4) > s->rows;
2999 if (emu[0]) {
3000 s->dst[0] = s->tmp_y;
3001 s->y_stride = 64;
3002 } else {
3003 s->dst[0] = f->data[0] + yoff;
3004 s->y_stride = f->linesize[0];
3005 }
3006 if (emu[1]) {
3007 s->dst[1] = s->tmp_uv[0];
3008 s->dst[2] = s->tmp_uv[1];
3009 s->uv_stride = 32;
3010 } else {
3011 s->dst[1] = f->data[1] + uvoff;
3012 s->dst[2] = f->data[2] + uvoff;
3013 s->uv_stride = f->linesize[1];
3014 }
3015 if (b->intra) {
3016 intra_recon(ctx, yoff, uvoff);
3017 } else {
3018 inter_recon(ctx);
3019 }
3020 if (emu[0]) {
3021 int w = FFMIN(s->cols - col, w4) * 8, h = FFMIN(s->rows - row, h4) * 8, n, o = 0;
3022
3023 for (n = 0; o < w; n++) {
3024 int bw = 64 >> n;
3025
3026 av_assert2(n <= 4);
3027 if (w & bw) {
3028 s->dsp.mc[n][0][0][0][0](f->data[0] + yoff + o, f->linesize[0],
3029 s->tmp_y + o, 64, h, 0, 0);
3030 o += bw;
3031 }
3032 }
3033 }
3034 if (emu[1]) {
3035 int w = FFMIN(s->cols - col, w4) * 4, h = FFMIN(s->rows - row, h4) * 4, n, o = 0;
3036
3037 for (n = 1; o < w; n++) {
3038 int bw = 64 >> n;
3039
3040 av_assert2(n <= 4);
3041 if (w & bw) {
3042 s->dsp.mc[n][0][0][0][0](f->data[1] + uvoff + o, f->linesize[1],
3043 s->tmp_uv[0] + o, 32, h, 0, 0);
3044 s->dsp.mc[n][0][0][0][0](f->data[2] + uvoff + o, f->linesize[2],
3045 s->tmp_uv[1] + o, 32, h, 0, 0);
3046 o += bw;
3047 }
3048 }
3049 }
3050
3051 // pick filter level and find edges to apply filter to
3052 if (s->filter.level &&
3053 (lvl = s->segmentation.feat[b->seg_id].lflvl[b->intra ? 0 : b->ref[0] + 1]
3054 [b->mode[3] != ZEROMV]) > 0) {
3055 int x_end = FFMIN(s->cols - col, w4), y_end = FFMIN(s->rows - row, h4);
3056 int skip_inter = !b->intra && b->skip, col7 = s->col7, row7 = s->row7;
3057
3058 setctx_2d(&lflvl->level[row7 * 8 + col7], w4, h4, 8, lvl);
3059 mask_edges(lflvl, 0, row7, col7, x_end, y_end, 0, 0, b->tx, skip_inter);
3060 mask_edges(lflvl, 1, row7, col7, x_end, y_end,
3061 s->cols & 1 && col + w4 >= s->cols ? s->cols & 7 : 0,
3062 s->rows & 1 && row + h4 >= s->rows ? s->rows & 7 : 0,
3063 b->uvtx, skip_inter);
3064
3065 if (!s->filter.lim_lut[lvl]) {
3066 int sharp = s->filter.sharpness;
3067 int limit = lvl;
3068
3069 if (sharp > 0) {
3070 limit >>= (sharp + 3) >> 2;
3071 limit = FFMIN(limit, 9 - sharp);
3072 }
3073 limit = FFMAX(limit, 1);
3074
3075 s->filter.lim_lut[lvl] = limit;
3076 s->filter.mblim_lut[lvl] = 2 * (lvl + 2) + limit;
3077 }
3078 }
3079
3080 if (s->pass == 2) {
3081 s->b++;
3082 s->block += w4 * h4 * 64;
3083 s->uvblock[0] += w4 * h4 * 16;
3084 s->uvblock[1] += w4 * h4 * 16;
3085 s->eob += 4 * w4 * h4;
3086 s->uveob[0] += w4 * h4;
3087 s->uveob[1] += w4 * h4;
3088 }
3089}
3090
3091static void decode_sb(AVCodecContext *ctx, int row, int col, struct VP9Filter *lflvl,
3092 ptrdiff_t yoff, ptrdiff_t uvoff, enum BlockLevel bl)
3093{
3094 VP9Context *s = ctx->priv_data;
3095 int c = ((s->above_partition_ctx[col] >> (3 - bl)) & 1) |
3096 (((s->left_partition_ctx[row & 0x7] >> (3 - bl)) & 1) << 1);
3097 const uint8_t *p = s->keyframe ? vp9_default_kf_partition_probs[bl][c] :
3098 s->prob.p.partition[bl][c];
3099 enum BlockPartition bp;
3100 ptrdiff_t hbs = 4 >> bl;
3101 AVFrame *f = s->frames[CUR_FRAME].tf.f;
3102 ptrdiff_t y_stride = f->linesize[0], uv_stride = f->linesize[1];
3103
3104 if (bl == BL_8X8) {
3105 bp = vp8_rac_get_tree(&s->c, vp9_partition_tree, p);
3106 decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3107 } else if (col + hbs < s->cols) { // FIXME why not <=?
3108 if (row + hbs < s->rows) { // FIXME why not <=?
3109 bp = vp8_rac_get_tree(&s->c, vp9_partition_tree, p);
3110 switch (bp) {
3111 case PARTITION_NONE:
3112 decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3113 break;
3114 case PARTITION_H:
3115 decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3116 yoff += hbs * 8 * y_stride;
3117 uvoff += hbs * 4 * uv_stride;
3118 decode_b(ctx, row + hbs, col, lflvl, yoff, uvoff, bl, bp);
3119 break;
3120 case PARTITION_V:
3121 decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3122 yoff += hbs * 8;
3123 uvoff += hbs * 4;
3124 decode_b(ctx, row, col + hbs, lflvl, yoff, uvoff, bl, bp);
3125 break;
3126 case PARTITION_SPLIT:
3127 decode_sb(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
3128 decode_sb(ctx, row, col + hbs, lflvl,
3129 yoff + 8 * hbs, uvoff + 4 * hbs, bl + 1);
3130 yoff += hbs * 8 * y_stride;
3131 uvoff += hbs * 4 * uv_stride;
3132 decode_sb(ctx, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
3133 decode_sb(ctx, row + hbs, col + hbs, lflvl,
3134 yoff + 8 * hbs, uvoff + 4 * hbs, bl + 1);
3135 break;
3136 default:
3137 av_assert0(0);
3138 }
3139 } else if (vp56_rac_get_prob_branchy(&s->c, p[1])) {
3140 bp = PARTITION_SPLIT;
3141 decode_sb(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
3142 decode_sb(ctx, row, col + hbs, lflvl,
3143 yoff + 8 * hbs, uvoff + 4 * hbs, bl + 1);
3144 } else {
3145 bp = PARTITION_H;
3146 decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3147 }
3148 } else if (row + hbs < s->rows) { // FIXME why not <=?
3149 if (vp56_rac_get_prob_branchy(&s->c, p[2])) {
3150 bp = PARTITION_SPLIT;
3151 decode_sb(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
3152 yoff += hbs * 8 * y_stride;
3153 uvoff += hbs * 4 * uv_stride;
3154 decode_sb(ctx, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
3155 } else {
3156 bp = PARTITION_V;
3157 decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3158 }
3159 } else {
3160 bp = PARTITION_SPLIT;
3161 decode_sb(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
3162 }
3163 s->counts.partition[bl][c][bp]++;
3164}
3165
3166static void decode_sb_mem(AVCodecContext *ctx, int row, int col, struct VP9Filter *lflvl,
3167 ptrdiff_t yoff, ptrdiff_t uvoff, enum BlockLevel bl)
3168{
3169 VP9Context *s = ctx->priv_data;
3170 VP9Block *b = s->b;
3171 ptrdiff_t hbs = 4 >> bl;
3172 AVFrame *f = s->frames[CUR_FRAME].tf.f;
3173 ptrdiff_t y_stride = f->linesize[0], uv_stride = f->linesize[1];
3174
3175 if (bl == BL_8X8) {
3176 av_assert2(b->bl == BL_8X8);
3177 decode_b(ctx, row, col, lflvl, yoff, uvoff, b->bl, b->bp);
3178 } else if (s->b->bl == bl) {
3179 decode_b(ctx, row, col, lflvl, yoff, uvoff, b->bl, b->bp);
3180 if (b->bp == PARTITION_H && row + hbs < s->rows) {
3181 yoff += hbs * 8 * y_stride;
3182 uvoff += hbs * 4 * uv_stride;
3183 decode_b(ctx, row + hbs, col, lflvl, yoff, uvoff, b->bl, b->bp);
3184 } else if (b->bp == PARTITION_V && col + hbs < s->cols) {
3185 yoff += hbs * 8;
3186 uvoff += hbs * 4;
3187 decode_b(ctx, row, col + hbs, lflvl, yoff, uvoff, b->bl, b->bp);
3188 }
3189 } else {
3190 decode_sb_mem(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
3191 if (col + hbs < s->cols) { // FIXME why not <=?
3192 if (row + hbs < s->rows) {
3193 decode_sb_mem(ctx, row, col + hbs, lflvl, yoff + 8 * hbs,
3194 uvoff + 4 * hbs, bl + 1);
3195 yoff += hbs * 8 * y_stride;
3196 uvoff += hbs * 4 * uv_stride;
3197 decode_sb_mem(ctx, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
3198 decode_sb_mem(ctx, row + hbs, col + hbs, lflvl,
3199 yoff + 8 * hbs, uvoff + 4 * hbs, bl + 1);
3200 } else {
3201 yoff += hbs * 8;
3202 uvoff += hbs * 4;
3203 decode_sb_mem(ctx, row, col + hbs, lflvl, yoff, uvoff, bl + 1);
3204 }
3205 } else if (row + hbs < s->rows) {
3206 yoff += hbs * 8 * y_stride;
3207 uvoff += hbs * 4 * uv_stride;
3208 decode_sb_mem(ctx, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
3209 }
3210 }
3211}
3212
3213static void loopfilter_sb(AVCodecContext *ctx, struct VP9Filter *lflvl,
3214 int row, int col, ptrdiff_t yoff, ptrdiff_t uvoff)
3215{
3216 VP9Context *s = ctx->priv_data;
3217 AVFrame *f = s->frames[CUR_FRAME].tf.f;
3218 uint8_t *dst = f->data[0] + yoff, *lvl = lflvl->level;
3219 ptrdiff_t ls_y = f->linesize[0], ls_uv = f->linesize[1];
3220 int y, x, p;
3221
3222 // FIXME in how far can we interleave the v/h loopfilter calls? E.g.
3223 // if you think of them as acting on a 8x8 block max, we can interleave
3224 // each v/h within the single x loop, but that only works if we work on
3225 // 8 pixel blocks, and we won't always do that (we want at least 16px
3226 // to use SSE2 optimizations, perhaps 32 for AVX2)
3227
3228 // filter edges between columns, Y plane (e.g. block1 | block2)
3229 for (y = 0; y < 8; y += 2, dst += 16 * ls_y, lvl += 16) {
3230 uint8_t *ptr = dst, *l = lvl, *hmask1 = lflvl->mask[0][0][y];
3231 uint8_t *hmask2 = lflvl->mask[0][0][y + 1];
3232 unsigned hm1 = hmask1[0] | hmask1[1] | hmask1[2], hm13 = hmask1[3];
3233 unsigned hm2 = hmask2[1] | hmask2[2], hm23 = hmask2[3];
3234 unsigned hm = hm1 | hm2 | hm13 | hm23;
3235
3236 for (x = 1; hm & ~(x - 1); x <<= 1, ptr += 8, l++) {
3237 if (hm1 & x) {
3238 int L = *l, H = L >> 4;
3239 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3240
3241 if (col || x > 1) {
3242 if (hmask1[0] & x) {
3243 if (hmask2[0] & x) {
3244 av_assert2(l[8] == L);
3245 s->dsp.loop_filter_16[0](ptr, ls_y, E, I, H);
3246 } else {
3247 s->dsp.loop_filter_8[2][0](ptr, ls_y, E, I, H);
3248 }
3249 } else if (hm2 & x) {
3250 L = l[8];
3251 H |= (L >> 4) << 8;
3252 E |= s->filter.mblim_lut[L] << 8;
3253 I |= s->filter.lim_lut[L] << 8;
3254 s->dsp.loop_filter_mix2[!!(hmask1[1] & x)]
3255 [!!(hmask2[1] & x)]
3256 [0](ptr, ls_y, E, I, H);
3257 } else {
3258 s->dsp.loop_filter_8[!!(hmask1[1] & x)]
3259 [0](ptr, ls_y, E, I, H);
3260 }
3261 }
3262 } else if (hm2 & x) {
3263 int L = l[8], H = L >> 4;
3264 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3265
3266 if (col || x > 1) {
3267 s->dsp.loop_filter_8[!!(hmask2[1] & x)]
3268 [0](ptr + 8 * ls_y, ls_y, E, I, H);
3269 }
3270 }
3271 if (hm13 & x) {
3272 int L = *l, H = L >> 4;
3273 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3274
3275 if (hm23 & x) {
3276 L = l[8];
3277 H |= (L >> 4) << 8;
3278 E |= s->filter.mblim_lut[L] << 8;
3279 I |= s->filter.lim_lut[L] << 8;
3280 s->dsp.loop_filter_mix2[0][0][0](ptr + 4, ls_y, E, I, H);
3281 } else {
3282 s->dsp.loop_filter_8[0][0](ptr + 4, ls_y, E, I, H);
3283 }
3284 } else if (hm23 & x) {
3285 int L = l[8], H = L >> 4;
3286 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3287
3288 s->dsp.loop_filter_8[0][0](ptr + 8 * ls_y + 4, ls_y, E, I, H);
3289 }
3290 }
3291 }
3292
3293 // block1
3294 // filter edges between rows, Y plane (e.g. ------)
3295 // block2
3296 dst = f->data[0] + yoff;
3297 lvl = lflvl->level;
3298 for (y = 0; y < 8; y++, dst += 8 * ls_y, lvl += 8) {
3299 uint8_t *ptr = dst, *l = lvl, *vmask = lflvl->mask[0][1][y];
3300 unsigned vm = vmask[0] | vmask[1] | vmask[2], vm3 = vmask[3];
3301
3302 for (x = 1; vm & ~(x - 1); x <<= 2, ptr += 16, l += 2) {
3303 if (row || y) {
3304 if (vm & x) {
3305 int L = *l, H = L >> 4;
3306 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3307
3308 if (vmask[0] & x) {
3309 if (vmask[0] & (x << 1)) {
3310 av_assert2(l[1] == L);
3311 s->dsp.loop_filter_16[1](ptr, ls_y, E, I, H);
3312 } else {
3313 s->dsp.loop_filter_8[2][1](ptr, ls_y, E, I, H);
3314 }
3315 } else if (vm & (x << 1)) {
3316 L = l[1];
3317 H |= (L >> 4) << 8;
3318 E |= s->filter.mblim_lut[L] << 8;
3319 I |= s->filter.lim_lut[L] << 8;
3320 s->dsp.loop_filter_mix2[!!(vmask[1] & x)]
3321 [!!(vmask[1] & (x << 1))]
3322 [1](ptr, ls_y, E, I, H);
3323 } else {
3324 s->dsp.loop_filter_8[!!(vmask[1] & x)]
3325 [1](ptr, ls_y, E, I, H);
3326 }
3327 } else if (vm & (x << 1)) {
3328 int L = l[1], H = L >> 4;
3329 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3330
3331 s->dsp.loop_filter_8[!!(vmask[1] & (x << 1))]
3332 [1](ptr + 8, ls_y, E, I, H);
3333 }
3334 }
3335 if (vm3 & x) {
3336 int L = *l, H = L >> 4;
3337 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3338
3339 if (vm3 & (x << 1)) {
3340 L = l[1];
3341 H |= (L >> 4) << 8;
3342 E |= s->filter.mblim_lut[L] << 8;
3343 I |= s->filter.lim_lut[L] << 8;
3344 s->dsp.loop_filter_mix2[0][0][1](ptr + ls_y * 4, ls_y, E, I, H);
3345 } else {
3346 s->dsp.loop_filter_8[0][1](ptr + ls_y * 4, ls_y, E, I, H);
3347 }
3348 } else if (vm3 & (x << 1)) {
3349 int L = l[1], H = L >> 4;
3350 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3351
3352 s->dsp.loop_filter_8[0][1](ptr + ls_y * 4 + 8, ls_y, E, I, H);
3353 }
3354 }
3355 }
3356
3357 // same principle but for U/V planes
3358 for (p = 0; p < 2; p++) {
3359 lvl = lflvl->level;
3360 dst = f->data[1 + p] + uvoff;
3361 for (y = 0; y < 8; y += 4, dst += 16 * ls_uv, lvl += 32) {
3362 uint8_t *ptr = dst, *l = lvl, *hmask1 = lflvl->mask[1][0][y];
3363 uint8_t *hmask2 = lflvl->mask[1][0][y + 2];
3364 unsigned hm1 = hmask1[0] | hmask1[1] | hmask1[2];
3365 unsigned hm2 = hmask2[1] | hmask2[2], hm = hm1 | hm2;
3366
3367 for (x = 1; hm & ~(x - 1); x <<= 1, ptr += 4) {
3368 if (col || x > 1) {
3369 if (hm1 & x) {
3370 int L = *l, H = L >> 4;
3371 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3372
3373 if (hmask1[0] & x) {
3374 if (hmask2[0] & x) {
3375 av_assert2(l[16] == L);
3376 s->dsp.loop_filter_16[0](ptr, ls_uv, E, I, H);
3377 } else {
3378 s->dsp.loop_filter_8[2][0](ptr, ls_uv, E, I, H);
3379 }
3380 } else if (hm2 & x) {
3381 L = l[16];
3382 H |= (L >> 4) << 8;
3383 E |= s->filter.mblim_lut[L] << 8;
3384 I |= s->filter.lim_lut[L] << 8;
3385 s->dsp.loop_filter_mix2[!!(hmask1[1] & x)]
3386 [!!(hmask2[1] & x)]
3387 [0](ptr, ls_uv, E, I, H);
3388 } else {
3389 s->dsp.loop_filter_8[!!(hmask1[1] & x)]
3390 [0](ptr, ls_uv, E, I, H);
3391 }
3392 } else if (hm2 & x) {
3393 int L = l[16], H = L >> 4;
3394 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3395
3396 s->dsp.loop_filter_8[!!(hmask2[1] & x)]
3397 [0](ptr + 8 * ls_uv, ls_uv, E, I, H);
3398 }
3399 }
3400 if (x & 0xAA)
3401 l += 2;
3402 }
3403 }
3404 lvl = lflvl->level;
3405 dst = f->data[1 + p] + uvoff;
3406 for (y = 0; y < 8; y++, dst += 4 * ls_uv) {
3407 uint8_t *ptr = dst, *l = lvl, *vmask = lflvl->mask[1][1][y];
3408 unsigned vm = vmask[0] | vmask[1] | vmask[2];
3409
3410 for (x = 1; vm & ~(x - 1); x <<= 4, ptr += 16, l += 4) {
3411 if (row || y) {
3412 if (vm & x) {
3413 int L = *l, H = L >> 4;
3414 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3415
3416 if (vmask[0] & x) {
3417 if (vmask[0] & (x << 2)) {
3418 av_assert2(l[2] == L);
3419 s->dsp.loop_filter_16[1](ptr, ls_uv, E, I, H);
3420 } else {
3421 s->dsp.loop_filter_8[2][1](ptr, ls_uv, E, I, H);
3422 }
3423 } else if (vm & (x << 2)) {
3424 L = l[2];
3425 H |= (L >> 4) << 8;
3426 E |= s->filter.mblim_lut[L] << 8;
3427 I |= s->filter.lim_lut[L] << 8;
3428 s->dsp.loop_filter_mix2[!!(vmask[1] & x)]
3429 [!!(vmask[1] & (x << 2))]
3430 [1](ptr, ls_uv, E, I, H);
3431 } else {
3432 s->dsp.loop_filter_8[!!(vmask[1] & x)]
3433 [1](ptr, ls_uv, E, I, H);
3434 }
3435 } else if (vm & (x << 2)) {
3436 int L = l[2], H = L >> 4;
3437 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3438
3439 s->dsp.loop_filter_8[!!(vmask[1] & (x << 2))]
3440 [1](ptr + 8, ls_uv, E, I, H);
3441 }
3442 }
3443 }
3444 if (y & 1)
3445 lvl += 16;
3446 }
3447 }
3448}
3449
3450static void set_tile_offset(int *start, int *end, int idx, int log2_n, int n)
3451{
3452 int sb_start = ( idx * n) >> log2_n;
3453 int sb_end = ((idx + 1) * n) >> log2_n;
3454 *start = FFMIN(sb_start, n) << 3;
3455 *end = FFMIN(sb_end, n) << 3;
3456}
3457
3458static av_always_inline void adapt_prob(uint8_t *p, unsigned ct0, unsigned ct1,
3459 int max_count, int update_factor)
3460{
3461 unsigned ct = ct0 + ct1, p2, p1;
3462
3463 if (!ct)
3464 return;
3465
3466 p1 = *p;
3467 p2 = ((ct0 << 8) + (ct >> 1)) / ct;
3468 p2 = av_clip(p2, 1, 255);
3469 ct = FFMIN(ct, max_count);
3470 update_factor = FASTDIV(update_factor * ct, max_count);
3471
3472 // (p1 * (256 - update_factor) + p2 * update_factor + 128) >> 8
3473 *p = p1 + (((p2 - p1) * update_factor + 128) >> 8);
3474}
3475
3476static void adapt_probs(VP9Context *s)
3477{
3478 int i, j, k, l, m;
3479 prob_context *p = &s->prob_ctx[s->framectxid].p;
3480 int uf = (s->keyframe || s->intraonly || !s->last_keyframe) ? 112 : 128;
3481
3482 // coefficients
3483 for (i = 0; i < 4; i++)
3484 for (j = 0; j < 2; j++)
3485 for (k = 0; k < 2; k++)
3486 for (l = 0; l < 6; l++)
3487 for (m = 0; m < 6; m++) {
3488 uint8_t *pp = s->prob_ctx[s->framectxid].coef[i][j][k][l][m];
3489 unsigned *e = s->counts.eob[i][j][k][l][m];
3490 unsigned *c = s->counts.coef[i][j][k][l][m];
3491
3492 if (l == 0 && m >= 3) // dc only has 3 pt
3493 break;
3494
3495 adapt_prob(&pp[0], e[0], e[1], 24, uf);
3496 adapt_prob(&pp[1], c[0], c[1] + c[2], 24, uf);
3497 adapt_prob(&pp[2], c[1], c[2], 24, uf);
3498 }
3499
3500 if (s->keyframe || s->intraonly) {
3501 memcpy(p->skip, s->prob.p.skip, sizeof(p->skip));
3502 memcpy(p->tx32p, s->prob.p.tx32p, sizeof(p->tx32p));
3503 memcpy(p->tx16p, s->prob.p.tx16p, sizeof(p->tx16p));
3504 memcpy(p->tx8p, s->prob.p.tx8p, sizeof(p->tx8p));
3505 return;
3506 }
3507
3508 // skip flag
3509 for (i = 0; i < 3; i++)
3510 adapt_prob(&p->skip[i], s->counts.skip[i][0], s->counts.skip[i][1], 20, 128);
3511
3512 // intra/inter flag
3513 for (i = 0; i < 4; i++)
3514 adapt_prob(&p->intra[i], s->counts.intra[i][0], s->counts.intra[i][1], 20, 128);
3515
3516 // comppred flag
3517 if (s->comppredmode == PRED_SWITCHABLE) {
3518 for (i = 0; i < 5; i++)
3519 adapt_prob(&p->comp[i], s->counts.comp[i][0], s->counts.comp[i][1], 20, 128);
3520 }
3521
3522 // reference frames
3523 if (s->comppredmode != PRED_SINGLEREF) {
3524 for (i = 0; i < 5; i++)
3525 adapt_prob(&p->comp_ref[i], s->counts.comp_ref[i][0],
3526 s->counts.comp_ref[i][1], 20, 128);
3527 }
3528
3529 if (s->comppredmode != PRED_COMPREF) {
3530 for (i = 0; i < 5; i++) {
3531 uint8_t *pp = p->single_ref[i];
3532 unsigned (*c)[2] = s->counts.single_ref[i];
3533
3534 adapt_prob(&pp[0], c[0][0], c[0][1], 20, 128);
3535 adapt_prob(&pp[1], c[1][0], c[1][1], 20, 128);
3536 }
3537 }
3538
3539 // block partitioning
3540 for (i = 0; i < 4; i++)
3541 for (j = 0; j < 4; j++) {
3542 uint8_t *pp = p->partition[i][j];
3543 unsigned *c = s->counts.partition[i][j];
3544
3545 adapt_prob(&pp[0], c[0], c[1] + c[2] + c[3], 20, 128);
3546 adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128);
3547 adapt_prob(&pp[2], c[2], c[3], 20, 128);
3548 }
3549
3550 // tx size
3551 if (s->txfmmode == TX_SWITCHABLE) {
3552 for (i = 0; i < 2; i++) {
3553 unsigned *c16 = s->counts.tx16p[i], *c32 = s->counts.tx32p[i];
3554
3555 adapt_prob(&p->tx8p[i], s->counts.tx8p[i][0], s->counts.tx8p[i][1], 20, 128);
3556 adapt_prob(&p->tx16p[i][0], c16[0], c16[1] + c16[2], 20, 128);
3557 adapt_prob(&p->tx16p[i][1], c16[1], c16[2], 20, 128);
3558 adapt_prob(&p->tx32p[i][0], c32[0], c32[1] + c32[2] + c32[3], 20, 128);
3559 adapt_prob(&p->tx32p[i][1], c32[1], c32[2] + c32[3], 20, 128);
3560 adapt_prob(&p->tx32p[i][2], c32[2], c32[3], 20, 128);
3561 }
3562 }
3563
3564 // interpolation filter
3565 if (s->filtermode == FILTER_SWITCHABLE) {
3566 for (i = 0; i < 4; i++) {
3567 uint8_t *pp = p->filter[i];
3568 unsigned *c = s->counts.filter[i];
3569
3570 adapt_prob(&pp[0], c[0], c[1] + c[2], 20, 128);
3571 adapt_prob(&pp[1], c[1], c[2], 20, 128);
3572 }
3573 }
3574
3575 // inter modes
3576 for (i = 0; i < 7; i++) {
3577 uint8_t *pp = p->mv_mode[i];
3578 unsigned *c = s->counts.mv_mode[i];
3579
3580 adapt_prob(&pp[0], c[2], c[1] + c[0] + c[3], 20, 128);
3581 adapt_prob(&pp[1], c[0], c[1] + c[3], 20, 128);
3582 adapt_prob(&pp[2], c[1], c[3], 20, 128);
3583 }
3584
3585 // mv joints
3586 {
3587 uint8_t *pp = p->mv_joint;
3588 unsigned *c = s->counts.mv_joint;
3589
3590 adapt_prob(&pp[0], c[0], c[1] + c[2] + c[3], 20, 128);
3591 adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128);
3592 adapt_prob(&pp[2], c[2], c[3], 20, 128);
3593 }
3594
3595 // mv components
3596 for (i = 0; i < 2; i++) {
3597 uint8_t *pp;
3598 unsigned *c, (*c2)[2], sum;
3599
3600 adapt_prob(&p->mv_comp[i].sign, s->counts.mv_comp[i].sign[0],
3601 s->counts.mv_comp[i].sign[1], 20, 128);
3602
3603 pp = p->mv_comp[i].classes;
3604 c = s->counts.mv_comp[i].classes;
3605 sum = c[1] + c[2] + c[3] + c[4] + c[5] + c[6] + c[7] + c[8] + c[9] + c[10];
3606 adapt_prob(&pp[0], c[0], sum, 20, 128);
3607 sum -= c[1];
3608 adapt_prob(&pp[1], c[1], sum, 20, 128);
3609 sum -= c[2] + c[3];
3610 adapt_prob(&pp[2], c[2] + c[3], sum, 20, 128);
3611 adapt_prob(&pp[3], c[2], c[3], 20, 128);
3612 sum -= c[4] + c[5];
3613 adapt_prob(&pp[4], c[4] + c[5], sum, 20, 128);
3614 adapt_prob(&pp[5], c[4], c[5], 20, 128);
3615 sum -= c[6];
3616 adapt_prob(&pp[6], c[6], sum, 20, 128);
3617 adapt_prob(&pp[7], c[7] + c[8], c[9] + c[10], 20, 128);
3618 adapt_prob(&pp[8], c[7], c[8], 20, 128);
3619 adapt_prob(&pp[9], c[9], c[10], 20, 128);
3620
3621 adapt_prob(&p->mv_comp[i].class0, s->counts.mv_comp[i].class0[0],
3622 s->counts.mv_comp[i].class0[1], 20, 128);
3623 pp = p->mv_comp[i].bits;
3624 c2 = s->counts.mv_comp[i].bits;
3625 for (j = 0; j < 10; j++)
3626 adapt_prob(&pp[j], c2[j][0], c2[j][1], 20, 128);
3627
3628 for (j = 0; j < 2; j++) {
3629 pp = p->mv_comp[i].class0_fp[j];
3630 c = s->counts.mv_comp[i].class0_fp[j];
3631 adapt_prob(&pp[0], c[0], c[1] + c[2] + c[3], 20, 128);
3632 adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128);
3633 adapt_prob(&pp[2], c[2], c[3], 20, 128);
3634 }
3635 pp = p->mv_comp[i].fp;
3636 c = s->counts.mv_comp[i].fp;
3637 adapt_prob(&pp[0], c[0], c[1] + c[2] + c[3], 20, 128);
3638 adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128);
3639 adapt_prob(&pp[2], c[2], c[3], 20, 128);
3640
3641 if (s->highprecisionmvs) {
3642 adapt_prob(&p->mv_comp[i].class0_hp, s->counts.mv_comp[i].class0_hp[0],
3643 s->counts.mv_comp[i].class0_hp[1], 20, 128);
3644 adapt_prob(&p->mv_comp[i].hp, s->counts.mv_comp[i].hp[0],
3645 s->counts.mv_comp[i].hp[1], 20, 128);
3646 }
3647 }
3648
3649 // y intra modes
3650 for (i = 0; i < 4; i++) {
3651 uint8_t *pp = p->y_mode[i];
3652 unsigned *c = s->counts.y_mode[i], sum, s2;
3653
3654 sum = c[0] + c[1] + c[3] + c[4] + c[5] + c[6] + c[7] + c[8] + c[9];
3655 adapt_prob(&pp[0], c[DC_PRED], sum, 20, 128);
3656 sum -= c[TM_VP8_PRED];
3657 adapt_prob(&pp[1], c[TM_VP8_PRED], sum, 20, 128);
3658 sum -= c[VERT_PRED];
3659 adapt_prob(&pp[2], c[VERT_PRED], sum, 20, 128);
3660 s2 = c[HOR_PRED] + c[DIAG_DOWN_RIGHT_PRED] + c[VERT_RIGHT_PRED];
3661 sum -= s2;
3662 adapt_prob(&pp[3], s2, sum, 20, 128);
3663 s2 -= c[HOR_PRED];
3664 adapt_prob(&pp[4], c[HOR_PRED], s2, 20, 128);
3665 adapt_prob(&pp[5], c[DIAG_DOWN_RIGHT_PRED], c[VERT_RIGHT_PRED], 20, 128);
3666 sum -= c[DIAG_DOWN_LEFT_PRED];
3667 adapt_prob(&pp[6], c[DIAG_DOWN_LEFT_PRED], sum, 20, 128);
3668 sum -= c[VERT_LEFT_PRED];
3669 adapt_prob(&pp[7], c[VERT_LEFT_PRED], sum, 20, 128);
3670 adapt_prob(&pp[8], c[HOR_DOWN_PRED], c[HOR_UP_PRED], 20, 128);
3671 }
3672
3673 // uv intra modes
3674 for (i = 0; i < 10; i++) {
3675 uint8_t *pp = p->uv_mode[i];
3676 unsigned *c = s->counts.uv_mode[i], sum, s2;
3677
3678 sum = c[0] + c[1] + c[3] + c[4] + c[5] + c[6] + c[7] + c[8] + c[9];
3679 adapt_prob(&pp[0], c[DC_PRED], sum, 20, 128);
3680 sum -= c[TM_VP8_PRED];
3681 adapt_prob(&pp[1], c[TM_VP8_PRED], sum, 20, 128);
3682 sum -= c[VERT_PRED];
3683 adapt_prob(&pp[2], c[VERT_PRED], sum, 20, 128);
3684 s2 = c[HOR_PRED] + c[DIAG_DOWN_RIGHT_PRED] + c[VERT_RIGHT_PRED];
3685 sum -= s2;
3686 adapt_prob(&pp[3], s2, sum, 20, 128);
3687 s2 -= c[HOR_PRED];
3688 adapt_prob(&pp[4], c[HOR_PRED], s2, 20, 128);
3689 adapt_prob(&pp[5], c[DIAG_DOWN_RIGHT_PRED], c[VERT_RIGHT_PRED], 20, 128);
3690 sum -= c[DIAG_DOWN_LEFT_PRED];
3691 adapt_prob(&pp[6], c[DIAG_DOWN_LEFT_PRED], sum, 20, 128);
3692 sum -= c[VERT_LEFT_PRED];
3693 adapt_prob(&pp[7], c[VERT_LEFT_PRED], sum, 20, 128);
3694 adapt_prob(&pp[8], c[HOR_DOWN_PRED], c[HOR_UP_PRED], 20, 128);
3695 }
3696}
3697
3698static void free_buffers(VP9Context *s)
3699{
3700 av_freep(&s->intra_pred_data[0]);
3701 av_freep(&s->b_base);
3702 av_freep(&s->block_base);
3703}
3704
3705static av_cold int vp9_decode_free(AVCodecContext *ctx)
3706{
3707 VP9Context *s = ctx->priv_data;
3708 int i;
3709
3710 for (i = 0; i < 2; i++) {
3711 if (s->frames[i].tf.f->data[0])
3712 vp9_unref_frame(ctx, &s->frames[i]);
3713 av_frame_free(&s->frames[i].tf.f);
3714 }
3715 for (i = 0; i < 8; i++) {
3716 if (s->refs[i].f->data[0])
3717 ff_thread_release_buffer(ctx, &s->refs[i]);
3718 av_frame_free(&s->refs[i].f);
3719 if (s->next_refs[i].f->data[0])
3720 ff_thread_release_buffer(ctx, &s->next_refs[i]);
3721 av_frame_free(&s->next_refs[i].f);
3722 }
3723 free_buffers(s);
3724 av_freep(&s->c_b);
3725 s->c_b_size = 0;
3726
3727 return 0;
3728}
3729
3730
3731static int vp9_decode_frame(AVCodecContext *ctx, void *frame,
3732 int *got_frame, AVPacket *pkt)
3733{
3734 const uint8_t *data = pkt->data;
3735 int size = pkt->size;
3736 VP9Context *s = ctx->priv_data;
3737 int res, tile_row, tile_col, i, ref, row, col;
3738 ptrdiff_t yoff, uvoff, ls_y, ls_uv;
3739 AVFrame *f;
3740
3741 if ((res = decode_frame_header(ctx, data, size, &ref)) < 0) {
3742 return res;
3743 } else if (res == 0) {
3744 if (!s->refs[ref].f->data[0]) {
3745 av_log(ctx, AV_LOG_ERROR, "Requested reference %d not available\n", ref);
3746 return AVERROR_INVALIDDATA;
3747 }
3748 if ((res = av_frame_ref(frame, s->refs[ref].f)) < 0)
3749 return res;
3750 *got_frame = 1;
3751 return 0;
3752 }
3753 data += res;
3754 size -= res;
3755
3756 if (s->frames[LAST_FRAME].tf.f->data[0])
3757 vp9_unref_frame(ctx, &s->frames[LAST_FRAME]);
3758 if (!s->keyframe && s->frames[CUR_FRAME].tf.f->data[0] &&
3759 (res = vp9_ref_frame(ctx, &s->frames[LAST_FRAME], &s->frames[CUR_FRAME])) < 0)
3760 return res;
3761 if (s->frames[CUR_FRAME].tf.f->data[0])
3762 vp9_unref_frame(ctx, &s->frames[CUR_FRAME]);
3763 if ((res = vp9_alloc_frame(ctx, &s->frames[CUR_FRAME])) < 0)
3764 return res;
3765 f = s->frames[CUR_FRAME].tf.f;
3766 f->key_frame = s->keyframe;
3767 f->pict_type = s->keyframe ? AV_PICTURE_TYPE_I : AV_PICTURE_TYPE_P;
3768 ls_y = f->linesize[0];
3769 ls_uv =f->linesize[1];
3770
3771 // ref frame setup
3772 for (i = 0; i < 8; i++) {
3773 if (s->next_refs[i].f->data[0])
3774 ff_thread_release_buffer(ctx, &s->next_refs[i]);
3775 if (s->refreshrefmask & (1 << i)) {
3776 res = ff_thread_ref_frame(&s->next_refs[i], &s->frames[CUR_FRAME].tf);
3777 } else {
3778 res = ff_thread_ref_frame(&s->next_refs[i], &s->refs[i]);
3779 }
3780 if (res < 0)
3781 return res;
3782 }
3783
f6fa7814
DM
3784 if (s->fullrange)
3785 ctx->color_range = AVCOL_RANGE_JPEG;
3786 else
3787 ctx->color_range = AVCOL_RANGE_MPEG;
3788
3789 switch (s->colorspace) {
3790 case 1: ctx->colorspace = AVCOL_SPC_BT470BG; break;
3791 case 2: ctx->colorspace = AVCOL_SPC_BT709; break;
3792 case 3: ctx->colorspace = AVCOL_SPC_SMPTE170M; break;
3793 case 4: ctx->colorspace = AVCOL_SPC_SMPTE240M; break;
3794 }
3795
2ba45a60
DM
3796 // main tile decode loop
3797 memset(s->above_partition_ctx, 0, s->cols);
3798 memset(s->above_skip_ctx, 0, s->cols);
3799 if (s->keyframe || s->intraonly) {
3800 memset(s->above_mode_ctx, DC_PRED, s->cols * 2);
3801 } else {
3802 memset(s->above_mode_ctx, NEARESTMV, s->cols);
3803 }
3804 memset(s->above_y_nnz_ctx, 0, s->sb_cols * 16);
3805 memset(s->above_uv_nnz_ctx[0], 0, s->sb_cols * 8);
3806 memset(s->above_uv_nnz_ctx[1], 0, s->sb_cols * 8);
3807 memset(s->above_segpred_ctx, 0, s->cols);
3808 s->pass = s->uses_2pass =
3809 ctx->active_thread_type == FF_THREAD_FRAME && s->refreshctx && !s->parallelmode;
3810 if ((res = update_block_buffers(ctx)) < 0) {
3811 av_log(ctx, AV_LOG_ERROR,
3812 "Failed to allocate block buffers\n");
3813 return res;
3814 }
3815 if (s->refreshctx && s->parallelmode) {
3816 int j, k, l, m;
3817
3818 for (i = 0; i < 4; i++) {
3819 for (j = 0; j < 2; j++)
3820 for (k = 0; k < 2; k++)
3821 for (l = 0; l < 6; l++)
3822 for (m = 0; m < 6; m++)
3823 memcpy(s->prob_ctx[s->framectxid].coef[i][j][k][l][m],
3824 s->prob.coef[i][j][k][l][m], 3);
3825 if (s->txfmmode == i)
3826 break;
3827 }
3828 s->prob_ctx[s->framectxid].p = s->prob.p;
3829 ff_thread_finish_setup(ctx);
f6fa7814
DM
3830 } else if (!s->refreshctx) {
3831 ff_thread_finish_setup(ctx);
2ba45a60
DM
3832 }
3833
3834 do {
3835 yoff = uvoff = 0;
3836 s->b = s->b_base;
3837 s->block = s->block_base;
3838 s->uvblock[0] = s->uvblock_base[0];
3839 s->uvblock[1] = s->uvblock_base[1];
3840 s->eob = s->eob_base;
3841 s->uveob[0] = s->uveob_base[0];
3842 s->uveob[1] = s->uveob_base[1];
3843
3844 for (tile_row = 0; tile_row < s->tiling.tile_rows; tile_row++) {
3845 set_tile_offset(&s->tiling.tile_row_start, &s->tiling.tile_row_end,
3846 tile_row, s->tiling.log2_tile_rows, s->sb_rows);
3847 if (s->pass != 2) {
3848 for (tile_col = 0; tile_col < s->tiling.tile_cols; tile_col++) {
3849 unsigned tile_size;
3850
3851 if (tile_col == s->tiling.tile_cols - 1 &&
3852 tile_row == s->tiling.tile_rows - 1) {
3853 tile_size = size;
3854 } else {
3855 tile_size = AV_RB32(data);
3856 data += 4;
3857 size -= 4;
3858 }
3859 if (tile_size > size) {
3860 ff_thread_report_progress(&s->frames[CUR_FRAME].tf, INT_MAX, 0);
3861 return AVERROR_INVALIDDATA;
3862 }
3863 ff_vp56_init_range_decoder(&s->c_b[tile_col], data, tile_size);
3864 if (vp56_rac_get_prob_branchy(&s->c_b[tile_col], 128)) { // marker bit
3865 ff_thread_report_progress(&s->frames[CUR_FRAME].tf, INT_MAX, 0);
3866 return AVERROR_INVALIDDATA;
3867 }
3868 data += tile_size;
3869 size -= tile_size;
3870 }
3871 }
3872
3873 for (row = s->tiling.tile_row_start; row < s->tiling.tile_row_end;
3874 row += 8, yoff += ls_y * 64, uvoff += ls_uv * 32) {
3875 struct VP9Filter *lflvl_ptr = s->lflvl;
3876 ptrdiff_t yoff2 = yoff, uvoff2 = uvoff;
3877
3878 for (tile_col = 0; tile_col < s->tiling.tile_cols; tile_col++) {
3879 set_tile_offset(&s->tiling.tile_col_start, &s->tiling.tile_col_end,
3880 tile_col, s->tiling.log2_tile_cols, s->sb_cols);
3881
3882 if (s->pass != 2) {
3883 memset(s->left_partition_ctx, 0, 8);
3884 memset(s->left_skip_ctx, 0, 8);
3885 if (s->keyframe || s->intraonly) {
3886 memset(s->left_mode_ctx, DC_PRED, 16);
3887 } else {
3888 memset(s->left_mode_ctx, NEARESTMV, 8);
3889 }
3890 memset(s->left_y_nnz_ctx, 0, 16);
3891 memset(s->left_uv_nnz_ctx, 0, 16);
3892 memset(s->left_segpred_ctx, 0, 8);
3893
3894 memcpy(&s->c, &s->c_b[tile_col], sizeof(s->c));
3895 }
3896
3897 for (col = s->tiling.tile_col_start;
3898 col < s->tiling.tile_col_end;
3899 col += 8, yoff2 += 64, uvoff2 += 32, lflvl_ptr++) {
3900 // FIXME integrate with lf code (i.e. zero after each
3901 // use, similar to invtxfm coefficients, or similar)
3902 if (s->pass != 1) {
3903 memset(lflvl_ptr->mask, 0, sizeof(lflvl_ptr->mask));
3904 }
3905
3906 if (s->pass == 2) {
3907 decode_sb_mem(ctx, row, col, lflvl_ptr,
3908 yoff2, uvoff2, BL_64X64);
3909 } else {
3910 decode_sb(ctx, row, col, lflvl_ptr,
3911 yoff2, uvoff2, BL_64X64);
3912 }
3913 }
3914 if (s->pass != 2) {
3915 memcpy(&s->c_b[tile_col], &s->c, sizeof(s->c));
3916 }
3917 }
3918
3919 if (s->pass == 1) {
3920 continue;
3921 }
3922
3923 // backup pre-loopfilter reconstruction data for intra
3924 // prediction of next row of sb64s
3925 if (row + 8 < s->rows) {
3926 memcpy(s->intra_pred_data[0],
3927 f->data[0] + yoff + 63 * ls_y,
3928 8 * s->cols);
3929 memcpy(s->intra_pred_data[1],
3930 f->data[1] + uvoff + 31 * ls_uv,
3931 4 * s->cols);
3932 memcpy(s->intra_pred_data[2],
3933 f->data[2] + uvoff + 31 * ls_uv,
3934 4 * s->cols);
3935 }
3936
3937 // loopfilter one row
3938 if (s->filter.level) {
3939 yoff2 = yoff;
3940 uvoff2 = uvoff;
3941 lflvl_ptr = s->lflvl;
3942 for (col = 0; col < s->cols;
3943 col += 8, yoff2 += 64, uvoff2 += 32, lflvl_ptr++) {
3944 loopfilter_sb(ctx, lflvl_ptr, row, col, yoff2, uvoff2);
3945 }
3946 }
3947
3948 // FIXME maybe we can make this more finegrained by running the
3949 // loopfilter per-block instead of after each sbrow
3950 // In fact that would also make intra pred left preparation easier?
3951 ff_thread_report_progress(&s->frames[CUR_FRAME].tf, row >> 3, 0);
3952 }
3953 }
3954
3955 if (s->pass < 2 && s->refreshctx && !s->parallelmode) {
3956 adapt_probs(s);
3957 ff_thread_finish_setup(ctx);
3958 }
3959 } while (s->pass++ == 1);
3960 ff_thread_report_progress(&s->frames[CUR_FRAME].tf, INT_MAX, 0);
3961
3962 // ref frame setup
3963 for (i = 0; i < 8; i++) {
3964 if (s->refs[i].f->data[0])
3965 ff_thread_release_buffer(ctx, &s->refs[i]);
3966 ff_thread_ref_frame(&s->refs[i], &s->next_refs[i]);
3967 }
3968
3969 if (!s->invisible) {
3970 if ((res = av_frame_ref(frame, s->frames[CUR_FRAME].tf.f)) < 0)
3971 return res;
3972 *got_frame = 1;
3973 }
3974
3975 return 0;
3976}
3977
3978static void vp9_decode_flush(AVCodecContext *ctx)
3979{
3980 VP9Context *s = ctx->priv_data;
3981 int i;
3982
3983 for (i = 0; i < 2; i++)
3984 vp9_unref_frame(ctx, &s->frames[i]);
3985 for (i = 0; i < 8; i++)
3986 ff_thread_release_buffer(ctx, &s->refs[i]);
3987}
3988
3989static int init_frames(AVCodecContext *ctx)
3990{
3991 VP9Context *s = ctx->priv_data;
3992 int i;
3993
3994 for (i = 0; i < 2; i++) {
3995 s->frames[i].tf.f = av_frame_alloc();
3996 if (!s->frames[i].tf.f) {
3997 vp9_decode_free(ctx);
3998 av_log(ctx, AV_LOG_ERROR, "Failed to allocate frame buffer %d\n", i);
3999 return AVERROR(ENOMEM);
4000 }
4001 }
4002 for (i = 0; i < 8; i++) {
4003 s->refs[i].f = av_frame_alloc();
4004 s->next_refs[i].f = av_frame_alloc();
4005 if (!s->refs[i].f || !s->next_refs[i].f) {
4006 vp9_decode_free(ctx);
4007 av_log(ctx, AV_LOG_ERROR, "Failed to allocate frame buffer %d\n", i);
4008 return AVERROR(ENOMEM);
4009 }
4010 }
4011
4012 return 0;
4013}
4014
4015static av_cold int vp9_decode_init(AVCodecContext *ctx)
4016{
4017 VP9Context *s = ctx->priv_data;
4018
4019 ctx->internal->allocate_progress = 1;
4020 ctx->pix_fmt = AV_PIX_FMT_YUV420P;
4021 ff_vp9dsp_init(&s->dsp);
4022 ff_videodsp_init(&s->vdsp, 8);
4023 s->filter.sharpness = -1;
4024
4025 return init_frames(ctx);
4026}
4027
4028static av_cold int vp9_decode_init_thread_copy(AVCodecContext *avctx)
4029{
4030 return init_frames(avctx);
4031}
4032
4033static int vp9_decode_update_thread_context(AVCodecContext *dst, const AVCodecContext *src)
4034{
4035 int i, res;
4036 VP9Context *s = dst->priv_data, *ssrc = src->priv_data;
4037
4038 // detect size changes in other threads
4039 if (s->intra_pred_data[0] &&
4040 (!ssrc->intra_pred_data[0] || s->cols != ssrc->cols || s->rows != ssrc->rows)) {
4041 free_buffers(s);
4042 }
4043
4044 for (i = 0; i < 2; i++) {
4045 if (s->frames[i].tf.f->data[0])
4046 vp9_unref_frame(dst, &s->frames[i]);
4047 if (ssrc->frames[i].tf.f->data[0]) {
4048 if ((res = vp9_ref_frame(dst, &s->frames[i], &ssrc->frames[i])) < 0)
4049 return res;
4050 }
4051 }
4052 for (i = 0; i < 8; i++) {
4053 if (s->refs[i].f->data[0])
4054 ff_thread_release_buffer(dst, &s->refs[i]);
4055 if (ssrc->next_refs[i].f->data[0]) {
4056 if ((res = ff_thread_ref_frame(&s->refs[i], &ssrc->next_refs[i])) < 0)
4057 return res;
4058 }
4059 }
4060
4061 s->invisible = ssrc->invisible;
4062 s->keyframe = ssrc->keyframe;
4063 s->uses_2pass = ssrc->uses_2pass;
4064 memcpy(&s->prob_ctx, &ssrc->prob_ctx, sizeof(s->prob_ctx));
4065 memcpy(&s->lf_delta, &ssrc->lf_delta, sizeof(s->lf_delta));
4066 if (ssrc->segmentation.enabled) {
4067 memcpy(&s->segmentation.feat, &ssrc->segmentation.feat,
4068 sizeof(s->segmentation.feat));
4069 }
4070
4071 return 0;
4072}
4073
4074AVCodec ff_vp9_decoder = {
4075 .name = "vp9",
4076 .long_name = NULL_IF_CONFIG_SMALL("Google VP9"),
4077 .type = AVMEDIA_TYPE_VIDEO,
4078 .id = AV_CODEC_ID_VP9,
4079 .priv_data_size = sizeof(VP9Context),
4080 .init = vp9_decode_init,
4081 .close = vp9_decode_free,
4082 .decode = vp9_decode_frame,
4083 .capabilities = CODEC_CAP_DR1 | CODEC_CAP_FRAME_THREADS,
4084 .flush = vp9_decode_flush,
4085 .init_thread_copy = ONLY_IF_THREADS_ENABLED(vp9_decode_init_thread_copy),
4086 .update_thread_context = ONLY_IF_THREADS_ENABLED(vp9_decode_update_thread_context),
4087};