Imported Debian version 2.4.3~trusty1
[deb_ffmpeg.git] / ffmpeg / libavcodec / vp9.c
CommitLineData
2ba45a60
DM
1/*
2 * VP9 compatible video decoder
3 *
4 * Copyright (C) 2013 Ronald S. Bultje <rsbultje gmail com>
5 * Copyright (C) 2013 Clément Bœsch <u pkh me>
6 *
7 * This file is part of FFmpeg.
8 *
9 * FFmpeg is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
13 *
14 * FFmpeg is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
18 *
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with FFmpeg; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 */
23
24#include "avcodec.h"
25#include "get_bits.h"
26#include "internal.h"
27#include "thread.h"
28#include "videodsp.h"
29#include "vp56.h"
30#include "vp9.h"
31#include "vp9data.h"
32#include "vp9dsp.h"
33#include "libavutil/avassert.h"
34
35#define VP9_SYNCCODE 0x498342
36
37enum CompPredMode {
38 PRED_SINGLEREF,
39 PRED_COMPREF,
40 PRED_SWITCHABLE,
41};
42
43enum BlockLevel {
44 BL_64X64,
45 BL_32X32,
46 BL_16X16,
47 BL_8X8,
48};
49
50enum BlockSize {
51 BS_64x64,
52 BS_64x32,
53 BS_32x64,
54 BS_32x32,
55 BS_32x16,
56 BS_16x32,
57 BS_16x16,
58 BS_16x8,
59 BS_8x16,
60 BS_8x8,
61 BS_8x4,
62 BS_4x8,
63 BS_4x4,
64 N_BS_SIZES,
65};
66
67struct VP9mvrefPair {
68 VP56mv mv[2];
69 int8_t ref[2];
70};
71
72typedef struct VP9Frame {
73 ThreadFrame tf;
74 AVBufferRef *extradata;
75 uint8_t *segmentation_map;
76 struct VP9mvrefPair *mv;
77} VP9Frame;
78
79struct VP9Filter {
80 uint8_t level[8 * 8];
81 uint8_t /* bit=col */ mask[2 /* 0=y, 1=uv */][2 /* 0=col, 1=row */]
82 [8 /* rows */][4 /* 0=16, 1=8, 2=4, 3=inner4 */];
83};
84
85typedef struct VP9Block {
86 uint8_t seg_id, intra, comp, ref[2], mode[4], uvmode, skip;
87 enum FilterMode filter;
88 VP56mv mv[4 /* b_idx */][2 /* ref */];
89 enum BlockSize bs;
90 enum TxfmMode tx, uvtx;
91 enum BlockLevel bl;
92 enum BlockPartition bp;
93} VP9Block;
94
95typedef struct VP9Context {
96 VP9DSPContext dsp;
97 VideoDSPContext vdsp;
98 GetBitContext gb;
99 VP56RangeCoder c;
100 VP56RangeCoder *c_b;
101 unsigned c_b_size;
102 VP9Block *b_base, *b;
103 int pass, uses_2pass, last_uses_2pass;
104 int row, row7, col, col7;
105 uint8_t *dst[3];
106 ptrdiff_t y_stride, uv_stride;
107
108 // bitstream header
109 uint8_t profile;
110 uint8_t keyframe, last_keyframe;
111 uint8_t invisible;
112 uint8_t use_last_frame_mvs;
113 uint8_t errorres;
114 uint8_t colorspace;
115 uint8_t fullrange;
116 uint8_t intraonly;
117 uint8_t resetctx;
118 uint8_t refreshrefmask;
119 uint8_t highprecisionmvs;
120 enum FilterMode filtermode;
121 uint8_t allowcompinter;
122 uint8_t fixcompref;
123 uint8_t refreshctx;
124 uint8_t parallelmode;
125 uint8_t framectxid;
126 uint8_t refidx[3];
127 uint8_t signbias[3];
128 uint8_t varcompref[2];
129 ThreadFrame refs[8], next_refs[8];
130#define CUR_FRAME 0
131#define LAST_FRAME 1
132 VP9Frame frames[2];
133
134 struct {
135 uint8_t level;
136 int8_t sharpness;
137 uint8_t lim_lut[64];
138 uint8_t mblim_lut[64];
139 } filter;
140 struct {
141 uint8_t enabled;
142 int8_t mode[2];
143 int8_t ref[4];
144 } lf_delta;
145 uint8_t yac_qi;
146 int8_t ydc_qdelta, uvdc_qdelta, uvac_qdelta;
147 uint8_t lossless;
148 struct {
149 uint8_t enabled;
150 uint8_t temporal;
151 uint8_t absolute_vals;
152 uint8_t update_map;
153 struct {
154 uint8_t q_enabled;
155 uint8_t lf_enabled;
156 uint8_t ref_enabled;
157 uint8_t skip_enabled;
158 uint8_t ref_val;
159 int16_t q_val;
160 int8_t lf_val;
161 int16_t qmul[2][2];
162 uint8_t lflvl[4][2];
163 } feat[8];
164 } segmentation;
165 struct {
166 unsigned log2_tile_cols, log2_tile_rows;
167 unsigned tile_cols, tile_rows;
168 unsigned tile_row_start, tile_row_end, tile_col_start, tile_col_end;
169 } tiling;
170 unsigned sb_cols, sb_rows, rows, cols;
171 struct {
172 prob_context p;
173 uint8_t coef[4][2][2][6][6][3];
174 } prob_ctx[4];
175 struct {
176 prob_context p;
177 uint8_t coef[4][2][2][6][6][11];
178 uint8_t seg[7];
179 uint8_t segpred[3];
180 } prob;
181 struct {
182 unsigned y_mode[4][10];
183 unsigned uv_mode[10][10];
184 unsigned filter[4][3];
185 unsigned mv_mode[7][4];
186 unsigned intra[4][2];
187 unsigned comp[5][2];
188 unsigned single_ref[5][2][2];
189 unsigned comp_ref[5][2];
190 unsigned tx32p[2][4];
191 unsigned tx16p[2][3];
192 unsigned tx8p[2][2];
193 unsigned skip[3][2];
194 unsigned mv_joint[4];
195 struct {
196 unsigned sign[2];
197 unsigned classes[11];
198 unsigned class0[2];
199 unsigned bits[10][2];
200 unsigned class0_fp[2][4];
201 unsigned fp[4];
202 unsigned class0_hp[2];
203 unsigned hp[2];
204 } mv_comp[2];
205 unsigned partition[4][4][4];
206 unsigned coef[4][2][2][6][6][3];
207 unsigned eob[4][2][2][6][6][2];
208 } counts;
209 enum TxfmMode txfmmode;
210 enum CompPredMode comppredmode;
211
212 // contextual (left/above) cache
213 DECLARE_ALIGNED(16, uint8_t, left_y_nnz_ctx)[16];
214 DECLARE_ALIGNED(16, uint8_t, left_mode_ctx)[16];
215 DECLARE_ALIGNED(16, VP56mv, left_mv_ctx)[16][2];
216 DECLARE_ALIGNED(8, uint8_t, left_uv_nnz_ctx)[2][8];
217 DECLARE_ALIGNED(8, uint8_t, left_partition_ctx)[8];
218 DECLARE_ALIGNED(8, uint8_t, left_skip_ctx)[8];
219 DECLARE_ALIGNED(8, uint8_t, left_txfm_ctx)[8];
220 DECLARE_ALIGNED(8, uint8_t, left_segpred_ctx)[8];
221 DECLARE_ALIGNED(8, uint8_t, left_intra_ctx)[8];
222 DECLARE_ALIGNED(8, uint8_t, left_comp_ctx)[8];
223 DECLARE_ALIGNED(8, uint8_t, left_ref_ctx)[8];
224 DECLARE_ALIGNED(8, uint8_t, left_filter_ctx)[8];
225 uint8_t *above_partition_ctx;
226 uint8_t *above_mode_ctx;
227 // FIXME maybe merge some of the below in a flags field?
228 uint8_t *above_y_nnz_ctx;
229 uint8_t *above_uv_nnz_ctx[2];
230 uint8_t *above_skip_ctx; // 1bit
231 uint8_t *above_txfm_ctx; // 2bit
232 uint8_t *above_segpred_ctx; // 1bit
233 uint8_t *above_intra_ctx; // 1bit
234 uint8_t *above_comp_ctx; // 1bit
235 uint8_t *above_ref_ctx; // 2bit
236 uint8_t *above_filter_ctx;
237 VP56mv (*above_mv_ctx)[2];
238
239 // whole-frame cache
240 uint8_t *intra_pred_data[3];
241 struct VP9Filter *lflvl;
242 DECLARE_ALIGNED(32, uint8_t, edge_emu_buffer)[71*80];
243
244 // block reconstruction intermediates
245 int block_alloc_using_2pass;
246 int16_t *block_base, *block, *uvblock_base[2], *uvblock[2];
247 uint8_t *eob_base, *uveob_base[2], *eob, *uveob[2];
248 struct { int x, y; } min_mv, max_mv;
249 DECLARE_ALIGNED(32, uint8_t, tmp_y)[64*64];
250 DECLARE_ALIGNED(32, uint8_t, tmp_uv)[2][32*32];
251} VP9Context;
252
253static const uint8_t bwh_tab[2][N_BS_SIZES][2] = {
254 {
255 { 16, 16 }, { 16, 8 }, { 8, 16 }, { 8, 8 }, { 8, 4 }, { 4, 8 },
256 { 4, 4 }, { 4, 2 }, { 2, 4 }, { 2, 2 }, { 2, 1 }, { 1, 2 }, { 1, 1 },
257 }, {
258 { 8, 8 }, { 8, 4 }, { 4, 8 }, { 4, 4 }, { 4, 2 }, { 2, 4 },
259 { 2, 2 }, { 2, 1 }, { 1, 2 }, { 1, 1 }, { 1, 1 }, { 1, 1 }, { 1, 1 },
260 }
261};
262
263static int vp9_alloc_frame(AVCodecContext *ctx, VP9Frame *f)
264{
265 VP9Context *s = ctx->priv_data;
266 int ret, sz;
267
268 if ((ret = ff_thread_get_buffer(ctx, &f->tf, AV_GET_BUFFER_FLAG_REF)) < 0)
269 return ret;
270 sz = 64 * s->sb_cols * s->sb_rows;
271 if (!(f->extradata = av_buffer_allocz(sz * (1 + sizeof(struct VP9mvrefPair))))) {
272 ff_thread_release_buffer(ctx, &f->tf);
273 return AVERROR(ENOMEM);
274 }
275
276 f->segmentation_map = f->extradata->data;
277 f->mv = (struct VP9mvrefPair *) (f->extradata->data + sz);
278
279 // retain segmentation map if it doesn't update
280 if (s->segmentation.enabled && !s->segmentation.update_map &&
281 !s->intraonly && !s->keyframe && !s->errorres) {
282 memcpy(f->segmentation_map, s->frames[LAST_FRAME].segmentation_map, sz);
283 }
284
285 return 0;
286}
287
288static void vp9_unref_frame(AVCodecContext *ctx, VP9Frame *f)
289{
290 ff_thread_release_buffer(ctx, &f->tf);
291 av_buffer_unref(&f->extradata);
292}
293
294static int vp9_ref_frame(AVCodecContext *ctx, VP9Frame *dst, VP9Frame *src)
295{
296 int res;
297
298 if ((res = ff_thread_ref_frame(&dst->tf, &src->tf)) < 0) {
299 return res;
300 } else if (!(dst->extradata = av_buffer_ref(src->extradata))) {
301 vp9_unref_frame(ctx, dst);
302 return AVERROR(ENOMEM);
303 }
304
305 dst->segmentation_map = src->segmentation_map;
306 dst->mv = src->mv;
307
308 return 0;
309}
310
311static int update_size(AVCodecContext *ctx, int w, int h)
312{
313 VP9Context *s = ctx->priv_data;
314 uint8_t *p;
315
316 av_assert0(w > 0 && h > 0);
317
318 if (s->intra_pred_data[0] && w == ctx->width && h == ctx->height)
319 return 0;
320
321 ctx->width = w;
322 ctx->height = h;
323 s->sb_cols = (w + 63) >> 6;
324 s->sb_rows = (h + 63) >> 6;
325 s->cols = (w + 7) >> 3;
326 s->rows = (h + 7) >> 3;
327
328#define assign(var, type, n) var = (type) p; p += s->sb_cols * (n) * sizeof(*var)
329 av_freep(&s->intra_pred_data[0]);
330 p = av_malloc(s->sb_cols * (240 + sizeof(*s->lflvl) + 16 * sizeof(*s->above_mv_ctx)));
331 if (!p)
332 return AVERROR(ENOMEM);
333 assign(s->intra_pred_data[0], uint8_t *, 64);
334 assign(s->intra_pred_data[1], uint8_t *, 32);
335 assign(s->intra_pred_data[2], uint8_t *, 32);
336 assign(s->above_y_nnz_ctx, uint8_t *, 16);
337 assign(s->above_mode_ctx, uint8_t *, 16);
338 assign(s->above_mv_ctx, VP56mv(*)[2], 16);
339 assign(s->above_partition_ctx, uint8_t *, 8);
340 assign(s->above_skip_ctx, uint8_t *, 8);
341 assign(s->above_txfm_ctx, uint8_t *, 8);
342 assign(s->above_uv_nnz_ctx[0], uint8_t *, 8);
343 assign(s->above_uv_nnz_ctx[1], uint8_t *, 8);
344 assign(s->above_segpred_ctx, uint8_t *, 8);
345 assign(s->above_intra_ctx, uint8_t *, 8);
346 assign(s->above_comp_ctx, uint8_t *, 8);
347 assign(s->above_ref_ctx, uint8_t *, 8);
348 assign(s->above_filter_ctx, uint8_t *, 8);
349 assign(s->lflvl, struct VP9Filter *, 1);
350#undef assign
351
352 // these will be re-allocated a little later
353 av_freep(&s->b_base);
354 av_freep(&s->block_base);
355
356 return 0;
357}
358
359static int update_block_buffers(AVCodecContext *ctx)
360{
361 VP9Context *s = ctx->priv_data;
362
363 if (s->b_base && s->block_base && s->block_alloc_using_2pass == s->uses_2pass)
364 return 0;
365
366 av_free(s->b_base);
367 av_free(s->block_base);
368 if (s->uses_2pass) {
369 int sbs = s->sb_cols * s->sb_rows;
370
371 s->b_base = av_malloc_array(s->cols * s->rows, sizeof(VP9Block));
372 s->block_base = av_mallocz((64 * 64 + 128) * sbs * 3);
373 if (!s->b_base || !s->block_base)
374 return AVERROR(ENOMEM);
375 s->uvblock_base[0] = s->block_base + sbs * 64 * 64;
376 s->uvblock_base[1] = s->uvblock_base[0] + sbs * 32 * 32;
377 s->eob_base = (uint8_t *) (s->uvblock_base[1] + sbs * 32 * 32);
378 s->uveob_base[0] = s->eob_base + 256 * sbs;
379 s->uveob_base[1] = s->uveob_base[0] + 64 * sbs;
380 } else {
381 s->b_base = av_malloc(sizeof(VP9Block));
382 s->block_base = av_mallocz((64 * 64 + 128) * 3);
383 if (!s->b_base || !s->block_base)
384 return AVERROR(ENOMEM);
385 s->uvblock_base[0] = s->block_base + 64 * 64;
386 s->uvblock_base[1] = s->uvblock_base[0] + 32 * 32;
387 s->eob_base = (uint8_t *) (s->uvblock_base[1] + 32 * 32);
388 s->uveob_base[0] = s->eob_base + 256;
389 s->uveob_base[1] = s->uveob_base[0] + 64;
390 }
391 s->block_alloc_using_2pass = s->uses_2pass;
392
393 return 0;
394}
395
396// for some reason the sign bit is at the end, not the start, of a bit sequence
397static av_always_inline int get_sbits_inv(GetBitContext *gb, int n)
398{
399 int v = get_bits(gb, n);
400 return get_bits1(gb) ? -v : v;
401}
402
403static av_always_inline int inv_recenter_nonneg(int v, int m)
404{
405 return v > 2 * m ? v : v & 1 ? m - ((v + 1) >> 1) : m + (v >> 1);
406}
407
408// differential forward probability updates
409static int update_prob(VP56RangeCoder *c, int p)
410{
411 static const int inv_map_table[254] = {
412 7, 20, 33, 46, 59, 72, 85, 98, 111, 124, 137, 150, 163, 176,
413 189, 202, 215, 228, 241, 254, 1, 2, 3, 4, 5, 6, 8, 9,
414 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 21, 22, 23, 24,
415 25, 26, 27, 28, 29, 30, 31, 32, 34, 35, 36, 37, 38, 39,
416 40, 41, 42, 43, 44, 45, 47, 48, 49, 50, 51, 52, 53, 54,
417 55, 56, 57, 58, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69,
418 70, 71, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84,
419 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 99, 100,
420 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 112, 113, 114, 115,
421 116, 117, 118, 119, 120, 121, 122, 123, 125, 126, 127, 128, 129, 130,
422 131, 132, 133, 134, 135, 136, 138, 139, 140, 141, 142, 143, 144, 145,
423 146, 147, 148, 149, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160,
424 161, 162, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175,
425 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 190, 191,
426 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 203, 204, 205, 206,
427 207, 208, 209, 210, 211, 212, 213, 214, 216, 217, 218, 219, 220, 221,
428 222, 223, 224, 225, 226, 227, 229, 230, 231, 232, 233, 234, 235, 236,
429 237, 238, 239, 240, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251,
430 252, 253,
431 };
432 int d;
433
434 /* This code is trying to do a differential probability update. For a
435 * current probability A in the range [1, 255], the difference to a new
436 * probability of any value can be expressed differentially as 1-A,255-A
437 * where some part of this (absolute range) exists both in positive as
438 * well as the negative part, whereas another part only exists in one
439 * half. We're trying to code this shared part differentially, i.e.
440 * times two where the value of the lowest bit specifies the sign, and
441 * the single part is then coded on top of this. This absolute difference
442 * then again has a value of [0,254], but a bigger value in this range
443 * indicates that we're further away from the original value A, so we
444 * can code this as a VLC code, since higher values are increasingly
445 * unlikely. The first 20 values in inv_map_table[] allow 'cheap, rough'
446 * updates vs. the 'fine, exact' updates further down the range, which
447 * adds one extra dimension to this differential update model. */
448
449 if (!vp8_rac_get(c)) {
450 d = vp8_rac_get_uint(c, 4) + 0;
451 } else if (!vp8_rac_get(c)) {
452 d = vp8_rac_get_uint(c, 4) + 16;
453 } else if (!vp8_rac_get(c)) {
454 d = vp8_rac_get_uint(c, 5) + 32;
455 } else {
456 d = vp8_rac_get_uint(c, 7);
457 if (d >= 65)
458 d = (d << 1) - 65 + vp8_rac_get(c);
459 d += 64;
460 }
461
462 return p <= 128 ? 1 + inv_recenter_nonneg(inv_map_table[d], p - 1) :
463 255 - inv_recenter_nonneg(inv_map_table[d], 255 - p);
464}
465
466static int decode_frame_header(AVCodecContext *ctx,
467 const uint8_t *data, int size, int *ref)
468{
469 VP9Context *s = ctx->priv_data;
470 int c, i, j, k, l, m, n, w, h, max, size2, res, sharp;
471 int last_invisible;
472 const uint8_t *data2;
473
474 /* general header */
475 if ((res = init_get_bits8(&s->gb, data, size)) < 0) {
476 av_log(ctx, AV_LOG_ERROR, "Failed to initialize bitstream reader\n");
477 return res;
478 }
479 if (get_bits(&s->gb, 2) != 0x2) { // frame marker
480 av_log(ctx, AV_LOG_ERROR, "Invalid frame marker\n");
481 return AVERROR_INVALIDDATA;
482 }
483 s->profile = get_bits1(&s->gb);
484 if (get_bits1(&s->gb)) { // reserved bit
485 av_log(ctx, AV_LOG_ERROR, "Reserved bit should be zero\n");
486 return AVERROR_INVALIDDATA;
487 }
488 if (get_bits1(&s->gb)) {
489 *ref = get_bits(&s->gb, 3);
490 return 0;
491 }
492 s->last_uses_2pass = s->uses_2pass;
493 s->last_keyframe = s->keyframe;
494 s->keyframe = !get_bits1(&s->gb);
495 last_invisible = s->invisible;
496 s->invisible = !get_bits1(&s->gb);
497 s->errorres = get_bits1(&s->gb);
498 s->use_last_frame_mvs = !s->errorres && !last_invisible;
499 if (s->keyframe) {
500 if (get_bits_long(&s->gb, 24) != VP9_SYNCCODE) { // synccode
501 av_log(ctx, AV_LOG_ERROR, "Invalid sync code\n");
502 return AVERROR_INVALIDDATA;
503 }
504 s->colorspace = get_bits(&s->gb, 3);
505 if (s->colorspace == 7) { // RGB = profile 1
506 av_log(ctx, AV_LOG_ERROR, "RGB not supported in profile 0\n");
507 return AVERROR_INVALIDDATA;
508 }
509 s->fullrange = get_bits1(&s->gb);
510 // for profile 1, here follows the subsampling bits
511 s->refreshrefmask = 0xff;
512 w = get_bits(&s->gb, 16) + 1;
513 h = get_bits(&s->gb, 16) + 1;
514 if (get_bits1(&s->gb)) // display size
515 skip_bits(&s->gb, 32);
516 } else {
517 s->intraonly = s->invisible ? get_bits1(&s->gb) : 0;
518 s->resetctx = s->errorres ? 0 : get_bits(&s->gb, 2);
519 if (s->intraonly) {
520 if (get_bits_long(&s->gb, 24) != VP9_SYNCCODE) { // synccode
521 av_log(ctx, AV_LOG_ERROR, "Invalid sync code\n");
522 return AVERROR_INVALIDDATA;
523 }
524 s->refreshrefmask = get_bits(&s->gb, 8);
525 w = get_bits(&s->gb, 16) + 1;
526 h = get_bits(&s->gb, 16) + 1;
527 if (get_bits1(&s->gb)) // display size
528 skip_bits(&s->gb, 32);
529 } else {
530 s->refreshrefmask = get_bits(&s->gb, 8);
531 s->refidx[0] = get_bits(&s->gb, 3);
532 s->signbias[0] = get_bits1(&s->gb);
533 s->refidx[1] = get_bits(&s->gb, 3);
534 s->signbias[1] = get_bits1(&s->gb);
535 s->refidx[2] = get_bits(&s->gb, 3);
536 s->signbias[2] = get_bits1(&s->gb);
537 if (!s->refs[s->refidx[0]].f->data[0] ||
538 !s->refs[s->refidx[1]].f->data[0] ||
539 !s->refs[s->refidx[2]].f->data[0]) {
540 av_log(ctx, AV_LOG_ERROR, "Not all references are available\n");
541 return AVERROR_INVALIDDATA;
542 }
543 if (get_bits1(&s->gb)) {
544 w = s->refs[s->refidx[0]].f->width;
545 h = s->refs[s->refidx[0]].f->height;
546 } else if (get_bits1(&s->gb)) {
547 w = s->refs[s->refidx[1]].f->width;
548 h = s->refs[s->refidx[1]].f->height;
549 } else if (get_bits1(&s->gb)) {
550 w = s->refs[s->refidx[2]].f->width;
551 h = s->refs[s->refidx[2]].f->height;
552 } else {
553 w = get_bits(&s->gb, 16) + 1;
554 h = get_bits(&s->gb, 16) + 1;
555 }
556 // Note that in this code, "CUR_FRAME" is actually before we
557 // have formally allocated a frame, and thus actually represents
558 // the _last_ frame
559 s->use_last_frame_mvs &= s->frames[CUR_FRAME].tf.f->width == w &&
560 s->frames[CUR_FRAME].tf.f->height == h;
561 if (get_bits1(&s->gb)) // display size
562 skip_bits(&s->gb, 32);
563 s->highprecisionmvs = get_bits1(&s->gb);
564 s->filtermode = get_bits1(&s->gb) ? FILTER_SWITCHABLE :
565 get_bits(&s->gb, 2);
566 s->allowcompinter = s->signbias[0] != s->signbias[1] ||
567 s->signbias[0] != s->signbias[2];
568 if (s->allowcompinter) {
569 if (s->signbias[0] == s->signbias[1]) {
570 s->fixcompref = 2;
571 s->varcompref[0] = 0;
572 s->varcompref[1] = 1;
573 } else if (s->signbias[0] == s->signbias[2]) {
574 s->fixcompref = 1;
575 s->varcompref[0] = 0;
576 s->varcompref[1] = 2;
577 } else {
578 s->fixcompref = 0;
579 s->varcompref[0] = 1;
580 s->varcompref[1] = 2;
581 }
582 }
583 }
584 }
585 s->refreshctx = s->errorres ? 0 : get_bits1(&s->gb);
586 s->parallelmode = s->errorres ? 1 : get_bits1(&s->gb);
587 s->framectxid = c = get_bits(&s->gb, 2);
588
589 /* loopfilter header data */
590 s->filter.level = get_bits(&s->gb, 6);
591 sharp = get_bits(&s->gb, 3);
592 // if sharpness changed, reinit lim/mblim LUTs. if it didn't change, keep
593 // the old cache values since they are still valid
594 if (s->filter.sharpness != sharp)
595 memset(s->filter.lim_lut, 0, sizeof(s->filter.lim_lut));
596 s->filter.sharpness = sharp;
597 if ((s->lf_delta.enabled = get_bits1(&s->gb))) {
598 if (get_bits1(&s->gb)) {
599 for (i = 0; i < 4; i++)
600 if (get_bits1(&s->gb))
601 s->lf_delta.ref[i] = get_sbits_inv(&s->gb, 6);
602 for (i = 0; i < 2; i++)
603 if (get_bits1(&s->gb))
604 s->lf_delta.mode[i] = get_sbits_inv(&s->gb, 6);
605 }
606 } else {
607 memset(&s->lf_delta, 0, sizeof(s->lf_delta));
608 }
609
610 /* quantization header data */
611 s->yac_qi = get_bits(&s->gb, 8);
612 s->ydc_qdelta = get_bits1(&s->gb) ? get_sbits_inv(&s->gb, 4) : 0;
613 s->uvdc_qdelta = get_bits1(&s->gb) ? get_sbits_inv(&s->gb, 4) : 0;
614 s->uvac_qdelta = get_bits1(&s->gb) ? get_sbits_inv(&s->gb, 4) : 0;
615 s->lossless = s->yac_qi == 0 && s->ydc_qdelta == 0 &&
616 s->uvdc_qdelta == 0 && s->uvac_qdelta == 0;
617
618 /* segmentation header info */
619 if ((s->segmentation.enabled = get_bits1(&s->gb))) {
620 if ((s->segmentation.update_map = get_bits1(&s->gb))) {
621 for (i = 0; i < 7; i++)
622 s->prob.seg[i] = get_bits1(&s->gb) ?
623 get_bits(&s->gb, 8) : 255;
624 if ((s->segmentation.temporal = get_bits1(&s->gb))) {
625 for (i = 0; i < 3; i++)
626 s->prob.segpred[i] = get_bits1(&s->gb) ?
627 get_bits(&s->gb, 8) : 255;
628 }
629 }
630 if ((!s->segmentation.update_map || s->segmentation.temporal) &&
631 (w != s->frames[CUR_FRAME].tf.f->width ||
632 h != s->frames[CUR_FRAME].tf.f->height)) {
633 av_log(ctx, AV_LOG_ERROR,
634 "Reference segmap (temp=%d,update=%d) enabled on size-change!\n",
635 s->segmentation.temporal, s->segmentation.update_map);
636 return AVERROR_INVALIDDATA;
637 }
638
639 if (get_bits1(&s->gb)) {
640 s->segmentation.absolute_vals = get_bits1(&s->gb);
641 for (i = 0; i < 8; i++) {
642 if ((s->segmentation.feat[i].q_enabled = get_bits1(&s->gb)))
643 s->segmentation.feat[i].q_val = get_sbits_inv(&s->gb, 8);
644 if ((s->segmentation.feat[i].lf_enabled = get_bits1(&s->gb)))
645 s->segmentation.feat[i].lf_val = get_sbits_inv(&s->gb, 6);
646 if ((s->segmentation.feat[i].ref_enabled = get_bits1(&s->gb)))
647 s->segmentation.feat[i].ref_val = get_bits(&s->gb, 2);
648 s->segmentation.feat[i].skip_enabled = get_bits1(&s->gb);
649 }
650 }
651 } else {
652 s->segmentation.feat[0].q_enabled = 0;
653 s->segmentation.feat[0].lf_enabled = 0;
654 s->segmentation.feat[0].skip_enabled = 0;
655 s->segmentation.feat[0].ref_enabled = 0;
656 }
657
658 // set qmul[] based on Y/UV, AC/DC and segmentation Q idx deltas
659 for (i = 0; i < (s->segmentation.enabled ? 8 : 1); i++) {
660 int qyac, qydc, quvac, quvdc, lflvl, sh;
661
662 if (s->segmentation.feat[i].q_enabled) {
663 if (s->segmentation.absolute_vals)
664 qyac = s->segmentation.feat[i].q_val;
665 else
666 qyac = s->yac_qi + s->segmentation.feat[i].q_val;
667 } else {
668 qyac = s->yac_qi;
669 }
670 qydc = av_clip_uintp2(qyac + s->ydc_qdelta, 8);
671 quvdc = av_clip_uintp2(qyac + s->uvdc_qdelta, 8);
672 quvac = av_clip_uintp2(qyac + s->uvac_qdelta, 8);
673 qyac = av_clip_uintp2(qyac, 8);
674
675 s->segmentation.feat[i].qmul[0][0] = vp9_dc_qlookup[qydc];
676 s->segmentation.feat[i].qmul[0][1] = vp9_ac_qlookup[qyac];
677 s->segmentation.feat[i].qmul[1][0] = vp9_dc_qlookup[quvdc];
678 s->segmentation.feat[i].qmul[1][1] = vp9_ac_qlookup[quvac];
679
680 sh = s->filter.level >= 32;
681 if (s->segmentation.feat[i].lf_enabled) {
682 if (s->segmentation.absolute_vals)
683 lflvl = s->segmentation.feat[i].lf_val;
684 else
685 lflvl = s->filter.level + s->segmentation.feat[i].lf_val;
686 } else {
687 lflvl = s->filter.level;
688 }
689 s->segmentation.feat[i].lflvl[0][0] =
690 s->segmentation.feat[i].lflvl[0][1] =
691 av_clip_uintp2(lflvl + (s->lf_delta.ref[0] << sh), 6);
692 for (j = 1; j < 4; j++) {
693 s->segmentation.feat[i].lflvl[j][0] =
694 av_clip_uintp2(lflvl + ((s->lf_delta.ref[j] +
695 s->lf_delta.mode[0]) << sh), 6);
696 s->segmentation.feat[i].lflvl[j][1] =
697 av_clip_uintp2(lflvl + ((s->lf_delta.ref[j] +
698 s->lf_delta.mode[1]) << sh), 6);
699 }
700 }
701
702 /* tiling info */
703 if ((res = update_size(ctx, w, h)) < 0) {
704 av_log(ctx, AV_LOG_ERROR, "Failed to initialize decoder for %dx%d\n", w, h);
705 return res;
706 }
707 for (s->tiling.log2_tile_cols = 0;
708 (s->sb_cols >> s->tiling.log2_tile_cols) > 64;
709 s->tiling.log2_tile_cols++) ;
710 for (max = 0; (s->sb_cols >> max) >= 4; max++) ;
711 max = FFMAX(0, max - 1);
712 while (max > s->tiling.log2_tile_cols) {
713 if (get_bits1(&s->gb))
714 s->tiling.log2_tile_cols++;
715 else
716 break;
717 }
718 s->tiling.log2_tile_rows = decode012(&s->gb);
719 s->tiling.tile_rows = 1 << s->tiling.log2_tile_rows;
720 if (s->tiling.tile_cols != (1 << s->tiling.log2_tile_cols)) {
721 s->tiling.tile_cols = 1 << s->tiling.log2_tile_cols;
722 s->c_b = av_fast_realloc(s->c_b, &s->c_b_size,
723 sizeof(VP56RangeCoder) * s->tiling.tile_cols);
724 if (!s->c_b) {
725 av_log(ctx, AV_LOG_ERROR, "Ran out of memory during range coder init\n");
726 return AVERROR(ENOMEM);
727 }
728 }
729
730 if (s->keyframe || s->errorres || s->intraonly) {
731 s->prob_ctx[0].p = s->prob_ctx[1].p = s->prob_ctx[2].p =
732 s->prob_ctx[3].p = vp9_default_probs;
733 memcpy(s->prob_ctx[0].coef, vp9_default_coef_probs,
734 sizeof(vp9_default_coef_probs));
735 memcpy(s->prob_ctx[1].coef, vp9_default_coef_probs,
736 sizeof(vp9_default_coef_probs));
737 memcpy(s->prob_ctx[2].coef, vp9_default_coef_probs,
738 sizeof(vp9_default_coef_probs));
739 memcpy(s->prob_ctx[3].coef, vp9_default_coef_probs,
740 sizeof(vp9_default_coef_probs));
741 }
742
743 // next 16 bits is size of the rest of the header (arith-coded)
744 size2 = get_bits(&s->gb, 16);
745 data2 = align_get_bits(&s->gb);
746 if (size2 > size - (data2 - data)) {
747 av_log(ctx, AV_LOG_ERROR, "Invalid compressed header size\n");
748 return AVERROR_INVALIDDATA;
749 }
750 ff_vp56_init_range_decoder(&s->c, data2, size2);
751 if (vp56_rac_get_prob_branchy(&s->c, 128)) { // marker bit
752 av_log(ctx, AV_LOG_ERROR, "Marker bit was set\n");
753 return AVERROR_INVALIDDATA;
754 }
755
756 if (s->keyframe || s->intraonly) {
757 memset(s->counts.coef, 0, sizeof(s->counts.coef) + sizeof(s->counts.eob));
758 } else {
759 memset(&s->counts, 0, sizeof(s->counts));
760 }
761 // FIXME is it faster to not copy here, but do it down in the fw updates
762 // as explicit copies if the fw update is missing (and skip the copy upon
763 // fw update)?
764 s->prob.p = s->prob_ctx[c].p;
765
766 // txfm updates
767 if (s->lossless) {
768 s->txfmmode = TX_4X4;
769 } else {
770 s->txfmmode = vp8_rac_get_uint(&s->c, 2);
771 if (s->txfmmode == 3)
772 s->txfmmode += vp8_rac_get(&s->c);
773
774 if (s->txfmmode == TX_SWITCHABLE) {
775 for (i = 0; i < 2; i++)
776 if (vp56_rac_get_prob_branchy(&s->c, 252))
777 s->prob.p.tx8p[i] = update_prob(&s->c, s->prob.p.tx8p[i]);
778 for (i = 0; i < 2; i++)
779 for (j = 0; j < 2; j++)
780 if (vp56_rac_get_prob_branchy(&s->c, 252))
781 s->prob.p.tx16p[i][j] =
782 update_prob(&s->c, s->prob.p.tx16p[i][j]);
783 for (i = 0; i < 2; i++)
784 for (j = 0; j < 3; j++)
785 if (vp56_rac_get_prob_branchy(&s->c, 252))
786 s->prob.p.tx32p[i][j] =
787 update_prob(&s->c, s->prob.p.tx32p[i][j]);
788 }
789 }
790
791 // coef updates
792 for (i = 0; i < 4; i++) {
793 uint8_t (*ref)[2][6][6][3] = s->prob_ctx[c].coef[i];
794 if (vp8_rac_get(&s->c)) {
795 for (j = 0; j < 2; j++)
796 for (k = 0; k < 2; k++)
797 for (l = 0; l < 6; l++)
798 for (m = 0; m < 6; m++) {
799 uint8_t *p = s->prob.coef[i][j][k][l][m];
800 uint8_t *r = ref[j][k][l][m];
801 if (m >= 3 && l == 0) // dc only has 3 pt
802 break;
803 for (n = 0; n < 3; n++) {
804 if (vp56_rac_get_prob_branchy(&s->c, 252)) {
805 p[n] = update_prob(&s->c, r[n]);
806 } else {
807 p[n] = r[n];
808 }
809 }
810 p[3] = 0;
811 }
812 } else {
813 for (j = 0; j < 2; j++)
814 for (k = 0; k < 2; k++)
815 for (l = 0; l < 6; l++)
816 for (m = 0; m < 6; m++) {
817 uint8_t *p = s->prob.coef[i][j][k][l][m];
818 uint8_t *r = ref[j][k][l][m];
819 if (m > 3 && l == 0) // dc only has 3 pt
820 break;
821 memcpy(p, r, 3);
822 p[3] = 0;
823 }
824 }
825 if (s->txfmmode == i)
826 break;
827 }
828
829 // mode updates
830 for (i = 0; i < 3; i++)
831 if (vp56_rac_get_prob_branchy(&s->c, 252))
832 s->prob.p.skip[i] = update_prob(&s->c, s->prob.p.skip[i]);
833 if (!s->keyframe && !s->intraonly) {
834 for (i = 0; i < 7; i++)
835 for (j = 0; j < 3; j++)
836 if (vp56_rac_get_prob_branchy(&s->c, 252))
837 s->prob.p.mv_mode[i][j] =
838 update_prob(&s->c, s->prob.p.mv_mode[i][j]);
839
840 if (s->filtermode == FILTER_SWITCHABLE)
841 for (i = 0; i < 4; i++)
842 for (j = 0; j < 2; j++)
843 if (vp56_rac_get_prob_branchy(&s->c, 252))
844 s->prob.p.filter[i][j] =
845 update_prob(&s->c, s->prob.p.filter[i][j]);
846
847 for (i = 0; i < 4; i++)
848 if (vp56_rac_get_prob_branchy(&s->c, 252))
849 s->prob.p.intra[i] = update_prob(&s->c, s->prob.p.intra[i]);
850
851 if (s->allowcompinter) {
852 s->comppredmode = vp8_rac_get(&s->c);
853 if (s->comppredmode)
854 s->comppredmode += vp8_rac_get(&s->c);
855 if (s->comppredmode == PRED_SWITCHABLE)
856 for (i = 0; i < 5; i++)
857 if (vp56_rac_get_prob_branchy(&s->c, 252))
858 s->prob.p.comp[i] =
859 update_prob(&s->c, s->prob.p.comp[i]);
860 } else {
861 s->comppredmode = PRED_SINGLEREF;
862 }
863
864 if (s->comppredmode != PRED_COMPREF) {
865 for (i = 0; i < 5; i++) {
866 if (vp56_rac_get_prob_branchy(&s->c, 252))
867 s->prob.p.single_ref[i][0] =
868 update_prob(&s->c, s->prob.p.single_ref[i][0]);
869 if (vp56_rac_get_prob_branchy(&s->c, 252))
870 s->prob.p.single_ref[i][1] =
871 update_prob(&s->c, s->prob.p.single_ref[i][1]);
872 }
873 }
874
875 if (s->comppredmode != PRED_SINGLEREF) {
876 for (i = 0; i < 5; i++)
877 if (vp56_rac_get_prob_branchy(&s->c, 252))
878 s->prob.p.comp_ref[i] =
879 update_prob(&s->c, s->prob.p.comp_ref[i]);
880 }
881
882 for (i = 0; i < 4; i++)
883 for (j = 0; j < 9; j++)
884 if (vp56_rac_get_prob_branchy(&s->c, 252))
885 s->prob.p.y_mode[i][j] =
886 update_prob(&s->c, s->prob.p.y_mode[i][j]);
887
888 for (i = 0; i < 4; i++)
889 for (j = 0; j < 4; j++)
890 for (k = 0; k < 3; k++)
891 if (vp56_rac_get_prob_branchy(&s->c, 252))
892 s->prob.p.partition[3 - i][j][k] =
893 update_prob(&s->c, s->prob.p.partition[3 - i][j][k]);
894
895 // mv fields don't use the update_prob subexp model for some reason
896 for (i = 0; i < 3; i++)
897 if (vp56_rac_get_prob_branchy(&s->c, 252))
898 s->prob.p.mv_joint[i] = (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
899
900 for (i = 0; i < 2; i++) {
901 if (vp56_rac_get_prob_branchy(&s->c, 252))
902 s->prob.p.mv_comp[i].sign = (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
903
904 for (j = 0; j < 10; j++)
905 if (vp56_rac_get_prob_branchy(&s->c, 252))
906 s->prob.p.mv_comp[i].classes[j] =
907 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
908
909 if (vp56_rac_get_prob_branchy(&s->c, 252))
910 s->prob.p.mv_comp[i].class0 = (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
911
912 for (j = 0; j < 10; j++)
913 if (vp56_rac_get_prob_branchy(&s->c, 252))
914 s->prob.p.mv_comp[i].bits[j] =
915 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
916 }
917
918 for (i = 0; i < 2; i++) {
919 for (j = 0; j < 2; j++)
920 for (k = 0; k < 3; k++)
921 if (vp56_rac_get_prob_branchy(&s->c, 252))
922 s->prob.p.mv_comp[i].class0_fp[j][k] =
923 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
924
925 for (j = 0; j < 3; j++)
926 if (vp56_rac_get_prob_branchy(&s->c, 252))
927 s->prob.p.mv_comp[i].fp[j] =
928 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
929 }
930
931 if (s->highprecisionmvs) {
932 for (i = 0; i < 2; i++) {
933 if (vp56_rac_get_prob_branchy(&s->c, 252))
934 s->prob.p.mv_comp[i].class0_hp =
935 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
936
937 if (vp56_rac_get_prob_branchy(&s->c, 252))
938 s->prob.p.mv_comp[i].hp =
939 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
940 }
941 }
942 }
943
944 return (data2 - data) + size2;
945}
946
947static av_always_inline void clamp_mv(VP56mv *dst, const VP56mv *src,
948 VP9Context *s)
949{
950 dst->x = av_clip(src->x, s->min_mv.x, s->max_mv.x);
951 dst->y = av_clip(src->y, s->min_mv.y, s->max_mv.y);
952}
953
954static void find_ref_mvs(VP9Context *s,
955 VP56mv *pmv, int ref, int z, int idx, int sb)
956{
957 static const int8_t mv_ref_blk_off[N_BS_SIZES][8][2] = {
958 [BS_64x64] = {{ 3, -1 }, { -1, 3 }, { 4, -1 }, { -1, 4 },
959 { -1, -1 }, { 0, -1 }, { -1, 0 }, { 6, -1 }},
960 [BS_64x32] = {{ 0, -1 }, { -1, 0 }, { 4, -1 }, { -1, 2 },
961 { -1, -1 }, { 0, -3 }, { -3, 0 }, { 2, -1 }},
962 [BS_32x64] = {{ -1, 0 }, { 0, -1 }, { -1, 4 }, { 2, -1 },
963 { -1, -1 }, { -3, 0 }, { 0, -3 }, { -1, 2 }},
964 [BS_32x32] = {{ 1, -1 }, { -1, 1 }, { 2, -1 }, { -1, 2 },
965 { -1, -1 }, { 0, -3 }, { -3, 0 }, { -3, -3 }},
966 [BS_32x16] = {{ 0, -1 }, { -1, 0 }, { 2, -1 }, { -1, -1 },
967 { -1, 1 }, { 0, -3 }, { -3, 0 }, { -3, -3 }},
968 [BS_16x32] = {{ -1, 0 }, { 0, -1 }, { -1, 2 }, { -1, -1 },
969 { 1, -1 }, { -3, 0 }, { 0, -3 }, { -3, -3 }},
970 [BS_16x16] = {{ 0, -1 }, { -1, 0 }, { 1, -1 }, { -1, 1 },
971 { -1, -1 }, { 0, -3 }, { -3, 0 }, { -3, -3 }},
972 [BS_16x8] = {{ 0, -1 }, { -1, 0 }, { 1, -1 }, { -1, -1 },
973 { 0, -2 }, { -2, 0 }, { -2, -1 }, { -1, -2 }},
974 [BS_8x16] = {{ -1, 0 }, { 0, -1 }, { -1, 1 }, { -1, -1 },
975 { -2, 0 }, { 0, -2 }, { -1, -2 }, { -2, -1 }},
976 [BS_8x8] = {{ 0, -1 }, { -1, 0 }, { -1, -1 }, { 0, -2 },
977 { -2, 0 }, { -1, -2 }, { -2, -1 }, { -2, -2 }},
978 [BS_8x4] = {{ 0, -1 }, { -1, 0 }, { -1, -1 }, { 0, -2 },
979 { -2, 0 }, { -1, -2 }, { -2, -1 }, { -2, -2 }},
980 [BS_4x8] = {{ 0, -1 }, { -1, 0 }, { -1, -1 }, { 0, -2 },
981 { -2, 0 }, { -1, -2 }, { -2, -1 }, { -2, -2 }},
982 [BS_4x4] = {{ 0, -1 }, { -1, 0 }, { -1, -1 }, { 0, -2 },
983 { -2, 0 }, { -1, -2 }, { -2, -1 }, { -2, -2 }},
984 };
985 VP9Block *b = s->b;
986 int row = s->row, col = s->col, row7 = s->row7;
987 const int8_t (*p)[2] = mv_ref_blk_off[b->bs];
988#define INVALID_MV 0x80008000U
989 uint32_t mem = INVALID_MV;
990 int i;
991
992#define RETURN_DIRECT_MV(mv) \
993 do { \
994 uint32_t m = AV_RN32A(&mv); \
995 if (!idx) { \
996 AV_WN32A(pmv, m); \
997 return; \
998 } else if (mem == INVALID_MV) { \
999 mem = m; \
1000 } else if (m != mem) { \
1001 AV_WN32A(pmv, m); \
1002 return; \
1003 } \
1004 } while (0)
1005
1006 if (sb >= 0) {
1007 if (sb == 2 || sb == 1) {
1008 RETURN_DIRECT_MV(b->mv[0][z]);
1009 } else if (sb == 3) {
1010 RETURN_DIRECT_MV(b->mv[2][z]);
1011 RETURN_DIRECT_MV(b->mv[1][z]);
1012 RETURN_DIRECT_MV(b->mv[0][z]);
1013 }
1014
1015#define RETURN_MV(mv) \
1016 do { \
1017 if (sb > 0) { \
1018 VP56mv tmp; \
1019 uint32_t m; \
1020 clamp_mv(&tmp, &mv, s); \
1021 m = AV_RN32A(&tmp); \
1022 if (!idx) { \
1023 AV_WN32A(pmv, m); \
1024 return; \
1025 } else if (mem == INVALID_MV) { \
1026 mem = m; \
1027 } else if (m != mem) { \
1028 AV_WN32A(pmv, m); \
1029 return; \
1030 } \
1031 } else { \
1032 uint32_t m = AV_RN32A(&mv); \
1033 if (!idx) { \
1034 clamp_mv(pmv, &mv, s); \
1035 return; \
1036 } else if (mem == INVALID_MV) { \
1037 mem = m; \
1038 } else if (m != mem) { \
1039 clamp_mv(pmv, &mv, s); \
1040 return; \
1041 } \
1042 } \
1043 } while (0)
1044
1045 if (row > 0) {
1046 struct VP9mvrefPair *mv = &s->frames[CUR_FRAME].mv[(row - 1) * s->sb_cols * 8 + col];
1047 if (mv->ref[0] == ref) {
1048 RETURN_MV(s->above_mv_ctx[2 * col + (sb & 1)][0]);
1049 } else if (mv->ref[1] == ref) {
1050 RETURN_MV(s->above_mv_ctx[2 * col + (sb & 1)][1]);
1051 }
1052 }
1053 if (col > s->tiling.tile_col_start) {
1054 struct VP9mvrefPair *mv = &s->frames[CUR_FRAME].mv[row * s->sb_cols * 8 + col - 1];
1055 if (mv->ref[0] == ref) {
1056 RETURN_MV(s->left_mv_ctx[2 * row7 + (sb >> 1)][0]);
1057 } else if (mv->ref[1] == ref) {
1058 RETURN_MV(s->left_mv_ctx[2 * row7 + (sb >> 1)][1]);
1059 }
1060 }
1061 i = 2;
1062 } else {
1063 i = 0;
1064 }
1065
1066 // previously coded MVs in this neighbourhood, using same reference frame
1067 for (; i < 8; i++) {
1068 int c = p[i][0] + col, r = p[i][1] + row;
1069
1070 if (c >= s->tiling.tile_col_start && c < s->cols && r >= 0 && r < s->rows) {
1071 struct VP9mvrefPair *mv = &s->frames[CUR_FRAME].mv[r * s->sb_cols * 8 + c];
1072
1073 if (mv->ref[0] == ref) {
1074 RETURN_MV(mv->mv[0]);
1075 } else if (mv->ref[1] == ref) {
1076 RETURN_MV(mv->mv[1]);
1077 }
1078 }
1079 }
1080
1081 // MV at this position in previous frame, using same reference frame
1082 if (s->use_last_frame_mvs) {
1083 struct VP9mvrefPair *mv = &s->frames[LAST_FRAME].mv[row * s->sb_cols * 8 + col];
1084
1085 if (!s->last_uses_2pass)
1086 ff_thread_await_progress(&s->frames[LAST_FRAME].tf, row >> 3, 0);
1087 if (mv->ref[0] == ref) {
1088 RETURN_MV(mv->mv[0]);
1089 } else if (mv->ref[1] == ref) {
1090 RETURN_MV(mv->mv[1]);
1091 }
1092 }
1093
1094#define RETURN_SCALE_MV(mv, scale) \
1095 do { \
1096 if (scale) { \
1097 VP56mv mv_temp = { -mv.x, -mv.y }; \
1098 RETURN_MV(mv_temp); \
1099 } else { \
1100 RETURN_MV(mv); \
1101 } \
1102 } while (0)
1103
1104 // previously coded MVs in this neighbourhood, using different reference frame
1105 for (i = 0; i < 8; i++) {
1106 int c = p[i][0] + col, r = p[i][1] + row;
1107
1108 if (c >= s->tiling.tile_col_start && c < s->cols && r >= 0 && r < s->rows) {
1109 struct VP9mvrefPair *mv = &s->frames[CUR_FRAME].mv[r * s->sb_cols * 8 + c];
1110
1111 if (mv->ref[0] != ref && mv->ref[0] >= 0) {
1112 RETURN_SCALE_MV(mv->mv[0], s->signbias[mv->ref[0]] != s->signbias[ref]);
1113 }
1114 if (mv->ref[1] != ref && mv->ref[1] >= 0 &&
1115 // BUG - libvpx has this condition regardless of whether
1116 // we used the first ref MV and pre-scaling
1117 AV_RN32A(&mv->mv[0]) != AV_RN32A(&mv->mv[1])) {
1118 RETURN_SCALE_MV(mv->mv[1], s->signbias[mv->ref[1]] != s->signbias[ref]);
1119 }
1120 }
1121 }
1122
1123 // MV at this position in previous frame, using different reference frame
1124 if (s->use_last_frame_mvs) {
1125 struct VP9mvrefPair *mv = &s->frames[LAST_FRAME].mv[row * s->sb_cols * 8 + col];
1126
1127 // no need to await_progress, because we already did that above
1128 if (mv->ref[0] != ref && mv->ref[0] >= 0) {
1129 RETURN_SCALE_MV(mv->mv[0], s->signbias[mv->ref[0]] != s->signbias[ref]);
1130 }
1131 if (mv->ref[1] != ref && mv->ref[1] >= 0 &&
1132 // BUG - libvpx has this condition regardless of whether
1133 // we used the first ref MV and pre-scaling
1134 AV_RN32A(&mv->mv[0]) != AV_RN32A(&mv->mv[1])) {
1135 RETURN_SCALE_MV(mv->mv[1], s->signbias[mv->ref[1]] != s->signbias[ref]);
1136 }
1137 }
1138
1139 AV_ZERO32(pmv);
1140#undef INVALID_MV
1141#undef RETURN_MV
1142#undef RETURN_SCALE_MV
1143}
1144
1145static av_always_inline int read_mv_component(VP9Context *s, int idx, int hp)
1146{
1147 int bit, sign = vp56_rac_get_prob(&s->c, s->prob.p.mv_comp[idx].sign);
1148 int n, c = vp8_rac_get_tree(&s->c, vp9_mv_class_tree,
1149 s->prob.p.mv_comp[idx].classes);
1150
1151 s->counts.mv_comp[idx].sign[sign]++;
1152 s->counts.mv_comp[idx].classes[c]++;
1153 if (c) {
1154 int m;
1155
1156 for (n = 0, m = 0; m < c; m++) {
1157 bit = vp56_rac_get_prob(&s->c, s->prob.p.mv_comp[idx].bits[m]);
1158 n |= bit << m;
1159 s->counts.mv_comp[idx].bits[m][bit]++;
1160 }
1161 n <<= 3;
1162 bit = vp8_rac_get_tree(&s->c, vp9_mv_fp_tree, s->prob.p.mv_comp[idx].fp);
1163 n |= bit << 1;
1164 s->counts.mv_comp[idx].fp[bit]++;
1165 if (hp) {
1166 bit = vp56_rac_get_prob(&s->c, s->prob.p.mv_comp[idx].hp);
1167 s->counts.mv_comp[idx].hp[bit]++;
1168 n |= bit;
1169 } else {
1170 n |= 1;
1171 // bug in libvpx - we count for bw entropy purposes even if the
1172 // bit wasn't coded
1173 s->counts.mv_comp[idx].hp[1]++;
1174 }
1175 n += 8 << c;
1176 } else {
1177 n = vp56_rac_get_prob(&s->c, s->prob.p.mv_comp[idx].class0);
1178 s->counts.mv_comp[idx].class0[n]++;
1179 bit = vp8_rac_get_tree(&s->c, vp9_mv_fp_tree,
1180 s->prob.p.mv_comp[idx].class0_fp[n]);
1181 s->counts.mv_comp[idx].class0_fp[n][bit]++;
1182 n = (n << 3) | (bit << 1);
1183 if (hp) {
1184 bit = vp56_rac_get_prob(&s->c, s->prob.p.mv_comp[idx].class0_hp);
1185 s->counts.mv_comp[idx].class0_hp[bit]++;
1186 n |= bit;
1187 } else {
1188 n |= 1;
1189 // bug in libvpx - we count for bw entropy purposes even if the
1190 // bit wasn't coded
1191 s->counts.mv_comp[idx].class0_hp[1]++;
1192 }
1193 }
1194
1195 return sign ? -(n + 1) : (n + 1);
1196}
1197
1198static void fill_mv(VP9Context *s,
1199 VP56mv *mv, int mode, int sb)
1200{
1201 VP9Block *b = s->b;
1202
1203 if (mode == ZEROMV) {
1204 AV_ZERO64(mv);
1205 } else {
1206 int hp;
1207
1208 // FIXME cache this value and reuse for other subblocks
1209 find_ref_mvs(s, &mv[0], b->ref[0], 0, mode == NEARMV,
1210 mode == NEWMV ? -1 : sb);
1211 // FIXME maybe move this code into find_ref_mvs()
1212 if ((mode == NEWMV || sb == -1) &&
1213 !(hp = s->highprecisionmvs && abs(mv[0].x) < 64 && abs(mv[0].y) < 64)) {
1214 if (mv[0].y & 1) {
1215 if (mv[0].y < 0)
1216 mv[0].y++;
1217 else
1218 mv[0].y--;
1219 }
1220 if (mv[0].x & 1) {
1221 if (mv[0].x < 0)
1222 mv[0].x++;
1223 else
1224 mv[0].x--;
1225 }
1226 }
1227 if (mode == NEWMV) {
1228 enum MVJoint j = vp8_rac_get_tree(&s->c, vp9_mv_joint_tree,
1229 s->prob.p.mv_joint);
1230
1231 s->counts.mv_joint[j]++;
1232 if (j >= MV_JOINT_V)
1233 mv[0].y += read_mv_component(s, 0, hp);
1234 if (j & 1)
1235 mv[0].x += read_mv_component(s, 1, hp);
1236 }
1237
1238 if (b->comp) {
1239 // FIXME cache this value and reuse for other subblocks
1240 find_ref_mvs(s, &mv[1], b->ref[1], 1, mode == NEARMV,
1241 mode == NEWMV ? -1 : sb);
1242 if ((mode == NEWMV || sb == -1) &&
1243 !(hp = s->highprecisionmvs && abs(mv[1].x) < 64 && abs(mv[1].y) < 64)) {
1244 if (mv[1].y & 1) {
1245 if (mv[1].y < 0)
1246 mv[1].y++;
1247 else
1248 mv[1].y--;
1249 }
1250 if (mv[1].x & 1) {
1251 if (mv[1].x < 0)
1252 mv[1].x++;
1253 else
1254 mv[1].x--;
1255 }
1256 }
1257 if (mode == NEWMV) {
1258 enum MVJoint j = vp8_rac_get_tree(&s->c, vp9_mv_joint_tree,
1259 s->prob.p.mv_joint);
1260
1261 s->counts.mv_joint[j]++;
1262 if (j >= MV_JOINT_V)
1263 mv[1].y += read_mv_component(s, 0, hp);
1264 if (j & 1)
1265 mv[1].x += read_mv_component(s, 1, hp);
1266 }
1267 }
1268 }
1269}
1270
1271static av_always_inline void setctx_2d(uint8_t *ptr, int w, int h,
1272 ptrdiff_t stride, int v)
1273{
1274 switch (w) {
1275 case 1:
1276 do {
1277 *ptr = v;
1278 ptr += stride;
1279 } while (--h);
1280 break;
1281 case 2: {
1282 int v16 = v * 0x0101;
1283 do {
1284 AV_WN16A(ptr, v16);
1285 ptr += stride;
1286 } while (--h);
1287 break;
1288 }
1289 case 4: {
1290 uint32_t v32 = v * 0x01010101;
1291 do {
1292 AV_WN32A(ptr, v32);
1293 ptr += stride;
1294 } while (--h);
1295 break;
1296 }
1297 case 8: {
1298#if HAVE_FAST_64BIT
1299 uint64_t v64 = v * 0x0101010101010101ULL;
1300 do {
1301 AV_WN64A(ptr, v64);
1302 ptr += stride;
1303 } while (--h);
1304#else
1305 uint32_t v32 = v * 0x01010101;
1306 do {
1307 AV_WN32A(ptr, v32);
1308 AV_WN32A(ptr + 4, v32);
1309 ptr += stride;
1310 } while (--h);
1311#endif
1312 break;
1313 }
1314 }
1315}
1316
1317static void decode_mode(AVCodecContext *ctx)
1318{
1319 static const uint8_t left_ctx[N_BS_SIZES] = {
1320 0x0, 0x8, 0x0, 0x8, 0xc, 0x8, 0xc, 0xe, 0xc, 0xe, 0xf, 0xe, 0xf
1321 };
1322 static const uint8_t above_ctx[N_BS_SIZES] = {
1323 0x0, 0x0, 0x8, 0x8, 0x8, 0xc, 0xc, 0xc, 0xe, 0xe, 0xe, 0xf, 0xf
1324 };
1325 static const uint8_t max_tx_for_bl_bp[N_BS_SIZES] = {
1326 TX_32X32, TX_32X32, TX_32X32, TX_32X32, TX_16X16, TX_16X16,
1327 TX_16X16, TX_8X8, TX_8X8, TX_8X8, TX_4X4, TX_4X4, TX_4X4
1328 };
1329 VP9Context *s = ctx->priv_data;
1330 VP9Block *b = s->b;
1331 int row = s->row, col = s->col, row7 = s->row7;
1332 enum TxfmMode max_tx = max_tx_for_bl_bp[b->bs];
1333 int w4 = FFMIN(s->cols - col, bwh_tab[1][b->bs][0]);
1334 int h4 = FFMIN(s->rows - row, bwh_tab[1][b->bs][1]), y;
1335 int have_a = row > 0, have_l = col > s->tiling.tile_col_start;
1336 int vref, filter_id;
1337
1338 if (!s->segmentation.enabled) {
1339 b->seg_id = 0;
1340 } else if (s->keyframe || s->intraonly) {
1341 b->seg_id = vp8_rac_get_tree(&s->c, vp9_segmentation_tree, s->prob.seg);
1342 } else if (!s->segmentation.update_map ||
1343 (s->segmentation.temporal &&
1344 vp56_rac_get_prob_branchy(&s->c,
1345 s->prob.segpred[s->above_segpred_ctx[col] +
1346 s->left_segpred_ctx[row7]]))) {
1347 if (!s->errorres) {
1348 int pred = 8, x;
1349 uint8_t *refsegmap = s->frames[LAST_FRAME].segmentation_map;
1350
1351 if (!s->last_uses_2pass)
1352 ff_thread_await_progress(&s->frames[LAST_FRAME].tf, row >> 3, 0);
1353 for (y = 0; y < h4; y++)
1354 for (x = 0; x < w4; x++)
1355 pred = FFMIN(pred, refsegmap[(y + row) * 8 * s->sb_cols + x + col]);
1356 av_assert1(pred < 8);
1357 b->seg_id = pred;
1358 } else {
1359 b->seg_id = 0;
1360 }
1361
1362 memset(&s->above_segpred_ctx[col], 1, w4);
1363 memset(&s->left_segpred_ctx[row7], 1, h4);
1364 } else {
1365 b->seg_id = vp8_rac_get_tree(&s->c, vp9_segmentation_tree,
1366 s->prob.seg);
1367
1368 memset(&s->above_segpred_ctx[col], 0, w4);
1369 memset(&s->left_segpred_ctx[row7], 0, h4);
1370 }
1371 if (s->segmentation.enabled &&
1372 (s->segmentation.update_map || s->keyframe || s->intraonly)) {
1373 setctx_2d(&s->frames[CUR_FRAME].segmentation_map[row * 8 * s->sb_cols + col],
1374 w4, h4, 8 * s->sb_cols, b->seg_id);
1375 }
1376
1377 b->skip = s->segmentation.enabled &&
1378 s->segmentation.feat[b->seg_id].skip_enabled;
1379 if (!b->skip) {
1380 int c = s->left_skip_ctx[row7] + s->above_skip_ctx[col];
1381 b->skip = vp56_rac_get_prob(&s->c, s->prob.p.skip[c]);
1382 s->counts.skip[c][b->skip]++;
1383 }
1384
1385 if (s->keyframe || s->intraonly) {
1386 b->intra = 1;
1387 } else if (s->segmentation.feat[b->seg_id].ref_enabled) {
1388 b->intra = !s->segmentation.feat[b->seg_id].ref_val;
1389 } else {
1390 int c, bit;
1391
1392 if (have_a && have_l) {
1393 c = s->above_intra_ctx[col] + s->left_intra_ctx[row7];
1394 c += (c == 2);
1395 } else {
1396 c = have_a ? 2 * s->above_intra_ctx[col] :
1397 have_l ? 2 * s->left_intra_ctx[row7] : 0;
1398 }
1399 bit = vp56_rac_get_prob(&s->c, s->prob.p.intra[c]);
1400 s->counts.intra[c][bit]++;
1401 b->intra = !bit;
1402 }
1403
1404 if ((b->intra || !b->skip) && s->txfmmode == TX_SWITCHABLE) {
1405 int c;
1406 if (have_a) {
1407 if (have_l) {
1408 c = (s->above_skip_ctx[col] ? max_tx :
1409 s->above_txfm_ctx[col]) +
1410 (s->left_skip_ctx[row7] ? max_tx :
1411 s->left_txfm_ctx[row7]) > max_tx;
1412 } else {
1413 c = s->above_skip_ctx[col] ? 1 :
1414 (s->above_txfm_ctx[col] * 2 > max_tx);
1415 }
1416 } else if (have_l) {
1417 c = s->left_skip_ctx[row7] ? 1 :
1418 (s->left_txfm_ctx[row7] * 2 > max_tx);
1419 } else {
1420 c = 1;
1421 }
1422 switch (max_tx) {
1423 case TX_32X32:
1424 b->tx = vp56_rac_get_prob(&s->c, s->prob.p.tx32p[c][0]);
1425 if (b->tx) {
1426 b->tx += vp56_rac_get_prob(&s->c, s->prob.p.tx32p[c][1]);
1427 if (b->tx == 2)
1428 b->tx += vp56_rac_get_prob(&s->c, s->prob.p.tx32p[c][2]);
1429 }
1430 s->counts.tx32p[c][b->tx]++;
1431 break;
1432 case TX_16X16:
1433 b->tx = vp56_rac_get_prob(&s->c, s->prob.p.tx16p[c][0]);
1434 if (b->tx)
1435 b->tx += vp56_rac_get_prob(&s->c, s->prob.p.tx16p[c][1]);
1436 s->counts.tx16p[c][b->tx]++;
1437 break;
1438 case TX_8X8:
1439 b->tx = vp56_rac_get_prob(&s->c, s->prob.p.tx8p[c]);
1440 s->counts.tx8p[c][b->tx]++;
1441 break;
1442 case TX_4X4:
1443 b->tx = TX_4X4;
1444 break;
1445 }
1446 } else {
1447 b->tx = FFMIN(max_tx, s->txfmmode);
1448 }
1449
1450 if (s->keyframe || s->intraonly) {
1451 uint8_t *a = &s->above_mode_ctx[col * 2];
1452 uint8_t *l = &s->left_mode_ctx[(row7) << 1];
1453
1454 b->comp = 0;
1455 if (b->bs > BS_8x8) {
1456 // FIXME the memory storage intermediates here aren't really
1457 // necessary, they're just there to make the code slightly
1458 // simpler for now
1459 b->mode[0] = a[0] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1460 vp9_default_kf_ymode_probs[a[0]][l[0]]);
1461 if (b->bs != BS_8x4) {
1462 b->mode[1] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1463 vp9_default_kf_ymode_probs[a[1]][b->mode[0]]);
1464 l[0] = a[1] = b->mode[1];
1465 } else {
1466 l[0] = a[1] = b->mode[1] = b->mode[0];
1467 }
1468 if (b->bs != BS_4x8) {
1469 b->mode[2] = a[0] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1470 vp9_default_kf_ymode_probs[a[0]][l[1]]);
1471 if (b->bs != BS_8x4) {
1472 b->mode[3] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1473 vp9_default_kf_ymode_probs[a[1]][b->mode[2]]);
1474 l[1] = a[1] = b->mode[3];
1475 } else {
1476 l[1] = a[1] = b->mode[3] = b->mode[2];
1477 }
1478 } else {
1479 b->mode[2] = b->mode[0];
1480 l[1] = a[1] = b->mode[3] = b->mode[1];
1481 }
1482 } else {
1483 b->mode[0] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1484 vp9_default_kf_ymode_probs[*a][*l]);
1485 b->mode[3] = b->mode[2] = b->mode[1] = b->mode[0];
1486 // FIXME this can probably be optimized
1487 memset(a, b->mode[0], bwh_tab[0][b->bs][0]);
1488 memset(l, b->mode[0], bwh_tab[0][b->bs][1]);
1489 }
1490 b->uvmode = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1491 vp9_default_kf_uvmode_probs[b->mode[3]]);
1492 } else if (b->intra) {
1493 b->comp = 0;
1494 if (b->bs > BS_8x8) {
1495 b->mode[0] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1496 s->prob.p.y_mode[0]);
1497 s->counts.y_mode[0][b->mode[0]]++;
1498 if (b->bs != BS_8x4) {
1499 b->mode[1] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1500 s->prob.p.y_mode[0]);
1501 s->counts.y_mode[0][b->mode[1]]++;
1502 } else {
1503 b->mode[1] = b->mode[0];
1504 }
1505 if (b->bs != BS_4x8) {
1506 b->mode[2] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1507 s->prob.p.y_mode[0]);
1508 s->counts.y_mode[0][b->mode[2]]++;
1509 if (b->bs != BS_8x4) {
1510 b->mode[3] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1511 s->prob.p.y_mode[0]);
1512 s->counts.y_mode[0][b->mode[3]]++;
1513 } else {
1514 b->mode[3] = b->mode[2];
1515 }
1516 } else {
1517 b->mode[2] = b->mode[0];
1518 b->mode[3] = b->mode[1];
1519 }
1520 } else {
1521 static const uint8_t size_group[10] = {
1522 3, 3, 3, 3, 2, 2, 2, 1, 1, 1
1523 };
1524 int sz = size_group[b->bs];
1525
1526 b->mode[0] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1527 s->prob.p.y_mode[sz]);
1528 b->mode[1] = b->mode[2] = b->mode[3] = b->mode[0];
1529 s->counts.y_mode[sz][b->mode[3]]++;
1530 }
1531 b->uvmode = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1532 s->prob.p.uv_mode[b->mode[3]]);
1533 s->counts.uv_mode[b->mode[3]][b->uvmode]++;
1534 } else {
1535 static const uint8_t inter_mode_ctx_lut[14][14] = {
1536 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1537 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1538 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1539 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1540 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1541 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1542 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1543 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1544 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1545 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1546 { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 2, 2, 1, 3 },
1547 { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 2, 2, 1, 3 },
1548 { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 1, 1, 0, 3 },
1549 { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 3, 3, 3, 4 },
1550 };
1551
1552 if (s->segmentation.feat[b->seg_id].ref_enabled) {
1553 av_assert2(s->segmentation.feat[b->seg_id].ref_val != 0);
1554 b->comp = 0;
1555 b->ref[0] = s->segmentation.feat[b->seg_id].ref_val - 1;
1556 } else {
1557 // read comp_pred flag
1558 if (s->comppredmode != PRED_SWITCHABLE) {
1559 b->comp = s->comppredmode == PRED_COMPREF;
1560 } else {
1561 int c;
1562
1563 // FIXME add intra as ref=0xff (or -1) to make these easier?
1564 if (have_a) {
1565 if (have_l) {
1566 if (s->above_comp_ctx[col] && s->left_comp_ctx[row7]) {
1567 c = 4;
1568 } else if (s->above_comp_ctx[col]) {
1569 c = 2 + (s->left_intra_ctx[row7] ||
1570 s->left_ref_ctx[row7] == s->fixcompref);
1571 } else if (s->left_comp_ctx[row7]) {
1572 c = 2 + (s->above_intra_ctx[col] ||
1573 s->above_ref_ctx[col] == s->fixcompref);
1574 } else {
1575 c = (!s->above_intra_ctx[col] &&
1576 s->above_ref_ctx[col] == s->fixcompref) ^
1577 (!s->left_intra_ctx[row7] &&
1578 s->left_ref_ctx[row & 7] == s->fixcompref);
1579 }
1580 } else {
1581 c = s->above_comp_ctx[col] ? 3 :
1582 (!s->above_intra_ctx[col] && s->above_ref_ctx[col] == s->fixcompref);
1583 }
1584 } else if (have_l) {
1585 c = s->left_comp_ctx[row7] ? 3 :
1586 (!s->left_intra_ctx[row7] && s->left_ref_ctx[row7] == s->fixcompref);
1587 } else {
1588 c = 1;
1589 }
1590 b->comp = vp56_rac_get_prob(&s->c, s->prob.p.comp[c]);
1591 s->counts.comp[c][b->comp]++;
1592 }
1593
1594 // read actual references
1595 // FIXME probably cache a few variables here to prevent repetitive
1596 // memory accesses below
1597 if (b->comp) /* two references */ {
1598 int fix_idx = s->signbias[s->fixcompref], var_idx = !fix_idx, c, bit;
1599
1600 b->ref[fix_idx] = s->fixcompref;
1601 // FIXME can this codeblob be replaced by some sort of LUT?
1602 if (have_a) {
1603 if (have_l) {
1604 if (s->above_intra_ctx[col]) {
1605 if (s->left_intra_ctx[row7]) {
1606 c = 2;
1607 } else {
1608 c = 1 + 2 * (s->left_ref_ctx[row7] != s->varcompref[1]);
1609 }
1610 } else if (s->left_intra_ctx[row7]) {
1611 c = 1 + 2 * (s->above_ref_ctx[col] != s->varcompref[1]);
1612 } else {
1613 int refl = s->left_ref_ctx[row7], refa = s->above_ref_ctx[col];
1614
1615 if (refl == refa && refa == s->varcompref[1]) {
1616 c = 0;
1617 } else if (!s->left_comp_ctx[row7] && !s->above_comp_ctx[col]) {
1618 if ((refa == s->fixcompref && refl == s->varcompref[0]) ||
1619 (refl == s->fixcompref && refa == s->varcompref[0])) {
1620 c = 4;
1621 } else {
1622 c = (refa == refl) ? 3 : 1;
1623 }
1624 } else if (!s->left_comp_ctx[row7]) {
1625 if (refa == s->varcompref[1] && refl != s->varcompref[1]) {
1626 c = 1;
1627 } else {
1628 c = (refl == s->varcompref[1] &&
1629 refa != s->varcompref[1]) ? 2 : 4;
1630 }
1631 } else if (!s->above_comp_ctx[col]) {
1632 if (refl == s->varcompref[1] && refa != s->varcompref[1]) {
1633 c = 1;
1634 } else {
1635 c = (refa == s->varcompref[1] &&
1636 refl != s->varcompref[1]) ? 2 : 4;
1637 }
1638 } else {
1639 c = (refl == refa) ? 4 : 2;
1640 }
1641 }
1642 } else {
1643 if (s->above_intra_ctx[col]) {
1644 c = 2;
1645 } else if (s->above_comp_ctx[col]) {
1646 c = 4 * (s->above_ref_ctx[col] != s->varcompref[1]);
1647 } else {
1648 c = 3 * (s->above_ref_ctx[col] != s->varcompref[1]);
1649 }
1650 }
1651 } else if (have_l) {
1652 if (s->left_intra_ctx[row7]) {
1653 c = 2;
1654 } else if (s->left_comp_ctx[row7]) {
1655 c = 4 * (s->left_ref_ctx[row7] != s->varcompref[1]);
1656 } else {
1657 c = 3 * (s->left_ref_ctx[row7] != s->varcompref[1]);
1658 }
1659 } else {
1660 c = 2;
1661 }
1662 bit = vp56_rac_get_prob(&s->c, s->prob.p.comp_ref[c]);
1663 b->ref[var_idx] = s->varcompref[bit];
1664 s->counts.comp_ref[c][bit]++;
1665 } else /* single reference */ {
1666 int bit, c;
1667
1668 if (have_a && !s->above_intra_ctx[col]) {
1669 if (have_l && !s->left_intra_ctx[row7]) {
1670 if (s->left_comp_ctx[row7]) {
1671 if (s->above_comp_ctx[col]) {
1672 c = 1 + (!s->fixcompref || !s->left_ref_ctx[row7] ||
1673 !s->above_ref_ctx[col]);
1674 } else {
1675 c = (3 * !s->above_ref_ctx[col]) +
1676 (!s->fixcompref || !s->left_ref_ctx[row7]);
1677 }
1678 } else if (s->above_comp_ctx[col]) {
1679 c = (3 * !s->left_ref_ctx[row7]) +
1680 (!s->fixcompref || !s->above_ref_ctx[col]);
1681 } else {
1682 c = 2 * !s->left_ref_ctx[row7] + 2 * !s->above_ref_ctx[col];
1683 }
1684 } else if (s->above_intra_ctx[col]) {
1685 c = 2;
1686 } else if (s->above_comp_ctx[col]) {
1687 c = 1 + (!s->fixcompref || !s->above_ref_ctx[col]);
1688 } else {
1689 c = 4 * (!s->above_ref_ctx[col]);
1690 }
1691 } else if (have_l && !s->left_intra_ctx[row7]) {
1692 if (s->left_intra_ctx[row7]) {
1693 c = 2;
1694 } else if (s->left_comp_ctx[row7]) {
1695 c = 1 + (!s->fixcompref || !s->left_ref_ctx[row7]);
1696 } else {
1697 c = 4 * (!s->left_ref_ctx[row7]);
1698 }
1699 } else {
1700 c = 2;
1701 }
1702 bit = vp56_rac_get_prob(&s->c, s->prob.p.single_ref[c][0]);
1703 s->counts.single_ref[c][0][bit]++;
1704 if (!bit) {
1705 b->ref[0] = 0;
1706 } else {
1707 // FIXME can this codeblob be replaced by some sort of LUT?
1708 if (have_a) {
1709 if (have_l) {
1710 if (s->left_intra_ctx[row7]) {
1711 if (s->above_intra_ctx[col]) {
1712 c = 2;
1713 } else if (s->above_comp_ctx[col]) {
1714 c = 1 + 2 * (s->fixcompref == 1 ||
1715 s->above_ref_ctx[col] == 1);
1716 } else if (!s->above_ref_ctx[col]) {
1717 c = 3;
1718 } else {
1719 c = 4 * (s->above_ref_ctx[col] == 1);
1720 }
1721 } else if (s->above_intra_ctx[col]) {
1722 if (s->left_intra_ctx[row7]) {
1723 c = 2;
1724 } else if (s->left_comp_ctx[row7]) {
1725 c = 1 + 2 * (s->fixcompref == 1 ||
1726 s->left_ref_ctx[row7] == 1);
1727 } else if (!s->left_ref_ctx[row7]) {
1728 c = 3;
1729 } else {
1730 c = 4 * (s->left_ref_ctx[row7] == 1);
1731 }
1732 } else if (s->above_comp_ctx[col]) {
1733 if (s->left_comp_ctx[row7]) {
1734 if (s->left_ref_ctx[row7] == s->above_ref_ctx[col]) {
1735 c = 3 * (s->fixcompref == 1 ||
1736 s->left_ref_ctx[row7] == 1);
1737 } else {
1738 c = 2;
1739 }
1740 } else if (!s->left_ref_ctx[row7]) {
1741 c = 1 + 2 * (s->fixcompref == 1 ||
1742 s->above_ref_ctx[col] == 1);
1743 } else {
1744 c = 3 * (s->left_ref_ctx[row7] == 1) +
1745 (s->fixcompref == 1 || s->above_ref_ctx[col] == 1);
1746 }
1747 } else if (s->left_comp_ctx[row7]) {
1748 if (!s->above_ref_ctx[col]) {
1749 c = 1 + 2 * (s->fixcompref == 1 ||
1750 s->left_ref_ctx[row7] == 1);
1751 } else {
1752 c = 3 * (s->above_ref_ctx[col] == 1) +
1753 (s->fixcompref == 1 || s->left_ref_ctx[row7] == 1);
1754 }
1755 } else if (!s->above_ref_ctx[col]) {
1756 if (!s->left_ref_ctx[row7]) {
1757 c = 3;
1758 } else {
1759 c = 4 * (s->left_ref_ctx[row7] == 1);
1760 }
1761 } else if (!s->left_ref_ctx[row7]) {
1762 c = 4 * (s->above_ref_ctx[col] == 1);
1763 } else {
1764 c = 2 * (s->left_ref_ctx[row7] == 1) +
1765 2 * (s->above_ref_ctx[col] == 1);
1766 }
1767 } else {
1768 if (s->above_intra_ctx[col] ||
1769 (!s->above_comp_ctx[col] && !s->above_ref_ctx[col])) {
1770 c = 2;
1771 } else if (s->above_comp_ctx[col]) {
1772 c = 3 * (s->fixcompref == 1 || s->above_ref_ctx[col] == 1);
1773 } else {
1774 c = 4 * (s->above_ref_ctx[col] == 1);
1775 }
1776 }
1777 } else if (have_l) {
1778 if (s->left_intra_ctx[row7] ||
1779 (!s->left_comp_ctx[row7] && !s->left_ref_ctx[row7])) {
1780 c = 2;
1781 } else if (s->left_comp_ctx[row7]) {
1782 c = 3 * (s->fixcompref == 1 || s->left_ref_ctx[row7] == 1);
1783 } else {
1784 c = 4 * (s->left_ref_ctx[row7] == 1);
1785 }
1786 } else {
1787 c = 2;
1788 }
1789 bit = vp56_rac_get_prob(&s->c, s->prob.p.single_ref[c][1]);
1790 s->counts.single_ref[c][1][bit]++;
1791 b->ref[0] = 1 + bit;
1792 }
1793 }
1794 }
1795
1796 if (b->bs <= BS_8x8) {
1797 if (s->segmentation.feat[b->seg_id].skip_enabled) {
1798 b->mode[0] = b->mode[1] = b->mode[2] = b->mode[3] = ZEROMV;
1799 } else {
1800 static const uint8_t off[10] = {
1801 3, 0, 0, 1, 0, 0, 0, 0, 0, 0
1802 };
1803
1804 // FIXME this needs to use the LUT tables from find_ref_mvs
1805 // because not all are -1,0/0,-1
1806 int c = inter_mode_ctx_lut[s->above_mode_ctx[col + off[b->bs]]]
1807 [s->left_mode_ctx[row7 + off[b->bs]]];
1808
1809 b->mode[0] = vp8_rac_get_tree(&s->c, vp9_inter_mode_tree,
1810 s->prob.p.mv_mode[c]);
1811 b->mode[1] = b->mode[2] = b->mode[3] = b->mode[0];
1812 s->counts.mv_mode[c][b->mode[0] - 10]++;
1813 }
1814 }
1815
1816 if (s->filtermode == FILTER_SWITCHABLE) {
1817 int c;
1818
1819 if (have_a && s->above_mode_ctx[col] >= NEARESTMV) {
1820 if (have_l && s->left_mode_ctx[row7] >= NEARESTMV) {
1821 c = s->above_filter_ctx[col] == s->left_filter_ctx[row7] ?
1822 s->left_filter_ctx[row7] : 3;
1823 } else {
1824 c = s->above_filter_ctx[col];
1825 }
1826 } else if (have_l && s->left_mode_ctx[row7] >= NEARESTMV) {
1827 c = s->left_filter_ctx[row7];
1828 } else {
1829 c = 3;
1830 }
1831
1832 filter_id = vp8_rac_get_tree(&s->c, vp9_filter_tree,
1833 s->prob.p.filter[c]);
1834 s->counts.filter[c][filter_id]++;
1835 b->filter = vp9_filter_lut[filter_id];
1836 } else {
1837 b->filter = s->filtermode;
1838 }
1839
1840 if (b->bs > BS_8x8) {
1841 int c = inter_mode_ctx_lut[s->above_mode_ctx[col]][s->left_mode_ctx[row7]];
1842
1843 b->mode[0] = vp8_rac_get_tree(&s->c, vp9_inter_mode_tree,
1844 s->prob.p.mv_mode[c]);
1845 s->counts.mv_mode[c][b->mode[0] - 10]++;
1846 fill_mv(s, b->mv[0], b->mode[0], 0);
1847
1848 if (b->bs != BS_8x4) {
1849 b->mode[1] = vp8_rac_get_tree(&s->c, vp9_inter_mode_tree,
1850 s->prob.p.mv_mode[c]);
1851 s->counts.mv_mode[c][b->mode[1] - 10]++;
1852 fill_mv(s, b->mv[1], b->mode[1], 1);
1853 } else {
1854 b->mode[1] = b->mode[0];
1855 AV_COPY32(&b->mv[1][0], &b->mv[0][0]);
1856 AV_COPY32(&b->mv[1][1], &b->mv[0][1]);
1857 }
1858
1859 if (b->bs != BS_4x8) {
1860 b->mode[2] = vp8_rac_get_tree(&s->c, vp9_inter_mode_tree,
1861 s->prob.p.mv_mode[c]);
1862 s->counts.mv_mode[c][b->mode[2] - 10]++;
1863 fill_mv(s, b->mv[2], b->mode[2], 2);
1864
1865 if (b->bs != BS_8x4) {
1866 b->mode[3] = vp8_rac_get_tree(&s->c, vp9_inter_mode_tree,
1867 s->prob.p.mv_mode[c]);
1868 s->counts.mv_mode[c][b->mode[3] - 10]++;
1869 fill_mv(s, b->mv[3], b->mode[3], 3);
1870 } else {
1871 b->mode[3] = b->mode[2];
1872 AV_COPY32(&b->mv[3][0], &b->mv[2][0]);
1873 AV_COPY32(&b->mv[3][1], &b->mv[2][1]);
1874 }
1875 } else {
1876 b->mode[2] = b->mode[0];
1877 AV_COPY32(&b->mv[2][0], &b->mv[0][0]);
1878 AV_COPY32(&b->mv[2][1], &b->mv[0][1]);
1879 b->mode[3] = b->mode[1];
1880 AV_COPY32(&b->mv[3][0], &b->mv[1][0]);
1881 AV_COPY32(&b->mv[3][1], &b->mv[1][1]);
1882 }
1883 } else {
1884 fill_mv(s, b->mv[0], b->mode[0], -1);
1885 AV_COPY32(&b->mv[1][0], &b->mv[0][0]);
1886 AV_COPY32(&b->mv[2][0], &b->mv[0][0]);
1887 AV_COPY32(&b->mv[3][0], &b->mv[0][0]);
1888 AV_COPY32(&b->mv[1][1], &b->mv[0][1]);
1889 AV_COPY32(&b->mv[2][1], &b->mv[0][1]);
1890 AV_COPY32(&b->mv[3][1], &b->mv[0][1]);
1891 }
1892
1893 vref = b->ref[b->comp ? s->signbias[s->varcompref[0]] : 0];
1894 }
1895
1896#if HAVE_FAST_64BIT
1897#define SPLAT_CTX(var, val, n) \
1898 switch (n) { \
1899 case 1: var = val; break; \
1900 case 2: AV_WN16A(&var, val * 0x0101); break; \
1901 case 4: AV_WN32A(&var, val * 0x01010101); break; \
1902 case 8: AV_WN64A(&var, val * 0x0101010101010101ULL); break; \
1903 case 16: { \
1904 uint64_t v64 = val * 0x0101010101010101ULL; \
1905 AV_WN64A( &var, v64); \
1906 AV_WN64A(&((uint8_t *) &var)[8], v64); \
1907 break; \
1908 } \
1909 }
1910#else
1911#define SPLAT_CTX(var, val, n) \
1912 switch (n) { \
1913 case 1: var = val; break; \
1914 case 2: AV_WN16A(&var, val * 0x0101); break; \
1915 case 4: AV_WN32A(&var, val * 0x01010101); break; \
1916 case 8: { \
1917 uint32_t v32 = val * 0x01010101; \
1918 AV_WN32A( &var, v32); \
1919 AV_WN32A(&((uint8_t *) &var)[4], v32); \
1920 break; \
1921 } \
1922 case 16: { \
1923 uint32_t v32 = val * 0x01010101; \
1924 AV_WN32A( &var, v32); \
1925 AV_WN32A(&((uint8_t *) &var)[4], v32); \
1926 AV_WN32A(&((uint8_t *) &var)[8], v32); \
1927 AV_WN32A(&((uint8_t *) &var)[12], v32); \
1928 break; \
1929 } \
1930 }
1931#endif
1932
1933 switch (bwh_tab[1][b->bs][0]) {
1934#define SET_CTXS(dir, off, n) \
1935 do { \
1936 SPLAT_CTX(s->dir##_skip_ctx[off], b->skip, n); \
1937 SPLAT_CTX(s->dir##_txfm_ctx[off], b->tx, n); \
1938 SPLAT_CTX(s->dir##_partition_ctx[off], dir##_ctx[b->bs], n); \
1939 if (!s->keyframe && !s->intraonly) { \
1940 SPLAT_CTX(s->dir##_intra_ctx[off], b->intra, n); \
1941 SPLAT_CTX(s->dir##_comp_ctx[off], b->comp, n); \
1942 SPLAT_CTX(s->dir##_mode_ctx[off], b->mode[3], n); \
1943 if (!b->intra) { \
1944 SPLAT_CTX(s->dir##_ref_ctx[off], vref, n); \
1945 if (s->filtermode == FILTER_SWITCHABLE) { \
1946 SPLAT_CTX(s->dir##_filter_ctx[off], filter_id, n); \
1947 } \
1948 } \
1949 } \
1950 } while (0)
1951 case 1: SET_CTXS(above, col, 1); break;
1952 case 2: SET_CTXS(above, col, 2); break;
1953 case 4: SET_CTXS(above, col, 4); break;
1954 case 8: SET_CTXS(above, col, 8); break;
1955 }
1956 switch (bwh_tab[1][b->bs][1]) {
1957 case 1: SET_CTXS(left, row7, 1); break;
1958 case 2: SET_CTXS(left, row7, 2); break;
1959 case 4: SET_CTXS(left, row7, 4); break;
1960 case 8: SET_CTXS(left, row7, 8); break;
1961 }
1962#undef SPLAT_CTX
1963#undef SET_CTXS
1964
1965 if (!s->keyframe && !s->intraonly) {
1966 if (b->bs > BS_8x8) {
1967 int mv0 = AV_RN32A(&b->mv[3][0]), mv1 = AV_RN32A(&b->mv[3][1]);
1968
1969 AV_COPY32(&s->left_mv_ctx[row7 * 2 + 0][0], &b->mv[1][0]);
1970 AV_COPY32(&s->left_mv_ctx[row7 * 2 + 0][1], &b->mv[1][1]);
1971 AV_WN32A(&s->left_mv_ctx[row7 * 2 + 1][0], mv0);
1972 AV_WN32A(&s->left_mv_ctx[row7 * 2 + 1][1], mv1);
1973 AV_COPY32(&s->above_mv_ctx[col * 2 + 0][0], &b->mv[2][0]);
1974 AV_COPY32(&s->above_mv_ctx[col * 2 + 0][1], &b->mv[2][1]);
1975 AV_WN32A(&s->above_mv_ctx[col * 2 + 1][0], mv0);
1976 AV_WN32A(&s->above_mv_ctx[col * 2 + 1][1], mv1);
1977 } else {
1978 int n, mv0 = AV_RN32A(&b->mv[3][0]), mv1 = AV_RN32A(&b->mv[3][1]);
1979
1980 for (n = 0; n < w4 * 2; n++) {
1981 AV_WN32A(&s->above_mv_ctx[col * 2 + n][0], mv0);
1982 AV_WN32A(&s->above_mv_ctx[col * 2 + n][1], mv1);
1983 }
1984 for (n = 0; n < h4 * 2; n++) {
1985 AV_WN32A(&s->left_mv_ctx[row7 * 2 + n][0], mv0);
1986 AV_WN32A(&s->left_mv_ctx[row7 * 2 + n][1], mv1);
1987 }
1988 }
1989 }
1990
1991 // FIXME kinda ugly
1992 for (y = 0; y < h4; y++) {
1993 int x, o = (row + y) * s->sb_cols * 8 + col;
1994 struct VP9mvrefPair *mv = &s->frames[CUR_FRAME].mv[o];
1995
1996 if (b->intra) {
1997 for (x = 0; x < w4; x++) {
1998 mv[x].ref[0] =
1999 mv[x].ref[1] = -1;
2000 }
2001 } else if (b->comp) {
2002 for (x = 0; x < w4; x++) {
2003 mv[x].ref[0] = b->ref[0];
2004 mv[x].ref[1] = b->ref[1];
2005 AV_COPY32(&mv[x].mv[0], &b->mv[3][0]);
2006 AV_COPY32(&mv[x].mv[1], &b->mv[3][1]);
2007 }
2008 } else {
2009 for (x = 0; x < w4; x++) {
2010 mv[x].ref[0] = b->ref[0];
2011 mv[x].ref[1] = -1;
2012 AV_COPY32(&mv[x].mv[0], &b->mv[3][0]);
2013 }
2014 }
2015 }
2016}
2017
2018// FIXME merge cnt/eob arguments?
2019static av_always_inline int
2020decode_coeffs_b_generic(VP56RangeCoder *c, int16_t *coef, int n_coeffs,
2021 int is_tx32x32, unsigned (*cnt)[6][3],
2022 unsigned (*eob)[6][2], uint8_t (*p)[6][11],
2023 int nnz, const int16_t *scan, const int16_t (*nb)[2],
2024 const int16_t *band_counts, const int16_t *qmul)
2025{
2026 int i = 0, band = 0, band_left = band_counts[band];
2027 uint8_t *tp = p[0][nnz];
2028 uint8_t cache[1024];
2029
2030 do {
2031 int val, rc;
2032
2033 val = vp56_rac_get_prob_branchy(c, tp[0]); // eob
2034 eob[band][nnz][val]++;
2035 if (!val)
2036 break;
2037
2038 skip_eob:
2039 if (!vp56_rac_get_prob_branchy(c, tp[1])) { // zero
2040 cnt[band][nnz][0]++;
2041 if (!--band_left)
2042 band_left = band_counts[++band];
2043 cache[scan[i]] = 0;
2044 nnz = (1 + cache[nb[i][0]] + cache[nb[i][1]]) >> 1;
2045 tp = p[band][nnz];
2046 if (++i == n_coeffs)
2047 break; //invalid input; blocks should end with EOB
2048 goto skip_eob;
2049 }
2050
2051 rc = scan[i];
2052 if (!vp56_rac_get_prob_branchy(c, tp[2])) { // one
2053 cnt[band][nnz][1]++;
2054 val = 1;
2055 cache[rc] = 1;
2056 } else {
2057 // fill in p[3-10] (model fill) - only once per frame for each pos
2058 if (!tp[3])
2059 memcpy(&tp[3], vp9_model_pareto8[tp[2]], 8);
2060
2061 cnt[band][nnz][2]++;
2062 if (!vp56_rac_get_prob_branchy(c, tp[3])) { // 2, 3, 4
2063 if (!vp56_rac_get_prob_branchy(c, tp[4])) {
2064 cache[rc] = val = 2;
2065 } else {
2066 val = 3 + vp56_rac_get_prob(c, tp[5]);
2067 cache[rc] = 3;
2068 }
2069 } else if (!vp56_rac_get_prob_branchy(c, tp[6])) { // cat1/2
2070 cache[rc] = 4;
2071 if (!vp56_rac_get_prob_branchy(c, tp[7])) {
2072 val = 5 + vp56_rac_get_prob(c, 159);
2073 } else {
2074 val = 7 + (vp56_rac_get_prob(c, 165) << 1);
2075 val += vp56_rac_get_prob(c, 145);
2076 }
2077 } else { // cat 3-6
2078 cache[rc] = 5;
2079 if (!vp56_rac_get_prob_branchy(c, tp[8])) {
2080 if (!vp56_rac_get_prob_branchy(c, tp[9])) {
2081 val = 11 + (vp56_rac_get_prob(c, 173) << 2);
2082 val += (vp56_rac_get_prob(c, 148) << 1);
2083 val += vp56_rac_get_prob(c, 140);
2084 } else {
2085 val = 19 + (vp56_rac_get_prob(c, 176) << 3);
2086 val += (vp56_rac_get_prob(c, 155) << 2);
2087 val += (vp56_rac_get_prob(c, 140) << 1);
2088 val += vp56_rac_get_prob(c, 135);
2089 }
2090 } else if (!vp56_rac_get_prob_branchy(c, tp[10])) {
2091 val = 35 + (vp56_rac_get_prob(c, 180) << 4);
2092 val += (vp56_rac_get_prob(c, 157) << 3);
2093 val += (vp56_rac_get_prob(c, 141) << 2);
2094 val += (vp56_rac_get_prob(c, 134) << 1);
2095 val += vp56_rac_get_prob(c, 130);
2096 } else {
2097 val = 67 + (vp56_rac_get_prob(c, 254) << 13);
2098 val += (vp56_rac_get_prob(c, 254) << 12);
2099 val += (vp56_rac_get_prob(c, 254) << 11);
2100 val += (vp56_rac_get_prob(c, 252) << 10);
2101 val += (vp56_rac_get_prob(c, 249) << 9);
2102 val += (vp56_rac_get_prob(c, 243) << 8);
2103 val += (vp56_rac_get_prob(c, 230) << 7);
2104 val += (vp56_rac_get_prob(c, 196) << 6);
2105 val += (vp56_rac_get_prob(c, 177) << 5);
2106 val += (vp56_rac_get_prob(c, 153) << 4);
2107 val += (vp56_rac_get_prob(c, 140) << 3);
2108 val += (vp56_rac_get_prob(c, 133) << 2);
2109 val += (vp56_rac_get_prob(c, 130) << 1);
2110 val += vp56_rac_get_prob(c, 129);
2111 }
2112 }
2113 }
2114 if (!--band_left)
2115 band_left = band_counts[++band];
2116 if (is_tx32x32)
2117 coef[rc] = ((vp8_rac_get(c) ? -val : val) * qmul[!!i]) / 2;
2118 else
2119 coef[rc] = (vp8_rac_get(c) ? -val : val) * qmul[!!i];
2120 nnz = (1 + cache[nb[i][0]] + cache[nb[i][1]]) >> 1;
2121 tp = p[band][nnz];
2122 } while (++i < n_coeffs);
2123
2124 return i;
2125}
2126
2127static int decode_coeffs_b(VP56RangeCoder *c, int16_t *coef, int n_coeffs,
2128 unsigned (*cnt)[6][3], unsigned (*eob)[6][2],
2129 uint8_t (*p)[6][11], int nnz, const int16_t *scan,
2130 const int16_t (*nb)[2], const int16_t *band_counts,
2131 const int16_t *qmul)
2132{
2133 return decode_coeffs_b_generic(c, coef, n_coeffs, 0, cnt, eob, p,
2134 nnz, scan, nb, band_counts, qmul);
2135}
2136
2137static int decode_coeffs_b32(VP56RangeCoder *c, int16_t *coef, int n_coeffs,
2138 unsigned (*cnt)[6][3], unsigned (*eob)[6][2],
2139 uint8_t (*p)[6][11], int nnz, const int16_t *scan,
2140 const int16_t (*nb)[2], const int16_t *band_counts,
2141 const int16_t *qmul)
2142{
2143 return decode_coeffs_b_generic(c, coef, n_coeffs, 1, cnt, eob, p,
2144 nnz, scan, nb, band_counts, qmul);
2145}
2146
2147static void decode_coeffs(AVCodecContext *ctx)
2148{
2149 VP9Context *s = ctx->priv_data;
2150 VP9Block *b = s->b;
2151 int row = s->row, col = s->col;
2152 uint8_t (*p)[6][11] = s->prob.coef[b->tx][0 /* y */][!b->intra];
2153 unsigned (*c)[6][3] = s->counts.coef[b->tx][0 /* y */][!b->intra];
2154 unsigned (*e)[6][2] = s->counts.eob[b->tx][0 /* y */][!b->intra];
2155 int w4 = bwh_tab[1][b->bs][0] << 1, h4 = bwh_tab[1][b->bs][1] << 1;
2156 int end_x = FFMIN(2 * (s->cols - col), w4);
2157 int end_y = FFMIN(2 * (s->rows - row), h4);
2158 int n, pl, x, y, res;
2159 int16_t (*qmul)[2] = s->segmentation.feat[b->seg_id].qmul;
2160 int tx = 4 * s->lossless + b->tx;
2161 const int16_t * const *yscans = vp9_scans[tx];
2162 const int16_t (* const *ynbs)[2] = vp9_scans_nb[tx];
2163 const int16_t *uvscan = vp9_scans[b->uvtx][DCT_DCT];
2164 const int16_t (*uvnb)[2] = vp9_scans_nb[b->uvtx][DCT_DCT];
2165 uint8_t *a = &s->above_y_nnz_ctx[col * 2];
2166 uint8_t *l = &s->left_y_nnz_ctx[(row & 7) << 1];
2167 static const int16_t band_counts[4][8] = {
2168 { 1, 2, 3, 4, 3, 16 - 13 },
2169 { 1, 2, 3, 4, 11, 64 - 21 },
2170 { 1, 2, 3, 4, 11, 256 - 21 },
2171 { 1, 2, 3, 4, 11, 1024 - 21 },
2172 };
2173 const int16_t *y_band_counts = band_counts[b->tx];
2174 const int16_t *uv_band_counts = band_counts[b->uvtx];
2175
2176#define MERGE(la, end, step, rd) \
2177 for (n = 0; n < end; n += step) \
2178 la[n] = !!rd(&la[n])
2179#define MERGE_CTX(step, rd) \
2180 do { \
2181 MERGE(l, end_y, step, rd); \
2182 MERGE(a, end_x, step, rd); \
2183 } while (0)
2184
2185#define DECODE_Y_COEF_LOOP(step, mode_index, v) \
2186 for (n = 0, y = 0; y < end_y; y += step) { \
2187 for (x = 0; x < end_x; x += step, n += step * step) { \
2188 enum TxfmType txtp = vp9_intra_txfm_type[b->mode[mode_index]]; \
2189 res = decode_coeffs_b##v(&s->c, s->block + 16 * n, 16 * step * step, \
2190 c, e, p, a[x] + l[y], yscans[txtp], \
2191 ynbs[txtp], y_band_counts, qmul[0]); \
2192 a[x] = l[y] = !!res; \
2193 if (step >= 4) { \
2194 AV_WN16A(&s->eob[n], res); \
2195 } else { \
2196 s->eob[n] = res; \
2197 } \
2198 } \
2199 }
2200
2201#define SPLAT(la, end, step, cond) \
2202 if (step == 2) { \
2203 for (n = 1; n < end; n += step) \
2204 la[n] = la[n - 1]; \
2205 } else if (step == 4) { \
2206 if (cond) { \
2207 for (n = 0; n < end; n += step) \
2208 AV_WN32A(&la[n], la[n] * 0x01010101); \
2209 } else { \
2210 for (n = 0; n < end; n += step) \
2211 memset(&la[n + 1], la[n], FFMIN(end - n - 1, 3)); \
2212 } \
2213 } else /* step == 8 */ { \
2214 if (cond) { \
2215 if (HAVE_FAST_64BIT) { \
2216 for (n = 0; n < end; n += step) \
2217 AV_WN64A(&la[n], la[n] * 0x0101010101010101ULL); \
2218 } else { \
2219 for (n = 0; n < end; n += step) { \
2220 uint32_t v32 = la[n] * 0x01010101; \
2221 AV_WN32A(&la[n], v32); \
2222 AV_WN32A(&la[n + 4], v32); \
2223 } \
2224 } \
2225 } else { \
2226 for (n = 0; n < end; n += step) \
2227 memset(&la[n + 1], la[n], FFMIN(end - n - 1, 7)); \
2228 } \
2229 }
2230#define SPLAT_CTX(step) \
2231 do { \
2232 SPLAT(a, end_x, step, end_x == w4); \
2233 SPLAT(l, end_y, step, end_y == h4); \
2234 } while (0)
2235
2236 /* y tokens */
2237 switch (b->tx) {
2238 case TX_4X4:
2239 DECODE_Y_COEF_LOOP(1, b->bs > BS_8x8 ? n : 0,);
2240 break;
2241 case TX_8X8:
2242 MERGE_CTX(2, AV_RN16A);
2243 DECODE_Y_COEF_LOOP(2, 0,);
2244 SPLAT_CTX(2);
2245 break;
2246 case TX_16X16:
2247 MERGE_CTX(4, AV_RN32A);
2248 DECODE_Y_COEF_LOOP(4, 0,);
2249 SPLAT_CTX(4);
2250 break;
2251 case TX_32X32:
2252 MERGE_CTX(8, AV_RN64A);
2253 DECODE_Y_COEF_LOOP(8, 0, 32);
2254 SPLAT_CTX(8);
2255 break;
2256 }
2257
2258#define DECODE_UV_COEF_LOOP(step) \
2259 for (n = 0, y = 0; y < end_y; y += step) { \
2260 for (x = 0; x < end_x; x += step, n += step * step) { \
2261 res = decode_coeffs_b(&s->c, s->uvblock[pl] + 16 * n, \
2262 16 * step * step, c, e, p, a[x] + l[y], \
2263 uvscan, uvnb, uv_band_counts, qmul[1]); \
2264 a[x] = l[y] = !!res; \
2265 if (step >= 4) { \
2266 AV_WN16A(&s->uveob[pl][n], res); \
2267 } else { \
2268 s->uveob[pl][n] = res; \
2269 } \
2270 } \
2271 }
2272
2273 p = s->prob.coef[b->uvtx][1 /* uv */][!b->intra];
2274 c = s->counts.coef[b->uvtx][1 /* uv */][!b->intra];
2275 e = s->counts.eob[b->uvtx][1 /* uv */][!b->intra];
2276 w4 >>= 1;
2277 h4 >>= 1;
2278 end_x >>= 1;
2279 end_y >>= 1;
2280 for (pl = 0; pl < 2; pl++) {
2281 a = &s->above_uv_nnz_ctx[pl][col];
2282 l = &s->left_uv_nnz_ctx[pl][row & 7];
2283 switch (b->uvtx) {
2284 case TX_4X4:
2285 DECODE_UV_COEF_LOOP(1);
2286 break;
2287 case TX_8X8:
2288 MERGE_CTX(2, AV_RN16A);
2289 DECODE_UV_COEF_LOOP(2);
2290 SPLAT_CTX(2);
2291 break;
2292 case TX_16X16:
2293 MERGE_CTX(4, AV_RN32A);
2294 DECODE_UV_COEF_LOOP(4);
2295 SPLAT_CTX(4);
2296 break;
2297 case TX_32X32:
2298 MERGE_CTX(8, AV_RN64A);
2299 // a 64x64 (max) uv block can ever only contain 1 tx32x32 block
2300 // so there is no need to loop
2301 res = decode_coeffs_b32(&s->c, s->uvblock[pl],
2302 1024, c, e, p, a[0] + l[0],
2303 uvscan, uvnb, uv_band_counts, qmul[1]);
2304 a[0] = l[0] = !!res;
2305 AV_WN16A(&s->uveob[pl][0], res);
2306 SPLAT_CTX(8);
2307 break;
2308 }
2309 }
2310}
2311
2312static av_always_inline int check_intra_mode(VP9Context *s, int mode, uint8_t **a,
2313 uint8_t *dst_edge, ptrdiff_t stride_edge,
2314 uint8_t *dst_inner, ptrdiff_t stride_inner,
2315 uint8_t *l, int col, int x, int w,
2316 int row, int y, enum TxfmMode tx,
2317 int p)
2318{
2319 int have_top = row > 0 || y > 0;
2320 int have_left = col > s->tiling.tile_col_start || x > 0;
2321 int have_right = x < w - 1;
2322 static const uint8_t mode_conv[10][2 /* have_left */][2 /* have_top */] = {
2323 [VERT_PRED] = { { DC_127_PRED, VERT_PRED },
2324 { DC_127_PRED, VERT_PRED } },
2325 [HOR_PRED] = { { DC_129_PRED, DC_129_PRED },
2326 { HOR_PRED, HOR_PRED } },
2327 [DC_PRED] = { { DC_128_PRED, TOP_DC_PRED },
2328 { LEFT_DC_PRED, DC_PRED } },
2329 [DIAG_DOWN_LEFT_PRED] = { { DC_127_PRED, DIAG_DOWN_LEFT_PRED },
2330 { DC_127_PRED, DIAG_DOWN_LEFT_PRED } },
2331 [DIAG_DOWN_RIGHT_PRED] = { { DIAG_DOWN_RIGHT_PRED, DIAG_DOWN_RIGHT_PRED },
2332 { DIAG_DOWN_RIGHT_PRED, DIAG_DOWN_RIGHT_PRED } },
2333 [VERT_RIGHT_PRED] = { { VERT_RIGHT_PRED, VERT_RIGHT_PRED },
2334 { VERT_RIGHT_PRED, VERT_RIGHT_PRED } },
2335 [HOR_DOWN_PRED] = { { HOR_DOWN_PRED, HOR_DOWN_PRED },
2336 { HOR_DOWN_PRED, HOR_DOWN_PRED } },
2337 [VERT_LEFT_PRED] = { { DC_127_PRED, VERT_LEFT_PRED },
2338 { DC_127_PRED, VERT_LEFT_PRED } },
2339 [HOR_UP_PRED] = { { DC_129_PRED, DC_129_PRED },
2340 { HOR_UP_PRED, HOR_UP_PRED } },
2341 [TM_VP8_PRED] = { { DC_129_PRED, VERT_PRED },
2342 { HOR_PRED, TM_VP8_PRED } },
2343 };
2344 static const struct {
2345 uint8_t needs_left:1;
2346 uint8_t needs_top:1;
2347 uint8_t needs_topleft:1;
2348 uint8_t needs_topright:1;
2349 } edges[N_INTRA_PRED_MODES] = {
2350 [VERT_PRED] = { .needs_top = 1 },
2351 [HOR_PRED] = { .needs_left = 1 },
2352 [DC_PRED] = { .needs_top = 1, .needs_left = 1 },
2353 [DIAG_DOWN_LEFT_PRED] = { .needs_top = 1, .needs_topright = 1 },
2354 [DIAG_DOWN_RIGHT_PRED] = { .needs_left = 1, .needs_top = 1, .needs_topleft = 1 },
2355 [VERT_RIGHT_PRED] = { .needs_left = 1, .needs_top = 1, .needs_topleft = 1 },
2356 [HOR_DOWN_PRED] = { .needs_left = 1, .needs_top = 1, .needs_topleft = 1 },
2357 [VERT_LEFT_PRED] = { .needs_top = 1, .needs_topright = 1 },
2358 [HOR_UP_PRED] = { .needs_left = 1 },
2359 [TM_VP8_PRED] = { .needs_left = 1, .needs_top = 1, .needs_topleft = 1 },
2360 [LEFT_DC_PRED] = { .needs_left = 1 },
2361 [TOP_DC_PRED] = { .needs_top = 1 },
2362 [DC_128_PRED] = { 0 },
2363 [DC_127_PRED] = { 0 },
2364 [DC_129_PRED] = { 0 }
2365 };
2366
2367 av_assert2(mode >= 0 && mode < 10);
2368 mode = mode_conv[mode][have_left][have_top];
2369 if (edges[mode].needs_top) {
2370 uint8_t *top, *topleft;
2371 int n_px_need = 4 << tx, n_px_have = (((s->cols - col) << !p) - x) * 4;
2372 int n_px_need_tr = 0;
2373
2374 if (tx == TX_4X4 && edges[mode].needs_topright && have_right)
2375 n_px_need_tr = 4;
2376
2377 // if top of sb64-row, use s->intra_pred_data[] instead of
2378 // dst[-stride] for intra prediction (it contains pre- instead of
2379 // post-loopfilter data)
2380 if (have_top) {
2381 top = !(row & 7) && !y ?
2382 s->intra_pred_data[p] + col * (8 >> !!p) + x * 4 :
2383 y == 0 ? &dst_edge[-stride_edge] : &dst_inner[-stride_inner];
2384 if (have_left)
2385 topleft = !(row & 7) && !y ?
2386 s->intra_pred_data[p] + col * (8 >> !!p) + x * 4 :
2387 y == 0 || x == 0 ? &dst_edge[-stride_edge] :
2388 &dst_inner[-stride_inner];
2389 }
2390
2391 if (have_top &&
2392 (!edges[mode].needs_topleft || (have_left && top == topleft)) &&
2393 (tx != TX_4X4 || !edges[mode].needs_topright || have_right) &&
2394 n_px_need + n_px_need_tr <= n_px_have) {
2395 *a = top;
2396 } else {
2397 if (have_top) {
2398 if (n_px_need <= n_px_have) {
2399 memcpy(*a, top, n_px_need);
2400 } else {
2401 memcpy(*a, top, n_px_have);
2402 memset(&(*a)[n_px_have], (*a)[n_px_have - 1],
2403 n_px_need - n_px_have);
2404 }
2405 } else {
2406 memset(*a, 127, n_px_need);
2407 }
2408 if (edges[mode].needs_topleft) {
2409 if (have_left && have_top) {
2410 (*a)[-1] = topleft[-1];
2411 } else {
2412 (*a)[-1] = have_top ? 129 : 127;
2413 }
2414 }
2415 if (tx == TX_4X4 && edges[mode].needs_topright) {
2416 if (have_top && have_right &&
2417 n_px_need + n_px_need_tr <= n_px_have) {
2418 memcpy(&(*a)[4], &top[4], 4);
2419 } else {
2420 memset(&(*a)[4], (*a)[3], 4);
2421 }
2422 }
2423 }
2424 }
2425 if (edges[mode].needs_left) {
2426 if (have_left) {
2427 int n_px_need = 4 << tx, i, n_px_have = (((s->rows - row) << !p) - y) * 4;
2428 uint8_t *dst = x == 0 ? dst_edge : dst_inner;
2429 ptrdiff_t stride = x == 0 ? stride_edge : stride_inner;
2430
2431 if (n_px_need <= n_px_have) {
2432 for (i = 0; i < n_px_need; i++)
2433 l[n_px_need - 1 - i] = dst[i * stride - 1];
2434 } else {
2435 for (i = 0; i < n_px_have; i++)
2436 l[n_px_need - 1 - i] = dst[i * stride - 1];
2437 memset(l, l[n_px_need - n_px_have], n_px_need - n_px_have);
2438 }
2439 } else {
2440 memset(l, 129, 4 << tx);
2441 }
2442 }
2443
2444 return mode;
2445}
2446
2447static void intra_recon(AVCodecContext *ctx, ptrdiff_t y_off, ptrdiff_t uv_off)
2448{
2449 VP9Context *s = ctx->priv_data;
2450 VP9Block *b = s->b;
2451 int row = s->row, col = s->col;
2452 int w4 = bwh_tab[1][b->bs][0] << 1, step1d = 1 << b->tx, n;
2453 int h4 = bwh_tab[1][b->bs][1] << 1, x, y, step = 1 << (b->tx * 2);
2454 int end_x = FFMIN(2 * (s->cols - col), w4);
2455 int end_y = FFMIN(2 * (s->rows - row), h4);
2456 int tx = 4 * s->lossless + b->tx, uvtx = b->uvtx + 4 * s->lossless;
2457 int uvstep1d = 1 << b->uvtx, p;
2458 uint8_t *dst = s->dst[0], *dst_r = s->frames[CUR_FRAME].tf.f->data[0] + y_off;
2459 LOCAL_ALIGNED_32(uint8_t, a_buf, [64]);
2460 LOCAL_ALIGNED_32(uint8_t, l, [32]);
2461
2462 for (n = 0, y = 0; y < end_y; y += step1d) {
2463 uint8_t *ptr = dst, *ptr_r = dst_r;
2464 for (x = 0; x < end_x; x += step1d, ptr += 4 * step1d,
2465 ptr_r += 4 * step1d, n += step) {
2466 int mode = b->mode[b->bs > BS_8x8 && b->tx == TX_4X4 ?
2467 y * 2 + x : 0];
2468 uint8_t *a = &a_buf[32];
2469 enum TxfmType txtp = vp9_intra_txfm_type[mode];
2470 int eob = b->skip ? 0 : b->tx > TX_8X8 ? AV_RN16A(&s->eob[n]) : s->eob[n];
2471
2472 mode = check_intra_mode(s, mode, &a, ptr_r,
2473 s->frames[CUR_FRAME].tf.f->linesize[0],
2474 ptr, s->y_stride, l,
2475 col, x, w4, row, y, b->tx, 0);
2476 s->dsp.intra_pred[b->tx][mode](ptr, s->y_stride, l, a);
2477 if (eob)
2478 s->dsp.itxfm_add[tx][txtp](ptr, s->y_stride,
2479 s->block + 16 * n, eob);
2480 }
2481 dst_r += 4 * step1d * s->frames[CUR_FRAME].tf.f->linesize[0];
2482 dst += 4 * step1d * s->y_stride;
2483 }
2484
2485 // U/V
2486 w4 >>= 1;
2487 end_x >>= 1;
2488 end_y >>= 1;
2489 step = 1 << (b->uvtx * 2);
2490 for (p = 0; p < 2; p++) {
2491 dst = s->dst[1 + p];
2492 dst_r = s->frames[CUR_FRAME].tf.f->data[1 + p] + uv_off;
2493 for (n = 0, y = 0; y < end_y; y += uvstep1d) {
2494 uint8_t *ptr = dst, *ptr_r = dst_r;
2495 for (x = 0; x < end_x; x += uvstep1d, ptr += 4 * uvstep1d,
2496 ptr_r += 4 * uvstep1d, n += step) {
2497 int mode = b->uvmode;
2498 uint8_t *a = &a_buf[16];
2499 int eob = b->skip ? 0 : b->uvtx > TX_8X8 ? AV_RN16A(&s->uveob[p][n]) : s->uveob[p][n];
2500
2501 mode = check_intra_mode(s, mode, &a, ptr_r,
2502 s->frames[CUR_FRAME].tf.f->linesize[1],
2503 ptr, s->uv_stride, l,
2504 col, x, w4, row, y, b->uvtx, p + 1);
2505 s->dsp.intra_pred[b->uvtx][mode](ptr, s->uv_stride, l, a);
2506 if (eob)
2507 s->dsp.itxfm_add[uvtx][DCT_DCT](ptr, s->uv_stride,
2508 s->uvblock[p] + 16 * n, eob);
2509 }
2510 dst_r += 4 * uvstep1d * s->frames[CUR_FRAME].tf.f->linesize[1];
2511 dst += 4 * uvstep1d * s->uv_stride;
2512 }
2513 }
2514}
2515
2516static av_always_inline void mc_luma_dir(VP9Context *s, vp9_mc_func (*mc)[2],
2517 uint8_t *dst, ptrdiff_t dst_stride,
2518 const uint8_t *ref, ptrdiff_t ref_stride,
2519 ThreadFrame *ref_frame,
2520 ptrdiff_t y, ptrdiff_t x, const VP56mv *mv,
2521 int bw, int bh, int w, int h)
2522{
2523 int mx = mv->x, my = mv->y, th;
2524
2525 y += my >> 3;
2526 x += mx >> 3;
2527 ref += y * ref_stride + x;
2528 mx &= 7;
2529 my &= 7;
2530 // FIXME bilinear filter only needs 0/1 pixels, not 3/4
2531 // we use +7 because the last 7 pixels of each sbrow can be changed in
2532 // the longest loopfilter of the next sbrow
2533 th = (y + bh + 4 * !!my + 7) >> 6;
2534 ff_thread_await_progress(ref_frame, FFMAX(th, 0), 0);
2535 if (x < !!mx * 3 || y < !!my * 3 ||
2536 x + !!mx * 4 > w - bw || y + !!my * 4 > h - bh) {
2537 s->vdsp.emulated_edge_mc(s->edge_emu_buffer,
2538 ref - !!my * 3 * ref_stride - !!mx * 3,
2539 80, ref_stride,
2540 bw + !!mx * 7, bh + !!my * 7,
2541 x - !!mx * 3, y - !!my * 3, w, h);
2542 ref = s->edge_emu_buffer + !!my * 3 * 80 + !!mx * 3;
2543 ref_stride = 80;
2544 }
2545 mc[!!mx][!!my](dst, dst_stride, ref, ref_stride, bh, mx << 1, my << 1);
2546}
2547
2548static av_always_inline void mc_chroma_dir(VP9Context *s, vp9_mc_func (*mc)[2],
2549 uint8_t *dst_u, uint8_t *dst_v,
2550 ptrdiff_t dst_stride,
2551 const uint8_t *ref_u, ptrdiff_t src_stride_u,
2552 const uint8_t *ref_v, ptrdiff_t src_stride_v,
2553 ThreadFrame *ref_frame,
2554 ptrdiff_t y, ptrdiff_t x, const VP56mv *mv,
2555 int bw, int bh, int w, int h)
2556{
2557 int mx = mv->x, my = mv->y, th;
2558
2559 y += my >> 4;
2560 x += mx >> 4;
2561 ref_u += y * src_stride_u + x;
2562 ref_v += y * src_stride_v + x;
2563 mx &= 15;
2564 my &= 15;
2565 // FIXME bilinear filter only needs 0/1 pixels, not 3/4
2566 // we use +7 because the last 7 pixels of each sbrow can be changed in
2567 // the longest loopfilter of the next sbrow
2568 th = (y + bh + 4 * !!my + 7) >> 5;
2569 ff_thread_await_progress(ref_frame, FFMAX(th, 0), 0);
2570 if (x < !!mx * 3 || y < !!my * 3 ||
2571 x + !!mx * 4 > w - bw || y + !!my * 4 > h - bh) {
2572 s->vdsp.emulated_edge_mc(s->edge_emu_buffer,
2573 ref_u - !!my * 3 * src_stride_u - !!mx * 3,
2574 80, src_stride_u,
2575 bw + !!mx * 7, bh + !!my * 7,
2576 x - !!mx * 3, y - !!my * 3, w, h);
2577 ref_u = s->edge_emu_buffer + !!my * 3 * 80 + !!mx * 3;
2578 mc[!!mx][!!my](dst_u, dst_stride, ref_u, 80, bh, mx, my);
2579
2580 s->vdsp.emulated_edge_mc(s->edge_emu_buffer,
2581 ref_v - !!my * 3 * src_stride_v - !!mx * 3,
2582 80, src_stride_v,
2583 bw + !!mx * 7, bh + !!my * 7,
2584 x - !!mx * 3, y - !!my * 3, w, h);
2585 ref_v = s->edge_emu_buffer + !!my * 3 * 80 + !!mx * 3;
2586 mc[!!mx][!!my](dst_v, dst_stride, ref_v, 80, bh, mx, my);
2587 } else {
2588 mc[!!mx][!!my](dst_u, dst_stride, ref_u, src_stride_u, bh, mx, my);
2589 mc[!!mx][!!my](dst_v, dst_stride, ref_v, src_stride_v, bh, mx, my);
2590 }
2591}
2592
2593static void inter_recon(AVCodecContext *ctx)
2594{
2595 static const uint8_t bwlog_tab[2][N_BS_SIZES] = {
2596 { 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4 },
2597 { 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4 },
2598 };
2599 VP9Context *s = ctx->priv_data;
2600 VP9Block *b = s->b;
2601 int row = s->row, col = s->col;
2602 ThreadFrame *tref1 = &s->refs[s->refidx[b->ref[0]]], *tref2;
2603 AVFrame *ref1 = tref1->f, *ref2;
2604 int w1 = ref1->width, h1 = ref1->height, w2, h2;
2605 ptrdiff_t ls_y = s->y_stride, ls_uv = s->uv_stride;
2606
2607 if (b->comp) {
2608 tref2 = &s->refs[s->refidx[b->ref[1]]];
2609 ref2 = tref2->f;
2610 w2 = ref2->width;
2611 h2 = ref2->height;
2612 }
2613
2614 // y inter pred
2615 if (b->bs > BS_8x8) {
2616 if (b->bs == BS_8x4) {
2617 mc_luma_dir(s, s->dsp.mc[3][b->filter][0], s->dst[0], ls_y,
2618 ref1->data[0], ref1->linesize[0], tref1,
2619 row << 3, col << 3, &b->mv[0][0], 8, 4, w1, h1);
2620 mc_luma_dir(s, s->dsp.mc[3][b->filter][0],
2621 s->dst[0] + 4 * ls_y, ls_y,
2622 ref1->data[0], ref1->linesize[0], tref1,
2623 (row << 3) + 4, col << 3, &b->mv[2][0], 8, 4, w1, h1);
2624
2625 if (b->comp) {
2626 mc_luma_dir(s, s->dsp.mc[3][b->filter][1], s->dst[0], ls_y,
2627 ref2->data[0], ref2->linesize[0], tref2,
2628 row << 3, col << 3, &b->mv[0][1], 8, 4, w2, h2);
2629 mc_luma_dir(s, s->dsp.mc[3][b->filter][1],
2630 s->dst[0] + 4 * ls_y, ls_y,
2631 ref2->data[0], ref2->linesize[0], tref2,
2632 (row << 3) + 4, col << 3, &b->mv[2][1], 8, 4, w2, h2);
2633 }
2634 } else if (b->bs == BS_4x8) {
2635 mc_luma_dir(s, s->dsp.mc[4][b->filter][0], s->dst[0], ls_y,
2636 ref1->data[0], ref1->linesize[0], tref1,
2637 row << 3, col << 3, &b->mv[0][0], 4, 8, w1, h1);
2638 mc_luma_dir(s, s->dsp.mc[4][b->filter][0], s->dst[0] + 4, ls_y,
2639 ref1->data[0], ref1->linesize[0], tref1,
2640 row << 3, (col << 3) + 4, &b->mv[1][0], 4, 8, w1, h1);
2641
2642 if (b->comp) {
2643 mc_luma_dir(s, s->dsp.mc[4][b->filter][1], s->dst[0], ls_y,
2644 ref2->data[0], ref2->linesize[0], tref2,
2645 row << 3, col << 3, &b->mv[0][1], 4, 8, w2, h2);
2646 mc_luma_dir(s, s->dsp.mc[4][b->filter][1], s->dst[0] + 4, ls_y,
2647 ref2->data[0], ref2->linesize[0], tref2,
2648 row << 3, (col << 3) + 4, &b->mv[1][1], 4, 8, w2, h2);
2649 }
2650 } else {
2651 av_assert2(b->bs == BS_4x4);
2652
2653 // FIXME if two horizontally adjacent blocks have the same MV,
2654 // do a w8 instead of a w4 call
2655 mc_luma_dir(s, s->dsp.mc[4][b->filter][0], s->dst[0], ls_y,
2656 ref1->data[0], ref1->linesize[0], tref1,
2657 row << 3, col << 3, &b->mv[0][0], 4, 4, w1, h1);
2658 mc_luma_dir(s, s->dsp.mc[4][b->filter][0], s->dst[0] + 4, ls_y,
2659 ref1->data[0], ref1->linesize[0], tref1,
2660 row << 3, (col << 3) + 4, &b->mv[1][0], 4, 4, w1, h1);
2661 mc_luma_dir(s, s->dsp.mc[4][b->filter][0],
2662 s->dst[0] + 4 * ls_y, ls_y,
2663 ref1->data[0], ref1->linesize[0], tref1,
2664 (row << 3) + 4, col << 3, &b->mv[2][0], 4, 4, w1, h1);
2665 mc_luma_dir(s, s->dsp.mc[4][b->filter][0],
2666 s->dst[0] + 4 * ls_y + 4, ls_y,
2667 ref1->data[0], ref1->linesize[0], tref1,
2668 (row << 3) + 4, (col << 3) + 4, &b->mv[3][0], 4, 4, w1, h1);
2669
2670 if (b->comp) {
2671 mc_luma_dir(s, s->dsp.mc[4][b->filter][1], s->dst[0], ls_y,
2672 ref2->data[0], ref2->linesize[0], tref2,
2673 row << 3, col << 3, &b->mv[0][1], 4, 4, w2, h2);
2674 mc_luma_dir(s, s->dsp.mc[4][b->filter][1], s->dst[0] + 4, ls_y,
2675 ref2->data[0], ref2->linesize[0], tref2,
2676 row << 3, (col << 3) + 4, &b->mv[1][1], 4, 4, w2, h2);
2677 mc_luma_dir(s, s->dsp.mc[4][b->filter][1],
2678 s->dst[0] + 4 * ls_y, ls_y,
2679 ref2->data[0], ref2->linesize[0], tref2,
2680 (row << 3) + 4, col << 3, &b->mv[2][1], 4, 4, w2, h2);
2681 mc_luma_dir(s, s->dsp.mc[4][b->filter][1],
2682 s->dst[0] + 4 * ls_y + 4, ls_y,
2683 ref2->data[0], ref2->linesize[0], tref2,
2684 (row << 3) + 4, (col << 3) + 4, &b->mv[3][1], 4, 4, w2, h2);
2685 }
2686 }
2687 } else {
2688 int bwl = bwlog_tab[0][b->bs];
2689 int bw = bwh_tab[0][b->bs][0] * 4, bh = bwh_tab[0][b->bs][1] * 4;
2690
2691 mc_luma_dir(s, s->dsp.mc[bwl][b->filter][0], s->dst[0], ls_y,
2692 ref1->data[0], ref1->linesize[0], tref1,
2693 row << 3, col << 3, &b->mv[0][0],bw, bh, w1, h1);
2694
2695 if (b->comp)
2696 mc_luma_dir(s, s->dsp.mc[bwl][b->filter][1], s->dst[0], ls_y,
2697 ref2->data[0], ref2->linesize[0], tref2,
2698 row << 3, col << 3, &b->mv[0][1], bw, bh, w2, h2);
2699 }
2700
2701 // uv inter pred
2702 {
2703 int bwl = bwlog_tab[1][b->bs];
2704 int bw = bwh_tab[1][b->bs][0] * 4, bh = bwh_tab[1][b->bs][1] * 4;
2705 VP56mv mvuv;
2706
2707 w1 = (w1 + 1) >> 1;
2708 h1 = (h1 + 1) >> 1;
2709 if (b->comp) {
2710 w2 = (w2 + 1) >> 1;
2711 h2 = (h2 + 1) >> 1;
2712 }
2713 if (b->bs > BS_8x8) {
2714 mvuv.x = ROUNDED_DIV(b->mv[0][0].x + b->mv[1][0].x + b->mv[2][0].x + b->mv[3][0].x, 4);
2715 mvuv.y = ROUNDED_DIV(b->mv[0][0].y + b->mv[1][0].y + b->mv[2][0].y + b->mv[3][0].y, 4);
2716 } else {
2717 mvuv = b->mv[0][0];
2718 }
2719
2720 mc_chroma_dir(s, s->dsp.mc[bwl][b->filter][0],
2721 s->dst[1], s->dst[2], ls_uv,
2722 ref1->data[1], ref1->linesize[1],
2723 ref1->data[2], ref1->linesize[2], tref1,
2724 row << 2, col << 2, &mvuv, bw, bh, w1, h1);
2725
2726 if (b->comp) {
2727 if (b->bs > BS_8x8) {
2728 mvuv.x = ROUNDED_DIV(b->mv[0][1].x + b->mv[1][1].x + b->mv[2][1].x + b->mv[3][1].x, 4);
2729 mvuv.y = ROUNDED_DIV(b->mv[0][1].y + b->mv[1][1].y + b->mv[2][1].y + b->mv[3][1].y, 4);
2730 } else {
2731 mvuv = b->mv[0][1];
2732 }
2733 mc_chroma_dir(s, s->dsp.mc[bwl][b->filter][1],
2734 s->dst[1], s->dst[2], ls_uv,
2735 ref2->data[1], ref2->linesize[1],
2736 ref2->data[2], ref2->linesize[2], tref2,
2737 row << 2, col << 2, &mvuv, bw, bh, w2, h2);
2738 }
2739 }
2740
2741 if (!b->skip) {
2742 /* mostly copied intra_reconn() */
2743
2744 int w4 = bwh_tab[1][b->bs][0] << 1, step1d = 1 << b->tx, n;
2745 int h4 = bwh_tab[1][b->bs][1] << 1, x, y, step = 1 << (b->tx * 2);
2746 int end_x = FFMIN(2 * (s->cols - col), w4);
2747 int end_y = FFMIN(2 * (s->rows - row), h4);
2748 int tx = 4 * s->lossless + b->tx, uvtx = b->uvtx + 4 * s->lossless;
2749 int uvstep1d = 1 << b->uvtx, p;
2750 uint8_t *dst = s->dst[0];
2751
2752 // y itxfm add
2753 for (n = 0, y = 0; y < end_y; y += step1d) {
2754 uint8_t *ptr = dst;
2755 for (x = 0; x < end_x; x += step1d, ptr += 4 * step1d, n += step) {
2756 int eob = b->tx > TX_8X8 ? AV_RN16A(&s->eob[n]) : s->eob[n];
2757
2758 if (eob)
2759 s->dsp.itxfm_add[tx][DCT_DCT](ptr, s->y_stride,
2760 s->block + 16 * n, eob);
2761 }
2762 dst += 4 * s->y_stride * step1d;
2763 }
2764
2765 // uv itxfm add
2766 end_x >>= 1;
2767 end_y >>= 1;
2768 step = 1 << (b->uvtx * 2);
2769 for (p = 0; p < 2; p++) {
2770 dst = s->dst[p + 1];
2771 for (n = 0, y = 0; y < end_y; y += uvstep1d) {
2772 uint8_t *ptr = dst;
2773 for (x = 0; x < end_x; x += uvstep1d, ptr += 4 * uvstep1d, n += step) {
2774 int eob = b->uvtx > TX_8X8 ? AV_RN16A(&s->uveob[p][n]) : s->uveob[p][n];
2775
2776 if (eob)
2777 s->dsp.itxfm_add[uvtx][DCT_DCT](ptr, s->uv_stride,
2778 s->uvblock[p] + 16 * n, eob);
2779 }
2780 dst += 4 * uvstep1d * s->uv_stride;
2781 }
2782 }
2783 }
2784}
2785
2786static av_always_inline void mask_edges(struct VP9Filter *lflvl, int is_uv,
2787 int row_and_7, int col_and_7,
2788 int w, int h, int col_end, int row_end,
2789 enum TxfmMode tx, int skip_inter)
2790{
2791 // FIXME I'm pretty sure all loops can be replaced by a single LUT if
2792 // we make VP9Filter.mask uint64_t (i.e. row/col all single variable)
2793 // and make the LUT 5-indexed (bl, bp, is_uv, tx and row/col), and then
2794 // use row_and_7/col_and_7 as shifts (1*col_and_7+8*row_and_7)
2795
2796 // the intended behaviour of the vp9 loopfilter is to work on 8-pixel
2797 // edges. This means that for UV, we work on two subsampled blocks at
2798 // a time, and we only use the topleft block's mode information to set
2799 // things like block strength. Thus, for any block size smaller than
2800 // 16x16, ignore the odd portion of the block.
2801 if (tx == TX_4X4 && is_uv) {
2802 if (h == 1) {
2803 if (row_and_7 & 1)
2804 return;
2805 if (!row_end)
2806 h += 1;
2807 }
2808 if (w == 1) {
2809 if (col_and_7 & 1)
2810 return;
2811 if (!col_end)
2812 w += 1;
2813 }
2814 }
2815
2816 if (tx == TX_4X4 && !skip_inter) {
2817 int t = 1 << col_and_7, m_col = (t << w) - t, y;
2818 int m_col_odd = (t << (w - 1)) - t;
2819
2820 // on 32-px edges, use the 8-px wide loopfilter; else, use 4-px wide
2821 if (is_uv) {
2822 int m_row_8 = m_col & 0x01, m_row_4 = m_col - m_row_8;
2823
2824 for (y = row_and_7; y < h + row_and_7; y++) {
2825 int col_mask_id = 2 - !(y & 7);
2826
2827 lflvl->mask[is_uv][0][y][1] |= m_row_8;
2828 lflvl->mask[is_uv][0][y][2] |= m_row_4;
2829 // for odd lines, if the odd col is not being filtered,
2830 // skip odd row also:
2831 // .---. <-- a
2832 // | |
2833 // |___| <-- b
2834 // ^ ^
2835 // c d
2836 //
2837 // if a/c are even row/col and b/d are odd, and d is skipped,
2838 // e.g. right edge of size-66x66.webm, then skip b also (bug)
2839 if ((col_end & 1) && (y & 1)) {
2840 lflvl->mask[is_uv][1][y][col_mask_id] |= m_col_odd;
2841 } else {
2842 lflvl->mask[is_uv][1][y][col_mask_id] |= m_col;
2843 }
2844 }
2845 } else {
2846 int m_row_8 = m_col & 0x11, m_row_4 = m_col - m_row_8;
2847
2848 for (y = row_and_7; y < h + row_and_7; y++) {
2849 int col_mask_id = 2 - !(y & 3);
2850
2851 lflvl->mask[is_uv][0][y][1] |= m_row_8; // row edge
2852 lflvl->mask[is_uv][0][y][2] |= m_row_4;
2853 lflvl->mask[is_uv][1][y][col_mask_id] |= m_col; // col edge
2854 lflvl->mask[is_uv][0][y][3] |= m_col;
2855 lflvl->mask[is_uv][1][y][3] |= m_col;
2856 }
2857 }
2858 } else {
2859 int y, t = 1 << col_and_7, m_col = (t << w) - t;
2860
2861 if (!skip_inter) {
2862 int mask_id = (tx == TX_8X8);
2863 int l2 = tx + is_uv - 1, step1d = 1 << l2;
2864 static const unsigned masks[4] = { 0xff, 0x55, 0x11, 0x01 };
2865 int m_row = m_col & masks[l2];
2866
2867 // at odd UV col/row edges tx16/tx32 loopfilter edges, force
2868 // 8wd loopfilter to prevent going off the visible edge.
2869 if (is_uv && tx > TX_8X8 && (w ^ (w - 1)) == 1) {
2870 int m_row_16 = ((t << (w - 1)) - t) & masks[l2];
2871 int m_row_8 = m_row - m_row_16;
2872
2873 for (y = row_and_7; y < h + row_and_7; y++) {
2874 lflvl->mask[is_uv][0][y][0] |= m_row_16;
2875 lflvl->mask[is_uv][0][y][1] |= m_row_8;
2876 }
2877 } else {
2878 for (y = row_and_7; y < h + row_and_7; y++)
2879 lflvl->mask[is_uv][0][y][mask_id] |= m_row;
2880 }
2881
2882 if (is_uv && tx > TX_8X8 && (h ^ (h - 1)) == 1) {
2883 for (y = row_and_7; y < h + row_and_7 - 1; y += step1d)
2884 lflvl->mask[is_uv][1][y][0] |= m_col;
2885 if (y - row_and_7 == h - 1)
2886 lflvl->mask[is_uv][1][y][1] |= m_col;
2887 } else {
2888 for (y = row_and_7; y < h + row_and_7; y += step1d)
2889 lflvl->mask[is_uv][1][y][mask_id] |= m_col;
2890 }
2891 } else if (tx != TX_4X4) {
2892 int mask_id;
2893
2894 mask_id = (tx == TX_8X8) || (is_uv && h == 1);
2895 lflvl->mask[is_uv][1][row_and_7][mask_id] |= m_col;
2896 mask_id = (tx == TX_8X8) || (is_uv && w == 1);
2897 for (y = row_and_7; y < h + row_and_7; y++)
2898 lflvl->mask[is_uv][0][y][mask_id] |= t;
2899 } else if (is_uv) {
2900 int t8 = t & 0x01, t4 = t - t8;
2901
2902 for (y = row_and_7; y < h + row_and_7; y++) {
2903 lflvl->mask[is_uv][0][y][2] |= t4;
2904 lflvl->mask[is_uv][0][y][1] |= t8;
2905 }
2906 lflvl->mask[is_uv][1][row_and_7][2 - !(row_and_7 & 7)] |= m_col;
2907 } else {
2908 int t8 = t & 0x11, t4 = t - t8;
2909
2910 for (y = row_and_7; y < h + row_and_7; y++) {
2911 lflvl->mask[is_uv][0][y][2] |= t4;
2912 lflvl->mask[is_uv][0][y][1] |= t8;
2913 }
2914 lflvl->mask[is_uv][1][row_and_7][2 - !(row_and_7 & 3)] |= m_col;
2915 }
2916 }
2917}
2918
2919static void decode_b(AVCodecContext *ctx, int row, int col,
2920 struct VP9Filter *lflvl, ptrdiff_t yoff, ptrdiff_t uvoff,
2921 enum BlockLevel bl, enum BlockPartition bp)
2922{
2923 VP9Context *s = ctx->priv_data;
2924 VP9Block *b = s->b;
2925 enum BlockSize bs = bl * 3 + bp;
2926 int w4 = bwh_tab[1][bs][0], h4 = bwh_tab[1][bs][1], lvl;
2927 int emu[2];
2928 AVFrame *f = s->frames[CUR_FRAME].tf.f;
2929
2930 s->row = row;
2931 s->row7 = row & 7;
2932 s->col = col;
2933 s->col7 = col & 7;
2934 s->min_mv.x = -(128 + col * 64);
2935 s->min_mv.y = -(128 + row * 64);
2936 s->max_mv.x = 128 + (s->cols - col - w4) * 64;
2937 s->max_mv.y = 128 + (s->rows - row - h4) * 64;
2938 if (s->pass < 2) {
2939 b->bs = bs;
2940 b->bl = bl;
2941 b->bp = bp;
2942 decode_mode(ctx);
2943 b->uvtx = b->tx - (w4 * 2 == (1 << b->tx) || h4 * 2 == (1 << b->tx));
2944
2945 if (!b->skip) {
2946 decode_coeffs(ctx);
2947 } else {
2948 int row7 = s->row7;
2949
2950#define SPLAT_ZERO_CTX(v, n) \
2951 switch (n) { \
2952 case 1: v = 0; break; \
2953 case 2: AV_ZERO16(&v); break; \
2954 case 4: AV_ZERO32(&v); break; \
2955 case 8: AV_ZERO64(&v); break; \
2956 case 16: AV_ZERO128(&v); break; \
2957 }
2958#define SPLAT_ZERO_YUV(dir, var, off, n) \
2959 do { \
2960 SPLAT_ZERO_CTX(s->dir##_y_##var[off * 2], n * 2); \
2961 SPLAT_ZERO_CTX(s->dir##_uv_##var[0][off], n); \
2962 SPLAT_ZERO_CTX(s->dir##_uv_##var[1][off], n); \
2963 } while (0)
2964
2965 switch (w4) {
2966 case 1: SPLAT_ZERO_YUV(above, nnz_ctx, col, 1); break;
2967 case 2: SPLAT_ZERO_YUV(above, nnz_ctx, col, 2); break;
2968 case 4: SPLAT_ZERO_YUV(above, nnz_ctx, col, 4); break;
2969 case 8: SPLAT_ZERO_YUV(above, nnz_ctx, col, 8); break;
2970 }
2971 switch (h4) {
2972 case 1: SPLAT_ZERO_YUV(left, nnz_ctx, row7, 1); break;
2973 case 2: SPLAT_ZERO_YUV(left, nnz_ctx, row7, 2); break;
2974 case 4: SPLAT_ZERO_YUV(left, nnz_ctx, row7, 4); break;
2975 case 8: SPLAT_ZERO_YUV(left, nnz_ctx, row7, 8); break;
2976 }
2977 }
2978 if (s->pass == 1) {
2979 s->b++;
2980 s->block += w4 * h4 * 64;
2981 s->uvblock[0] += w4 * h4 * 16;
2982 s->uvblock[1] += w4 * h4 * 16;
2983 s->eob += 4 * w4 * h4;
2984 s->uveob[0] += w4 * h4;
2985 s->uveob[1] += w4 * h4;
2986
2987 return;
2988 }
2989 }
2990
2991 // emulated overhangs if the stride of the target buffer can't hold. This
2992 // allows to support emu-edge and so on even if we have large block
2993 // overhangs
2994 emu[0] = (col + w4) * 8 > f->linesize[0] ||
2995 (row + h4) > s->rows;
2996 emu[1] = (col + w4) * 4 > f->linesize[1] ||
2997 (row + h4) > s->rows;
2998 if (emu[0]) {
2999 s->dst[0] = s->tmp_y;
3000 s->y_stride = 64;
3001 } else {
3002 s->dst[0] = f->data[0] + yoff;
3003 s->y_stride = f->linesize[0];
3004 }
3005 if (emu[1]) {
3006 s->dst[1] = s->tmp_uv[0];
3007 s->dst[2] = s->tmp_uv[1];
3008 s->uv_stride = 32;
3009 } else {
3010 s->dst[1] = f->data[1] + uvoff;
3011 s->dst[2] = f->data[2] + uvoff;
3012 s->uv_stride = f->linesize[1];
3013 }
3014 if (b->intra) {
3015 intra_recon(ctx, yoff, uvoff);
3016 } else {
3017 inter_recon(ctx);
3018 }
3019 if (emu[0]) {
3020 int w = FFMIN(s->cols - col, w4) * 8, h = FFMIN(s->rows - row, h4) * 8, n, o = 0;
3021
3022 for (n = 0; o < w; n++) {
3023 int bw = 64 >> n;
3024
3025 av_assert2(n <= 4);
3026 if (w & bw) {
3027 s->dsp.mc[n][0][0][0][0](f->data[0] + yoff + o, f->linesize[0],
3028 s->tmp_y + o, 64, h, 0, 0);
3029 o += bw;
3030 }
3031 }
3032 }
3033 if (emu[1]) {
3034 int w = FFMIN(s->cols - col, w4) * 4, h = FFMIN(s->rows - row, h4) * 4, n, o = 0;
3035
3036 for (n = 1; o < w; n++) {
3037 int bw = 64 >> n;
3038
3039 av_assert2(n <= 4);
3040 if (w & bw) {
3041 s->dsp.mc[n][0][0][0][0](f->data[1] + uvoff + o, f->linesize[1],
3042 s->tmp_uv[0] + o, 32, h, 0, 0);
3043 s->dsp.mc[n][0][0][0][0](f->data[2] + uvoff + o, f->linesize[2],
3044 s->tmp_uv[1] + o, 32, h, 0, 0);
3045 o += bw;
3046 }
3047 }
3048 }
3049
3050 // pick filter level and find edges to apply filter to
3051 if (s->filter.level &&
3052 (lvl = s->segmentation.feat[b->seg_id].lflvl[b->intra ? 0 : b->ref[0] + 1]
3053 [b->mode[3] != ZEROMV]) > 0) {
3054 int x_end = FFMIN(s->cols - col, w4), y_end = FFMIN(s->rows - row, h4);
3055 int skip_inter = !b->intra && b->skip, col7 = s->col7, row7 = s->row7;
3056
3057 setctx_2d(&lflvl->level[row7 * 8 + col7], w4, h4, 8, lvl);
3058 mask_edges(lflvl, 0, row7, col7, x_end, y_end, 0, 0, b->tx, skip_inter);
3059 mask_edges(lflvl, 1, row7, col7, x_end, y_end,
3060 s->cols & 1 && col + w4 >= s->cols ? s->cols & 7 : 0,
3061 s->rows & 1 && row + h4 >= s->rows ? s->rows & 7 : 0,
3062 b->uvtx, skip_inter);
3063
3064 if (!s->filter.lim_lut[lvl]) {
3065 int sharp = s->filter.sharpness;
3066 int limit = lvl;
3067
3068 if (sharp > 0) {
3069 limit >>= (sharp + 3) >> 2;
3070 limit = FFMIN(limit, 9 - sharp);
3071 }
3072 limit = FFMAX(limit, 1);
3073
3074 s->filter.lim_lut[lvl] = limit;
3075 s->filter.mblim_lut[lvl] = 2 * (lvl + 2) + limit;
3076 }
3077 }
3078
3079 if (s->pass == 2) {
3080 s->b++;
3081 s->block += w4 * h4 * 64;
3082 s->uvblock[0] += w4 * h4 * 16;
3083 s->uvblock[1] += w4 * h4 * 16;
3084 s->eob += 4 * w4 * h4;
3085 s->uveob[0] += w4 * h4;
3086 s->uveob[1] += w4 * h4;
3087 }
3088}
3089
3090static void decode_sb(AVCodecContext *ctx, int row, int col, struct VP9Filter *lflvl,
3091 ptrdiff_t yoff, ptrdiff_t uvoff, enum BlockLevel bl)
3092{
3093 VP9Context *s = ctx->priv_data;
3094 int c = ((s->above_partition_ctx[col] >> (3 - bl)) & 1) |
3095 (((s->left_partition_ctx[row & 0x7] >> (3 - bl)) & 1) << 1);
3096 const uint8_t *p = s->keyframe ? vp9_default_kf_partition_probs[bl][c] :
3097 s->prob.p.partition[bl][c];
3098 enum BlockPartition bp;
3099 ptrdiff_t hbs = 4 >> bl;
3100 AVFrame *f = s->frames[CUR_FRAME].tf.f;
3101 ptrdiff_t y_stride = f->linesize[0], uv_stride = f->linesize[1];
3102
3103 if (bl == BL_8X8) {
3104 bp = vp8_rac_get_tree(&s->c, vp9_partition_tree, p);
3105 decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3106 } else if (col + hbs < s->cols) { // FIXME why not <=?
3107 if (row + hbs < s->rows) { // FIXME why not <=?
3108 bp = vp8_rac_get_tree(&s->c, vp9_partition_tree, p);
3109 switch (bp) {
3110 case PARTITION_NONE:
3111 decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3112 break;
3113 case PARTITION_H:
3114 decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3115 yoff += hbs * 8 * y_stride;
3116 uvoff += hbs * 4 * uv_stride;
3117 decode_b(ctx, row + hbs, col, lflvl, yoff, uvoff, bl, bp);
3118 break;
3119 case PARTITION_V:
3120 decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3121 yoff += hbs * 8;
3122 uvoff += hbs * 4;
3123 decode_b(ctx, row, col + hbs, lflvl, yoff, uvoff, bl, bp);
3124 break;
3125 case PARTITION_SPLIT:
3126 decode_sb(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
3127 decode_sb(ctx, row, col + hbs, lflvl,
3128 yoff + 8 * hbs, uvoff + 4 * hbs, bl + 1);
3129 yoff += hbs * 8 * y_stride;
3130 uvoff += hbs * 4 * uv_stride;
3131 decode_sb(ctx, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
3132 decode_sb(ctx, row + hbs, col + hbs, lflvl,
3133 yoff + 8 * hbs, uvoff + 4 * hbs, bl + 1);
3134 break;
3135 default:
3136 av_assert0(0);
3137 }
3138 } else if (vp56_rac_get_prob_branchy(&s->c, p[1])) {
3139 bp = PARTITION_SPLIT;
3140 decode_sb(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
3141 decode_sb(ctx, row, col + hbs, lflvl,
3142 yoff + 8 * hbs, uvoff + 4 * hbs, bl + 1);
3143 } else {
3144 bp = PARTITION_H;
3145 decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3146 }
3147 } else if (row + hbs < s->rows) { // FIXME why not <=?
3148 if (vp56_rac_get_prob_branchy(&s->c, p[2])) {
3149 bp = PARTITION_SPLIT;
3150 decode_sb(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
3151 yoff += hbs * 8 * y_stride;
3152 uvoff += hbs * 4 * uv_stride;
3153 decode_sb(ctx, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
3154 } else {
3155 bp = PARTITION_V;
3156 decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3157 }
3158 } else {
3159 bp = PARTITION_SPLIT;
3160 decode_sb(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
3161 }
3162 s->counts.partition[bl][c][bp]++;
3163}
3164
3165static void decode_sb_mem(AVCodecContext *ctx, int row, int col, struct VP9Filter *lflvl,
3166 ptrdiff_t yoff, ptrdiff_t uvoff, enum BlockLevel bl)
3167{
3168 VP9Context *s = ctx->priv_data;
3169 VP9Block *b = s->b;
3170 ptrdiff_t hbs = 4 >> bl;
3171 AVFrame *f = s->frames[CUR_FRAME].tf.f;
3172 ptrdiff_t y_stride = f->linesize[0], uv_stride = f->linesize[1];
3173
3174 if (bl == BL_8X8) {
3175 av_assert2(b->bl == BL_8X8);
3176 decode_b(ctx, row, col, lflvl, yoff, uvoff, b->bl, b->bp);
3177 } else if (s->b->bl == bl) {
3178 decode_b(ctx, row, col, lflvl, yoff, uvoff, b->bl, b->bp);
3179 if (b->bp == PARTITION_H && row + hbs < s->rows) {
3180 yoff += hbs * 8 * y_stride;
3181 uvoff += hbs * 4 * uv_stride;
3182 decode_b(ctx, row + hbs, col, lflvl, yoff, uvoff, b->bl, b->bp);
3183 } else if (b->bp == PARTITION_V && col + hbs < s->cols) {
3184 yoff += hbs * 8;
3185 uvoff += hbs * 4;
3186 decode_b(ctx, row, col + hbs, lflvl, yoff, uvoff, b->bl, b->bp);
3187 }
3188 } else {
3189 decode_sb_mem(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
3190 if (col + hbs < s->cols) { // FIXME why not <=?
3191 if (row + hbs < s->rows) {
3192 decode_sb_mem(ctx, row, col + hbs, lflvl, yoff + 8 * hbs,
3193 uvoff + 4 * hbs, bl + 1);
3194 yoff += hbs * 8 * y_stride;
3195 uvoff += hbs * 4 * uv_stride;
3196 decode_sb_mem(ctx, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
3197 decode_sb_mem(ctx, row + hbs, col + hbs, lflvl,
3198 yoff + 8 * hbs, uvoff + 4 * hbs, bl + 1);
3199 } else {
3200 yoff += hbs * 8;
3201 uvoff += hbs * 4;
3202 decode_sb_mem(ctx, row, col + hbs, lflvl, yoff, uvoff, bl + 1);
3203 }
3204 } else if (row + hbs < s->rows) {
3205 yoff += hbs * 8 * y_stride;
3206 uvoff += hbs * 4 * uv_stride;
3207 decode_sb_mem(ctx, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
3208 }
3209 }
3210}
3211
3212static void loopfilter_sb(AVCodecContext *ctx, struct VP9Filter *lflvl,
3213 int row, int col, ptrdiff_t yoff, ptrdiff_t uvoff)
3214{
3215 VP9Context *s = ctx->priv_data;
3216 AVFrame *f = s->frames[CUR_FRAME].tf.f;
3217 uint8_t *dst = f->data[0] + yoff, *lvl = lflvl->level;
3218 ptrdiff_t ls_y = f->linesize[0], ls_uv = f->linesize[1];
3219 int y, x, p;
3220
3221 // FIXME in how far can we interleave the v/h loopfilter calls? E.g.
3222 // if you think of them as acting on a 8x8 block max, we can interleave
3223 // each v/h within the single x loop, but that only works if we work on
3224 // 8 pixel blocks, and we won't always do that (we want at least 16px
3225 // to use SSE2 optimizations, perhaps 32 for AVX2)
3226
3227 // filter edges between columns, Y plane (e.g. block1 | block2)
3228 for (y = 0; y < 8; y += 2, dst += 16 * ls_y, lvl += 16) {
3229 uint8_t *ptr = dst, *l = lvl, *hmask1 = lflvl->mask[0][0][y];
3230 uint8_t *hmask2 = lflvl->mask[0][0][y + 1];
3231 unsigned hm1 = hmask1[0] | hmask1[1] | hmask1[2], hm13 = hmask1[3];
3232 unsigned hm2 = hmask2[1] | hmask2[2], hm23 = hmask2[3];
3233 unsigned hm = hm1 | hm2 | hm13 | hm23;
3234
3235 for (x = 1; hm & ~(x - 1); x <<= 1, ptr += 8, l++) {
3236 if (hm1 & x) {
3237 int L = *l, H = L >> 4;
3238 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3239
3240 if (col || x > 1) {
3241 if (hmask1[0] & x) {
3242 if (hmask2[0] & x) {
3243 av_assert2(l[8] == L);
3244 s->dsp.loop_filter_16[0](ptr, ls_y, E, I, H);
3245 } else {
3246 s->dsp.loop_filter_8[2][0](ptr, ls_y, E, I, H);
3247 }
3248 } else if (hm2 & x) {
3249 L = l[8];
3250 H |= (L >> 4) << 8;
3251 E |= s->filter.mblim_lut[L] << 8;
3252 I |= s->filter.lim_lut[L] << 8;
3253 s->dsp.loop_filter_mix2[!!(hmask1[1] & x)]
3254 [!!(hmask2[1] & x)]
3255 [0](ptr, ls_y, E, I, H);
3256 } else {
3257 s->dsp.loop_filter_8[!!(hmask1[1] & x)]
3258 [0](ptr, ls_y, E, I, H);
3259 }
3260 }
3261 } else if (hm2 & x) {
3262 int L = l[8], H = L >> 4;
3263 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3264
3265 if (col || x > 1) {
3266 s->dsp.loop_filter_8[!!(hmask2[1] & x)]
3267 [0](ptr + 8 * ls_y, ls_y, E, I, H);
3268 }
3269 }
3270 if (hm13 & x) {
3271 int L = *l, H = L >> 4;
3272 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3273
3274 if (hm23 & x) {
3275 L = l[8];
3276 H |= (L >> 4) << 8;
3277 E |= s->filter.mblim_lut[L] << 8;
3278 I |= s->filter.lim_lut[L] << 8;
3279 s->dsp.loop_filter_mix2[0][0][0](ptr + 4, ls_y, E, I, H);
3280 } else {
3281 s->dsp.loop_filter_8[0][0](ptr + 4, ls_y, E, I, H);
3282 }
3283 } else if (hm23 & x) {
3284 int L = l[8], H = L >> 4;
3285 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3286
3287 s->dsp.loop_filter_8[0][0](ptr + 8 * ls_y + 4, ls_y, E, I, H);
3288 }
3289 }
3290 }
3291
3292 // block1
3293 // filter edges between rows, Y plane (e.g. ------)
3294 // block2
3295 dst = f->data[0] + yoff;
3296 lvl = lflvl->level;
3297 for (y = 0; y < 8; y++, dst += 8 * ls_y, lvl += 8) {
3298 uint8_t *ptr = dst, *l = lvl, *vmask = lflvl->mask[0][1][y];
3299 unsigned vm = vmask[0] | vmask[1] | vmask[2], vm3 = vmask[3];
3300
3301 for (x = 1; vm & ~(x - 1); x <<= 2, ptr += 16, l += 2) {
3302 if (row || y) {
3303 if (vm & x) {
3304 int L = *l, H = L >> 4;
3305 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3306
3307 if (vmask[0] & x) {
3308 if (vmask[0] & (x << 1)) {
3309 av_assert2(l[1] == L);
3310 s->dsp.loop_filter_16[1](ptr, ls_y, E, I, H);
3311 } else {
3312 s->dsp.loop_filter_8[2][1](ptr, ls_y, E, I, H);
3313 }
3314 } else if (vm & (x << 1)) {
3315 L = l[1];
3316 H |= (L >> 4) << 8;
3317 E |= s->filter.mblim_lut[L] << 8;
3318 I |= s->filter.lim_lut[L] << 8;
3319 s->dsp.loop_filter_mix2[!!(vmask[1] & x)]
3320 [!!(vmask[1] & (x << 1))]
3321 [1](ptr, ls_y, E, I, H);
3322 } else {
3323 s->dsp.loop_filter_8[!!(vmask[1] & x)]
3324 [1](ptr, ls_y, E, I, H);
3325 }
3326 } else if (vm & (x << 1)) {
3327 int L = l[1], H = L >> 4;
3328 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3329
3330 s->dsp.loop_filter_8[!!(vmask[1] & (x << 1))]
3331 [1](ptr + 8, ls_y, E, I, H);
3332 }
3333 }
3334 if (vm3 & x) {
3335 int L = *l, H = L >> 4;
3336 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3337
3338 if (vm3 & (x << 1)) {
3339 L = l[1];
3340 H |= (L >> 4) << 8;
3341 E |= s->filter.mblim_lut[L] << 8;
3342 I |= s->filter.lim_lut[L] << 8;
3343 s->dsp.loop_filter_mix2[0][0][1](ptr + ls_y * 4, ls_y, E, I, H);
3344 } else {
3345 s->dsp.loop_filter_8[0][1](ptr + ls_y * 4, ls_y, E, I, H);
3346 }
3347 } else if (vm3 & (x << 1)) {
3348 int L = l[1], H = L >> 4;
3349 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3350
3351 s->dsp.loop_filter_8[0][1](ptr + ls_y * 4 + 8, ls_y, E, I, H);
3352 }
3353 }
3354 }
3355
3356 // same principle but for U/V planes
3357 for (p = 0; p < 2; p++) {
3358 lvl = lflvl->level;
3359 dst = f->data[1 + p] + uvoff;
3360 for (y = 0; y < 8; y += 4, dst += 16 * ls_uv, lvl += 32) {
3361 uint8_t *ptr = dst, *l = lvl, *hmask1 = lflvl->mask[1][0][y];
3362 uint8_t *hmask2 = lflvl->mask[1][0][y + 2];
3363 unsigned hm1 = hmask1[0] | hmask1[1] | hmask1[2];
3364 unsigned hm2 = hmask2[1] | hmask2[2], hm = hm1 | hm2;
3365
3366 for (x = 1; hm & ~(x - 1); x <<= 1, ptr += 4) {
3367 if (col || x > 1) {
3368 if (hm1 & x) {
3369 int L = *l, H = L >> 4;
3370 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3371
3372 if (hmask1[0] & x) {
3373 if (hmask2[0] & x) {
3374 av_assert2(l[16] == L);
3375 s->dsp.loop_filter_16[0](ptr, ls_uv, E, I, H);
3376 } else {
3377 s->dsp.loop_filter_8[2][0](ptr, ls_uv, E, I, H);
3378 }
3379 } else if (hm2 & x) {
3380 L = l[16];
3381 H |= (L >> 4) << 8;
3382 E |= s->filter.mblim_lut[L] << 8;
3383 I |= s->filter.lim_lut[L] << 8;
3384 s->dsp.loop_filter_mix2[!!(hmask1[1] & x)]
3385 [!!(hmask2[1] & x)]
3386 [0](ptr, ls_uv, E, I, H);
3387 } else {
3388 s->dsp.loop_filter_8[!!(hmask1[1] & x)]
3389 [0](ptr, ls_uv, E, I, H);
3390 }
3391 } else if (hm2 & x) {
3392 int L = l[16], H = L >> 4;
3393 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3394
3395 s->dsp.loop_filter_8[!!(hmask2[1] & x)]
3396 [0](ptr + 8 * ls_uv, ls_uv, E, I, H);
3397 }
3398 }
3399 if (x & 0xAA)
3400 l += 2;
3401 }
3402 }
3403 lvl = lflvl->level;
3404 dst = f->data[1 + p] + uvoff;
3405 for (y = 0; y < 8; y++, dst += 4 * ls_uv) {
3406 uint8_t *ptr = dst, *l = lvl, *vmask = lflvl->mask[1][1][y];
3407 unsigned vm = vmask[0] | vmask[1] | vmask[2];
3408
3409 for (x = 1; vm & ~(x - 1); x <<= 4, ptr += 16, l += 4) {
3410 if (row || y) {
3411 if (vm & x) {
3412 int L = *l, H = L >> 4;
3413 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3414
3415 if (vmask[0] & x) {
3416 if (vmask[0] & (x << 2)) {
3417 av_assert2(l[2] == L);
3418 s->dsp.loop_filter_16[1](ptr, ls_uv, E, I, H);
3419 } else {
3420 s->dsp.loop_filter_8[2][1](ptr, ls_uv, E, I, H);
3421 }
3422 } else if (vm & (x << 2)) {
3423 L = l[2];
3424 H |= (L >> 4) << 8;
3425 E |= s->filter.mblim_lut[L] << 8;
3426 I |= s->filter.lim_lut[L] << 8;
3427 s->dsp.loop_filter_mix2[!!(vmask[1] & x)]
3428 [!!(vmask[1] & (x << 2))]
3429 [1](ptr, ls_uv, E, I, H);
3430 } else {
3431 s->dsp.loop_filter_8[!!(vmask[1] & x)]
3432 [1](ptr, ls_uv, E, I, H);
3433 }
3434 } else if (vm & (x << 2)) {
3435 int L = l[2], H = L >> 4;
3436 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3437
3438 s->dsp.loop_filter_8[!!(vmask[1] & (x << 2))]
3439 [1](ptr + 8, ls_uv, E, I, H);
3440 }
3441 }
3442 }
3443 if (y & 1)
3444 lvl += 16;
3445 }
3446 }
3447}
3448
3449static void set_tile_offset(int *start, int *end, int idx, int log2_n, int n)
3450{
3451 int sb_start = ( idx * n) >> log2_n;
3452 int sb_end = ((idx + 1) * n) >> log2_n;
3453 *start = FFMIN(sb_start, n) << 3;
3454 *end = FFMIN(sb_end, n) << 3;
3455}
3456
3457static av_always_inline void adapt_prob(uint8_t *p, unsigned ct0, unsigned ct1,
3458 int max_count, int update_factor)
3459{
3460 unsigned ct = ct0 + ct1, p2, p1;
3461
3462 if (!ct)
3463 return;
3464
3465 p1 = *p;
3466 p2 = ((ct0 << 8) + (ct >> 1)) / ct;
3467 p2 = av_clip(p2, 1, 255);
3468 ct = FFMIN(ct, max_count);
3469 update_factor = FASTDIV(update_factor * ct, max_count);
3470
3471 // (p1 * (256 - update_factor) + p2 * update_factor + 128) >> 8
3472 *p = p1 + (((p2 - p1) * update_factor + 128) >> 8);
3473}
3474
3475static void adapt_probs(VP9Context *s)
3476{
3477 int i, j, k, l, m;
3478 prob_context *p = &s->prob_ctx[s->framectxid].p;
3479 int uf = (s->keyframe || s->intraonly || !s->last_keyframe) ? 112 : 128;
3480
3481 // coefficients
3482 for (i = 0; i < 4; i++)
3483 for (j = 0; j < 2; j++)
3484 for (k = 0; k < 2; k++)
3485 for (l = 0; l < 6; l++)
3486 for (m = 0; m < 6; m++) {
3487 uint8_t *pp = s->prob_ctx[s->framectxid].coef[i][j][k][l][m];
3488 unsigned *e = s->counts.eob[i][j][k][l][m];
3489 unsigned *c = s->counts.coef[i][j][k][l][m];
3490
3491 if (l == 0 && m >= 3) // dc only has 3 pt
3492 break;
3493
3494 adapt_prob(&pp[0], e[0], e[1], 24, uf);
3495 adapt_prob(&pp[1], c[0], c[1] + c[2], 24, uf);
3496 adapt_prob(&pp[2], c[1], c[2], 24, uf);
3497 }
3498
3499 if (s->keyframe || s->intraonly) {
3500 memcpy(p->skip, s->prob.p.skip, sizeof(p->skip));
3501 memcpy(p->tx32p, s->prob.p.tx32p, sizeof(p->tx32p));
3502 memcpy(p->tx16p, s->prob.p.tx16p, sizeof(p->tx16p));
3503 memcpy(p->tx8p, s->prob.p.tx8p, sizeof(p->tx8p));
3504 return;
3505 }
3506
3507 // skip flag
3508 for (i = 0; i < 3; i++)
3509 adapt_prob(&p->skip[i], s->counts.skip[i][0], s->counts.skip[i][1], 20, 128);
3510
3511 // intra/inter flag
3512 for (i = 0; i < 4; i++)
3513 adapt_prob(&p->intra[i], s->counts.intra[i][0], s->counts.intra[i][1], 20, 128);
3514
3515 // comppred flag
3516 if (s->comppredmode == PRED_SWITCHABLE) {
3517 for (i = 0; i < 5; i++)
3518 adapt_prob(&p->comp[i], s->counts.comp[i][0], s->counts.comp[i][1], 20, 128);
3519 }
3520
3521 // reference frames
3522 if (s->comppredmode != PRED_SINGLEREF) {
3523 for (i = 0; i < 5; i++)
3524 adapt_prob(&p->comp_ref[i], s->counts.comp_ref[i][0],
3525 s->counts.comp_ref[i][1], 20, 128);
3526 }
3527
3528 if (s->comppredmode != PRED_COMPREF) {
3529 for (i = 0; i < 5; i++) {
3530 uint8_t *pp = p->single_ref[i];
3531 unsigned (*c)[2] = s->counts.single_ref[i];
3532
3533 adapt_prob(&pp[0], c[0][0], c[0][1], 20, 128);
3534 adapt_prob(&pp[1], c[1][0], c[1][1], 20, 128);
3535 }
3536 }
3537
3538 // block partitioning
3539 for (i = 0; i < 4; i++)
3540 for (j = 0; j < 4; j++) {
3541 uint8_t *pp = p->partition[i][j];
3542 unsigned *c = s->counts.partition[i][j];
3543
3544 adapt_prob(&pp[0], c[0], c[1] + c[2] + c[3], 20, 128);
3545 adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128);
3546 adapt_prob(&pp[2], c[2], c[3], 20, 128);
3547 }
3548
3549 // tx size
3550 if (s->txfmmode == TX_SWITCHABLE) {
3551 for (i = 0; i < 2; i++) {
3552 unsigned *c16 = s->counts.tx16p[i], *c32 = s->counts.tx32p[i];
3553
3554 adapt_prob(&p->tx8p[i], s->counts.tx8p[i][0], s->counts.tx8p[i][1], 20, 128);
3555 adapt_prob(&p->tx16p[i][0], c16[0], c16[1] + c16[2], 20, 128);
3556 adapt_prob(&p->tx16p[i][1], c16[1], c16[2], 20, 128);
3557 adapt_prob(&p->tx32p[i][0], c32[0], c32[1] + c32[2] + c32[3], 20, 128);
3558 adapt_prob(&p->tx32p[i][1], c32[1], c32[2] + c32[3], 20, 128);
3559 adapt_prob(&p->tx32p[i][2], c32[2], c32[3], 20, 128);
3560 }
3561 }
3562
3563 // interpolation filter
3564 if (s->filtermode == FILTER_SWITCHABLE) {
3565 for (i = 0; i < 4; i++) {
3566 uint8_t *pp = p->filter[i];
3567 unsigned *c = s->counts.filter[i];
3568
3569 adapt_prob(&pp[0], c[0], c[1] + c[2], 20, 128);
3570 adapt_prob(&pp[1], c[1], c[2], 20, 128);
3571 }
3572 }
3573
3574 // inter modes
3575 for (i = 0; i < 7; i++) {
3576 uint8_t *pp = p->mv_mode[i];
3577 unsigned *c = s->counts.mv_mode[i];
3578
3579 adapt_prob(&pp[0], c[2], c[1] + c[0] + c[3], 20, 128);
3580 adapt_prob(&pp[1], c[0], c[1] + c[3], 20, 128);
3581 adapt_prob(&pp[2], c[1], c[3], 20, 128);
3582 }
3583
3584 // mv joints
3585 {
3586 uint8_t *pp = p->mv_joint;
3587 unsigned *c = s->counts.mv_joint;
3588
3589 adapt_prob(&pp[0], c[0], c[1] + c[2] + c[3], 20, 128);
3590 adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128);
3591 adapt_prob(&pp[2], c[2], c[3], 20, 128);
3592 }
3593
3594 // mv components
3595 for (i = 0; i < 2; i++) {
3596 uint8_t *pp;
3597 unsigned *c, (*c2)[2], sum;
3598
3599 adapt_prob(&p->mv_comp[i].sign, s->counts.mv_comp[i].sign[0],
3600 s->counts.mv_comp[i].sign[1], 20, 128);
3601
3602 pp = p->mv_comp[i].classes;
3603 c = s->counts.mv_comp[i].classes;
3604 sum = c[1] + c[2] + c[3] + c[4] + c[5] + c[6] + c[7] + c[8] + c[9] + c[10];
3605 adapt_prob(&pp[0], c[0], sum, 20, 128);
3606 sum -= c[1];
3607 adapt_prob(&pp[1], c[1], sum, 20, 128);
3608 sum -= c[2] + c[3];
3609 adapt_prob(&pp[2], c[2] + c[3], sum, 20, 128);
3610 adapt_prob(&pp[3], c[2], c[3], 20, 128);
3611 sum -= c[4] + c[5];
3612 adapt_prob(&pp[4], c[4] + c[5], sum, 20, 128);
3613 adapt_prob(&pp[5], c[4], c[5], 20, 128);
3614 sum -= c[6];
3615 adapt_prob(&pp[6], c[6], sum, 20, 128);
3616 adapt_prob(&pp[7], c[7] + c[8], c[9] + c[10], 20, 128);
3617 adapt_prob(&pp[8], c[7], c[8], 20, 128);
3618 adapt_prob(&pp[9], c[9], c[10], 20, 128);
3619
3620 adapt_prob(&p->mv_comp[i].class0, s->counts.mv_comp[i].class0[0],
3621 s->counts.mv_comp[i].class0[1], 20, 128);
3622 pp = p->mv_comp[i].bits;
3623 c2 = s->counts.mv_comp[i].bits;
3624 for (j = 0; j < 10; j++)
3625 adapt_prob(&pp[j], c2[j][0], c2[j][1], 20, 128);
3626
3627 for (j = 0; j < 2; j++) {
3628 pp = p->mv_comp[i].class0_fp[j];
3629 c = s->counts.mv_comp[i].class0_fp[j];
3630 adapt_prob(&pp[0], c[0], c[1] + c[2] + c[3], 20, 128);
3631 adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128);
3632 adapt_prob(&pp[2], c[2], c[3], 20, 128);
3633 }
3634 pp = p->mv_comp[i].fp;
3635 c = s->counts.mv_comp[i].fp;
3636 adapt_prob(&pp[0], c[0], c[1] + c[2] + c[3], 20, 128);
3637 adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128);
3638 adapt_prob(&pp[2], c[2], c[3], 20, 128);
3639
3640 if (s->highprecisionmvs) {
3641 adapt_prob(&p->mv_comp[i].class0_hp, s->counts.mv_comp[i].class0_hp[0],
3642 s->counts.mv_comp[i].class0_hp[1], 20, 128);
3643 adapt_prob(&p->mv_comp[i].hp, s->counts.mv_comp[i].hp[0],
3644 s->counts.mv_comp[i].hp[1], 20, 128);
3645 }
3646 }
3647
3648 // y intra modes
3649 for (i = 0; i < 4; i++) {
3650 uint8_t *pp = p->y_mode[i];
3651 unsigned *c = s->counts.y_mode[i], sum, s2;
3652
3653 sum = c[0] + c[1] + c[3] + c[4] + c[5] + c[6] + c[7] + c[8] + c[9];
3654 adapt_prob(&pp[0], c[DC_PRED], sum, 20, 128);
3655 sum -= c[TM_VP8_PRED];
3656 adapt_prob(&pp[1], c[TM_VP8_PRED], sum, 20, 128);
3657 sum -= c[VERT_PRED];
3658 adapt_prob(&pp[2], c[VERT_PRED], sum, 20, 128);
3659 s2 = c[HOR_PRED] + c[DIAG_DOWN_RIGHT_PRED] + c[VERT_RIGHT_PRED];
3660 sum -= s2;
3661 adapt_prob(&pp[3], s2, sum, 20, 128);
3662 s2 -= c[HOR_PRED];
3663 adapt_prob(&pp[4], c[HOR_PRED], s2, 20, 128);
3664 adapt_prob(&pp[5], c[DIAG_DOWN_RIGHT_PRED], c[VERT_RIGHT_PRED], 20, 128);
3665 sum -= c[DIAG_DOWN_LEFT_PRED];
3666 adapt_prob(&pp[6], c[DIAG_DOWN_LEFT_PRED], sum, 20, 128);
3667 sum -= c[VERT_LEFT_PRED];
3668 adapt_prob(&pp[7], c[VERT_LEFT_PRED], sum, 20, 128);
3669 adapt_prob(&pp[8], c[HOR_DOWN_PRED], c[HOR_UP_PRED], 20, 128);
3670 }
3671
3672 // uv intra modes
3673 for (i = 0; i < 10; i++) {
3674 uint8_t *pp = p->uv_mode[i];
3675 unsigned *c = s->counts.uv_mode[i], sum, s2;
3676
3677 sum = c[0] + c[1] + c[3] + c[4] + c[5] + c[6] + c[7] + c[8] + c[9];
3678 adapt_prob(&pp[0], c[DC_PRED], sum, 20, 128);
3679 sum -= c[TM_VP8_PRED];
3680 adapt_prob(&pp[1], c[TM_VP8_PRED], sum, 20, 128);
3681 sum -= c[VERT_PRED];
3682 adapt_prob(&pp[2], c[VERT_PRED], sum, 20, 128);
3683 s2 = c[HOR_PRED] + c[DIAG_DOWN_RIGHT_PRED] + c[VERT_RIGHT_PRED];
3684 sum -= s2;
3685 adapt_prob(&pp[3], s2, sum, 20, 128);
3686 s2 -= c[HOR_PRED];
3687 adapt_prob(&pp[4], c[HOR_PRED], s2, 20, 128);
3688 adapt_prob(&pp[5], c[DIAG_DOWN_RIGHT_PRED], c[VERT_RIGHT_PRED], 20, 128);
3689 sum -= c[DIAG_DOWN_LEFT_PRED];
3690 adapt_prob(&pp[6], c[DIAG_DOWN_LEFT_PRED], sum, 20, 128);
3691 sum -= c[VERT_LEFT_PRED];
3692 adapt_prob(&pp[7], c[VERT_LEFT_PRED], sum, 20, 128);
3693 adapt_prob(&pp[8], c[HOR_DOWN_PRED], c[HOR_UP_PRED], 20, 128);
3694 }
3695}
3696
3697static void free_buffers(VP9Context *s)
3698{
3699 av_freep(&s->intra_pred_data[0]);
3700 av_freep(&s->b_base);
3701 av_freep(&s->block_base);
3702}
3703
3704static av_cold int vp9_decode_free(AVCodecContext *ctx)
3705{
3706 VP9Context *s = ctx->priv_data;
3707 int i;
3708
3709 for (i = 0; i < 2; i++) {
3710 if (s->frames[i].tf.f->data[0])
3711 vp9_unref_frame(ctx, &s->frames[i]);
3712 av_frame_free(&s->frames[i].tf.f);
3713 }
3714 for (i = 0; i < 8; i++) {
3715 if (s->refs[i].f->data[0])
3716 ff_thread_release_buffer(ctx, &s->refs[i]);
3717 av_frame_free(&s->refs[i].f);
3718 if (s->next_refs[i].f->data[0])
3719 ff_thread_release_buffer(ctx, &s->next_refs[i]);
3720 av_frame_free(&s->next_refs[i].f);
3721 }
3722 free_buffers(s);
3723 av_freep(&s->c_b);
3724 s->c_b_size = 0;
3725
3726 return 0;
3727}
3728
3729
3730static int vp9_decode_frame(AVCodecContext *ctx, void *frame,
3731 int *got_frame, AVPacket *pkt)
3732{
3733 const uint8_t *data = pkt->data;
3734 int size = pkt->size;
3735 VP9Context *s = ctx->priv_data;
3736 int res, tile_row, tile_col, i, ref, row, col;
3737 ptrdiff_t yoff, uvoff, ls_y, ls_uv;
3738 AVFrame *f;
3739
3740 if ((res = decode_frame_header(ctx, data, size, &ref)) < 0) {
3741 return res;
3742 } else if (res == 0) {
3743 if (!s->refs[ref].f->data[0]) {
3744 av_log(ctx, AV_LOG_ERROR, "Requested reference %d not available\n", ref);
3745 return AVERROR_INVALIDDATA;
3746 }
3747 if ((res = av_frame_ref(frame, s->refs[ref].f)) < 0)
3748 return res;
3749 *got_frame = 1;
3750 return 0;
3751 }
3752 data += res;
3753 size -= res;
3754
3755 if (s->frames[LAST_FRAME].tf.f->data[0])
3756 vp9_unref_frame(ctx, &s->frames[LAST_FRAME]);
3757 if (!s->keyframe && s->frames[CUR_FRAME].tf.f->data[0] &&
3758 (res = vp9_ref_frame(ctx, &s->frames[LAST_FRAME], &s->frames[CUR_FRAME])) < 0)
3759 return res;
3760 if (s->frames[CUR_FRAME].tf.f->data[0])
3761 vp9_unref_frame(ctx, &s->frames[CUR_FRAME]);
3762 if ((res = vp9_alloc_frame(ctx, &s->frames[CUR_FRAME])) < 0)
3763 return res;
3764 f = s->frames[CUR_FRAME].tf.f;
3765 f->key_frame = s->keyframe;
3766 f->pict_type = s->keyframe ? AV_PICTURE_TYPE_I : AV_PICTURE_TYPE_P;
3767 ls_y = f->linesize[0];
3768 ls_uv =f->linesize[1];
3769
3770 // ref frame setup
3771 for (i = 0; i < 8; i++) {
3772 if (s->next_refs[i].f->data[0])
3773 ff_thread_release_buffer(ctx, &s->next_refs[i]);
3774 if (s->refreshrefmask & (1 << i)) {
3775 res = ff_thread_ref_frame(&s->next_refs[i], &s->frames[CUR_FRAME].tf);
3776 } else {
3777 res = ff_thread_ref_frame(&s->next_refs[i], &s->refs[i]);
3778 }
3779 if (res < 0)
3780 return res;
3781 }
3782
3783 // main tile decode loop
3784 memset(s->above_partition_ctx, 0, s->cols);
3785 memset(s->above_skip_ctx, 0, s->cols);
3786 if (s->keyframe || s->intraonly) {
3787 memset(s->above_mode_ctx, DC_PRED, s->cols * 2);
3788 } else {
3789 memset(s->above_mode_ctx, NEARESTMV, s->cols);
3790 }
3791 memset(s->above_y_nnz_ctx, 0, s->sb_cols * 16);
3792 memset(s->above_uv_nnz_ctx[0], 0, s->sb_cols * 8);
3793 memset(s->above_uv_nnz_ctx[1], 0, s->sb_cols * 8);
3794 memset(s->above_segpred_ctx, 0, s->cols);
3795 s->pass = s->uses_2pass =
3796 ctx->active_thread_type == FF_THREAD_FRAME && s->refreshctx && !s->parallelmode;
3797 if ((res = update_block_buffers(ctx)) < 0) {
3798 av_log(ctx, AV_LOG_ERROR,
3799 "Failed to allocate block buffers\n");
3800 return res;
3801 }
3802 if (s->refreshctx && s->parallelmode) {
3803 int j, k, l, m;
3804
3805 for (i = 0; i < 4; i++) {
3806 for (j = 0; j < 2; j++)
3807 for (k = 0; k < 2; k++)
3808 for (l = 0; l < 6; l++)
3809 for (m = 0; m < 6; m++)
3810 memcpy(s->prob_ctx[s->framectxid].coef[i][j][k][l][m],
3811 s->prob.coef[i][j][k][l][m], 3);
3812 if (s->txfmmode == i)
3813 break;
3814 }
3815 s->prob_ctx[s->framectxid].p = s->prob.p;
3816 ff_thread_finish_setup(ctx);
3817 }
3818
3819 do {
3820 yoff = uvoff = 0;
3821 s->b = s->b_base;
3822 s->block = s->block_base;
3823 s->uvblock[0] = s->uvblock_base[0];
3824 s->uvblock[1] = s->uvblock_base[1];
3825 s->eob = s->eob_base;
3826 s->uveob[0] = s->uveob_base[0];
3827 s->uveob[1] = s->uveob_base[1];
3828
3829 for (tile_row = 0; tile_row < s->tiling.tile_rows; tile_row++) {
3830 set_tile_offset(&s->tiling.tile_row_start, &s->tiling.tile_row_end,
3831 tile_row, s->tiling.log2_tile_rows, s->sb_rows);
3832 if (s->pass != 2) {
3833 for (tile_col = 0; tile_col < s->tiling.tile_cols; tile_col++) {
3834 unsigned tile_size;
3835
3836 if (tile_col == s->tiling.tile_cols - 1 &&
3837 tile_row == s->tiling.tile_rows - 1) {
3838 tile_size = size;
3839 } else {
3840 tile_size = AV_RB32(data);
3841 data += 4;
3842 size -= 4;
3843 }
3844 if (tile_size > size) {
3845 ff_thread_report_progress(&s->frames[CUR_FRAME].tf, INT_MAX, 0);
3846 return AVERROR_INVALIDDATA;
3847 }
3848 ff_vp56_init_range_decoder(&s->c_b[tile_col], data, tile_size);
3849 if (vp56_rac_get_prob_branchy(&s->c_b[tile_col], 128)) { // marker bit
3850 ff_thread_report_progress(&s->frames[CUR_FRAME].tf, INT_MAX, 0);
3851 return AVERROR_INVALIDDATA;
3852 }
3853 data += tile_size;
3854 size -= tile_size;
3855 }
3856 }
3857
3858 for (row = s->tiling.tile_row_start; row < s->tiling.tile_row_end;
3859 row += 8, yoff += ls_y * 64, uvoff += ls_uv * 32) {
3860 struct VP9Filter *lflvl_ptr = s->lflvl;
3861 ptrdiff_t yoff2 = yoff, uvoff2 = uvoff;
3862
3863 for (tile_col = 0; tile_col < s->tiling.tile_cols; tile_col++) {
3864 set_tile_offset(&s->tiling.tile_col_start, &s->tiling.tile_col_end,
3865 tile_col, s->tiling.log2_tile_cols, s->sb_cols);
3866
3867 if (s->pass != 2) {
3868 memset(s->left_partition_ctx, 0, 8);
3869 memset(s->left_skip_ctx, 0, 8);
3870 if (s->keyframe || s->intraonly) {
3871 memset(s->left_mode_ctx, DC_PRED, 16);
3872 } else {
3873 memset(s->left_mode_ctx, NEARESTMV, 8);
3874 }
3875 memset(s->left_y_nnz_ctx, 0, 16);
3876 memset(s->left_uv_nnz_ctx, 0, 16);
3877 memset(s->left_segpred_ctx, 0, 8);
3878
3879 memcpy(&s->c, &s->c_b[tile_col], sizeof(s->c));
3880 }
3881
3882 for (col = s->tiling.tile_col_start;
3883 col < s->tiling.tile_col_end;
3884 col += 8, yoff2 += 64, uvoff2 += 32, lflvl_ptr++) {
3885 // FIXME integrate with lf code (i.e. zero after each
3886 // use, similar to invtxfm coefficients, or similar)
3887 if (s->pass != 1) {
3888 memset(lflvl_ptr->mask, 0, sizeof(lflvl_ptr->mask));
3889 }
3890
3891 if (s->pass == 2) {
3892 decode_sb_mem(ctx, row, col, lflvl_ptr,
3893 yoff2, uvoff2, BL_64X64);
3894 } else {
3895 decode_sb(ctx, row, col, lflvl_ptr,
3896 yoff2, uvoff2, BL_64X64);
3897 }
3898 }
3899 if (s->pass != 2) {
3900 memcpy(&s->c_b[tile_col], &s->c, sizeof(s->c));
3901 }
3902 }
3903
3904 if (s->pass == 1) {
3905 continue;
3906 }
3907
3908 // backup pre-loopfilter reconstruction data for intra
3909 // prediction of next row of sb64s
3910 if (row + 8 < s->rows) {
3911 memcpy(s->intra_pred_data[0],
3912 f->data[0] + yoff + 63 * ls_y,
3913 8 * s->cols);
3914 memcpy(s->intra_pred_data[1],
3915 f->data[1] + uvoff + 31 * ls_uv,
3916 4 * s->cols);
3917 memcpy(s->intra_pred_data[2],
3918 f->data[2] + uvoff + 31 * ls_uv,
3919 4 * s->cols);
3920 }
3921
3922 // loopfilter one row
3923 if (s->filter.level) {
3924 yoff2 = yoff;
3925 uvoff2 = uvoff;
3926 lflvl_ptr = s->lflvl;
3927 for (col = 0; col < s->cols;
3928 col += 8, yoff2 += 64, uvoff2 += 32, lflvl_ptr++) {
3929 loopfilter_sb(ctx, lflvl_ptr, row, col, yoff2, uvoff2);
3930 }
3931 }
3932
3933 // FIXME maybe we can make this more finegrained by running the
3934 // loopfilter per-block instead of after each sbrow
3935 // In fact that would also make intra pred left preparation easier?
3936 ff_thread_report_progress(&s->frames[CUR_FRAME].tf, row >> 3, 0);
3937 }
3938 }
3939
3940 if (s->pass < 2 && s->refreshctx && !s->parallelmode) {
3941 adapt_probs(s);
3942 ff_thread_finish_setup(ctx);
3943 }
3944 } while (s->pass++ == 1);
3945 ff_thread_report_progress(&s->frames[CUR_FRAME].tf, INT_MAX, 0);
3946
3947 // ref frame setup
3948 for (i = 0; i < 8; i++) {
3949 if (s->refs[i].f->data[0])
3950 ff_thread_release_buffer(ctx, &s->refs[i]);
3951 ff_thread_ref_frame(&s->refs[i], &s->next_refs[i]);
3952 }
3953
3954 if (!s->invisible) {
3955 if ((res = av_frame_ref(frame, s->frames[CUR_FRAME].tf.f)) < 0)
3956 return res;
3957 *got_frame = 1;
3958 }
3959
3960 return 0;
3961}
3962
3963static void vp9_decode_flush(AVCodecContext *ctx)
3964{
3965 VP9Context *s = ctx->priv_data;
3966 int i;
3967
3968 for (i = 0; i < 2; i++)
3969 vp9_unref_frame(ctx, &s->frames[i]);
3970 for (i = 0; i < 8; i++)
3971 ff_thread_release_buffer(ctx, &s->refs[i]);
3972}
3973
3974static int init_frames(AVCodecContext *ctx)
3975{
3976 VP9Context *s = ctx->priv_data;
3977 int i;
3978
3979 for (i = 0; i < 2; i++) {
3980 s->frames[i].tf.f = av_frame_alloc();
3981 if (!s->frames[i].tf.f) {
3982 vp9_decode_free(ctx);
3983 av_log(ctx, AV_LOG_ERROR, "Failed to allocate frame buffer %d\n", i);
3984 return AVERROR(ENOMEM);
3985 }
3986 }
3987 for (i = 0; i < 8; i++) {
3988 s->refs[i].f = av_frame_alloc();
3989 s->next_refs[i].f = av_frame_alloc();
3990 if (!s->refs[i].f || !s->next_refs[i].f) {
3991 vp9_decode_free(ctx);
3992 av_log(ctx, AV_LOG_ERROR, "Failed to allocate frame buffer %d\n", i);
3993 return AVERROR(ENOMEM);
3994 }
3995 }
3996
3997 return 0;
3998}
3999
4000static av_cold int vp9_decode_init(AVCodecContext *ctx)
4001{
4002 VP9Context *s = ctx->priv_data;
4003
4004 ctx->internal->allocate_progress = 1;
4005 ctx->pix_fmt = AV_PIX_FMT_YUV420P;
4006 ff_vp9dsp_init(&s->dsp);
4007 ff_videodsp_init(&s->vdsp, 8);
4008 s->filter.sharpness = -1;
4009
4010 return init_frames(ctx);
4011}
4012
4013static av_cold int vp9_decode_init_thread_copy(AVCodecContext *avctx)
4014{
4015 return init_frames(avctx);
4016}
4017
4018static int vp9_decode_update_thread_context(AVCodecContext *dst, const AVCodecContext *src)
4019{
4020 int i, res;
4021 VP9Context *s = dst->priv_data, *ssrc = src->priv_data;
4022
4023 // detect size changes in other threads
4024 if (s->intra_pred_data[0] &&
4025 (!ssrc->intra_pred_data[0] || s->cols != ssrc->cols || s->rows != ssrc->rows)) {
4026 free_buffers(s);
4027 }
4028
4029 for (i = 0; i < 2; i++) {
4030 if (s->frames[i].tf.f->data[0])
4031 vp9_unref_frame(dst, &s->frames[i]);
4032 if (ssrc->frames[i].tf.f->data[0]) {
4033 if ((res = vp9_ref_frame(dst, &s->frames[i], &ssrc->frames[i])) < 0)
4034 return res;
4035 }
4036 }
4037 for (i = 0; i < 8; i++) {
4038 if (s->refs[i].f->data[0])
4039 ff_thread_release_buffer(dst, &s->refs[i]);
4040 if (ssrc->next_refs[i].f->data[0]) {
4041 if ((res = ff_thread_ref_frame(&s->refs[i], &ssrc->next_refs[i])) < 0)
4042 return res;
4043 }
4044 }
4045
4046 s->invisible = ssrc->invisible;
4047 s->keyframe = ssrc->keyframe;
4048 s->uses_2pass = ssrc->uses_2pass;
4049 memcpy(&s->prob_ctx, &ssrc->prob_ctx, sizeof(s->prob_ctx));
4050 memcpy(&s->lf_delta, &ssrc->lf_delta, sizeof(s->lf_delta));
4051 if (ssrc->segmentation.enabled) {
4052 memcpy(&s->segmentation.feat, &ssrc->segmentation.feat,
4053 sizeof(s->segmentation.feat));
4054 }
4055
4056 return 0;
4057}
4058
4059AVCodec ff_vp9_decoder = {
4060 .name = "vp9",
4061 .long_name = NULL_IF_CONFIG_SMALL("Google VP9"),
4062 .type = AVMEDIA_TYPE_VIDEO,
4063 .id = AV_CODEC_ID_VP9,
4064 .priv_data_size = sizeof(VP9Context),
4065 .init = vp9_decode_init,
4066 .close = vp9_decode_free,
4067 .decode = vp9_decode_frame,
4068 .capabilities = CODEC_CAP_DR1 | CODEC_CAP_FRAME_THREADS,
4069 .flush = vp9_decode_flush,
4070 .init_thread_copy = ONLY_IF_THREADS_ENABLED(vp9_decode_init_thread_copy),
4071 .update_thread_context = ONLY_IF_THREADS_ENABLED(vp9_decode_update_thread_context),
4072};