2 * VP9 compatible video decoder
4 * Copyright (C) 2013 Ronald S. Bultje <rsbultje gmail com>
5 * Copyright (C) 2013 Clément Bœsch <u pkh me>
7 * This file is part of FFmpeg.
9 * FFmpeg is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
14 * FFmpeg is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with FFmpeg; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
33 #include "libavutil/avassert.h"
35 #define VP9_SYNCCODE 0x498342
72 typedef struct VP9Frame
{
74 AVBufferRef
*extradata
;
75 uint8_t *segmentation_map
;
76 struct VP9mvrefPair
*mv
;
81 uint8_t /* bit=col */ mask
[2 /* 0=y, 1=uv */][2 /* 0=col, 1=row */]
82 [8 /* rows */][4 /* 0=16, 1=8, 2=4, 3=inner4 */];
85 typedef struct VP9Block
{
86 uint8_t seg_id
, intra
, comp
, ref
[2], mode
[4], uvmode
, skip
;
87 enum FilterMode filter
;
88 VP56mv mv
[4 /* b_idx */][2 /* ref */];
90 enum TxfmMode tx
, uvtx
;
92 enum BlockPartition bp
;
95 typedef struct VP9Context
{
102 VP9Block
*b_base
, *b
;
103 int pass
, uses_2pass
, last_uses_2pass
;
104 int row
, row7
, col
, col7
;
106 ptrdiff_t y_stride
, uv_stride
;
110 uint8_t keyframe
, last_keyframe
;
112 uint8_t use_last_frame_mvs
;
118 uint8_t refreshrefmask
;
119 uint8_t highprecisionmvs
;
120 enum FilterMode filtermode
;
121 uint8_t allowcompinter
;
124 uint8_t parallelmode
;
128 uint8_t varcompref
[2];
129 ThreadFrame refs
[8], next_refs
[8];
138 uint8_t mblim_lut
[64];
146 int8_t ydc_qdelta
, uvdc_qdelta
, uvac_qdelta
;
148 #define MAX_SEGMENT 8
152 uint8_t absolute_vals
;
158 uint8_t skip_enabled
;
167 unsigned log2_tile_cols
, log2_tile_rows
;
168 unsigned tile_cols
, tile_rows
;
169 unsigned tile_row_start
, tile_row_end
, tile_col_start
, tile_col_end
;
171 unsigned sb_cols
, sb_rows
, rows
, cols
;
174 uint8_t coef
[4][2][2][6][6][3];
178 uint8_t coef
[4][2][2][6][6][11];
183 unsigned y_mode
[4][10];
184 unsigned uv_mode
[10][10];
185 unsigned filter
[4][3];
186 unsigned mv_mode
[7][4];
187 unsigned intra
[4][2];
189 unsigned single_ref
[5][2][2];
190 unsigned comp_ref
[5][2];
191 unsigned tx32p
[2][4];
192 unsigned tx16p
[2][3];
195 unsigned mv_joint
[4];
198 unsigned classes
[11];
200 unsigned bits
[10][2];
201 unsigned class0_fp
[2][4];
203 unsigned class0_hp
[2];
206 unsigned partition
[4][4][4];
207 unsigned coef
[4][2][2][6][6][3];
208 unsigned eob
[4][2][2][6][6][2];
210 enum TxfmMode txfmmode
;
211 enum CompPredMode comppredmode
;
213 // contextual (left/above) cache
214 DECLARE_ALIGNED(16, uint8_t, left_y_nnz_ctx
)[16];
215 DECLARE_ALIGNED(16, uint8_t, left_mode_ctx
)[16];
216 DECLARE_ALIGNED(16, VP56mv
, left_mv_ctx
)[16][2];
217 DECLARE_ALIGNED(8, uint8_t, left_uv_nnz_ctx
)[2][8];
218 DECLARE_ALIGNED(8, uint8_t, left_partition_ctx
)[8];
219 DECLARE_ALIGNED(8, uint8_t, left_skip_ctx
)[8];
220 DECLARE_ALIGNED(8, uint8_t, left_txfm_ctx
)[8];
221 DECLARE_ALIGNED(8, uint8_t, left_segpred_ctx
)[8];
222 DECLARE_ALIGNED(8, uint8_t, left_intra_ctx
)[8];
223 DECLARE_ALIGNED(8, uint8_t, left_comp_ctx
)[8];
224 DECLARE_ALIGNED(8, uint8_t, left_ref_ctx
)[8];
225 DECLARE_ALIGNED(8, uint8_t, left_filter_ctx
)[8];
226 uint8_t *above_partition_ctx
;
227 uint8_t *above_mode_ctx
;
228 // FIXME maybe merge some of the below in a flags field?
229 uint8_t *above_y_nnz_ctx
;
230 uint8_t *above_uv_nnz_ctx
[2];
231 uint8_t *above_skip_ctx
; // 1bit
232 uint8_t *above_txfm_ctx
; // 2bit
233 uint8_t *above_segpred_ctx
; // 1bit
234 uint8_t *above_intra_ctx
; // 1bit
235 uint8_t *above_comp_ctx
; // 1bit
236 uint8_t *above_ref_ctx
; // 2bit
237 uint8_t *above_filter_ctx
;
238 VP56mv (*above_mv_ctx
)[2];
241 uint8_t *intra_pred_data
[3];
242 struct VP9Filter
*lflvl
;
243 DECLARE_ALIGNED(32, uint8_t, edge_emu_buffer
)[71*80];
245 // block reconstruction intermediates
246 int block_alloc_using_2pass
;
247 int16_t *block_base
, *block
, *uvblock_base
[2], *uvblock
[2];
248 uint8_t *eob_base
, *uveob_base
[2], *eob
, *uveob
[2];
249 struct { int x
, y
; } min_mv
, max_mv
;
250 DECLARE_ALIGNED(32, uint8_t, tmp_y
)[64*64];
251 DECLARE_ALIGNED(32, uint8_t, tmp_uv
)[2][32*32];
254 static const uint8_t bwh_tab
[2][N_BS_SIZES
][2] = {
256 { 16, 16 }, { 16, 8 }, { 8, 16 }, { 8, 8 }, { 8, 4 }, { 4, 8 },
257 { 4, 4 }, { 4, 2 }, { 2, 4 }, { 2, 2 }, { 2, 1 }, { 1, 2 }, { 1, 1 },
259 { 8, 8 }, { 8, 4 }, { 4, 8 }, { 4, 4 }, { 4, 2 }, { 2, 4 },
260 { 2, 2 }, { 2, 1 }, { 1, 2 }, { 1, 1 }, { 1, 1 }, { 1, 1 }, { 1, 1 },
264 static int vp9_alloc_frame(AVCodecContext
*ctx
, VP9Frame
*f
)
266 VP9Context
*s
= ctx
->priv_data
;
269 if ((ret
= ff_thread_get_buffer(ctx
, &f
->tf
, AV_GET_BUFFER_FLAG_REF
)) < 0)
271 sz
= 64 * s
->sb_cols
* s
->sb_rows
;
272 if (!(f
->extradata
= av_buffer_allocz(sz
* (1 + sizeof(struct VP9mvrefPair
))))) {
273 ff_thread_release_buffer(ctx
, &f
->tf
);
274 return AVERROR(ENOMEM
);
277 f
->segmentation_map
= f
->extradata
->data
;
278 f
->mv
= (struct VP9mvrefPair
*) (f
->extradata
->data
+ sz
);
280 // retain segmentation map if it doesn't update
281 if (s
->segmentation
.enabled
&& !s
->segmentation
.update_map
&&
282 !s
->intraonly
&& !s
->keyframe
&& !s
->errorres
) {
283 memcpy(f
->segmentation_map
, s
->frames
[LAST_FRAME
].segmentation_map
, sz
);
289 static void vp9_unref_frame(AVCodecContext
*ctx
, VP9Frame
*f
)
291 ff_thread_release_buffer(ctx
, &f
->tf
);
292 av_buffer_unref(&f
->extradata
);
295 static int vp9_ref_frame(AVCodecContext
*ctx
, VP9Frame
*dst
, VP9Frame
*src
)
299 if ((res
= ff_thread_ref_frame(&dst
->tf
, &src
->tf
)) < 0) {
301 } else if (!(dst
->extradata
= av_buffer_ref(src
->extradata
))) {
302 vp9_unref_frame(ctx
, dst
);
303 return AVERROR(ENOMEM
);
306 dst
->segmentation_map
= src
->segmentation_map
;
312 static int update_size(AVCodecContext
*ctx
, int w
, int h
)
314 VP9Context
*s
= ctx
->priv_data
;
317 av_assert0(w
> 0 && h
> 0);
319 if (s
->intra_pred_data
[0] && w
== ctx
->width
&& h
== ctx
->height
)
324 s
->sb_cols
= (w
+ 63) >> 6;
325 s
->sb_rows
= (h
+ 63) >> 6;
326 s
->cols
= (w
+ 7) >> 3;
327 s
->rows
= (h
+ 7) >> 3;
329 #define assign(var, type, n) var = (type) p; p += s->sb_cols * (n) * sizeof(*var)
330 av_freep(&s
->intra_pred_data
[0]);
331 p
= av_malloc(s
->sb_cols
* (240 + sizeof(*s
->lflvl
) + 16 * sizeof(*s
->above_mv_ctx
)));
333 return AVERROR(ENOMEM
);
334 assign(s
->intra_pred_data
[0], uint8_t *, 64);
335 assign(s
->intra_pred_data
[1], uint8_t *, 32);
336 assign(s
->intra_pred_data
[2], uint8_t *, 32);
337 assign(s
->above_y_nnz_ctx
, uint8_t *, 16);
338 assign(s
->above_mode_ctx
, uint8_t *, 16);
339 assign(s
->above_mv_ctx
, VP56mv(*)[2], 16);
340 assign(s
->above_partition_ctx
, uint8_t *, 8);
341 assign(s
->above_skip_ctx
, uint8_t *, 8);
342 assign(s
->above_txfm_ctx
, uint8_t *, 8);
343 assign(s
->above_uv_nnz_ctx
[0], uint8_t *, 8);
344 assign(s
->above_uv_nnz_ctx
[1], uint8_t *, 8);
345 assign(s
->above_segpred_ctx
, uint8_t *, 8);
346 assign(s
->above_intra_ctx
, uint8_t *, 8);
347 assign(s
->above_comp_ctx
, uint8_t *, 8);
348 assign(s
->above_ref_ctx
, uint8_t *, 8);
349 assign(s
->above_filter_ctx
, uint8_t *, 8);
350 assign(s
->lflvl
, struct VP9Filter
*, 1);
353 // these will be re-allocated a little later
354 av_freep(&s
->b_base
);
355 av_freep(&s
->block_base
);
360 static int update_block_buffers(AVCodecContext
*ctx
)
362 VP9Context
*s
= ctx
->priv_data
;
364 if (s
->b_base
&& s
->block_base
&& s
->block_alloc_using_2pass
== s
->uses_2pass
)
368 av_free(s
->block_base
);
370 int sbs
= s
->sb_cols
* s
->sb_rows
;
372 s
->b_base
= av_malloc_array(s
->cols
* s
->rows
, sizeof(VP9Block
));
373 s
->block_base
= av_mallocz((64 * 64 + 128) * sbs
* 3);
374 if (!s
->b_base
|| !s
->block_base
)
375 return AVERROR(ENOMEM
);
376 s
->uvblock_base
[0] = s
->block_base
+ sbs
* 64 * 64;
377 s
->uvblock_base
[1] = s
->uvblock_base
[0] + sbs
* 32 * 32;
378 s
->eob_base
= (uint8_t *) (s
->uvblock_base
[1] + sbs
* 32 * 32);
379 s
->uveob_base
[0] = s
->eob_base
+ 256 * sbs
;
380 s
->uveob_base
[1] = s
->uveob_base
[0] + 64 * sbs
;
382 s
->b_base
= av_malloc(sizeof(VP9Block
));
383 s
->block_base
= av_mallocz((64 * 64 + 128) * 3);
384 if (!s
->b_base
|| !s
->block_base
)
385 return AVERROR(ENOMEM
);
386 s
->uvblock_base
[0] = s
->block_base
+ 64 * 64;
387 s
->uvblock_base
[1] = s
->uvblock_base
[0] + 32 * 32;
388 s
->eob_base
= (uint8_t *) (s
->uvblock_base
[1] + 32 * 32);
389 s
->uveob_base
[0] = s
->eob_base
+ 256;
390 s
->uveob_base
[1] = s
->uveob_base
[0] + 64;
392 s
->block_alloc_using_2pass
= s
->uses_2pass
;
397 // for some reason the sign bit is at the end, not the start, of a bit sequence
398 static av_always_inline
int get_sbits_inv(GetBitContext
*gb
, int n
)
400 int v
= get_bits(gb
, n
);
401 return get_bits1(gb
) ? -v
: v
;
404 static av_always_inline
int inv_recenter_nonneg(int v
, int m
)
406 return v
> 2 * m
? v
: v
& 1 ? m
- ((v
+ 1) >> 1) : m
+ (v
>> 1);
409 // differential forward probability updates
410 static int update_prob(VP56RangeCoder
*c
, int p
)
412 static const int inv_map_table
[254] = {
413 7, 20, 33, 46, 59, 72, 85, 98, 111, 124, 137, 150, 163, 176,
414 189, 202, 215, 228, 241, 254, 1, 2, 3, 4, 5, 6, 8, 9,
415 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 21, 22, 23, 24,
416 25, 26, 27, 28, 29, 30, 31, 32, 34, 35, 36, 37, 38, 39,
417 40, 41, 42, 43, 44, 45, 47, 48, 49, 50, 51, 52, 53, 54,
418 55, 56, 57, 58, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69,
419 70, 71, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84,
420 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 99, 100,
421 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 112, 113, 114, 115,
422 116, 117, 118, 119, 120, 121, 122, 123, 125, 126, 127, 128, 129, 130,
423 131, 132, 133, 134, 135, 136, 138, 139, 140, 141, 142, 143, 144, 145,
424 146, 147, 148, 149, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160,
425 161, 162, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175,
426 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 190, 191,
427 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 203, 204, 205, 206,
428 207, 208, 209, 210, 211, 212, 213, 214, 216, 217, 218, 219, 220, 221,
429 222, 223, 224, 225, 226, 227, 229, 230, 231, 232, 233, 234, 235, 236,
430 237, 238, 239, 240, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251,
435 /* This code is trying to do a differential probability update. For a
436 * current probability A in the range [1, 255], the difference to a new
437 * probability of any value can be expressed differentially as 1-A,255-A
438 * where some part of this (absolute range) exists both in positive as
439 * well as the negative part, whereas another part only exists in one
440 * half. We're trying to code this shared part differentially, i.e.
441 * times two where the value of the lowest bit specifies the sign, and
442 * the single part is then coded on top of this. This absolute difference
443 * then again has a value of [0,254], but a bigger value in this range
444 * indicates that we're further away from the original value A, so we
445 * can code this as a VLC code, since higher values are increasingly
446 * unlikely. The first 20 values in inv_map_table[] allow 'cheap, rough'
447 * updates vs. the 'fine, exact' updates further down the range, which
448 * adds one extra dimension to this differential update model. */
450 if (!vp8_rac_get(c
)) {
451 d
= vp8_rac_get_uint(c
, 4) + 0;
452 } else if (!vp8_rac_get(c
)) {
453 d
= vp8_rac_get_uint(c
, 4) + 16;
454 } else if (!vp8_rac_get(c
)) {
455 d
= vp8_rac_get_uint(c
, 5) + 32;
457 d
= vp8_rac_get_uint(c
, 7);
459 d
= (d
<< 1) - 65 + vp8_rac_get(c
);
463 return p
<= 128 ? 1 + inv_recenter_nonneg(inv_map_table
[d
], p
- 1) :
464 255 - inv_recenter_nonneg(inv_map_table
[d
], 255 - p
);
467 static int decode_frame_header(AVCodecContext
*ctx
,
468 const uint8_t *data
, int size
, int *ref
)
470 VP9Context
*s
= ctx
->priv_data
;
471 int c
, i
, j
, k
, l
, m
, n
, w
, h
, max
, size2
, res
, sharp
;
473 const uint8_t *data2
;
476 if ((res
= init_get_bits8(&s
->gb
, data
, size
)) < 0) {
477 av_log(ctx
, AV_LOG_ERROR
, "Failed to initialize bitstream reader\n");
480 if (get_bits(&s
->gb
, 2) != 0x2) { // frame marker
481 av_log(ctx
, AV_LOG_ERROR
, "Invalid frame marker\n");
482 return AVERROR_INVALIDDATA
;
484 s
->profile
= get_bits1(&s
->gb
);
485 if (get_bits1(&s
->gb
)) { // reserved bit
486 av_log(ctx
, AV_LOG_ERROR
, "Reserved bit should be zero\n");
487 return AVERROR_INVALIDDATA
;
489 if (get_bits1(&s
->gb
)) {
490 *ref
= get_bits(&s
->gb
, 3);
493 s
->last_uses_2pass
= s
->uses_2pass
;
494 s
->last_keyframe
= s
->keyframe
;
495 s
->keyframe
= !get_bits1(&s
->gb
);
496 last_invisible
= s
->invisible
;
497 s
->invisible
= !get_bits1(&s
->gb
);
498 s
->errorres
= get_bits1(&s
->gb
);
499 s
->use_last_frame_mvs
= !s
->errorres
&& !last_invisible
;
501 if (get_bits_long(&s
->gb
, 24) != VP9_SYNCCODE
) { // synccode
502 av_log(ctx
, AV_LOG_ERROR
, "Invalid sync code\n");
503 return AVERROR_INVALIDDATA
;
505 s
->colorspace
= get_bits(&s
->gb
, 3);
506 if (s
->colorspace
== 7) { // RGB = profile 1
507 av_log(ctx
, AV_LOG_ERROR
, "RGB not supported in profile 0\n");
508 return AVERROR_INVALIDDATA
;
510 s
->fullrange
= get_bits1(&s
->gb
);
511 // for profile 1, here follows the subsampling bits
512 s
->refreshrefmask
= 0xff;
513 w
= get_bits(&s
->gb
, 16) + 1;
514 h
= get_bits(&s
->gb
, 16) + 1;
515 if (get_bits1(&s
->gb
)) // display size
516 skip_bits(&s
->gb
, 32);
518 s
->intraonly
= s
->invisible
? get_bits1(&s
->gb
) : 0;
519 s
->resetctx
= s
->errorres
? 0 : get_bits(&s
->gb
, 2);
521 if (get_bits_long(&s
->gb
, 24) != VP9_SYNCCODE
) { // synccode
522 av_log(ctx
, AV_LOG_ERROR
, "Invalid sync code\n");
523 return AVERROR_INVALIDDATA
;
525 s
->refreshrefmask
= get_bits(&s
->gb
, 8);
526 w
= get_bits(&s
->gb
, 16) + 1;
527 h
= get_bits(&s
->gb
, 16) + 1;
528 if (get_bits1(&s
->gb
)) // display size
529 skip_bits(&s
->gb
, 32);
531 s
->refreshrefmask
= get_bits(&s
->gb
, 8);
532 s
->refidx
[0] = get_bits(&s
->gb
, 3);
533 s
->signbias
[0] = get_bits1(&s
->gb
);
534 s
->refidx
[1] = get_bits(&s
->gb
, 3);
535 s
->signbias
[1] = get_bits1(&s
->gb
);
536 s
->refidx
[2] = get_bits(&s
->gb
, 3);
537 s
->signbias
[2] = get_bits1(&s
->gb
);
538 if (!s
->refs
[s
->refidx
[0]].f
->data
[0] ||
539 !s
->refs
[s
->refidx
[1]].f
->data
[0] ||
540 !s
->refs
[s
->refidx
[2]].f
->data
[0]) {
541 av_log(ctx
, AV_LOG_ERROR
, "Not all references are available\n");
542 return AVERROR_INVALIDDATA
;
544 if (get_bits1(&s
->gb
)) {
545 w
= s
->refs
[s
->refidx
[0]].f
->width
;
546 h
= s
->refs
[s
->refidx
[0]].f
->height
;
547 } else if (get_bits1(&s
->gb
)) {
548 w
= s
->refs
[s
->refidx
[1]].f
->width
;
549 h
= s
->refs
[s
->refidx
[1]].f
->height
;
550 } else if (get_bits1(&s
->gb
)) {
551 w
= s
->refs
[s
->refidx
[2]].f
->width
;
552 h
= s
->refs
[s
->refidx
[2]].f
->height
;
554 w
= get_bits(&s
->gb
, 16) + 1;
555 h
= get_bits(&s
->gb
, 16) + 1;
557 // Note that in this code, "CUR_FRAME" is actually before we
558 // have formally allocated a frame, and thus actually represents
560 s
->use_last_frame_mvs
&= s
->frames
[CUR_FRAME
].tf
.f
->width
== w
&&
561 s
->frames
[CUR_FRAME
].tf
.f
->height
== h
;
562 if (get_bits1(&s
->gb
)) // display size
563 skip_bits(&s
->gb
, 32);
564 s
->highprecisionmvs
= get_bits1(&s
->gb
);
565 s
->filtermode
= get_bits1(&s
->gb
) ? FILTER_SWITCHABLE
:
567 s
->allowcompinter
= s
->signbias
[0] != s
->signbias
[1] ||
568 s
->signbias
[0] != s
->signbias
[2];
569 if (s
->allowcompinter
) {
570 if (s
->signbias
[0] == s
->signbias
[1]) {
572 s
->varcompref
[0] = 0;
573 s
->varcompref
[1] = 1;
574 } else if (s
->signbias
[0] == s
->signbias
[2]) {
576 s
->varcompref
[0] = 0;
577 s
->varcompref
[1] = 2;
580 s
->varcompref
[0] = 1;
581 s
->varcompref
[1] = 2;
586 s
->refreshctx
= s
->errorres
? 0 : get_bits1(&s
->gb
);
587 s
->parallelmode
= s
->errorres
? 1 : get_bits1(&s
->gb
);
588 s
->framectxid
= c
= get_bits(&s
->gb
, 2);
590 /* loopfilter header data */
591 s
->filter
.level
= get_bits(&s
->gb
, 6);
592 sharp
= get_bits(&s
->gb
, 3);
593 // if sharpness changed, reinit lim/mblim LUTs. if it didn't change, keep
594 // the old cache values since they are still valid
595 if (s
->filter
.sharpness
!= sharp
)
596 memset(s
->filter
.lim_lut
, 0, sizeof(s
->filter
.lim_lut
));
597 s
->filter
.sharpness
= sharp
;
598 if ((s
->lf_delta
.enabled
= get_bits1(&s
->gb
))) {
599 if (get_bits1(&s
->gb
)) {
600 for (i
= 0; i
< 4; i
++)
601 if (get_bits1(&s
->gb
))
602 s
->lf_delta
.ref
[i
] = get_sbits_inv(&s
->gb
, 6);
603 for (i
= 0; i
< 2; i
++)
604 if (get_bits1(&s
->gb
))
605 s
->lf_delta
.mode
[i
] = get_sbits_inv(&s
->gb
, 6);
608 memset(&s
->lf_delta
, 0, sizeof(s
->lf_delta
));
611 /* quantization header data */
612 s
->yac_qi
= get_bits(&s
->gb
, 8);
613 s
->ydc_qdelta
= get_bits1(&s
->gb
) ? get_sbits_inv(&s
->gb
, 4) : 0;
614 s
->uvdc_qdelta
= get_bits1(&s
->gb
) ? get_sbits_inv(&s
->gb
, 4) : 0;
615 s
->uvac_qdelta
= get_bits1(&s
->gb
) ? get_sbits_inv(&s
->gb
, 4) : 0;
616 s
->lossless
= s
->yac_qi
== 0 && s
->ydc_qdelta
== 0 &&
617 s
->uvdc_qdelta
== 0 && s
->uvac_qdelta
== 0;
619 /* segmentation header info */
620 if ((s
->segmentation
.enabled
= get_bits1(&s
->gb
))) {
621 if ((s
->segmentation
.update_map
= get_bits1(&s
->gb
))) {
622 for (i
= 0; i
< 7; i
++)
623 s
->prob
.seg
[i
] = get_bits1(&s
->gb
) ?
624 get_bits(&s
->gb
, 8) : 255;
625 if ((s
->segmentation
.temporal
= get_bits1(&s
->gb
))) {
626 for (i
= 0; i
< 3; i
++)
627 s
->prob
.segpred
[i
] = get_bits1(&s
->gb
) ?
628 get_bits(&s
->gb
, 8) : 255;
631 if ((!s
->segmentation
.update_map
|| s
->segmentation
.temporal
) &&
632 (w
!= s
->frames
[CUR_FRAME
].tf
.f
->width
||
633 h
!= s
->frames
[CUR_FRAME
].tf
.f
->height
)) {
634 av_log(ctx
, AV_LOG_ERROR
,
635 "Reference segmap (temp=%d,update=%d) enabled on size-change!\n",
636 s
->segmentation
.temporal
, s
->segmentation
.update_map
);
637 return AVERROR_INVALIDDATA
;
640 if (get_bits1(&s
->gb
)) {
641 s
->segmentation
.absolute_vals
= get_bits1(&s
->gb
);
642 for (i
= 0; i
< 8; i
++) {
643 if ((s
->segmentation
.feat
[i
].q_enabled
= get_bits1(&s
->gb
)))
644 s
->segmentation
.feat
[i
].q_val
= get_sbits_inv(&s
->gb
, 8);
645 if ((s
->segmentation
.feat
[i
].lf_enabled
= get_bits1(&s
->gb
)))
646 s
->segmentation
.feat
[i
].lf_val
= get_sbits_inv(&s
->gb
, 6);
647 if ((s
->segmentation
.feat
[i
].ref_enabled
= get_bits1(&s
->gb
)))
648 s
->segmentation
.feat
[i
].ref_val
= get_bits(&s
->gb
, 2);
649 s
->segmentation
.feat
[i
].skip_enabled
= get_bits1(&s
->gb
);
653 s
->segmentation
.feat
[0].q_enabled
= 0;
654 s
->segmentation
.feat
[0].lf_enabled
= 0;
655 s
->segmentation
.feat
[0].skip_enabled
= 0;
656 s
->segmentation
.feat
[0].ref_enabled
= 0;
659 // set qmul[] based on Y/UV, AC/DC and segmentation Q idx deltas
660 for (i
= 0; i
< (s
->segmentation
.enabled
? 8 : 1); i
++) {
661 int qyac
, qydc
, quvac
, quvdc
, lflvl
, sh
;
663 if (s
->segmentation
.feat
[i
].q_enabled
) {
664 if (s
->segmentation
.absolute_vals
)
665 qyac
= s
->segmentation
.feat
[i
].q_val
;
667 qyac
= s
->yac_qi
+ s
->segmentation
.feat
[i
].q_val
;
671 qydc
= av_clip_uintp2(qyac
+ s
->ydc_qdelta
, 8);
672 quvdc
= av_clip_uintp2(qyac
+ s
->uvdc_qdelta
, 8);
673 quvac
= av_clip_uintp2(qyac
+ s
->uvac_qdelta
, 8);
674 qyac
= av_clip_uintp2(qyac
, 8);
676 s
->segmentation
.feat
[i
].qmul
[0][0] = vp9_dc_qlookup
[qydc
];
677 s
->segmentation
.feat
[i
].qmul
[0][1] = vp9_ac_qlookup
[qyac
];
678 s
->segmentation
.feat
[i
].qmul
[1][0] = vp9_dc_qlookup
[quvdc
];
679 s
->segmentation
.feat
[i
].qmul
[1][1] = vp9_ac_qlookup
[quvac
];
681 sh
= s
->filter
.level
>= 32;
682 if (s
->segmentation
.feat
[i
].lf_enabled
) {
683 if (s
->segmentation
.absolute_vals
)
684 lflvl
= s
->segmentation
.feat
[i
].lf_val
;
686 lflvl
= s
->filter
.level
+ s
->segmentation
.feat
[i
].lf_val
;
688 lflvl
= s
->filter
.level
;
690 s
->segmentation
.feat
[i
].lflvl
[0][0] =
691 s
->segmentation
.feat
[i
].lflvl
[0][1] =
692 av_clip_uintp2(lflvl
+ (s
->lf_delta
.ref
[0] << sh
), 6);
693 for (j
= 1; j
< 4; j
++) {
694 s
->segmentation
.feat
[i
].lflvl
[j
][0] =
695 av_clip_uintp2(lflvl
+ ((s
->lf_delta
.ref
[j
] +
696 s
->lf_delta
.mode
[0]) << sh
), 6);
697 s
->segmentation
.feat
[i
].lflvl
[j
][1] =
698 av_clip_uintp2(lflvl
+ ((s
->lf_delta
.ref
[j
] +
699 s
->lf_delta
.mode
[1]) << sh
), 6);
704 if ((res
= update_size(ctx
, w
, h
)) < 0) {
705 av_log(ctx
, AV_LOG_ERROR
, "Failed to initialize decoder for %dx%d\n", w
, h
);
708 for (s
->tiling
.log2_tile_cols
= 0;
709 (s
->sb_cols
>> s
->tiling
.log2_tile_cols
) > 64;
710 s
->tiling
.log2_tile_cols
++) ;
711 for (max
= 0; (s
->sb_cols
>> max
) >= 4; max
++) ;
712 max
= FFMAX(0, max
- 1);
713 while (max
> s
->tiling
.log2_tile_cols
) {
714 if (get_bits1(&s
->gb
))
715 s
->tiling
.log2_tile_cols
++;
719 s
->tiling
.log2_tile_rows
= decode012(&s
->gb
);
720 s
->tiling
.tile_rows
= 1 << s
->tiling
.log2_tile_rows
;
721 if (s
->tiling
.tile_cols
!= (1 << s
->tiling
.log2_tile_cols
)) {
722 s
->tiling
.tile_cols
= 1 << s
->tiling
.log2_tile_cols
;
723 s
->c_b
= av_fast_realloc(s
->c_b
, &s
->c_b_size
,
724 sizeof(VP56RangeCoder
) * s
->tiling
.tile_cols
);
726 av_log(ctx
, AV_LOG_ERROR
, "Ran out of memory during range coder init\n");
727 return AVERROR(ENOMEM
);
731 if (s
->keyframe
|| s
->errorres
|| s
->intraonly
) {
732 s
->prob_ctx
[0].p
= s
->prob_ctx
[1].p
= s
->prob_ctx
[2].p
=
733 s
->prob_ctx
[3].p
= vp9_default_probs
;
734 memcpy(s
->prob_ctx
[0].coef
, vp9_default_coef_probs
,
735 sizeof(vp9_default_coef_probs
));
736 memcpy(s
->prob_ctx
[1].coef
, vp9_default_coef_probs
,
737 sizeof(vp9_default_coef_probs
));
738 memcpy(s
->prob_ctx
[2].coef
, vp9_default_coef_probs
,
739 sizeof(vp9_default_coef_probs
));
740 memcpy(s
->prob_ctx
[3].coef
, vp9_default_coef_probs
,
741 sizeof(vp9_default_coef_probs
));
744 // next 16 bits is size of the rest of the header (arith-coded)
745 size2
= get_bits(&s
->gb
, 16);
746 data2
= align_get_bits(&s
->gb
);
747 if (size2
> size
- (data2
- data
)) {
748 av_log(ctx
, AV_LOG_ERROR
, "Invalid compressed header size\n");
749 return AVERROR_INVALIDDATA
;
751 ff_vp56_init_range_decoder(&s
->c
, data2
, size2
);
752 if (vp56_rac_get_prob_branchy(&s
->c
, 128)) { // marker bit
753 av_log(ctx
, AV_LOG_ERROR
, "Marker bit was set\n");
754 return AVERROR_INVALIDDATA
;
757 if (s
->keyframe
|| s
->intraonly
) {
758 memset(s
->counts
.coef
, 0, sizeof(s
->counts
.coef
) + sizeof(s
->counts
.eob
));
760 memset(&s
->counts
, 0, sizeof(s
->counts
));
762 // FIXME is it faster to not copy here, but do it down in the fw updates
763 // as explicit copies if the fw update is missing (and skip the copy upon
765 s
->prob
.p
= s
->prob_ctx
[c
].p
;
769 s
->txfmmode
= TX_4X4
;
771 s
->txfmmode
= vp8_rac_get_uint(&s
->c
, 2);
772 if (s
->txfmmode
== 3)
773 s
->txfmmode
+= vp8_rac_get(&s
->c
);
775 if (s
->txfmmode
== TX_SWITCHABLE
) {
776 for (i
= 0; i
< 2; i
++)
777 if (vp56_rac_get_prob_branchy(&s
->c
, 252))
778 s
->prob
.p
.tx8p
[i
] = update_prob(&s
->c
, s
->prob
.p
.tx8p
[i
]);
779 for (i
= 0; i
< 2; i
++)
780 for (j
= 0; j
< 2; j
++)
781 if (vp56_rac_get_prob_branchy(&s
->c
, 252))
782 s
->prob
.p
.tx16p
[i
][j
] =
783 update_prob(&s
->c
, s
->prob
.p
.tx16p
[i
][j
]);
784 for (i
= 0; i
< 2; i
++)
785 for (j
= 0; j
< 3; j
++)
786 if (vp56_rac_get_prob_branchy(&s
->c
, 252))
787 s
->prob
.p
.tx32p
[i
][j
] =
788 update_prob(&s
->c
, s
->prob
.p
.tx32p
[i
][j
]);
793 for (i
= 0; i
< 4; i
++) {
794 uint8_t (*ref
)[2][6][6][3] = s
->prob_ctx
[c
].coef
[i
];
795 if (vp8_rac_get(&s
->c
)) {
796 for (j
= 0; j
< 2; j
++)
797 for (k
= 0; k
< 2; k
++)
798 for (l
= 0; l
< 6; l
++)
799 for (m
= 0; m
< 6; m
++) {
800 uint8_t *p
= s
->prob
.coef
[i
][j
][k
][l
][m
];
801 uint8_t *r
= ref
[j
][k
][l
][m
];
802 if (m
>= 3 && l
== 0) // dc only has 3 pt
804 for (n
= 0; n
< 3; n
++) {
805 if (vp56_rac_get_prob_branchy(&s
->c
, 252)) {
806 p
[n
] = update_prob(&s
->c
, r
[n
]);
814 for (j
= 0; j
< 2; j
++)
815 for (k
= 0; k
< 2; k
++)
816 for (l
= 0; l
< 6; l
++)
817 for (m
= 0; m
< 6; m
++) {
818 uint8_t *p
= s
->prob
.coef
[i
][j
][k
][l
][m
];
819 uint8_t *r
= ref
[j
][k
][l
][m
];
820 if (m
> 3 && l
== 0) // dc only has 3 pt
826 if (s
->txfmmode
== i
)
831 for (i
= 0; i
< 3; i
++)
832 if (vp56_rac_get_prob_branchy(&s
->c
, 252))
833 s
->prob
.p
.skip
[i
] = update_prob(&s
->c
, s
->prob
.p
.skip
[i
]);
834 if (!s
->keyframe
&& !s
->intraonly
) {
835 for (i
= 0; i
< 7; i
++)
836 for (j
= 0; j
< 3; j
++)
837 if (vp56_rac_get_prob_branchy(&s
->c
, 252))
838 s
->prob
.p
.mv_mode
[i
][j
] =
839 update_prob(&s
->c
, s
->prob
.p
.mv_mode
[i
][j
]);
841 if (s
->filtermode
== FILTER_SWITCHABLE
)
842 for (i
= 0; i
< 4; i
++)
843 for (j
= 0; j
< 2; j
++)
844 if (vp56_rac_get_prob_branchy(&s
->c
, 252))
845 s
->prob
.p
.filter
[i
][j
] =
846 update_prob(&s
->c
, s
->prob
.p
.filter
[i
][j
]);
848 for (i
= 0; i
< 4; i
++)
849 if (vp56_rac_get_prob_branchy(&s
->c
, 252))
850 s
->prob
.p
.intra
[i
] = update_prob(&s
->c
, s
->prob
.p
.intra
[i
]);
852 if (s
->allowcompinter
) {
853 s
->comppredmode
= vp8_rac_get(&s
->c
);
855 s
->comppredmode
+= vp8_rac_get(&s
->c
);
856 if (s
->comppredmode
== PRED_SWITCHABLE
)
857 for (i
= 0; i
< 5; i
++)
858 if (vp56_rac_get_prob_branchy(&s
->c
, 252))
860 update_prob(&s
->c
, s
->prob
.p
.comp
[i
]);
862 s
->comppredmode
= PRED_SINGLEREF
;
865 if (s
->comppredmode
!= PRED_COMPREF
) {
866 for (i
= 0; i
< 5; i
++) {
867 if (vp56_rac_get_prob_branchy(&s
->c
, 252))
868 s
->prob
.p
.single_ref
[i
][0] =
869 update_prob(&s
->c
, s
->prob
.p
.single_ref
[i
][0]);
870 if (vp56_rac_get_prob_branchy(&s
->c
, 252))
871 s
->prob
.p
.single_ref
[i
][1] =
872 update_prob(&s
->c
, s
->prob
.p
.single_ref
[i
][1]);
876 if (s
->comppredmode
!= PRED_SINGLEREF
) {
877 for (i
= 0; i
< 5; i
++)
878 if (vp56_rac_get_prob_branchy(&s
->c
, 252))
879 s
->prob
.p
.comp_ref
[i
] =
880 update_prob(&s
->c
, s
->prob
.p
.comp_ref
[i
]);
883 for (i
= 0; i
< 4; i
++)
884 for (j
= 0; j
< 9; j
++)
885 if (vp56_rac_get_prob_branchy(&s
->c
, 252))
886 s
->prob
.p
.y_mode
[i
][j
] =
887 update_prob(&s
->c
, s
->prob
.p
.y_mode
[i
][j
]);
889 for (i
= 0; i
< 4; i
++)
890 for (j
= 0; j
< 4; j
++)
891 for (k
= 0; k
< 3; k
++)
892 if (vp56_rac_get_prob_branchy(&s
->c
, 252))
893 s
->prob
.p
.partition
[3 - i
][j
][k
] =
894 update_prob(&s
->c
, s
->prob
.p
.partition
[3 - i
][j
][k
]);
896 // mv fields don't use the update_prob subexp model for some reason
897 for (i
= 0; i
< 3; i
++)
898 if (vp56_rac_get_prob_branchy(&s
->c
, 252))
899 s
->prob
.p
.mv_joint
[i
] = (vp8_rac_get_uint(&s
->c
, 7) << 1) | 1;
901 for (i
= 0; i
< 2; i
++) {
902 if (vp56_rac_get_prob_branchy(&s
->c
, 252))
903 s
->prob
.p
.mv_comp
[i
].sign
= (vp8_rac_get_uint(&s
->c
, 7) << 1) | 1;
905 for (j
= 0; j
< 10; j
++)
906 if (vp56_rac_get_prob_branchy(&s
->c
, 252))
907 s
->prob
.p
.mv_comp
[i
].classes
[j
] =
908 (vp8_rac_get_uint(&s
->c
, 7) << 1) | 1;
910 if (vp56_rac_get_prob_branchy(&s
->c
, 252))
911 s
->prob
.p
.mv_comp
[i
].class0
= (vp8_rac_get_uint(&s
->c
, 7) << 1) | 1;
913 for (j
= 0; j
< 10; j
++)
914 if (vp56_rac_get_prob_branchy(&s
->c
, 252))
915 s
->prob
.p
.mv_comp
[i
].bits
[j
] =
916 (vp8_rac_get_uint(&s
->c
, 7) << 1) | 1;
919 for (i
= 0; i
< 2; i
++) {
920 for (j
= 0; j
< 2; j
++)
921 for (k
= 0; k
< 3; k
++)
922 if (vp56_rac_get_prob_branchy(&s
->c
, 252))
923 s
->prob
.p
.mv_comp
[i
].class0_fp
[j
][k
] =
924 (vp8_rac_get_uint(&s
->c
, 7) << 1) | 1;
926 for (j
= 0; j
< 3; j
++)
927 if (vp56_rac_get_prob_branchy(&s
->c
, 252))
928 s
->prob
.p
.mv_comp
[i
].fp
[j
] =
929 (vp8_rac_get_uint(&s
->c
, 7) << 1) | 1;
932 if (s
->highprecisionmvs
) {
933 for (i
= 0; i
< 2; i
++) {
934 if (vp56_rac_get_prob_branchy(&s
->c
, 252))
935 s
->prob
.p
.mv_comp
[i
].class0_hp
=
936 (vp8_rac_get_uint(&s
->c
, 7) << 1) | 1;
938 if (vp56_rac_get_prob_branchy(&s
->c
, 252))
939 s
->prob
.p
.mv_comp
[i
].hp
=
940 (vp8_rac_get_uint(&s
->c
, 7) << 1) | 1;
945 return (data2
- data
) + size2
;
948 static av_always_inline
void clamp_mv(VP56mv
*dst
, const VP56mv
*src
,
951 dst
->x
= av_clip(src
->x
, s
->min_mv
.x
, s
->max_mv
.x
);
952 dst
->y
= av_clip(src
->y
, s
->min_mv
.y
, s
->max_mv
.y
);
955 static void find_ref_mvs(VP9Context
*s
,
956 VP56mv
*pmv
, int ref
, int z
, int idx
, int sb
)
958 static const int8_t mv_ref_blk_off
[N_BS_SIZES
][8][2] = {
959 [BS_64x64
] = {{ 3, -1 }, { -1, 3 }, { 4, -1 }, { -1, 4 },
960 { -1, -1 }, { 0, -1 }, { -1, 0 }, { 6, -1 }},
961 [BS_64x32
] = {{ 0, -1 }, { -1, 0 }, { 4, -1 }, { -1, 2 },
962 { -1, -1 }, { 0, -3 }, { -3, 0 }, { 2, -1 }},
963 [BS_32x64
] = {{ -1, 0 }, { 0, -1 }, { -1, 4 }, { 2, -1 },
964 { -1, -1 }, { -3, 0 }, { 0, -3 }, { -1, 2 }},
965 [BS_32x32
] = {{ 1, -1 }, { -1, 1 }, { 2, -1 }, { -1, 2 },
966 { -1, -1 }, { 0, -3 }, { -3, 0 }, { -3, -3 }},
967 [BS_32x16
] = {{ 0, -1 }, { -1, 0 }, { 2, -1 }, { -1, -1 },
968 { -1, 1 }, { 0, -3 }, { -3, 0 }, { -3, -3 }},
969 [BS_16x32
] = {{ -1, 0 }, { 0, -1 }, { -1, 2 }, { -1, -1 },
970 { 1, -1 }, { -3, 0 }, { 0, -3 }, { -3, -3 }},
971 [BS_16x16
] = {{ 0, -1 }, { -1, 0 }, { 1, -1 }, { -1, 1 },
972 { -1, -1 }, { 0, -3 }, { -3, 0 }, { -3, -3 }},
973 [BS_16x8
] = {{ 0, -1 }, { -1, 0 }, { 1, -1 }, { -1, -1 },
974 { 0, -2 }, { -2, 0 }, { -2, -1 }, { -1, -2 }},
975 [BS_8x16
] = {{ -1, 0 }, { 0, -1 }, { -1, 1 }, { -1, -1 },
976 { -2, 0 }, { 0, -2 }, { -1, -2 }, { -2, -1 }},
977 [BS_8x8
] = {{ 0, -1 }, { -1, 0 }, { -1, -1 }, { 0, -2 },
978 { -2, 0 }, { -1, -2 }, { -2, -1 }, { -2, -2 }},
979 [BS_8x4
] = {{ 0, -1 }, { -1, 0 }, { -1, -1 }, { 0, -2 },
980 { -2, 0 }, { -1, -2 }, { -2, -1 }, { -2, -2 }},
981 [BS_4x8
] = {{ 0, -1 }, { -1, 0 }, { -1, -1 }, { 0, -2 },
982 { -2, 0 }, { -1, -2 }, { -2, -1 }, { -2, -2 }},
983 [BS_4x4
] = {{ 0, -1 }, { -1, 0 }, { -1, -1 }, { 0, -2 },
984 { -2, 0 }, { -1, -2 }, { -2, -1 }, { -2, -2 }},
987 int row
= s
->row
, col
= s
->col
, row7
= s
->row7
;
988 const int8_t (*p
)[2] = mv_ref_blk_off
[b
->bs
];
989 #define INVALID_MV 0x80008000U
990 uint32_t mem
= INVALID_MV
;
993 #define RETURN_DIRECT_MV(mv) \
995 uint32_t m = AV_RN32A(&mv); \
999 } else if (mem == INVALID_MV) { \
1001 } else if (m != mem) { \
1008 if (sb
== 2 || sb
== 1) {
1009 RETURN_DIRECT_MV(b
->mv
[0][z
]);
1010 } else if (sb
== 3) {
1011 RETURN_DIRECT_MV(b
->mv
[2][z
]);
1012 RETURN_DIRECT_MV(b
->mv
[1][z
]);
1013 RETURN_DIRECT_MV(b
->mv
[0][z
]);
1016 #define RETURN_MV(mv) \
1021 clamp_mv(&tmp, &mv, s); \
1022 m = AV_RN32A(&tmp); \
1026 } else if (mem == INVALID_MV) { \
1028 } else if (m != mem) { \
1033 uint32_t m = AV_RN32A(&mv); \
1035 clamp_mv(pmv, &mv, s); \
1037 } else if (mem == INVALID_MV) { \
1039 } else if (m != mem) { \
1040 clamp_mv(pmv, &mv, s); \
1047 struct VP9mvrefPair
*mv
= &s
->frames
[CUR_FRAME
].mv
[(row
- 1) * s
->sb_cols
* 8 + col
];
1048 if (mv
->ref
[0] == ref
) {
1049 RETURN_MV(s
->above_mv_ctx
[2 * col
+ (sb
& 1)][0]);
1050 } else if (mv
->ref
[1] == ref
) {
1051 RETURN_MV(s
->above_mv_ctx
[2 * col
+ (sb
& 1)][1]);
1054 if (col
> s
->tiling
.tile_col_start
) {
1055 struct VP9mvrefPair
*mv
= &s
->frames
[CUR_FRAME
].mv
[row
* s
->sb_cols
* 8 + col
- 1];
1056 if (mv
->ref
[0] == ref
) {
1057 RETURN_MV(s
->left_mv_ctx
[2 * row7
+ (sb
>> 1)][0]);
1058 } else if (mv
->ref
[1] == ref
) {
1059 RETURN_MV(s
->left_mv_ctx
[2 * row7
+ (sb
>> 1)][1]);
1067 // previously coded MVs in this neighbourhood, using same reference frame
1068 for (; i
< 8; i
++) {
1069 int c
= p
[i
][0] + col
, r
= p
[i
][1] + row
;
1071 if (c
>= s
->tiling
.tile_col_start
&& c
< s
->cols
&& r
>= 0 && r
< s
->rows
) {
1072 struct VP9mvrefPair
*mv
= &s
->frames
[CUR_FRAME
].mv
[r
* s
->sb_cols
* 8 + c
];
1074 if (mv
->ref
[0] == ref
) {
1075 RETURN_MV(mv
->mv
[0]);
1076 } else if (mv
->ref
[1] == ref
) {
1077 RETURN_MV(mv
->mv
[1]);
1082 // MV at this position in previous frame, using same reference frame
1083 if (s
->use_last_frame_mvs
) {
1084 struct VP9mvrefPair
*mv
= &s
->frames
[LAST_FRAME
].mv
[row
* s
->sb_cols
* 8 + col
];
1086 if (!s
->last_uses_2pass
)
1087 ff_thread_await_progress(&s
->frames
[LAST_FRAME
].tf
, row
>> 3, 0);
1088 if (mv
->ref
[0] == ref
) {
1089 RETURN_MV(mv
->mv
[0]);
1090 } else if (mv
->ref
[1] == ref
) {
1091 RETURN_MV(mv
->mv
[1]);
1095 #define RETURN_SCALE_MV(mv, scale) \
1098 VP56mv mv_temp = { -mv.x, -mv.y }; \
1099 RETURN_MV(mv_temp); \
1105 // previously coded MVs in this neighbourhood, using different reference frame
1106 for (i
= 0; i
< 8; i
++) {
1107 int c
= p
[i
][0] + col
, r
= p
[i
][1] + row
;
1109 if (c
>= s
->tiling
.tile_col_start
&& c
< s
->cols
&& r
>= 0 && r
< s
->rows
) {
1110 struct VP9mvrefPair
*mv
= &s
->frames
[CUR_FRAME
].mv
[r
* s
->sb_cols
* 8 + c
];
1112 if (mv
->ref
[0] != ref
&& mv
->ref
[0] >= 0) {
1113 RETURN_SCALE_MV(mv
->mv
[0], s
->signbias
[mv
->ref
[0]] != s
->signbias
[ref
]);
1115 if (mv
->ref
[1] != ref
&& mv
->ref
[1] >= 0 &&
1116 // BUG - libvpx has this condition regardless of whether
1117 // we used the first ref MV and pre-scaling
1118 AV_RN32A(&mv
->mv
[0]) != AV_RN32A(&mv
->mv
[1])) {
1119 RETURN_SCALE_MV(mv
->mv
[1], s
->signbias
[mv
->ref
[1]] != s
->signbias
[ref
]);
1124 // MV at this position in previous frame, using different reference frame
1125 if (s
->use_last_frame_mvs
) {
1126 struct VP9mvrefPair
*mv
= &s
->frames
[LAST_FRAME
].mv
[row
* s
->sb_cols
* 8 + col
];
1128 // no need to await_progress, because we already did that above
1129 if (mv
->ref
[0] != ref
&& mv
->ref
[0] >= 0) {
1130 RETURN_SCALE_MV(mv
->mv
[0], s
->signbias
[mv
->ref
[0]] != s
->signbias
[ref
]);
1132 if (mv
->ref
[1] != ref
&& mv
->ref
[1] >= 0 &&
1133 // BUG - libvpx has this condition regardless of whether
1134 // we used the first ref MV and pre-scaling
1135 AV_RN32A(&mv
->mv
[0]) != AV_RN32A(&mv
->mv
[1])) {
1136 RETURN_SCALE_MV(mv
->mv
[1], s
->signbias
[mv
->ref
[1]] != s
->signbias
[ref
]);
1143 #undef RETURN_SCALE_MV
1146 static av_always_inline
int read_mv_component(VP9Context
*s
, int idx
, int hp
)
1148 int bit
, sign
= vp56_rac_get_prob(&s
->c
, s
->prob
.p
.mv_comp
[idx
].sign
);
1149 int n
, c
= vp8_rac_get_tree(&s
->c
, vp9_mv_class_tree
,
1150 s
->prob
.p
.mv_comp
[idx
].classes
);
1152 s
->counts
.mv_comp
[idx
].sign
[sign
]++;
1153 s
->counts
.mv_comp
[idx
].classes
[c
]++;
1157 for (n
= 0, m
= 0; m
< c
; m
++) {
1158 bit
= vp56_rac_get_prob(&s
->c
, s
->prob
.p
.mv_comp
[idx
].bits
[m
]);
1160 s
->counts
.mv_comp
[idx
].bits
[m
][bit
]++;
1163 bit
= vp8_rac_get_tree(&s
->c
, vp9_mv_fp_tree
, s
->prob
.p
.mv_comp
[idx
].fp
);
1165 s
->counts
.mv_comp
[idx
].fp
[bit
]++;
1167 bit
= vp56_rac_get_prob(&s
->c
, s
->prob
.p
.mv_comp
[idx
].hp
);
1168 s
->counts
.mv_comp
[idx
].hp
[bit
]++;
1172 // bug in libvpx - we count for bw entropy purposes even if the
1174 s
->counts
.mv_comp
[idx
].hp
[1]++;
1178 n
= vp56_rac_get_prob(&s
->c
, s
->prob
.p
.mv_comp
[idx
].class0
);
1179 s
->counts
.mv_comp
[idx
].class0
[n
]++;
1180 bit
= vp8_rac_get_tree(&s
->c
, vp9_mv_fp_tree
,
1181 s
->prob
.p
.mv_comp
[idx
].class0_fp
[n
]);
1182 s
->counts
.mv_comp
[idx
].class0_fp
[n
][bit
]++;
1183 n
= (n
<< 3) | (bit
<< 1);
1185 bit
= vp56_rac_get_prob(&s
->c
, s
->prob
.p
.mv_comp
[idx
].class0_hp
);
1186 s
->counts
.mv_comp
[idx
].class0_hp
[bit
]++;
1190 // bug in libvpx - we count for bw entropy purposes even if the
1192 s
->counts
.mv_comp
[idx
].class0_hp
[1]++;
1196 return sign
? -(n
+ 1) : (n
+ 1);
1199 static void fill_mv(VP9Context
*s
,
1200 VP56mv
*mv
, int mode
, int sb
)
1204 if (mode
== ZEROMV
) {
1209 // FIXME cache this value and reuse for other subblocks
1210 find_ref_mvs(s
, &mv
[0], b
->ref
[0], 0, mode
== NEARMV
,
1211 mode
== NEWMV
? -1 : sb
);
1212 // FIXME maybe move this code into find_ref_mvs()
1213 if ((mode
== NEWMV
|| sb
== -1) &&
1214 !(hp
= s
->highprecisionmvs
&& abs(mv
[0].x
) < 64 && abs(mv
[0].y
) < 64)) {
1228 if (mode
== NEWMV
) {
1229 enum MVJoint j
= vp8_rac_get_tree(&s
->c
, vp9_mv_joint_tree
,
1230 s
->prob
.p
.mv_joint
);
1232 s
->counts
.mv_joint
[j
]++;
1233 if (j
>= MV_JOINT_V
)
1234 mv
[0].y
+= read_mv_component(s
, 0, hp
);
1236 mv
[0].x
+= read_mv_component(s
, 1, hp
);
1240 // FIXME cache this value and reuse for other subblocks
1241 find_ref_mvs(s
, &mv
[1], b
->ref
[1], 1, mode
== NEARMV
,
1242 mode
== NEWMV
? -1 : sb
);
1243 if ((mode
== NEWMV
|| sb
== -1) &&
1244 !(hp
= s
->highprecisionmvs
&& abs(mv
[1].x
) < 64 && abs(mv
[1].y
) < 64)) {
1258 if (mode
== NEWMV
) {
1259 enum MVJoint j
= vp8_rac_get_tree(&s
->c
, vp9_mv_joint_tree
,
1260 s
->prob
.p
.mv_joint
);
1262 s
->counts
.mv_joint
[j
]++;
1263 if (j
>= MV_JOINT_V
)
1264 mv
[1].y
+= read_mv_component(s
, 0, hp
);
1266 mv
[1].x
+= read_mv_component(s
, 1, hp
);
1272 static av_always_inline
void setctx_2d(uint8_t *ptr
, int w
, int h
,
1273 ptrdiff_t stride
, int v
)
1283 int v16
= v
* 0x0101;
1291 uint32_t v32
= v
* 0x01010101;
1300 uint64_t v64
= v
* 0x0101010101010101ULL
;
1306 uint32_t v32
= v
* 0x01010101;
1309 AV_WN32A(ptr
+ 4, v32
);
1318 static void decode_mode(AVCodecContext
*ctx
)
1320 static const uint8_t left_ctx
[N_BS_SIZES
] = {
1321 0x0, 0x8, 0x0, 0x8, 0xc, 0x8, 0xc, 0xe, 0xc, 0xe, 0xf, 0xe, 0xf
1323 static const uint8_t above_ctx
[N_BS_SIZES
] = {
1324 0x0, 0x0, 0x8, 0x8, 0x8, 0xc, 0xc, 0xc, 0xe, 0xe, 0xe, 0xf, 0xf
1326 static const uint8_t max_tx_for_bl_bp
[N_BS_SIZES
] = {
1327 TX_32X32
, TX_32X32
, TX_32X32
, TX_32X32
, TX_16X16
, TX_16X16
,
1328 TX_16X16
, TX_8X8
, TX_8X8
, TX_8X8
, TX_4X4
, TX_4X4
, TX_4X4
1330 VP9Context
*s
= ctx
->priv_data
;
1332 int row
= s
->row
, col
= s
->col
, row7
= s
->row7
;
1333 enum TxfmMode max_tx
= max_tx_for_bl_bp
[b
->bs
];
1334 int w4
= FFMIN(s
->cols
- col
, bwh_tab
[1][b
->bs
][0]);
1335 int h4
= FFMIN(s
->rows
- row
, bwh_tab
[1][b
->bs
][1]), y
;
1336 int have_a
= row
> 0, have_l
= col
> s
->tiling
.tile_col_start
;
1337 int vref
, filter_id
;
1339 if (!s
->segmentation
.enabled
) {
1341 } else if (s
->keyframe
|| s
->intraonly
) {
1342 b
->seg_id
= vp8_rac_get_tree(&s
->c
, vp9_segmentation_tree
, s
->prob
.seg
);
1343 } else if (!s
->segmentation
.update_map
||
1344 (s
->segmentation
.temporal
&&
1345 vp56_rac_get_prob_branchy(&s
->c
,
1346 s
->prob
.segpred
[s
->above_segpred_ctx
[col
] +
1347 s
->left_segpred_ctx
[row7
]]))) {
1350 uint8_t *refsegmap
= s
->frames
[LAST_FRAME
].segmentation_map
;
1352 if (!s
->last_uses_2pass
)
1353 ff_thread_await_progress(&s
->frames
[LAST_FRAME
].tf
, row
>> 3, 0);
1354 for (y
= 0; y
< h4
; y
++)
1355 for (x
= 0; x
< w4
; x
++)
1356 pred
= FFMIN(pred
, refsegmap
[(y
+ row
) * 8 * s
->sb_cols
+ x
+ col
]);
1357 av_assert1(pred
< 8);
1363 memset(&s
->above_segpred_ctx
[col
], 1, w4
);
1364 memset(&s
->left_segpred_ctx
[row7
], 1, h4
);
1366 b
->seg_id
= vp8_rac_get_tree(&s
->c
, vp9_segmentation_tree
,
1369 memset(&s
->above_segpred_ctx
[col
], 0, w4
);
1370 memset(&s
->left_segpred_ctx
[row7
], 0, h4
);
1372 if (s
->segmentation
.enabled
&&
1373 (s
->segmentation
.update_map
|| s
->keyframe
|| s
->intraonly
)) {
1374 setctx_2d(&s
->frames
[CUR_FRAME
].segmentation_map
[row
* 8 * s
->sb_cols
+ col
],
1375 w4
, h4
, 8 * s
->sb_cols
, b
->seg_id
);
1378 b
->skip
= s
->segmentation
.enabled
&&
1379 s
->segmentation
.feat
[b
->seg_id
].skip_enabled
;
1381 int c
= s
->left_skip_ctx
[row7
] + s
->above_skip_ctx
[col
];
1382 b
->skip
= vp56_rac_get_prob(&s
->c
, s
->prob
.p
.skip
[c
]);
1383 s
->counts
.skip
[c
][b
->skip
]++;
1386 if (s
->keyframe
|| s
->intraonly
) {
1388 } else if (s
->segmentation
.feat
[b
->seg_id
].ref_enabled
) {
1389 b
->intra
= !s
->segmentation
.feat
[b
->seg_id
].ref_val
;
1393 if (have_a
&& have_l
) {
1394 c
= s
->above_intra_ctx
[col
] + s
->left_intra_ctx
[row7
];
1397 c
= have_a
? 2 * s
->above_intra_ctx
[col
] :
1398 have_l
? 2 * s
->left_intra_ctx
[row7
] : 0;
1400 bit
= vp56_rac_get_prob(&s
->c
, s
->prob
.p
.intra
[c
]);
1401 s
->counts
.intra
[c
][bit
]++;
1405 if ((b
->intra
|| !b
->skip
) && s
->txfmmode
== TX_SWITCHABLE
) {
1409 c
= (s
->above_skip_ctx
[col
] ? max_tx
:
1410 s
->above_txfm_ctx
[col
]) +
1411 (s
->left_skip_ctx
[row7
] ? max_tx
:
1412 s
->left_txfm_ctx
[row7
]) > max_tx
;
1414 c
= s
->above_skip_ctx
[col
] ? 1 :
1415 (s
->above_txfm_ctx
[col
] * 2 > max_tx
);
1417 } else if (have_l
) {
1418 c
= s
->left_skip_ctx
[row7
] ? 1 :
1419 (s
->left_txfm_ctx
[row7
] * 2 > max_tx
);
1425 b
->tx
= vp56_rac_get_prob(&s
->c
, s
->prob
.p
.tx32p
[c
][0]);
1427 b
->tx
+= vp56_rac_get_prob(&s
->c
, s
->prob
.p
.tx32p
[c
][1]);
1429 b
->tx
+= vp56_rac_get_prob(&s
->c
, s
->prob
.p
.tx32p
[c
][2]);
1431 s
->counts
.tx32p
[c
][b
->tx
]++;
1434 b
->tx
= vp56_rac_get_prob(&s
->c
, s
->prob
.p
.tx16p
[c
][0]);
1436 b
->tx
+= vp56_rac_get_prob(&s
->c
, s
->prob
.p
.tx16p
[c
][1]);
1437 s
->counts
.tx16p
[c
][b
->tx
]++;
1440 b
->tx
= vp56_rac_get_prob(&s
->c
, s
->prob
.p
.tx8p
[c
]);
1441 s
->counts
.tx8p
[c
][b
->tx
]++;
1448 b
->tx
= FFMIN(max_tx
, s
->txfmmode
);
1451 if (s
->keyframe
|| s
->intraonly
) {
1452 uint8_t *a
= &s
->above_mode_ctx
[col
* 2];
1453 uint8_t *l
= &s
->left_mode_ctx
[(row7
) << 1];
1456 if (b
->bs
> BS_8x8
) {
1457 // FIXME the memory storage intermediates here aren't really
1458 // necessary, they're just there to make the code slightly
1460 b
->mode
[0] = a
[0] = vp8_rac_get_tree(&s
->c
, vp9_intramode_tree
,
1461 vp9_default_kf_ymode_probs
[a
[0]][l
[0]]);
1462 if (b
->bs
!= BS_8x4
) {
1463 b
->mode
[1] = vp8_rac_get_tree(&s
->c
, vp9_intramode_tree
,
1464 vp9_default_kf_ymode_probs
[a
[1]][b
->mode
[0]]);
1465 l
[0] = a
[1] = b
->mode
[1];
1467 l
[0] = a
[1] = b
->mode
[1] = b
->mode
[0];
1469 if (b
->bs
!= BS_4x8
) {
1470 b
->mode
[2] = a
[0] = vp8_rac_get_tree(&s
->c
, vp9_intramode_tree
,
1471 vp9_default_kf_ymode_probs
[a
[0]][l
[1]]);
1472 if (b
->bs
!= BS_8x4
) {
1473 b
->mode
[3] = vp8_rac_get_tree(&s
->c
, vp9_intramode_tree
,
1474 vp9_default_kf_ymode_probs
[a
[1]][b
->mode
[2]]);
1475 l
[1] = a
[1] = b
->mode
[3];
1477 l
[1] = a
[1] = b
->mode
[3] = b
->mode
[2];
1480 b
->mode
[2] = b
->mode
[0];
1481 l
[1] = a
[1] = b
->mode
[3] = b
->mode
[1];
1484 b
->mode
[0] = vp8_rac_get_tree(&s
->c
, vp9_intramode_tree
,
1485 vp9_default_kf_ymode_probs
[*a
][*l
]);
1486 b
->mode
[3] = b
->mode
[2] = b
->mode
[1] = b
->mode
[0];
1487 // FIXME this can probably be optimized
1488 memset(a
, b
->mode
[0], bwh_tab
[0][b
->bs
][0]);
1489 memset(l
, b
->mode
[0], bwh_tab
[0][b
->bs
][1]);
1491 b
->uvmode
= vp8_rac_get_tree(&s
->c
, vp9_intramode_tree
,
1492 vp9_default_kf_uvmode_probs
[b
->mode
[3]]);
1493 } else if (b
->intra
) {
1495 if (b
->bs
> BS_8x8
) {
1496 b
->mode
[0] = vp8_rac_get_tree(&s
->c
, vp9_intramode_tree
,
1497 s
->prob
.p
.y_mode
[0]);
1498 s
->counts
.y_mode
[0][b
->mode
[0]]++;
1499 if (b
->bs
!= BS_8x4
) {
1500 b
->mode
[1] = vp8_rac_get_tree(&s
->c
, vp9_intramode_tree
,
1501 s
->prob
.p
.y_mode
[0]);
1502 s
->counts
.y_mode
[0][b
->mode
[1]]++;
1504 b
->mode
[1] = b
->mode
[0];
1506 if (b
->bs
!= BS_4x8
) {
1507 b
->mode
[2] = vp8_rac_get_tree(&s
->c
, vp9_intramode_tree
,
1508 s
->prob
.p
.y_mode
[0]);
1509 s
->counts
.y_mode
[0][b
->mode
[2]]++;
1510 if (b
->bs
!= BS_8x4
) {
1511 b
->mode
[3] = vp8_rac_get_tree(&s
->c
, vp9_intramode_tree
,
1512 s
->prob
.p
.y_mode
[0]);
1513 s
->counts
.y_mode
[0][b
->mode
[3]]++;
1515 b
->mode
[3] = b
->mode
[2];
1518 b
->mode
[2] = b
->mode
[0];
1519 b
->mode
[3] = b
->mode
[1];
1522 static const uint8_t size_group
[10] = {
1523 3, 3, 3, 3, 2, 2, 2, 1, 1, 1
1525 int sz
= size_group
[b
->bs
];
1527 b
->mode
[0] = vp8_rac_get_tree(&s
->c
, vp9_intramode_tree
,
1528 s
->prob
.p
.y_mode
[sz
]);
1529 b
->mode
[1] = b
->mode
[2] = b
->mode
[3] = b
->mode
[0];
1530 s
->counts
.y_mode
[sz
][b
->mode
[3]]++;
1532 b
->uvmode
= vp8_rac_get_tree(&s
->c
, vp9_intramode_tree
,
1533 s
->prob
.p
.uv_mode
[b
->mode
[3]]);
1534 s
->counts
.uv_mode
[b
->mode
[3]][b
->uvmode
]++;
1536 static const uint8_t inter_mode_ctx_lut
[14][14] = {
1537 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1538 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1539 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1540 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1541 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1542 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1543 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1544 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1545 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1546 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1547 { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 2, 2, 1, 3 },
1548 { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 2, 2, 1, 3 },
1549 { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 1, 1, 0, 3 },
1550 { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 3, 3, 3, 4 },
1553 if (s
->segmentation
.feat
[b
->seg_id
].ref_enabled
) {
1554 av_assert2(s
->segmentation
.feat
[b
->seg_id
].ref_val
!= 0);
1556 b
->ref
[0] = s
->segmentation
.feat
[b
->seg_id
].ref_val
- 1;
1558 // read comp_pred flag
1559 if (s
->comppredmode
!= PRED_SWITCHABLE
) {
1560 b
->comp
= s
->comppredmode
== PRED_COMPREF
;
1564 // FIXME add intra as ref=0xff (or -1) to make these easier?
1567 if (s
->above_comp_ctx
[col
] && s
->left_comp_ctx
[row7
]) {
1569 } else if (s
->above_comp_ctx
[col
]) {
1570 c
= 2 + (s
->left_intra_ctx
[row7
] ||
1571 s
->left_ref_ctx
[row7
] == s
->fixcompref
);
1572 } else if (s
->left_comp_ctx
[row7
]) {
1573 c
= 2 + (s
->above_intra_ctx
[col
] ||
1574 s
->above_ref_ctx
[col
] == s
->fixcompref
);
1576 c
= (!s
->above_intra_ctx
[col
] &&
1577 s
->above_ref_ctx
[col
] == s
->fixcompref
) ^
1578 (!s
->left_intra_ctx
[row7
] &&
1579 s
->left_ref_ctx
[row
& 7] == s
->fixcompref
);
1582 c
= s
->above_comp_ctx
[col
] ? 3 :
1583 (!s
->above_intra_ctx
[col
] && s
->above_ref_ctx
[col
] == s
->fixcompref
);
1585 } else if (have_l
) {
1586 c
= s
->left_comp_ctx
[row7
] ? 3 :
1587 (!s
->left_intra_ctx
[row7
] && s
->left_ref_ctx
[row7
] == s
->fixcompref
);
1591 b
->comp
= vp56_rac_get_prob(&s
->c
, s
->prob
.p
.comp
[c
]);
1592 s
->counts
.comp
[c
][b
->comp
]++;
1595 // read actual references
1596 // FIXME probably cache a few variables here to prevent repetitive
1597 // memory accesses below
1598 if (b
->comp
) /* two references */ {
1599 int fix_idx
= s
->signbias
[s
->fixcompref
], var_idx
= !fix_idx
, c
, bit
;
1601 b
->ref
[fix_idx
] = s
->fixcompref
;
1602 // FIXME can this codeblob be replaced by some sort of LUT?
1605 if (s
->above_intra_ctx
[col
]) {
1606 if (s
->left_intra_ctx
[row7
]) {
1609 c
= 1 + 2 * (s
->left_ref_ctx
[row7
] != s
->varcompref
[1]);
1611 } else if (s
->left_intra_ctx
[row7
]) {
1612 c
= 1 + 2 * (s
->above_ref_ctx
[col
] != s
->varcompref
[1]);
1614 int refl
= s
->left_ref_ctx
[row7
], refa
= s
->above_ref_ctx
[col
];
1616 if (refl
== refa
&& refa
== s
->varcompref
[1]) {
1618 } else if (!s
->left_comp_ctx
[row7
] && !s
->above_comp_ctx
[col
]) {
1619 if ((refa
== s
->fixcompref
&& refl
== s
->varcompref
[0]) ||
1620 (refl
== s
->fixcompref
&& refa
== s
->varcompref
[0])) {
1623 c
= (refa
== refl
) ? 3 : 1;
1625 } else if (!s
->left_comp_ctx
[row7
]) {
1626 if (refa
== s
->varcompref
[1] && refl
!= s
->varcompref
[1]) {
1629 c
= (refl
== s
->varcompref
[1] &&
1630 refa
!= s
->varcompref
[1]) ? 2 : 4;
1632 } else if (!s
->above_comp_ctx
[col
]) {
1633 if (refl
== s
->varcompref
[1] && refa
!= s
->varcompref
[1]) {
1636 c
= (refa
== s
->varcompref
[1] &&
1637 refl
!= s
->varcompref
[1]) ? 2 : 4;
1640 c
= (refl
== refa
) ? 4 : 2;
1644 if (s
->above_intra_ctx
[col
]) {
1646 } else if (s
->above_comp_ctx
[col
]) {
1647 c
= 4 * (s
->above_ref_ctx
[col
] != s
->varcompref
[1]);
1649 c
= 3 * (s
->above_ref_ctx
[col
] != s
->varcompref
[1]);
1652 } else if (have_l
) {
1653 if (s
->left_intra_ctx
[row7
]) {
1655 } else if (s
->left_comp_ctx
[row7
]) {
1656 c
= 4 * (s
->left_ref_ctx
[row7
] != s
->varcompref
[1]);
1658 c
= 3 * (s
->left_ref_ctx
[row7
] != s
->varcompref
[1]);
1663 bit
= vp56_rac_get_prob(&s
->c
, s
->prob
.p
.comp_ref
[c
]);
1664 b
->ref
[var_idx
] = s
->varcompref
[bit
];
1665 s
->counts
.comp_ref
[c
][bit
]++;
1666 } else /* single reference */ {
1669 if (have_a
&& !s
->above_intra_ctx
[col
]) {
1670 if (have_l
&& !s
->left_intra_ctx
[row7
]) {
1671 if (s
->left_comp_ctx
[row7
]) {
1672 if (s
->above_comp_ctx
[col
]) {
1673 c
= 1 + (!s
->fixcompref
|| !s
->left_ref_ctx
[row7
] ||
1674 !s
->above_ref_ctx
[col
]);
1676 c
= (3 * !s
->above_ref_ctx
[col
]) +
1677 (!s
->fixcompref
|| !s
->left_ref_ctx
[row7
]);
1679 } else if (s
->above_comp_ctx
[col
]) {
1680 c
= (3 * !s
->left_ref_ctx
[row7
]) +
1681 (!s
->fixcompref
|| !s
->above_ref_ctx
[col
]);
1683 c
= 2 * !s
->left_ref_ctx
[row7
] + 2 * !s
->above_ref_ctx
[col
];
1685 } else if (s
->above_intra_ctx
[col
]) {
1687 } else if (s
->above_comp_ctx
[col
]) {
1688 c
= 1 + (!s
->fixcompref
|| !s
->above_ref_ctx
[col
]);
1690 c
= 4 * (!s
->above_ref_ctx
[col
]);
1692 } else if (have_l
&& !s
->left_intra_ctx
[row7
]) {
1693 if (s
->left_intra_ctx
[row7
]) {
1695 } else if (s
->left_comp_ctx
[row7
]) {
1696 c
= 1 + (!s
->fixcompref
|| !s
->left_ref_ctx
[row7
]);
1698 c
= 4 * (!s
->left_ref_ctx
[row7
]);
1703 bit
= vp56_rac_get_prob(&s
->c
, s
->prob
.p
.single_ref
[c
][0]);
1704 s
->counts
.single_ref
[c
][0][bit
]++;
1708 // FIXME can this codeblob be replaced by some sort of LUT?
1711 if (s
->left_intra_ctx
[row7
]) {
1712 if (s
->above_intra_ctx
[col
]) {
1714 } else if (s
->above_comp_ctx
[col
]) {
1715 c
= 1 + 2 * (s
->fixcompref
== 1 ||
1716 s
->above_ref_ctx
[col
] == 1);
1717 } else if (!s
->above_ref_ctx
[col
]) {
1720 c
= 4 * (s
->above_ref_ctx
[col
] == 1);
1722 } else if (s
->above_intra_ctx
[col
]) {
1723 if (s
->left_intra_ctx
[row7
]) {
1725 } else if (s
->left_comp_ctx
[row7
]) {
1726 c
= 1 + 2 * (s
->fixcompref
== 1 ||
1727 s
->left_ref_ctx
[row7
] == 1);
1728 } else if (!s
->left_ref_ctx
[row7
]) {
1731 c
= 4 * (s
->left_ref_ctx
[row7
] == 1);
1733 } else if (s
->above_comp_ctx
[col
]) {
1734 if (s
->left_comp_ctx
[row7
]) {
1735 if (s
->left_ref_ctx
[row7
] == s
->above_ref_ctx
[col
]) {
1736 c
= 3 * (s
->fixcompref
== 1 ||
1737 s
->left_ref_ctx
[row7
] == 1);
1741 } else if (!s
->left_ref_ctx
[row7
]) {
1742 c
= 1 + 2 * (s
->fixcompref
== 1 ||
1743 s
->above_ref_ctx
[col
] == 1);
1745 c
= 3 * (s
->left_ref_ctx
[row7
] == 1) +
1746 (s
->fixcompref
== 1 || s
->above_ref_ctx
[col
] == 1);
1748 } else if (s
->left_comp_ctx
[row7
]) {
1749 if (!s
->above_ref_ctx
[col
]) {
1750 c
= 1 + 2 * (s
->fixcompref
== 1 ||
1751 s
->left_ref_ctx
[row7
] == 1);
1753 c
= 3 * (s
->above_ref_ctx
[col
] == 1) +
1754 (s
->fixcompref
== 1 || s
->left_ref_ctx
[row7
] == 1);
1756 } else if (!s
->above_ref_ctx
[col
]) {
1757 if (!s
->left_ref_ctx
[row7
]) {
1760 c
= 4 * (s
->left_ref_ctx
[row7
] == 1);
1762 } else if (!s
->left_ref_ctx
[row7
]) {
1763 c
= 4 * (s
->above_ref_ctx
[col
] == 1);
1765 c
= 2 * (s
->left_ref_ctx
[row7
] == 1) +
1766 2 * (s
->above_ref_ctx
[col
] == 1);
1769 if (s
->above_intra_ctx
[col
] ||
1770 (!s
->above_comp_ctx
[col
] && !s
->above_ref_ctx
[col
])) {
1772 } else if (s
->above_comp_ctx
[col
]) {
1773 c
= 3 * (s
->fixcompref
== 1 || s
->above_ref_ctx
[col
] == 1);
1775 c
= 4 * (s
->above_ref_ctx
[col
] == 1);
1778 } else if (have_l
) {
1779 if (s
->left_intra_ctx
[row7
] ||
1780 (!s
->left_comp_ctx
[row7
] && !s
->left_ref_ctx
[row7
])) {
1782 } else if (s
->left_comp_ctx
[row7
]) {
1783 c
= 3 * (s
->fixcompref
== 1 || s
->left_ref_ctx
[row7
] == 1);
1785 c
= 4 * (s
->left_ref_ctx
[row7
] == 1);
1790 bit
= vp56_rac_get_prob(&s
->c
, s
->prob
.p
.single_ref
[c
][1]);
1791 s
->counts
.single_ref
[c
][1][bit
]++;
1792 b
->ref
[0] = 1 + bit
;
1797 if (b
->bs
<= BS_8x8
) {
1798 if (s
->segmentation
.feat
[b
->seg_id
].skip_enabled
) {
1799 b
->mode
[0] = b
->mode
[1] = b
->mode
[2] = b
->mode
[3] = ZEROMV
;
1801 static const uint8_t off
[10] = {
1802 3, 0, 0, 1, 0, 0, 0, 0, 0, 0
1805 // FIXME this needs to use the LUT tables from find_ref_mvs
1806 // because not all are -1,0/0,-1
1807 int c
= inter_mode_ctx_lut
[s
->above_mode_ctx
[col
+ off
[b
->bs
]]]
1808 [s
->left_mode_ctx
[row7
+ off
[b
->bs
]]];
1810 b
->mode
[0] = vp8_rac_get_tree(&s
->c
, vp9_inter_mode_tree
,
1811 s
->prob
.p
.mv_mode
[c
]);
1812 b
->mode
[1] = b
->mode
[2] = b
->mode
[3] = b
->mode
[0];
1813 s
->counts
.mv_mode
[c
][b
->mode
[0] - 10]++;
1817 if (s
->filtermode
== FILTER_SWITCHABLE
) {
1820 if (have_a
&& s
->above_mode_ctx
[col
] >= NEARESTMV
) {
1821 if (have_l
&& s
->left_mode_ctx
[row7
] >= NEARESTMV
) {
1822 c
= s
->above_filter_ctx
[col
] == s
->left_filter_ctx
[row7
] ?
1823 s
->left_filter_ctx
[row7
] : 3;
1825 c
= s
->above_filter_ctx
[col
];
1827 } else if (have_l
&& s
->left_mode_ctx
[row7
] >= NEARESTMV
) {
1828 c
= s
->left_filter_ctx
[row7
];
1833 filter_id
= vp8_rac_get_tree(&s
->c
, vp9_filter_tree
,
1834 s
->prob
.p
.filter
[c
]);
1835 s
->counts
.filter
[c
][filter_id
]++;
1836 b
->filter
= vp9_filter_lut
[filter_id
];
1838 b
->filter
= s
->filtermode
;
1841 if (b
->bs
> BS_8x8
) {
1842 int c
= inter_mode_ctx_lut
[s
->above_mode_ctx
[col
]][s
->left_mode_ctx
[row7
]];
1844 b
->mode
[0] = vp8_rac_get_tree(&s
->c
, vp9_inter_mode_tree
,
1845 s
->prob
.p
.mv_mode
[c
]);
1846 s
->counts
.mv_mode
[c
][b
->mode
[0] - 10]++;
1847 fill_mv(s
, b
->mv
[0], b
->mode
[0], 0);
1849 if (b
->bs
!= BS_8x4
) {
1850 b
->mode
[1] = vp8_rac_get_tree(&s
->c
, vp9_inter_mode_tree
,
1851 s
->prob
.p
.mv_mode
[c
]);
1852 s
->counts
.mv_mode
[c
][b
->mode
[1] - 10]++;
1853 fill_mv(s
, b
->mv
[1], b
->mode
[1], 1);
1855 b
->mode
[1] = b
->mode
[0];
1856 AV_COPY32(&b
->mv
[1][0], &b
->mv
[0][0]);
1857 AV_COPY32(&b
->mv
[1][1], &b
->mv
[0][1]);
1860 if (b
->bs
!= BS_4x8
) {
1861 b
->mode
[2] = vp8_rac_get_tree(&s
->c
, vp9_inter_mode_tree
,
1862 s
->prob
.p
.mv_mode
[c
]);
1863 s
->counts
.mv_mode
[c
][b
->mode
[2] - 10]++;
1864 fill_mv(s
, b
->mv
[2], b
->mode
[2], 2);
1866 if (b
->bs
!= BS_8x4
) {
1867 b
->mode
[3] = vp8_rac_get_tree(&s
->c
, vp9_inter_mode_tree
,
1868 s
->prob
.p
.mv_mode
[c
]);
1869 s
->counts
.mv_mode
[c
][b
->mode
[3] - 10]++;
1870 fill_mv(s
, b
->mv
[3], b
->mode
[3], 3);
1872 b
->mode
[3] = b
->mode
[2];
1873 AV_COPY32(&b
->mv
[3][0], &b
->mv
[2][0]);
1874 AV_COPY32(&b
->mv
[3][1], &b
->mv
[2][1]);
1877 b
->mode
[2] = b
->mode
[0];
1878 AV_COPY32(&b
->mv
[2][0], &b
->mv
[0][0]);
1879 AV_COPY32(&b
->mv
[2][1], &b
->mv
[0][1]);
1880 b
->mode
[3] = b
->mode
[1];
1881 AV_COPY32(&b
->mv
[3][0], &b
->mv
[1][0]);
1882 AV_COPY32(&b
->mv
[3][1], &b
->mv
[1][1]);
1885 fill_mv(s
, b
->mv
[0], b
->mode
[0], -1);
1886 AV_COPY32(&b
->mv
[1][0], &b
->mv
[0][0]);
1887 AV_COPY32(&b
->mv
[2][0], &b
->mv
[0][0]);
1888 AV_COPY32(&b
->mv
[3][0], &b
->mv
[0][0]);
1889 AV_COPY32(&b
->mv
[1][1], &b
->mv
[0][1]);
1890 AV_COPY32(&b
->mv
[2][1], &b
->mv
[0][1]);
1891 AV_COPY32(&b
->mv
[3][1], &b
->mv
[0][1]);
1894 vref
= b
->ref
[b
->comp
? s
->signbias
[s
->varcompref
[0]] : 0];
1898 #define SPLAT_CTX(var, val, n) \
1900 case 1: var = val; break; \
1901 case 2: AV_WN16A(&var, val * 0x0101); break; \
1902 case 4: AV_WN32A(&var, val * 0x01010101); break; \
1903 case 8: AV_WN64A(&var, val * 0x0101010101010101ULL); break; \
1905 uint64_t v64 = val * 0x0101010101010101ULL; \
1906 AV_WN64A( &var, v64); \
1907 AV_WN64A(&((uint8_t *) &var)[8], v64); \
1912 #define SPLAT_CTX(var, val, n) \
1914 case 1: var = val; break; \
1915 case 2: AV_WN16A(&var, val * 0x0101); break; \
1916 case 4: AV_WN32A(&var, val * 0x01010101); break; \
1918 uint32_t v32 = val * 0x01010101; \
1919 AV_WN32A( &var, v32); \
1920 AV_WN32A(&((uint8_t *) &var)[4], v32); \
1924 uint32_t v32 = val * 0x01010101; \
1925 AV_WN32A( &var, v32); \
1926 AV_WN32A(&((uint8_t *) &var)[4], v32); \
1927 AV_WN32A(&((uint8_t *) &var)[8], v32); \
1928 AV_WN32A(&((uint8_t *) &var)[12], v32); \
1934 switch (bwh_tab
[1][b
->bs
][0]) {
1935 #define SET_CTXS(dir, off, n) \
1937 SPLAT_CTX(s->dir##_skip_ctx[off], b->skip, n); \
1938 SPLAT_CTX(s->dir##_txfm_ctx[off], b->tx, n); \
1939 SPLAT_CTX(s->dir##_partition_ctx[off], dir##_ctx[b->bs], n); \
1940 if (!s->keyframe && !s->intraonly) { \
1941 SPLAT_CTX(s->dir##_intra_ctx[off], b->intra, n); \
1942 SPLAT_CTX(s->dir##_comp_ctx[off], b->comp, n); \
1943 SPLAT_CTX(s->dir##_mode_ctx[off], b->mode[3], n); \
1945 SPLAT_CTX(s->dir##_ref_ctx[off], vref, n); \
1946 if (s->filtermode == FILTER_SWITCHABLE) { \
1947 SPLAT_CTX(s->dir##_filter_ctx[off], filter_id, n); \
1952 case 1: SET_CTXS(above
, col
, 1); break;
1953 case 2: SET_CTXS(above
, col
, 2); break;
1954 case 4: SET_CTXS(above
, col
, 4); break;
1955 case 8: SET_CTXS(above
, col
, 8); break;
1957 switch (bwh_tab
[1][b
->bs
][1]) {
1958 case 1: SET_CTXS(left
, row7
, 1); break;
1959 case 2: SET_CTXS(left
, row7
, 2); break;
1960 case 4: SET_CTXS(left
, row7
, 4); break;
1961 case 8: SET_CTXS(left
, row7
, 8); break;
1966 if (!s
->keyframe
&& !s
->intraonly
) {
1967 if (b
->bs
> BS_8x8
) {
1968 int mv0
= AV_RN32A(&b
->mv
[3][0]), mv1
= AV_RN32A(&b
->mv
[3][1]);
1970 AV_COPY32(&s
->left_mv_ctx
[row7
* 2 + 0][0], &b
->mv
[1][0]);
1971 AV_COPY32(&s
->left_mv_ctx
[row7
* 2 + 0][1], &b
->mv
[1][1]);
1972 AV_WN32A(&s
->left_mv_ctx
[row7
* 2 + 1][0], mv0
);
1973 AV_WN32A(&s
->left_mv_ctx
[row7
* 2 + 1][1], mv1
);
1974 AV_COPY32(&s
->above_mv_ctx
[col
* 2 + 0][0], &b
->mv
[2][0]);
1975 AV_COPY32(&s
->above_mv_ctx
[col
* 2 + 0][1], &b
->mv
[2][1]);
1976 AV_WN32A(&s
->above_mv_ctx
[col
* 2 + 1][0], mv0
);
1977 AV_WN32A(&s
->above_mv_ctx
[col
* 2 + 1][1], mv1
);
1979 int n
, mv0
= AV_RN32A(&b
->mv
[3][0]), mv1
= AV_RN32A(&b
->mv
[3][1]);
1981 for (n
= 0; n
< w4
* 2; n
++) {
1982 AV_WN32A(&s
->above_mv_ctx
[col
* 2 + n
][0], mv0
);
1983 AV_WN32A(&s
->above_mv_ctx
[col
* 2 + n
][1], mv1
);
1985 for (n
= 0; n
< h4
* 2; n
++) {
1986 AV_WN32A(&s
->left_mv_ctx
[row7
* 2 + n
][0], mv0
);
1987 AV_WN32A(&s
->left_mv_ctx
[row7
* 2 + n
][1], mv1
);
1993 for (y
= 0; y
< h4
; y
++) {
1994 int x
, o
= (row
+ y
) * s
->sb_cols
* 8 + col
;
1995 struct VP9mvrefPair
*mv
= &s
->frames
[CUR_FRAME
].mv
[o
];
1998 for (x
= 0; x
< w4
; x
++) {
2002 } else if (b
->comp
) {
2003 for (x
= 0; x
< w4
; x
++) {
2004 mv
[x
].ref
[0] = b
->ref
[0];
2005 mv
[x
].ref
[1] = b
->ref
[1];
2006 AV_COPY32(&mv
[x
].mv
[0], &b
->mv
[3][0]);
2007 AV_COPY32(&mv
[x
].mv
[1], &b
->mv
[3][1]);
2010 for (x
= 0; x
< w4
; x
++) {
2011 mv
[x
].ref
[0] = b
->ref
[0];
2013 AV_COPY32(&mv
[x
].mv
[0], &b
->mv
[3][0]);
2019 // FIXME merge cnt/eob arguments?
2020 static av_always_inline
int
2021 decode_coeffs_b_generic(VP56RangeCoder
*c
, int16_t *coef
, int n_coeffs
,
2022 int is_tx32x32
, unsigned (*cnt
)[6][3],
2023 unsigned (*eob
)[6][2], uint8_t (*p
)[6][11],
2024 int nnz
, const int16_t *scan
, const int16_t (*nb
)[2],
2025 const int16_t *band_counts
, const int16_t *qmul
)
2027 int i
= 0, band
= 0, band_left
= band_counts
[band
];
2028 uint8_t *tp
= p
[0][nnz
];
2029 uint8_t cache
[1024];
2034 val
= vp56_rac_get_prob_branchy(c
, tp
[0]); // eob
2035 eob
[band
][nnz
][val
]++;
2040 if (!vp56_rac_get_prob_branchy(c
, tp
[1])) { // zero
2041 cnt
[band
][nnz
][0]++;
2043 band_left
= band_counts
[++band
];
2045 nnz
= (1 + cache
[nb
[i
][0]] + cache
[nb
[i
][1]]) >> 1;
2047 if (++i
== n_coeffs
)
2048 break; //invalid input; blocks should end with EOB
2053 if (!vp56_rac_get_prob_branchy(c
, tp
[2])) { // one
2054 cnt
[band
][nnz
][1]++;
2058 // fill in p[3-10] (model fill) - only once per frame for each pos
2060 memcpy(&tp
[3], vp9_model_pareto8
[tp
[2]], 8);
2062 cnt
[band
][nnz
][2]++;
2063 if (!vp56_rac_get_prob_branchy(c
, tp
[3])) { // 2, 3, 4
2064 if (!vp56_rac_get_prob_branchy(c
, tp
[4])) {
2065 cache
[rc
] = val
= 2;
2067 val
= 3 + vp56_rac_get_prob(c
, tp
[5]);
2070 } else if (!vp56_rac_get_prob_branchy(c
, tp
[6])) { // cat1/2
2072 if (!vp56_rac_get_prob_branchy(c
, tp
[7])) {
2073 val
= 5 + vp56_rac_get_prob(c
, 159);
2075 val
= 7 + (vp56_rac_get_prob(c
, 165) << 1);
2076 val
+= vp56_rac_get_prob(c
, 145);
2080 if (!vp56_rac_get_prob_branchy(c
, tp
[8])) {
2081 if (!vp56_rac_get_prob_branchy(c
, tp
[9])) {
2082 val
= 11 + (vp56_rac_get_prob(c
, 173) << 2);
2083 val
+= (vp56_rac_get_prob(c
, 148) << 1);
2084 val
+= vp56_rac_get_prob(c
, 140);
2086 val
= 19 + (vp56_rac_get_prob(c
, 176) << 3);
2087 val
+= (vp56_rac_get_prob(c
, 155) << 2);
2088 val
+= (vp56_rac_get_prob(c
, 140) << 1);
2089 val
+= vp56_rac_get_prob(c
, 135);
2091 } else if (!vp56_rac_get_prob_branchy(c
, tp
[10])) {
2092 val
= 35 + (vp56_rac_get_prob(c
, 180) << 4);
2093 val
+= (vp56_rac_get_prob(c
, 157) << 3);
2094 val
+= (vp56_rac_get_prob(c
, 141) << 2);
2095 val
+= (vp56_rac_get_prob(c
, 134) << 1);
2096 val
+= vp56_rac_get_prob(c
, 130);
2098 val
= 67 + (vp56_rac_get_prob(c
, 254) << 13);
2099 val
+= (vp56_rac_get_prob(c
, 254) << 12);
2100 val
+= (vp56_rac_get_prob(c
, 254) << 11);
2101 val
+= (vp56_rac_get_prob(c
, 252) << 10);
2102 val
+= (vp56_rac_get_prob(c
, 249) << 9);
2103 val
+= (vp56_rac_get_prob(c
, 243) << 8);
2104 val
+= (vp56_rac_get_prob(c
, 230) << 7);
2105 val
+= (vp56_rac_get_prob(c
, 196) << 6);
2106 val
+= (vp56_rac_get_prob(c
, 177) << 5);
2107 val
+= (vp56_rac_get_prob(c
, 153) << 4);
2108 val
+= (vp56_rac_get_prob(c
, 140) << 3);
2109 val
+= (vp56_rac_get_prob(c
, 133) << 2);
2110 val
+= (vp56_rac_get_prob(c
, 130) << 1);
2111 val
+= vp56_rac_get_prob(c
, 129);
2116 band_left
= band_counts
[++band
];
2118 coef
[rc
] = ((vp8_rac_get(c
) ? -val
: val
) * qmul
[!!i
]) / 2;
2120 coef
[rc
] = (vp8_rac_get(c
) ? -val
: val
) * qmul
[!!i
];
2121 nnz
= (1 + cache
[nb
[i
][0]] + cache
[nb
[i
][1]]) >> 1;
2123 } while (++i
< n_coeffs
);
2128 static int decode_coeffs_b(VP56RangeCoder
*c
, int16_t *coef
, int n_coeffs
,
2129 unsigned (*cnt
)[6][3], unsigned (*eob
)[6][2],
2130 uint8_t (*p
)[6][11], int nnz
, const int16_t *scan
,
2131 const int16_t (*nb
)[2], const int16_t *band_counts
,
2132 const int16_t *qmul
)
2134 return decode_coeffs_b_generic(c
, coef
, n_coeffs
, 0, cnt
, eob
, p
,
2135 nnz
, scan
, nb
, band_counts
, qmul
);
2138 static int decode_coeffs_b32(VP56RangeCoder
*c
, int16_t *coef
, int n_coeffs
,
2139 unsigned (*cnt
)[6][3], unsigned (*eob
)[6][2],
2140 uint8_t (*p
)[6][11], int nnz
, const int16_t *scan
,
2141 const int16_t (*nb
)[2], const int16_t *band_counts
,
2142 const int16_t *qmul
)
2144 return decode_coeffs_b_generic(c
, coef
, n_coeffs
, 1, cnt
, eob
, p
,
2145 nnz
, scan
, nb
, band_counts
, qmul
);
2148 static void decode_coeffs(AVCodecContext
*ctx
)
2150 VP9Context
*s
= ctx
->priv_data
;
2152 int row
= s
->row
, col
= s
->col
;
2153 uint8_t (*p
)[6][11] = s
->prob
.coef
[b
->tx
][0 /* y */][!b
->intra
];
2154 unsigned (*c
)[6][3] = s
->counts
.coef
[b
->tx
][0 /* y */][!b
->intra
];
2155 unsigned (*e
)[6][2] = s
->counts
.eob
[b
->tx
][0 /* y */][!b
->intra
];
2156 int w4
= bwh_tab
[1][b
->bs
][0] << 1, h4
= bwh_tab
[1][b
->bs
][1] << 1;
2157 int end_x
= FFMIN(2 * (s
->cols
- col
), w4
);
2158 int end_y
= FFMIN(2 * (s
->rows
- row
), h4
);
2159 int n
, pl
, x
, y
, res
;
2160 int16_t (*qmul
)[2] = s
->segmentation
.feat
[b
->seg_id
].qmul
;
2161 int tx
= 4 * s
->lossless
+ b
->tx
;
2162 const int16_t * const *yscans
= vp9_scans
[tx
];
2163 const int16_t (* const *ynbs
)[2] = vp9_scans_nb
[tx
];
2164 const int16_t *uvscan
= vp9_scans
[b
->uvtx
][DCT_DCT
];
2165 const int16_t (*uvnb
)[2] = vp9_scans_nb
[b
->uvtx
][DCT_DCT
];
2166 uint8_t *a
= &s
->above_y_nnz_ctx
[col
* 2];
2167 uint8_t *l
= &s
->left_y_nnz_ctx
[(row
& 7) << 1];
2168 static const int16_t band_counts
[4][8] = {
2169 { 1, 2, 3, 4, 3, 16 - 13 },
2170 { 1, 2, 3, 4, 11, 64 - 21 },
2171 { 1, 2, 3, 4, 11, 256 - 21 },
2172 { 1, 2, 3, 4, 11, 1024 - 21 },
2174 const int16_t *y_band_counts
= band_counts
[b
->tx
];
2175 const int16_t *uv_band_counts
= band_counts
[b
->uvtx
];
2177 #define MERGE(la, end, step, rd) \
2178 for (n = 0; n < end; n += step) \
2179 la[n] = !!rd(&la[n])
2180 #define MERGE_CTX(step, rd) \
2182 MERGE(l, end_y, step, rd); \
2183 MERGE(a, end_x, step, rd); \
2186 #define DECODE_Y_COEF_LOOP(step, mode_index, v) \
2187 for (n = 0, y = 0; y < end_y; y += step) { \
2188 for (x = 0; x < end_x; x += step, n += step * step) { \
2189 enum TxfmType txtp = vp9_intra_txfm_type[b->mode[mode_index]]; \
2190 res = decode_coeffs_b##v(&s->c, s->block + 16 * n, 16 * step * step, \
2191 c, e, p, a[x] + l[y], yscans[txtp], \
2192 ynbs[txtp], y_band_counts, qmul[0]); \
2193 a[x] = l[y] = !!res; \
2195 AV_WN16A(&s->eob[n], res); \
2202 #define SPLAT(la, end, step, cond) \
2204 for (n = 1; n < end; n += step) \
2205 la[n] = la[n - 1]; \
2206 } else if (step == 4) { \
2208 for (n = 0; n < end; n += step) \
2209 AV_WN32A(&la[n], la[n] * 0x01010101); \
2211 for (n = 0; n < end; n += step) \
2212 memset(&la[n + 1], la[n], FFMIN(end - n - 1, 3)); \
2214 } else /* step == 8 */ { \
2216 if (HAVE_FAST_64BIT) { \
2217 for (n = 0; n < end; n += step) \
2218 AV_WN64A(&la[n], la[n] * 0x0101010101010101ULL); \
2220 for (n = 0; n < end; n += step) { \
2221 uint32_t v32 = la[n] * 0x01010101; \
2222 AV_WN32A(&la[n], v32); \
2223 AV_WN32A(&la[n + 4], v32); \
2227 for (n = 0; n < end; n += step) \
2228 memset(&la[n + 1], la[n], FFMIN(end - n - 1, 7)); \
2231 #define SPLAT_CTX(step) \
2233 SPLAT(a, end_x, step, end_x == w4); \
2234 SPLAT(l, end_y, step, end_y == h4); \
2240 DECODE_Y_COEF_LOOP(1, b
->bs
> BS_8x8
? n
: 0,);
2243 MERGE_CTX(2, AV_RN16A
);
2244 DECODE_Y_COEF_LOOP(2, 0,);
2248 MERGE_CTX(4, AV_RN32A
);
2249 DECODE_Y_COEF_LOOP(4, 0,);
2253 MERGE_CTX(8, AV_RN64A
);
2254 DECODE_Y_COEF_LOOP(8, 0, 32);
2259 #define DECODE_UV_COEF_LOOP(step) \
2260 for (n = 0, y = 0; y < end_y; y += step) { \
2261 for (x = 0; x < end_x; x += step, n += step * step) { \
2262 res = decode_coeffs_b(&s->c, s->uvblock[pl] + 16 * n, \
2263 16 * step * step, c, e, p, a[x] + l[y], \
2264 uvscan, uvnb, uv_band_counts, qmul[1]); \
2265 a[x] = l[y] = !!res; \
2267 AV_WN16A(&s->uveob[pl][n], res); \
2269 s->uveob[pl][n] = res; \
2274 p
= s
->prob
.coef
[b
->uvtx
][1 /* uv */][!b
->intra
];
2275 c
= s
->counts
.coef
[b
->uvtx
][1 /* uv */][!b
->intra
];
2276 e
= s
->counts
.eob
[b
->uvtx
][1 /* uv */][!b
->intra
];
2281 for (pl
= 0; pl
< 2; pl
++) {
2282 a
= &s
->above_uv_nnz_ctx
[pl
][col
];
2283 l
= &s
->left_uv_nnz_ctx
[pl
][row
& 7];
2286 DECODE_UV_COEF_LOOP(1);
2289 MERGE_CTX(2, AV_RN16A
);
2290 DECODE_UV_COEF_LOOP(2);
2294 MERGE_CTX(4, AV_RN32A
);
2295 DECODE_UV_COEF_LOOP(4);
2299 MERGE_CTX(8, AV_RN64A
);
2300 // a 64x64 (max) uv block can ever only contain 1 tx32x32 block
2301 // so there is no need to loop
2302 res
= decode_coeffs_b32(&s
->c
, s
->uvblock
[pl
],
2303 1024, c
, e
, p
, a
[0] + l
[0],
2304 uvscan
, uvnb
, uv_band_counts
, qmul
[1]);
2305 a
[0] = l
[0] = !!res
;
2306 AV_WN16A(&s
->uveob
[pl
][0], res
);
2313 static av_always_inline
int check_intra_mode(VP9Context
*s
, int mode
, uint8_t **a
,
2314 uint8_t *dst_edge
, ptrdiff_t stride_edge
,
2315 uint8_t *dst_inner
, ptrdiff_t stride_inner
,
2316 uint8_t *l
, int col
, int x
, int w
,
2317 int row
, int y
, enum TxfmMode tx
,
2320 int have_top
= row
> 0 || y
> 0;
2321 int have_left
= col
> s
->tiling
.tile_col_start
|| x
> 0;
2322 int have_right
= x
< w
- 1;
2323 static const uint8_t mode_conv
[10][2 /* have_left */][2 /* have_top */] = {
2324 [VERT_PRED
] = { { DC_127_PRED
, VERT_PRED
},
2325 { DC_127_PRED
, VERT_PRED
} },
2326 [HOR_PRED
] = { { DC_129_PRED
, DC_129_PRED
},
2327 { HOR_PRED
, HOR_PRED
} },
2328 [DC_PRED
] = { { DC_128_PRED
, TOP_DC_PRED
},
2329 { LEFT_DC_PRED
, DC_PRED
} },
2330 [DIAG_DOWN_LEFT_PRED
] = { { DC_127_PRED
, DIAG_DOWN_LEFT_PRED
},
2331 { DC_127_PRED
, DIAG_DOWN_LEFT_PRED
} },
2332 [DIAG_DOWN_RIGHT_PRED
] = { { DIAG_DOWN_RIGHT_PRED
, DIAG_DOWN_RIGHT_PRED
},
2333 { DIAG_DOWN_RIGHT_PRED
, DIAG_DOWN_RIGHT_PRED
} },
2334 [VERT_RIGHT_PRED
] = { { VERT_RIGHT_PRED
, VERT_RIGHT_PRED
},
2335 { VERT_RIGHT_PRED
, VERT_RIGHT_PRED
} },
2336 [HOR_DOWN_PRED
] = { { HOR_DOWN_PRED
, HOR_DOWN_PRED
},
2337 { HOR_DOWN_PRED
, HOR_DOWN_PRED
} },
2338 [VERT_LEFT_PRED
] = { { DC_127_PRED
, VERT_LEFT_PRED
},
2339 { DC_127_PRED
, VERT_LEFT_PRED
} },
2340 [HOR_UP_PRED
] = { { DC_129_PRED
, DC_129_PRED
},
2341 { HOR_UP_PRED
, HOR_UP_PRED
} },
2342 [TM_VP8_PRED
] = { { DC_129_PRED
, VERT_PRED
},
2343 { HOR_PRED
, TM_VP8_PRED
} },
2345 static const struct {
2346 uint8_t needs_left
:1;
2347 uint8_t needs_top
:1;
2348 uint8_t needs_topleft
:1;
2349 uint8_t needs_topright
:1;
2350 } edges
[N_INTRA_PRED_MODES
] = {
2351 [VERT_PRED
] = { .needs_top
= 1 },
2352 [HOR_PRED
] = { .needs_left
= 1 },
2353 [DC_PRED
] = { .needs_top
= 1, .needs_left
= 1 },
2354 [DIAG_DOWN_LEFT_PRED
] = { .needs_top
= 1, .needs_topright
= 1 },
2355 [DIAG_DOWN_RIGHT_PRED
] = { .needs_left
= 1, .needs_top
= 1, .needs_topleft
= 1 },
2356 [VERT_RIGHT_PRED
] = { .needs_left
= 1, .needs_top
= 1, .needs_topleft
= 1 },
2357 [HOR_DOWN_PRED
] = { .needs_left
= 1, .needs_top
= 1, .needs_topleft
= 1 },
2358 [VERT_LEFT_PRED
] = { .needs_top
= 1, .needs_topright
= 1 },
2359 [HOR_UP_PRED
] = { .needs_left
= 1 },
2360 [TM_VP8_PRED
] = { .needs_left
= 1, .needs_top
= 1, .needs_topleft
= 1 },
2361 [LEFT_DC_PRED
] = { .needs_left
= 1 },
2362 [TOP_DC_PRED
] = { .needs_top
= 1 },
2363 [DC_128_PRED
] = { 0 },
2364 [DC_127_PRED
] = { 0 },
2365 [DC_129_PRED
] = { 0 }
2368 av_assert2(mode
>= 0 && mode
< 10);
2369 mode
= mode_conv
[mode
][have_left
][have_top
];
2370 if (edges
[mode
].needs_top
) {
2371 uint8_t *top
, *topleft
;
2372 int n_px_need
= 4 << tx
, n_px_have
= (((s
->cols
- col
) << !p
) - x
) * 4;
2373 int n_px_need_tr
= 0;
2375 if (tx
== TX_4X4
&& edges
[mode
].needs_topright
&& have_right
)
2378 // if top of sb64-row, use s->intra_pred_data[] instead of
2379 // dst[-stride] for intra prediction (it contains pre- instead of
2380 // post-loopfilter data)
2382 top
= !(row
& 7) && !y
?
2383 s
->intra_pred_data
[p
] + col
* (8 >> !!p
) + x
* 4 :
2384 y
== 0 ? &dst_edge
[-stride_edge
] : &dst_inner
[-stride_inner
];
2386 topleft
= !(row
& 7) && !y
?
2387 s
->intra_pred_data
[p
] + col
* (8 >> !!p
) + x
* 4 :
2388 y
== 0 || x
== 0 ? &dst_edge
[-stride_edge
] :
2389 &dst_inner
[-stride_inner
];
2393 (!edges
[mode
].needs_topleft
|| (have_left
&& top
== topleft
)) &&
2394 (tx
!= TX_4X4
|| !edges
[mode
].needs_topright
|| have_right
) &&
2395 n_px_need
+ n_px_need_tr
<= n_px_have
) {
2399 if (n_px_need
<= n_px_have
) {
2400 memcpy(*a
, top
, n_px_need
);
2402 memcpy(*a
, top
, n_px_have
);
2403 memset(&(*a
)[n_px_have
], (*a
)[n_px_have
- 1],
2404 n_px_need
- n_px_have
);
2407 memset(*a
, 127, n_px_need
);
2409 if (edges
[mode
].needs_topleft
) {
2410 if (have_left
&& have_top
) {
2411 (*a
)[-1] = topleft
[-1];
2413 (*a
)[-1] = have_top
? 129 : 127;
2416 if (tx
== TX_4X4
&& edges
[mode
].needs_topright
) {
2417 if (have_top
&& have_right
&&
2418 n_px_need
+ n_px_need_tr
<= n_px_have
) {
2419 memcpy(&(*a
)[4], &top
[4], 4);
2421 memset(&(*a
)[4], (*a
)[3], 4);
2426 if (edges
[mode
].needs_left
) {
2428 int n_px_need
= 4 << tx
, i
, n_px_have
= (((s
->rows
- row
) << !p
) - y
) * 4;
2429 uint8_t *dst
= x
== 0 ? dst_edge
: dst_inner
;
2430 ptrdiff_t stride
= x
== 0 ? stride_edge
: stride_inner
;
2432 if (n_px_need
<= n_px_have
) {
2433 for (i
= 0; i
< n_px_need
; i
++)
2434 l
[n_px_need
- 1 - i
] = dst
[i
* stride
- 1];
2436 for (i
= 0; i
< n_px_have
; i
++)
2437 l
[n_px_need
- 1 - i
] = dst
[i
* stride
- 1];
2438 memset(l
, l
[n_px_need
- n_px_have
], n_px_need
- n_px_have
);
2441 memset(l
, 129, 4 << tx
);
2448 static void intra_recon(AVCodecContext
*ctx
, ptrdiff_t y_off
, ptrdiff_t uv_off
)
2450 VP9Context
*s
= ctx
->priv_data
;
2452 int row
= s
->row
, col
= s
->col
;
2453 int w4
= bwh_tab
[1][b
->bs
][0] << 1, step1d
= 1 << b
->tx
, n
;
2454 int h4
= bwh_tab
[1][b
->bs
][1] << 1, x
, y
, step
= 1 << (b
->tx
* 2);
2455 int end_x
= FFMIN(2 * (s
->cols
- col
), w4
);
2456 int end_y
= FFMIN(2 * (s
->rows
- row
), h4
);
2457 int tx
= 4 * s
->lossless
+ b
->tx
, uvtx
= b
->uvtx
+ 4 * s
->lossless
;
2458 int uvstep1d
= 1 << b
->uvtx
, p
;
2459 uint8_t *dst
= s
->dst
[0], *dst_r
= s
->frames
[CUR_FRAME
].tf
.f
->data
[0] + y_off
;
2460 LOCAL_ALIGNED_32(uint8_t, a_buf
, [64]);
2461 LOCAL_ALIGNED_32(uint8_t, l
, [32]);
2463 for (n
= 0, y
= 0; y
< end_y
; y
+= step1d
) {
2464 uint8_t *ptr
= dst
, *ptr_r
= dst_r
;
2465 for (x
= 0; x
< end_x
; x
+= step1d
, ptr
+= 4 * step1d
,
2466 ptr_r
+= 4 * step1d
, n
+= step
) {
2467 int mode
= b
->mode
[b
->bs
> BS_8x8
&& b
->tx
== TX_4X4
?
2469 uint8_t *a
= &a_buf
[32];
2470 enum TxfmType txtp
= vp9_intra_txfm_type
[mode
];
2471 int eob
= b
->skip
? 0 : b
->tx
> TX_8X8
? AV_RN16A(&s
->eob
[n
]) : s
->eob
[n
];
2473 mode
= check_intra_mode(s
, mode
, &a
, ptr_r
,
2474 s
->frames
[CUR_FRAME
].tf
.f
->linesize
[0],
2475 ptr
, s
->y_stride
, l
,
2476 col
, x
, w4
, row
, y
, b
->tx
, 0);
2477 s
->dsp
.intra_pred
[b
->tx
][mode
](ptr
, s
->y_stride
, l
, a
);
2479 s
->dsp
.itxfm_add
[tx
][txtp
](ptr
, s
->y_stride
,
2480 s
->block
+ 16 * n
, eob
);
2482 dst_r
+= 4 * step1d
* s
->frames
[CUR_FRAME
].tf
.f
->linesize
[0];
2483 dst
+= 4 * step1d
* s
->y_stride
;
2490 step
= 1 << (b
->uvtx
* 2);
2491 for (p
= 0; p
< 2; p
++) {
2492 dst
= s
->dst
[1 + p
];
2493 dst_r
= s
->frames
[CUR_FRAME
].tf
.f
->data
[1 + p
] + uv_off
;
2494 for (n
= 0, y
= 0; y
< end_y
; y
+= uvstep1d
) {
2495 uint8_t *ptr
= dst
, *ptr_r
= dst_r
;
2496 for (x
= 0; x
< end_x
; x
+= uvstep1d
, ptr
+= 4 * uvstep1d
,
2497 ptr_r
+= 4 * uvstep1d
, n
+= step
) {
2498 int mode
= b
->uvmode
;
2499 uint8_t *a
= &a_buf
[16];
2500 int eob
= b
->skip
? 0 : b
->uvtx
> TX_8X8
? AV_RN16A(&s
->uveob
[p
][n
]) : s
->uveob
[p
][n
];
2502 mode
= check_intra_mode(s
, mode
, &a
, ptr_r
,
2503 s
->frames
[CUR_FRAME
].tf
.f
->linesize
[1],
2504 ptr
, s
->uv_stride
, l
,
2505 col
, x
, w4
, row
, y
, b
->uvtx
, p
+ 1);
2506 s
->dsp
.intra_pred
[b
->uvtx
][mode
](ptr
, s
->uv_stride
, l
, a
);
2508 s
->dsp
.itxfm_add
[uvtx
][DCT_DCT
](ptr
, s
->uv_stride
,
2509 s
->uvblock
[p
] + 16 * n
, eob
);
2511 dst_r
+= 4 * uvstep1d
* s
->frames
[CUR_FRAME
].tf
.f
->linesize
[1];
2512 dst
+= 4 * uvstep1d
* s
->uv_stride
;
2517 static av_always_inline
void mc_luma_dir(VP9Context
*s
, vp9_mc_func (*mc
)[2],
2518 uint8_t *dst
, ptrdiff_t dst_stride
,
2519 const uint8_t *ref
, ptrdiff_t ref_stride
,
2520 ThreadFrame
*ref_frame
,
2521 ptrdiff_t y
, ptrdiff_t x
, const VP56mv
*mv
,
2522 int bw
, int bh
, int w
, int h
)
2524 int mx
= mv
->x
, my
= mv
->y
, th
;
2528 ref
+= y
* ref_stride
+ x
;
2531 // FIXME bilinear filter only needs 0/1 pixels, not 3/4
2532 // we use +7 because the last 7 pixels of each sbrow can be changed in
2533 // the longest loopfilter of the next sbrow
2534 th
= (y
+ bh
+ 4 * !!my
+ 7) >> 6;
2535 ff_thread_await_progress(ref_frame
, FFMAX(th
, 0), 0);
2536 if (x
< !!mx
* 3 || y
< !!my
* 3 ||
2537 x
+ !!mx
* 4 > w
- bw
|| y
+ !!my
* 4 > h
- bh
) {
2538 s
->vdsp
.emulated_edge_mc(s
->edge_emu_buffer
,
2539 ref
- !!my
* 3 * ref_stride
- !!mx
* 3,
2541 bw
+ !!mx
* 7, bh
+ !!my
* 7,
2542 x
- !!mx
* 3, y
- !!my
* 3, w
, h
);
2543 ref
= s
->edge_emu_buffer
+ !!my
* 3 * 80 + !!mx
* 3;
2546 mc
[!!mx
][!!my
](dst
, dst_stride
, ref
, ref_stride
, bh
, mx
<< 1, my
<< 1);
2549 static av_always_inline
void mc_chroma_dir(VP9Context
*s
, vp9_mc_func (*mc
)[2],
2550 uint8_t *dst_u
, uint8_t *dst_v
,
2551 ptrdiff_t dst_stride
,
2552 const uint8_t *ref_u
, ptrdiff_t src_stride_u
,
2553 const uint8_t *ref_v
, ptrdiff_t src_stride_v
,
2554 ThreadFrame
*ref_frame
,
2555 ptrdiff_t y
, ptrdiff_t x
, const VP56mv
*mv
,
2556 int bw
, int bh
, int w
, int h
)
2558 int mx
= mv
->x
, my
= mv
->y
, th
;
2562 ref_u
+= y
* src_stride_u
+ x
;
2563 ref_v
+= y
* src_stride_v
+ x
;
2566 // FIXME bilinear filter only needs 0/1 pixels, not 3/4
2567 // we use +7 because the last 7 pixels of each sbrow can be changed in
2568 // the longest loopfilter of the next sbrow
2569 th
= (y
+ bh
+ 4 * !!my
+ 7) >> 5;
2570 ff_thread_await_progress(ref_frame
, FFMAX(th
, 0), 0);
2571 if (x
< !!mx
* 3 || y
< !!my
* 3 ||
2572 x
+ !!mx
* 4 > w
- bw
|| y
+ !!my
* 4 > h
- bh
) {
2573 s
->vdsp
.emulated_edge_mc(s
->edge_emu_buffer
,
2574 ref_u
- !!my
* 3 * src_stride_u
- !!mx
* 3,
2576 bw
+ !!mx
* 7, bh
+ !!my
* 7,
2577 x
- !!mx
* 3, y
- !!my
* 3, w
, h
);
2578 ref_u
= s
->edge_emu_buffer
+ !!my
* 3 * 80 + !!mx
* 3;
2579 mc
[!!mx
][!!my
](dst_u
, dst_stride
, ref_u
, 80, bh
, mx
, my
);
2581 s
->vdsp
.emulated_edge_mc(s
->edge_emu_buffer
,
2582 ref_v
- !!my
* 3 * src_stride_v
- !!mx
* 3,
2584 bw
+ !!mx
* 7, bh
+ !!my
* 7,
2585 x
- !!mx
* 3, y
- !!my
* 3, w
, h
);
2586 ref_v
= s
->edge_emu_buffer
+ !!my
* 3 * 80 + !!mx
* 3;
2587 mc
[!!mx
][!!my
](dst_v
, dst_stride
, ref_v
, 80, bh
, mx
, my
);
2589 mc
[!!mx
][!!my
](dst_u
, dst_stride
, ref_u
, src_stride_u
, bh
, mx
, my
);
2590 mc
[!!mx
][!!my
](dst_v
, dst_stride
, ref_v
, src_stride_v
, bh
, mx
, my
);
2594 static void inter_recon(AVCodecContext
*ctx
)
2596 static const uint8_t bwlog_tab
[2][N_BS_SIZES
] = {
2597 { 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4 },
2598 { 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4 },
2600 VP9Context
*s
= ctx
->priv_data
;
2602 int row
= s
->row
, col
= s
->col
;
2603 ThreadFrame
*tref1
= &s
->refs
[s
->refidx
[b
->ref
[0]]], *tref2
;
2604 AVFrame
*ref1
= tref1
->f
, *ref2
;
2605 int w1
= ref1
->width
, h1
= ref1
->height
, w2
, h2
;
2606 ptrdiff_t ls_y
= s
->y_stride
, ls_uv
= s
->uv_stride
;
2609 tref2
= &s
->refs
[s
->refidx
[b
->ref
[1]]];
2616 if (b
->bs
> BS_8x8
) {
2617 if (b
->bs
== BS_8x4
) {
2618 mc_luma_dir(s
, s
->dsp
.mc
[3][b
->filter
][0], s
->dst
[0], ls_y
,
2619 ref1
->data
[0], ref1
->linesize
[0], tref1
,
2620 row
<< 3, col
<< 3, &b
->mv
[0][0], 8, 4, w1
, h1
);
2621 mc_luma_dir(s
, s
->dsp
.mc
[3][b
->filter
][0],
2622 s
->dst
[0] + 4 * ls_y
, ls_y
,
2623 ref1
->data
[0], ref1
->linesize
[0], tref1
,
2624 (row
<< 3) + 4, col
<< 3, &b
->mv
[2][0], 8, 4, w1
, h1
);
2627 mc_luma_dir(s
, s
->dsp
.mc
[3][b
->filter
][1], s
->dst
[0], ls_y
,
2628 ref2
->data
[0], ref2
->linesize
[0], tref2
,
2629 row
<< 3, col
<< 3, &b
->mv
[0][1], 8, 4, w2
, h2
);
2630 mc_luma_dir(s
, s
->dsp
.mc
[3][b
->filter
][1],
2631 s
->dst
[0] + 4 * ls_y
, ls_y
,
2632 ref2
->data
[0], ref2
->linesize
[0], tref2
,
2633 (row
<< 3) + 4, col
<< 3, &b
->mv
[2][1], 8, 4, w2
, h2
);
2635 } else if (b
->bs
== BS_4x8
) {
2636 mc_luma_dir(s
, s
->dsp
.mc
[4][b
->filter
][0], s
->dst
[0], ls_y
,
2637 ref1
->data
[0], ref1
->linesize
[0], tref1
,
2638 row
<< 3, col
<< 3, &b
->mv
[0][0], 4, 8, w1
, h1
);
2639 mc_luma_dir(s
, s
->dsp
.mc
[4][b
->filter
][0], s
->dst
[0] + 4, ls_y
,
2640 ref1
->data
[0], ref1
->linesize
[0], tref1
,
2641 row
<< 3, (col
<< 3) + 4, &b
->mv
[1][0], 4, 8, w1
, h1
);
2644 mc_luma_dir(s
, s
->dsp
.mc
[4][b
->filter
][1], s
->dst
[0], ls_y
,
2645 ref2
->data
[0], ref2
->linesize
[0], tref2
,
2646 row
<< 3, col
<< 3, &b
->mv
[0][1], 4, 8, w2
, h2
);
2647 mc_luma_dir(s
, s
->dsp
.mc
[4][b
->filter
][1], s
->dst
[0] + 4, ls_y
,
2648 ref2
->data
[0], ref2
->linesize
[0], tref2
,
2649 row
<< 3, (col
<< 3) + 4, &b
->mv
[1][1], 4, 8, w2
, h2
);
2652 av_assert2(b
->bs
== BS_4x4
);
2654 // FIXME if two horizontally adjacent blocks have the same MV,
2655 // do a w8 instead of a w4 call
2656 mc_luma_dir(s
, s
->dsp
.mc
[4][b
->filter
][0], s
->dst
[0], ls_y
,
2657 ref1
->data
[0], ref1
->linesize
[0], tref1
,
2658 row
<< 3, col
<< 3, &b
->mv
[0][0], 4, 4, w1
, h1
);
2659 mc_luma_dir(s
, s
->dsp
.mc
[4][b
->filter
][0], s
->dst
[0] + 4, ls_y
,
2660 ref1
->data
[0], ref1
->linesize
[0], tref1
,
2661 row
<< 3, (col
<< 3) + 4, &b
->mv
[1][0], 4, 4, w1
, h1
);
2662 mc_luma_dir(s
, s
->dsp
.mc
[4][b
->filter
][0],
2663 s
->dst
[0] + 4 * ls_y
, ls_y
,
2664 ref1
->data
[0], ref1
->linesize
[0], tref1
,
2665 (row
<< 3) + 4, col
<< 3, &b
->mv
[2][0], 4, 4, w1
, h1
);
2666 mc_luma_dir(s
, s
->dsp
.mc
[4][b
->filter
][0],
2667 s
->dst
[0] + 4 * ls_y
+ 4, ls_y
,
2668 ref1
->data
[0], ref1
->linesize
[0], tref1
,
2669 (row
<< 3) + 4, (col
<< 3) + 4, &b
->mv
[3][0], 4, 4, w1
, h1
);
2672 mc_luma_dir(s
, s
->dsp
.mc
[4][b
->filter
][1], s
->dst
[0], ls_y
,
2673 ref2
->data
[0], ref2
->linesize
[0], tref2
,
2674 row
<< 3, col
<< 3, &b
->mv
[0][1], 4, 4, w2
, h2
);
2675 mc_luma_dir(s
, s
->dsp
.mc
[4][b
->filter
][1], s
->dst
[0] + 4, ls_y
,
2676 ref2
->data
[0], ref2
->linesize
[0], tref2
,
2677 row
<< 3, (col
<< 3) + 4, &b
->mv
[1][1], 4, 4, w2
, h2
);
2678 mc_luma_dir(s
, s
->dsp
.mc
[4][b
->filter
][1],
2679 s
->dst
[0] + 4 * ls_y
, ls_y
,
2680 ref2
->data
[0], ref2
->linesize
[0], tref2
,
2681 (row
<< 3) + 4, col
<< 3, &b
->mv
[2][1], 4, 4, w2
, h2
);
2682 mc_luma_dir(s
, s
->dsp
.mc
[4][b
->filter
][1],
2683 s
->dst
[0] + 4 * ls_y
+ 4, ls_y
,
2684 ref2
->data
[0], ref2
->linesize
[0], tref2
,
2685 (row
<< 3) + 4, (col
<< 3) + 4, &b
->mv
[3][1], 4, 4, w2
, h2
);
2689 int bwl
= bwlog_tab
[0][b
->bs
];
2690 int bw
= bwh_tab
[0][b
->bs
][0] * 4, bh
= bwh_tab
[0][b
->bs
][1] * 4;
2692 mc_luma_dir(s
, s
->dsp
.mc
[bwl
][b
->filter
][0], s
->dst
[0], ls_y
,
2693 ref1
->data
[0], ref1
->linesize
[0], tref1
,
2694 row
<< 3, col
<< 3, &b
->mv
[0][0],bw
, bh
, w1
, h1
);
2697 mc_luma_dir(s
, s
->dsp
.mc
[bwl
][b
->filter
][1], s
->dst
[0], ls_y
,
2698 ref2
->data
[0], ref2
->linesize
[0], tref2
,
2699 row
<< 3, col
<< 3, &b
->mv
[0][1], bw
, bh
, w2
, h2
);
2704 int bwl
= bwlog_tab
[1][b
->bs
];
2705 int bw
= bwh_tab
[1][b
->bs
][0] * 4, bh
= bwh_tab
[1][b
->bs
][1] * 4;
2714 if (b
->bs
> BS_8x8
) {
2715 mvuv
.x
= ROUNDED_DIV(b
->mv
[0][0].x
+ b
->mv
[1][0].x
+ b
->mv
[2][0].x
+ b
->mv
[3][0].x
, 4);
2716 mvuv
.y
= ROUNDED_DIV(b
->mv
[0][0].y
+ b
->mv
[1][0].y
+ b
->mv
[2][0].y
+ b
->mv
[3][0].y
, 4);
2721 mc_chroma_dir(s
, s
->dsp
.mc
[bwl
][b
->filter
][0],
2722 s
->dst
[1], s
->dst
[2], ls_uv
,
2723 ref1
->data
[1], ref1
->linesize
[1],
2724 ref1
->data
[2], ref1
->linesize
[2], tref1
,
2725 row
<< 2, col
<< 2, &mvuv
, bw
, bh
, w1
, h1
);
2728 if (b
->bs
> BS_8x8
) {
2729 mvuv
.x
= ROUNDED_DIV(b
->mv
[0][1].x
+ b
->mv
[1][1].x
+ b
->mv
[2][1].x
+ b
->mv
[3][1].x
, 4);
2730 mvuv
.y
= ROUNDED_DIV(b
->mv
[0][1].y
+ b
->mv
[1][1].y
+ b
->mv
[2][1].y
+ b
->mv
[3][1].y
, 4);
2734 mc_chroma_dir(s
, s
->dsp
.mc
[bwl
][b
->filter
][1],
2735 s
->dst
[1], s
->dst
[2], ls_uv
,
2736 ref2
->data
[1], ref2
->linesize
[1],
2737 ref2
->data
[2], ref2
->linesize
[2], tref2
,
2738 row
<< 2, col
<< 2, &mvuv
, bw
, bh
, w2
, h2
);
2743 /* mostly copied intra_reconn() */
2745 int w4
= bwh_tab
[1][b
->bs
][0] << 1, step1d
= 1 << b
->tx
, n
;
2746 int h4
= bwh_tab
[1][b
->bs
][1] << 1, x
, y
, step
= 1 << (b
->tx
* 2);
2747 int end_x
= FFMIN(2 * (s
->cols
- col
), w4
);
2748 int end_y
= FFMIN(2 * (s
->rows
- row
), h4
);
2749 int tx
= 4 * s
->lossless
+ b
->tx
, uvtx
= b
->uvtx
+ 4 * s
->lossless
;
2750 int uvstep1d
= 1 << b
->uvtx
, p
;
2751 uint8_t *dst
= s
->dst
[0];
2754 for (n
= 0, y
= 0; y
< end_y
; y
+= step1d
) {
2756 for (x
= 0; x
< end_x
; x
+= step1d
, ptr
+= 4 * step1d
, n
+= step
) {
2757 int eob
= b
->tx
> TX_8X8
? AV_RN16A(&s
->eob
[n
]) : s
->eob
[n
];
2760 s
->dsp
.itxfm_add
[tx
][DCT_DCT
](ptr
, s
->y_stride
,
2761 s
->block
+ 16 * n
, eob
);
2763 dst
+= 4 * s
->y_stride
* step1d
;
2769 step
= 1 << (b
->uvtx
* 2);
2770 for (p
= 0; p
< 2; p
++) {
2771 dst
= s
->dst
[p
+ 1];
2772 for (n
= 0, y
= 0; y
< end_y
; y
+= uvstep1d
) {
2774 for (x
= 0; x
< end_x
; x
+= uvstep1d
, ptr
+= 4 * uvstep1d
, n
+= step
) {
2775 int eob
= b
->uvtx
> TX_8X8
? AV_RN16A(&s
->uveob
[p
][n
]) : s
->uveob
[p
][n
];
2778 s
->dsp
.itxfm_add
[uvtx
][DCT_DCT
](ptr
, s
->uv_stride
,
2779 s
->uvblock
[p
] + 16 * n
, eob
);
2781 dst
+= 4 * uvstep1d
* s
->uv_stride
;
2787 static av_always_inline
void mask_edges(struct VP9Filter
*lflvl
, int is_uv
,
2788 int row_and_7
, int col_and_7
,
2789 int w
, int h
, int col_end
, int row_end
,
2790 enum TxfmMode tx
, int skip_inter
)
2792 // FIXME I'm pretty sure all loops can be replaced by a single LUT if
2793 // we make VP9Filter.mask uint64_t (i.e. row/col all single variable)
2794 // and make the LUT 5-indexed (bl, bp, is_uv, tx and row/col), and then
2795 // use row_and_7/col_and_7 as shifts (1*col_and_7+8*row_and_7)
2797 // the intended behaviour of the vp9 loopfilter is to work on 8-pixel
2798 // edges. This means that for UV, we work on two subsampled blocks at
2799 // a time, and we only use the topleft block's mode information to set
2800 // things like block strength. Thus, for any block size smaller than
2801 // 16x16, ignore the odd portion of the block.
2802 if (tx
== TX_4X4
&& is_uv
) {
2817 if (tx
== TX_4X4
&& !skip_inter
) {
2818 int t
= 1 << col_and_7
, m_col
= (t
<< w
) - t
, y
;
2819 int m_col_odd
= (t
<< (w
- 1)) - t
;
2821 // on 32-px edges, use the 8-px wide loopfilter; else, use 4-px wide
2823 int m_row_8
= m_col
& 0x01, m_row_4
= m_col
- m_row_8
;
2825 for (y
= row_and_7
; y
< h
+ row_and_7
; y
++) {
2826 int col_mask_id
= 2 - !(y
& 7);
2828 lflvl
->mask
[is_uv
][0][y
][1] |= m_row_8
;
2829 lflvl
->mask
[is_uv
][0][y
][2] |= m_row_4
;
2830 // for odd lines, if the odd col is not being filtered,
2831 // skip odd row also:
2838 // if a/c are even row/col and b/d are odd, and d is skipped,
2839 // e.g. right edge of size-66x66.webm, then skip b also (bug)
2840 if ((col_end
& 1) && (y
& 1)) {
2841 lflvl
->mask
[is_uv
][1][y
][col_mask_id
] |= m_col_odd
;
2843 lflvl
->mask
[is_uv
][1][y
][col_mask_id
] |= m_col
;
2847 int m_row_8
= m_col
& 0x11, m_row_4
= m_col
- m_row_8
;
2849 for (y
= row_and_7
; y
< h
+ row_and_7
; y
++) {
2850 int col_mask_id
= 2 - !(y
& 3);
2852 lflvl
->mask
[is_uv
][0][y
][1] |= m_row_8
; // row edge
2853 lflvl
->mask
[is_uv
][0][y
][2] |= m_row_4
;
2854 lflvl
->mask
[is_uv
][1][y
][col_mask_id
] |= m_col
; // col edge
2855 lflvl
->mask
[is_uv
][0][y
][3] |= m_col
;
2856 lflvl
->mask
[is_uv
][1][y
][3] |= m_col
;
2860 int y
, t
= 1 << col_and_7
, m_col
= (t
<< w
) - t
;
2863 int mask_id
= (tx
== TX_8X8
);
2864 int l2
= tx
+ is_uv
- 1, step1d
= 1 << l2
;
2865 static const unsigned masks
[4] = { 0xff, 0x55, 0x11, 0x01 };
2866 int m_row
= m_col
& masks
[l2
];
2868 // at odd UV col/row edges tx16/tx32 loopfilter edges, force
2869 // 8wd loopfilter to prevent going off the visible edge.
2870 if (is_uv
&& tx
> TX_8X8
&& (w
^ (w
- 1)) == 1) {
2871 int m_row_16
= ((t
<< (w
- 1)) - t
) & masks
[l2
];
2872 int m_row_8
= m_row
- m_row_16
;
2874 for (y
= row_and_7
; y
< h
+ row_and_7
; y
++) {
2875 lflvl
->mask
[is_uv
][0][y
][0] |= m_row_16
;
2876 lflvl
->mask
[is_uv
][0][y
][1] |= m_row_8
;
2879 for (y
= row_and_7
; y
< h
+ row_and_7
; y
++)
2880 lflvl
->mask
[is_uv
][0][y
][mask_id
] |= m_row
;
2883 if (is_uv
&& tx
> TX_8X8
&& (h
^ (h
- 1)) == 1) {
2884 for (y
= row_and_7
; y
< h
+ row_and_7
- 1; y
+= step1d
)
2885 lflvl
->mask
[is_uv
][1][y
][0] |= m_col
;
2886 if (y
- row_and_7
== h
- 1)
2887 lflvl
->mask
[is_uv
][1][y
][1] |= m_col
;
2889 for (y
= row_and_7
; y
< h
+ row_and_7
; y
+= step1d
)
2890 lflvl
->mask
[is_uv
][1][y
][mask_id
] |= m_col
;
2892 } else if (tx
!= TX_4X4
) {
2895 mask_id
= (tx
== TX_8X8
) || (is_uv
&& h
== 1);
2896 lflvl
->mask
[is_uv
][1][row_and_7
][mask_id
] |= m_col
;
2897 mask_id
= (tx
== TX_8X8
) || (is_uv
&& w
== 1);
2898 for (y
= row_and_7
; y
< h
+ row_and_7
; y
++)
2899 lflvl
->mask
[is_uv
][0][y
][mask_id
] |= t
;
2901 int t8
= t
& 0x01, t4
= t
- t8
;
2903 for (y
= row_and_7
; y
< h
+ row_and_7
; y
++) {
2904 lflvl
->mask
[is_uv
][0][y
][2] |= t4
;
2905 lflvl
->mask
[is_uv
][0][y
][1] |= t8
;
2907 lflvl
->mask
[is_uv
][1][row_and_7
][2 - !(row_and_7
& 7)] |= m_col
;
2909 int t8
= t
& 0x11, t4
= t
- t8
;
2911 for (y
= row_and_7
; y
< h
+ row_and_7
; y
++) {
2912 lflvl
->mask
[is_uv
][0][y
][2] |= t4
;
2913 lflvl
->mask
[is_uv
][0][y
][1] |= t8
;
2915 lflvl
->mask
[is_uv
][1][row_and_7
][2 - !(row_and_7
& 3)] |= m_col
;
2920 static void decode_b(AVCodecContext
*ctx
, int row
, int col
,
2921 struct VP9Filter
*lflvl
, ptrdiff_t yoff
, ptrdiff_t uvoff
,
2922 enum BlockLevel bl
, enum BlockPartition bp
)
2924 VP9Context
*s
= ctx
->priv_data
;
2926 enum BlockSize bs
= bl
* 3 + bp
;
2927 int w4
= bwh_tab
[1][bs
][0], h4
= bwh_tab
[1][bs
][1], lvl
;
2929 AVFrame
*f
= s
->frames
[CUR_FRAME
].tf
.f
;
2935 s
->min_mv
.x
= -(128 + col
* 64);
2936 s
->min_mv
.y
= -(128 + row
* 64);
2937 s
->max_mv
.x
= 128 + (s
->cols
- col
- w4
) * 64;
2938 s
->max_mv
.y
= 128 + (s
->rows
- row
- h4
) * 64;
2944 b
->uvtx
= b
->tx
- (w4
* 2 == (1 << b
->tx
) || h4
* 2 == (1 << b
->tx
));
2951 #define SPLAT_ZERO_CTX(v, n) \
2953 case 1: v = 0; break; \
2954 case 2: AV_ZERO16(&v); break; \
2955 case 4: AV_ZERO32(&v); break; \
2956 case 8: AV_ZERO64(&v); break; \
2957 case 16: AV_ZERO128(&v); break; \
2959 #define SPLAT_ZERO_YUV(dir, var, off, n) \
2961 SPLAT_ZERO_CTX(s->dir##_y_##var[off * 2], n * 2); \
2962 SPLAT_ZERO_CTX(s->dir##_uv_##var[0][off], n); \
2963 SPLAT_ZERO_CTX(s->dir##_uv_##var[1][off], n); \
2967 case 1: SPLAT_ZERO_YUV(above
, nnz_ctx
, col
, 1); break;
2968 case 2: SPLAT_ZERO_YUV(above
, nnz_ctx
, col
, 2); break;
2969 case 4: SPLAT_ZERO_YUV(above
, nnz_ctx
, col
, 4); break;
2970 case 8: SPLAT_ZERO_YUV(above
, nnz_ctx
, col
, 8); break;
2973 case 1: SPLAT_ZERO_YUV(left
, nnz_ctx
, row7
, 1); break;
2974 case 2: SPLAT_ZERO_YUV(left
, nnz_ctx
, row7
, 2); break;
2975 case 4: SPLAT_ZERO_YUV(left
, nnz_ctx
, row7
, 4); break;
2976 case 8: SPLAT_ZERO_YUV(left
, nnz_ctx
, row7
, 8); break;
2981 s
->block
+= w4
* h4
* 64;
2982 s
->uvblock
[0] += w4
* h4
* 16;
2983 s
->uvblock
[1] += w4
* h4
* 16;
2984 s
->eob
+= 4 * w4
* h4
;
2985 s
->uveob
[0] += w4
* h4
;
2986 s
->uveob
[1] += w4
* h4
;
2992 // emulated overhangs if the stride of the target buffer can't hold. This
2993 // allows to support emu-edge and so on even if we have large block
2995 emu
[0] = (col
+ w4
) * 8 > f
->linesize
[0] ||
2996 (row
+ h4
) > s
->rows
;
2997 emu
[1] = (col
+ w4
) * 4 > f
->linesize
[1] ||
2998 (row
+ h4
) > s
->rows
;
3000 s
->dst
[0] = s
->tmp_y
;
3003 s
->dst
[0] = f
->data
[0] + yoff
;
3004 s
->y_stride
= f
->linesize
[0];
3007 s
->dst
[1] = s
->tmp_uv
[0];
3008 s
->dst
[2] = s
->tmp_uv
[1];
3011 s
->dst
[1] = f
->data
[1] + uvoff
;
3012 s
->dst
[2] = f
->data
[2] + uvoff
;
3013 s
->uv_stride
= f
->linesize
[1];
3016 intra_recon(ctx
, yoff
, uvoff
);
3021 int w
= FFMIN(s
->cols
- col
, w4
) * 8, h
= FFMIN(s
->rows
- row
, h4
) * 8, n
, o
= 0;
3023 for (n
= 0; o
< w
; n
++) {
3028 s
->dsp
.mc
[n
][0][0][0][0](f
->data
[0] + yoff
+ o
, f
->linesize
[0],
3029 s
->tmp_y
+ o
, 64, h
, 0, 0);
3035 int w
= FFMIN(s
->cols
- col
, w4
) * 4, h
= FFMIN(s
->rows
- row
, h4
) * 4, n
, o
= 0;
3037 for (n
= 1; o
< w
; n
++) {
3042 s
->dsp
.mc
[n
][0][0][0][0](f
->data
[1] + uvoff
+ o
, f
->linesize
[1],
3043 s
->tmp_uv
[0] + o
, 32, h
, 0, 0);
3044 s
->dsp
.mc
[n
][0][0][0][0](f
->data
[2] + uvoff
+ o
, f
->linesize
[2],
3045 s
->tmp_uv
[1] + o
, 32, h
, 0, 0);
3051 // pick filter level and find edges to apply filter to
3052 if (s
->filter
.level
&&
3053 (lvl
= s
->segmentation
.feat
[b
->seg_id
].lflvl
[b
->intra
? 0 : b
->ref
[0] + 1]
3054 [b
->mode
[3] != ZEROMV
]) > 0) {
3055 int x_end
= FFMIN(s
->cols
- col
, w4
), y_end
= FFMIN(s
->rows
- row
, h4
);
3056 int skip_inter
= !b
->intra
&& b
->skip
, col7
= s
->col7
, row7
= s
->row7
;
3058 setctx_2d(&lflvl
->level
[row7
* 8 + col7
], w4
, h4
, 8, lvl
);
3059 mask_edges(lflvl
, 0, row7
, col7
, x_end
, y_end
, 0, 0, b
->tx
, skip_inter
);
3060 mask_edges(lflvl
, 1, row7
, col7
, x_end
, y_end
,
3061 s
->cols
& 1 && col
+ w4
>= s
->cols
? s
->cols
& 7 : 0,
3062 s
->rows
& 1 && row
+ h4
>= s
->rows
? s
->rows
& 7 : 0,
3063 b
->uvtx
, skip_inter
);
3065 if (!s
->filter
.lim_lut
[lvl
]) {
3066 int sharp
= s
->filter
.sharpness
;
3070 limit
>>= (sharp
+ 3) >> 2;
3071 limit
= FFMIN(limit
, 9 - sharp
);
3073 limit
= FFMAX(limit
, 1);
3075 s
->filter
.lim_lut
[lvl
] = limit
;
3076 s
->filter
.mblim_lut
[lvl
] = 2 * (lvl
+ 2) + limit
;
3082 s
->block
+= w4
* h4
* 64;
3083 s
->uvblock
[0] += w4
* h4
* 16;
3084 s
->uvblock
[1] += w4
* h4
* 16;
3085 s
->eob
+= 4 * w4
* h4
;
3086 s
->uveob
[0] += w4
* h4
;
3087 s
->uveob
[1] += w4
* h4
;
3091 static void decode_sb(AVCodecContext
*ctx
, int row
, int col
, struct VP9Filter
*lflvl
,
3092 ptrdiff_t yoff
, ptrdiff_t uvoff
, enum BlockLevel bl
)
3094 VP9Context
*s
= ctx
->priv_data
;
3095 int c
= ((s
->above_partition_ctx
[col
] >> (3 - bl
)) & 1) |
3096 (((s
->left_partition_ctx
[row
& 0x7] >> (3 - bl
)) & 1) << 1);
3097 const uint8_t *p
= s
->keyframe
? vp9_default_kf_partition_probs
[bl
][c
] :
3098 s
->prob
.p
.partition
[bl
][c
];
3099 enum BlockPartition bp
;
3100 ptrdiff_t hbs
= 4 >> bl
;
3101 AVFrame
*f
= s
->frames
[CUR_FRAME
].tf
.f
;
3102 ptrdiff_t y_stride
= f
->linesize
[0], uv_stride
= f
->linesize
[1];
3105 bp
= vp8_rac_get_tree(&s
->c
, vp9_partition_tree
, p
);
3106 decode_b(ctx
, row
, col
, lflvl
, yoff
, uvoff
, bl
, bp
);
3107 } else if (col
+ hbs
< s
->cols
) { // FIXME why not <=?
3108 if (row
+ hbs
< s
->rows
) { // FIXME why not <=?
3109 bp
= vp8_rac_get_tree(&s
->c
, vp9_partition_tree
, p
);
3111 case PARTITION_NONE
:
3112 decode_b(ctx
, row
, col
, lflvl
, yoff
, uvoff
, bl
, bp
);
3115 decode_b(ctx
, row
, col
, lflvl
, yoff
, uvoff
, bl
, bp
);
3116 yoff
+= hbs
* 8 * y_stride
;
3117 uvoff
+= hbs
* 4 * uv_stride
;
3118 decode_b(ctx
, row
+ hbs
, col
, lflvl
, yoff
, uvoff
, bl
, bp
);
3121 decode_b(ctx
, row
, col
, lflvl
, yoff
, uvoff
, bl
, bp
);
3124 decode_b(ctx
, row
, col
+ hbs
, lflvl
, yoff
, uvoff
, bl
, bp
);
3126 case PARTITION_SPLIT
:
3127 decode_sb(ctx
, row
, col
, lflvl
, yoff
, uvoff
, bl
+ 1);
3128 decode_sb(ctx
, row
, col
+ hbs
, lflvl
,
3129 yoff
+ 8 * hbs
, uvoff
+ 4 * hbs
, bl
+ 1);
3130 yoff
+= hbs
* 8 * y_stride
;
3131 uvoff
+= hbs
* 4 * uv_stride
;
3132 decode_sb(ctx
, row
+ hbs
, col
, lflvl
, yoff
, uvoff
, bl
+ 1);
3133 decode_sb(ctx
, row
+ hbs
, col
+ hbs
, lflvl
,
3134 yoff
+ 8 * hbs
, uvoff
+ 4 * hbs
, bl
+ 1);
3139 } else if (vp56_rac_get_prob_branchy(&s
->c
, p
[1])) {
3140 bp
= PARTITION_SPLIT
;
3141 decode_sb(ctx
, row
, col
, lflvl
, yoff
, uvoff
, bl
+ 1);
3142 decode_sb(ctx
, row
, col
+ hbs
, lflvl
,
3143 yoff
+ 8 * hbs
, uvoff
+ 4 * hbs
, bl
+ 1);
3146 decode_b(ctx
, row
, col
, lflvl
, yoff
, uvoff
, bl
, bp
);
3148 } else if (row
+ hbs
< s
->rows
) { // FIXME why not <=?
3149 if (vp56_rac_get_prob_branchy(&s
->c
, p
[2])) {
3150 bp
= PARTITION_SPLIT
;
3151 decode_sb(ctx
, row
, col
, lflvl
, yoff
, uvoff
, bl
+ 1);
3152 yoff
+= hbs
* 8 * y_stride
;
3153 uvoff
+= hbs
* 4 * uv_stride
;
3154 decode_sb(ctx
, row
+ hbs
, col
, lflvl
, yoff
, uvoff
, bl
+ 1);
3157 decode_b(ctx
, row
, col
, lflvl
, yoff
, uvoff
, bl
, bp
);
3160 bp
= PARTITION_SPLIT
;
3161 decode_sb(ctx
, row
, col
, lflvl
, yoff
, uvoff
, bl
+ 1);
3163 s
->counts
.partition
[bl
][c
][bp
]++;
3166 static void decode_sb_mem(AVCodecContext
*ctx
, int row
, int col
, struct VP9Filter
*lflvl
,
3167 ptrdiff_t yoff
, ptrdiff_t uvoff
, enum BlockLevel bl
)
3169 VP9Context
*s
= ctx
->priv_data
;
3171 ptrdiff_t hbs
= 4 >> bl
;
3172 AVFrame
*f
= s
->frames
[CUR_FRAME
].tf
.f
;
3173 ptrdiff_t y_stride
= f
->linesize
[0], uv_stride
= f
->linesize
[1];
3176 av_assert2(b
->bl
== BL_8X8
);
3177 decode_b(ctx
, row
, col
, lflvl
, yoff
, uvoff
, b
->bl
, b
->bp
);
3178 } else if (s
->b
->bl
== bl
) {
3179 decode_b(ctx
, row
, col
, lflvl
, yoff
, uvoff
, b
->bl
, b
->bp
);
3180 if (b
->bp
== PARTITION_H
&& row
+ hbs
< s
->rows
) {
3181 yoff
+= hbs
* 8 * y_stride
;
3182 uvoff
+= hbs
* 4 * uv_stride
;
3183 decode_b(ctx
, row
+ hbs
, col
, lflvl
, yoff
, uvoff
, b
->bl
, b
->bp
);
3184 } else if (b
->bp
== PARTITION_V
&& col
+ hbs
< s
->cols
) {
3187 decode_b(ctx
, row
, col
+ hbs
, lflvl
, yoff
, uvoff
, b
->bl
, b
->bp
);
3190 decode_sb_mem(ctx
, row
, col
, lflvl
, yoff
, uvoff
, bl
+ 1);
3191 if (col
+ hbs
< s
->cols
) { // FIXME why not <=?
3192 if (row
+ hbs
< s
->rows
) {
3193 decode_sb_mem(ctx
, row
, col
+ hbs
, lflvl
, yoff
+ 8 * hbs
,
3194 uvoff
+ 4 * hbs
, bl
+ 1);
3195 yoff
+= hbs
* 8 * y_stride
;
3196 uvoff
+= hbs
* 4 * uv_stride
;
3197 decode_sb_mem(ctx
, row
+ hbs
, col
, lflvl
, yoff
, uvoff
, bl
+ 1);
3198 decode_sb_mem(ctx
, row
+ hbs
, col
+ hbs
, lflvl
,
3199 yoff
+ 8 * hbs
, uvoff
+ 4 * hbs
, bl
+ 1);
3203 decode_sb_mem(ctx
, row
, col
+ hbs
, lflvl
, yoff
, uvoff
, bl
+ 1);
3205 } else if (row
+ hbs
< s
->rows
) {
3206 yoff
+= hbs
* 8 * y_stride
;
3207 uvoff
+= hbs
* 4 * uv_stride
;
3208 decode_sb_mem(ctx
, row
+ hbs
, col
, lflvl
, yoff
, uvoff
, bl
+ 1);
3213 static void loopfilter_sb(AVCodecContext
*ctx
, struct VP9Filter
*lflvl
,
3214 int row
, int col
, ptrdiff_t yoff
, ptrdiff_t uvoff
)
3216 VP9Context
*s
= ctx
->priv_data
;
3217 AVFrame
*f
= s
->frames
[CUR_FRAME
].tf
.f
;
3218 uint8_t *dst
= f
->data
[0] + yoff
, *lvl
= lflvl
->level
;
3219 ptrdiff_t ls_y
= f
->linesize
[0], ls_uv
= f
->linesize
[1];
3222 // FIXME in how far can we interleave the v/h loopfilter calls? E.g.
3223 // if you think of them as acting on a 8x8 block max, we can interleave
3224 // each v/h within the single x loop, but that only works if we work on
3225 // 8 pixel blocks, and we won't always do that (we want at least 16px
3226 // to use SSE2 optimizations, perhaps 32 for AVX2)
3228 // filter edges between columns, Y plane (e.g. block1 | block2)
3229 for (y
= 0; y
< 8; y
+= 2, dst
+= 16 * ls_y
, lvl
+= 16) {
3230 uint8_t *ptr
= dst
, *l
= lvl
, *hmask1
= lflvl
->mask
[0][0][y
];
3231 uint8_t *hmask2
= lflvl
->mask
[0][0][y
+ 1];
3232 unsigned hm1
= hmask1
[0] | hmask1
[1] | hmask1
[2], hm13
= hmask1
[3];
3233 unsigned hm2
= hmask2
[1] | hmask2
[2], hm23
= hmask2
[3];
3234 unsigned hm
= hm1
| hm2
| hm13
| hm23
;
3236 for (x
= 1; hm
& ~(x
- 1); x
<<= 1, ptr
+= 8, l
++) {
3238 int L
= *l
, H
= L
>> 4;
3239 int E
= s
->filter
.mblim_lut
[L
], I
= s
->filter
.lim_lut
[L
];
3242 if (hmask1
[0] & x
) {
3243 if (hmask2
[0] & x
) {
3244 av_assert2(l
[8] == L
);
3245 s
->dsp
.loop_filter_16
[0](ptr
, ls_y
, E
, I
, H
);
3247 s
->dsp
.loop_filter_8
[2][0](ptr
, ls_y
, E
, I
, H
);
3249 } else if (hm2
& x
) {
3252 E
|= s
->filter
.mblim_lut
[L
] << 8;
3253 I
|= s
->filter
.lim_lut
[L
] << 8;
3254 s
->dsp
.loop_filter_mix2
[!!(hmask1
[1] & x
)]
3256 [0](ptr
, ls_y
, E
, I
, H
);
3258 s
->dsp
.loop_filter_8
[!!(hmask1
[1] & x
)]
3259 [0](ptr
, ls_y
, E
, I
, H
);
3262 } else if (hm2
& x
) {
3263 int L
= l
[8], H
= L
>> 4;
3264 int E
= s
->filter
.mblim_lut
[L
], I
= s
->filter
.lim_lut
[L
];
3267 s
->dsp
.loop_filter_8
[!!(hmask2
[1] & x
)]
3268 [0](ptr
+ 8 * ls_y
, ls_y
, E
, I
, H
);
3272 int L
= *l
, H
= L
>> 4;
3273 int E
= s
->filter
.mblim_lut
[L
], I
= s
->filter
.lim_lut
[L
];
3278 E
|= s
->filter
.mblim_lut
[L
] << 8;
3279 I
|= s
->filter
.lim_lut
[L
] << 8;
3280 s
->dsp
.loop_filter_mix2
[0][0][0](ptr
+ 4, ls_y
, E
, I
, H
);
3282 s
->dsp
.loop_filter_8
[0][0](ptr
+ 4, ls_y
, E
, I
, H
);
3284 } else if (hm23
& x
) {
3285 int L
= l
[8], H
= L
>> 4;
3286 int E
= s
->filter
.mblim_lut
[L
], I
= s
->filter
.lim_lut
[L
];
3288 s
->dsp
.loop_filter_8
[0][0](ptr
+ 8 * ls_y
+ 4, ls_y
, E
, I
, H
);
3294 // filter edges between rows, Y plane (e.g. ------)
3296 dst
= f
->data
[0] + yoff
;
3298 for (y
= 0; y
< 8; y
++, dst
+= 8 * ls_y
, lvl
+= 8) {
3299 uint8_t *ptr
= dst
, *l
= lvl
, *vmask
= lflvl
->mask
[0][1][y
];
3300 unsigned vm
= vmask
[0] | vmask
[1] | vmask
[2], vm3
= vmask
[3];
3302 for (x
= 1; vm
& ~(x
- 1); x
<<= 2, ptr
+= 16, l
+= 2) {
3305 int L
= *l
, H
= L
>> 4;
3306 int E
= s
->filter
.mblim_lut
[L
], I
= s
->filter
.lim_lut
[L
];
3309 if (vmask
[0] & (x
<< 1)) {
3310 av_assert2(l
[1] == L
);
3311 s
->dsp
.loop_filter_16
[1](ptr
, ls_y
, E
, I
, H
);
3313 s
->dsp
.loop_filter_8
[2][1](ptr
, ls_y
, E
, I
, H
);
3315 } else if (vm
& (x
<< 1)) {
3318 E
|= s
->filter
.mblim_lut
[L
] << 8;
3319 I
|= s
->filter
.lim_lut
[L
] << 8;
3320 s
->dsp
.loop_filter_mix2
[!!(vmask
[1] & x
)]
3321 [!!(vmask
[1] & (x
<< 1))]
3322 [1](ptr
, ls_y
, E
, I
, H
);
3324 s
->dsp
.loop_filter_8
[!!(vmask
[1] & x
)]
3325 [1](ptr
, ls_y
, E
, I
, H
);
3327 } else if (vm
& (x
<< 1)) {
3328 int L
= l
[1], H
= L
>> 4;
3329 int E
= s
->filter
.mblim_lut
[L
], I
= s
->filter
.lim_lut
[L
];
3331 s
->dsp
.loop_filter_8
[!!(vmask
[1] & (x
<< 1))]
3332 [1](ptr
+ 8, ls_y
, E
, I
, H
);
3336 int L
= *l
, H
= L
>> 4;
3337 int E
= s
->filter
.mblim_lut
[L
], I
= s
->filter
.lim_lut
[L
];
3339 if (vm3
& (x
<< 1)) {
3342 E
|= s
->filter
.mblim_lut
[L
] << 8;
3343 I
|= s
->filter
.lim_lut
[L
] << 8;
3344 s
->dsp
.loop_filter_mix2
[0][0][1](ptr
+ ls_y
* 4, ls_y
, E
, I
, H
);
3346 s
->dsp
.loop_filter_8
[0][1](ptr
+ ls_y
* 4, ls_y
, E
, I
, H
);
3348 } else if (vm3
& (x
<< 1)) {
3349 int L
= l
[1], H
= L
>> 4;
3350 int E
= s
->filter
.mblim_lut
[L
], I
= s
->filter
.lim_lut
[L
];
3352 s
->dsp
.loop_filter_8
[0][1](ptr
+ ls_y
* 4 + 8, ls_y
, E
, I
, H
);
3357 // same principle but for U/V planes
3358 for (p
= 0; p
< 2; p
++) {
3360 dst
= f
->data
[1 + p
] + uvoff
;
3361 for (y
= 0; y
< 8; y
+= 4, dst
+= 16 * ls_uv
, lvl
+= 32) {
3362 uint8_t *ptr
= dst
, *l
= lvl
, *hmask1
= lflvl
->mask
[1][0][y
];
3363 uint8_t *hmask2
= lflvl
->mask
[1][0][y
+ 2];
3364 unsigned hm1
= hmask1
[0] | hmask1
[1] | hmask1
[2];
3365 unsigned hm2
= hmask2
[1] | hmask2
[2], hm
= hm1
| hm2
;
3367 for (x
= 1; hm
& ~(x
- 1); x
<<= 1, ptr
+= 4) {
3370 int L
= *l
, H
= L
>> 4;
3371 int E
= s
->filter
.mblim_lut
[L
], I
= s
->filter
.lim_lut
[L
];
3373 if (hmask1
[0] & x
) {
3374 if (hmask2
[0] & x
) {
3375 av_assert2(l
[16] == L
);
3376 s
->dsp
.loop_filter_16
[0](ptr
, ls_uv
, E
, I
, H
);
3378 s
->dsp
.loop_filter_8
[2][0](ptr
, ls_uv
, E
, I
, H
);
3380 } else if (hm2
& x
) {
3383 E
|= s
->filter
.mblim_lut
[L
] << 8;
3384 I
|= s
->filter
.lim_lut
[L
] << 8;
3385 s
->dsp
.loop_filter_mix2
[!!(hmask1
[1] & x
)]
3387 [0](ptr
, ls_uv
, E
, I
, H
);
3389 s
->dsp
.loop_filter_8
[!!(hmask1
[1] & x
)]
3390 [0](ptr
, ls_uv
, E
, I
, H
);
3392 } else if (hm2
& x
) {
3393 int L
= l
[16], H
= L
>> 4;
3394 int E
= s
->filter
.mblim_lut
[L
], I
= s
->filter
.lim_lut
[L
];
3396 s
->dsp
.loop_filter_8
[!!(hmask2
[1] & x
)]
3397 [0](ptr
+ 8 * ls_uv
, ls_uv
, E
, I
, H
);
3405 dst
= f
->data
[1 + p
] + uvoff
;
3406 for (y
= 0; y
< 8; y
++, dst
+= 4 * ls_uv
) {
3407 uint8_t *ptr
= dst
, *l
= lvl
, *vmask
= lflvl
->mask
[1][1][y
];
3408 unsigned vm
= vmask
[0] | vmask
[1] | vmask
[2];
3410 for (x
= 1; vm
& ~(x
- 1); x
<<= 4, ptr
+= 16, l
+= 4) {
3413 int L
= *l
, H
= L
>> 4;
3414 int E
= s
->filter
.mblim_lut
[L
], I
= s
->filter
.lim_lut
[L
];
3417 if (vmask
[0] & (x
<< 2)) {
3418 av_assert2(l
[2] == L
);
3419 s
->dsp
.loop_filter_16
[1](ptr
, ls_uv
, E
, I
, H
);
3421 s
->dsp
.loop_filter_8
[2][1](ptr
, ls_uv
, E
, I
, H
);
3423 } else if (vm
& (x
<< 2)) {
3426 E
|= s
->filter
.mblim_lut
[L
] << 8;
3427 I
|= s
->filter
.lim_lut
[L
] << 8;
3428 s
->dsp
.loop_filter_mix2
[!!(vmask
[1] & x
)]
3429 [!!(vmask
[1] & (x
<< 2))]
3430 [1](ptr
, ls_uv
, E
, I
, H
);
3432 s
->dsp
.loop_filter_8
[!!(vmask
[1] & x
)]
3433 [1](ptr
, ls_uv
, E
, I
, H
);
3435 } else if (vm
& (x
<< 2)) {
3436 int L
= l
[2], H
= L
>> 4;
3437 int E
= s
->filter
.mblim_lut
[L
], I
= s
->filter
.lim_lut
[L
];
3439 s
->dsp
.loop_filter_8
[!!(vmask
[1] & (x
<< 2))]
3440 [1](ptr
+ 8, ls_uv
, E
, I
, H
);
3450 static void set_tile_offset(int *start
, int *end
, int idx
, int log2_n
, int n
)
3452 int sb_start
= ( idx
* n
) >> log2_n
;
3453 int sb_end
= ((idx
+ 1) * n
) >> log2_n
;
3454 *start
= FFMIN(sb_start
, n
) << 3;
3455 *end
= FFMIN(sb_end
, n
) << 3;
3458 static av_always_inline
void adapt_prob(uint8_t *p
, unsigned ct0
, unsigned ct1
,
3459 int max_count
, int update_factor
)
3461 unsigned ct
= ct0
+ ct1
, p2
, p1
;
3467 p2
= ((ct0
<< 8) + (ct
>> 1)) / ct
;
3468 p2
= av_clip(p2
, 1, 255);
3469 ct
= FFMIN(ct
, max_count
);
3470 update_factor
= FASTDIV(update_factor
* ct
, max_count
);
3472 // (p1 * (256 - update_factor) + p2 * update_factor + 128) >> 8
3473 *p
= p1
+ (((p2
- p1
) * update_factor
+ 128) >> 8);
3476 static void adapt_probs(VP9Context
*s
)
3479 prob_context
*p
= &s
->prob_ctx
[s
->framectxid
].p
;
3480 int uf
= (s
->keyframe
|| s
->intraonly
|| !s
->last_keyframe
) ? 112 : 128;
3483 for (i
= 0; i
< 4; i
++)
3484 for (j
= 0; j
< 2; j
++)
3485 for (k
= 0; k
< 2; k
++)
3486 for (l
= 0; l
< 6; l
++)
3487 for (m
= 0; m
< 6; m
++) {
3488 uint8_t *pp
= s
->prob_ctx
[s
->framectxid
].coef
[i
][j
][k
][l
][m
];
3489 unsigned *e
= s
->counts
.eob
[i
][j
][k
][l
][m
];
3490 unsigned *c
= s
->counts
.coef
[i
][j
][k
][l
][m
];
3492 if (l
== 0 && m
>= 3) // dc only has 3 pt
3495 adapt_prob(&pp
[0], e
[0], e
[1], 24, uf
);
3496 adapt_prob(&pp
[1], c
[0], c
[1] + c
[2], 24, uf
);
3497 adapt_prob(&pp
[2], c
[1], c
[2], 24, uf
);
3500 if (s
->keyframe
|| s
->intraonly
) {
3501 memcpy(p
->skip
, s
->prob
.p
.skip
, sizeof(p
->skip
));
3502 memcpy(p
->tx32p
, s
->prob
.p
.tx32p
, sizeof(p
->tx32p
));
3503 memcpy(p
->tx16p
, s
->prob
.p
.tx16p
, sizeof(p
->tx16p
));
3504 memcpy(p
->tx8p
, s
->prob
.p
.tx8p
, sizeof(p
->tx8p
));
3509 for (i
= 0; i
< 3; i
++)
3510 adapt_prob(&p
->skip
[i
], s
->counts
.skip
[i
][0], s
->counts
.skip
[i
][1], 20, 128);
3513 for (i
= 0; i
< 4; i
++)
3514 adapt_prob(&p
->intra
[i
], s
->counts
.intra
[i
][0], s
->counts
.intra
[i
][1], 20, 128);
3517 if (s
->comppredmode
== PRED_SWITCHABLE
) {
3518 for (i
= 0; i
< 5; i
++)
3519 adapt_prob(&p
->comp
[i
], s
->counts
.comp
[i
][0], s
->counts
.comp
[i
][1], 20, 128);
3523 if (s
->comppredmode
!= PRED_SINGLEREF
) {
3524 for (i
= 0; i
< 5; i
++)
3525 adapt_prob(&p
->comp_ref
[i
], s
->counts
.comp_ref
[i
][0],
3526 s
->counts
.comp_ref
[i
][1], 20, 128);
3529 if (s
->comppredmode
!= PRED_COMPREF
) {
3530 for (i
= 0; i
< 5; i
++) {
3531 uint8_t *pp
= p
->single_ref
[i
];
3532 unsigned (*c
)[2] = s
->counts
.single_ref
[i
];
3534 adapt_prob(&pp
[0], c
[0][0], c
[0][1], 20, 128);
3535 adapt_prob(&pp
[1], c
[1][0], c
[1][1], 20, 128);
3539 // block partitioning
3540 for (i
= 0; i
< 4; i
++)
3541 for (j
= 0; j
< 4; j
++) {
3542 uint8_t *pp
= p
->partition
[i
][j
];
3543 unsigned *c
= s
->counts
.partition
[i
][j
];
3545 adapt_prob(&pp
[0], c
[0], c
[1] + c
[2] + c
[3], 20, 128);
3546 adapt_prob(&pp
[1], c
[1], c
[2] + c
[3], 20, 128);
3547 adapt_prob(&pp
[2], c
[2], c
[3], 20, 128);
3551 if (s
->txfmmode
== TX_SWITCHABLE
) {
3552 for (i
= 0; i
< 2; i
++) {
3553 unsigned *c16
= s
->counts
.tx16p
[i
], *c32
= s
->counts
.tx32p
[i
];
3555 adapt_prob(&p
->tx8p
[i
], s
->counts
.tx8p
[i
][0], s
->counts
.tx8p
[i
][1], 20, 128);
3556 adapt_prob(&p
->tx16p
[i
][0], c16
[0], c16
[1] + c16
[2], 20, 128);
3557 adapt_prob(&p
->tx16p
[i
][1], c16
[1], c16
[2], 20, 128);
3558 adapt_prob(&p
->tx32p
[i
][0], c32
[0], c32
[1] + c32
[2] + c32
[3], 20, 128);
3559 adapt_prob(&p
->tx32p
[i
][1], c32
[1], c32
[2] + c32
[3], 20, 128);
3560 adapt_prob(&p
->tx32p
[i
][2], c32
[2], c32
[3], 20, 128);
3564 // interpolation filter
3565 if (s
->filtermode
== FILTER_SWITCHABLE
) {
3566 for (i
= 0; i
< 4; i
++) {
3567 uint8_t *pp
= p
->filter
[i
];
3568 unsigned *c
= s
->counts
.filter
[i
];
3570 adapt_prob(&pp
[0], c
[0], c
[1] + c
[2], 20, 128);
3571 adapt_prob(&pp
[1], c
[1], c
[2], 20, 128);
3576 for (i
= 0; i
< 7; i
++) {
3577 uint8_t *pp
= p
->mv_mode
[i
];
3578 unsigned *c
= s
->counts
.mv_mode
[i
];
3580 adapt_prob(&pp
[0], c
[2], c
[1] + c
[0] + c
[3], 20, 128);
3581 adapt_prob(&pp
[1], c
[0], c
[1] + c
[3], 20, 128);
3582 adapt_prob(&pp
[2], c
[1], c
[3], 20, 128);
3587 uint8_t *pp
= p
->mv_joint
;
3588 unsigned *c
= s
->counts
.mv_joint
;
3590 adapt_prob(&pp
[0], c
[0], c
[1] + c
[2] + c
[3], 20, 128);
3591 adapt_prob(&pp
[1], c
[1], c
[2] + c
[3], 20, 128);
3592 adapt_prob(&pp
[2], c
[2], c
[3], 20, 128);
3596 for (i
= 0; i
< 2; i
++) {
3598 unsigned *c
, (*c2
)[2], sum
;
3600 adapt_prob(&p
->mv_comp
[i
].sign
, s
->counts
.mv_comp
[i
].sign
[0],
3601 s
->counts
.mv_comp
[i
].sign
[1], 20, 128);
3603 pp
= p
->mv_comp
[i
].classes
;
3604 c
= s
->counts
.mv_comp
[i
].classes
;
3605 sum
= c
[1] + c
[2] + c
[3] + c
[4] + c
[5] + c
[6] + c
[7] + c
[8] + c
[9] + c
[10];
3606 adapt_prob(&pp
[0], c
[0], sum
, 20, 128);
3608 adapt_prob(&pp
[1], c
[1], sum
, 20, 128);
3610 adapt_prob(&pp
[2], c
[2] + c
[3], sum
, 20, 128);
3611 adapt_prob(&pp
[3], c
[2], c
[3], 20, 128);
3613 adapt_prob(&pp
[4], c
[4] + c
[5], sum
, 20, 128);
3614 adapt_prob(&pp
[5], c
[4], c
[5], 20, 128);
3616 adapt_prob(&pp
[6], c
[6], sum
, 20, 128);
3617 adapt_prob(&pp
[7], c
[7] + c
[8], c
[9] + c
[10], 20, 128);
3618 adapt_prob(&pp
[8], c
[7], c
[8], 20, 128);
3619 adapt_prob(&pp
[9], c
[9], c
[10], 20, 128);
3621 adapt_prob(&p
->mv_comp
[i
].class0
, s
->counts
.mv_comp
[i
].class0
[0],
3622 s
->counts
.mv_comp
[i
].class0
[1], 20, 128);
3623 pp
= p
->mv_comp
[i
].bits
;
3624 c2
= s
->counts
.mv_comp
[i
].bits
;
3625 for (j
= 0; j
< 10; j
++)
3626 adapt_prob(&pp
[j
], c2
[j
][0], c2
[j
][1], 20, 128);
3628 for (j
= 0; j
< 2; j
++) {
3629 pp
= p
->mv_comp
[i
].class0_fp
[j
];
3630 c
= s
->counts
.mv_comp
[i
].class0_fp
[j
];
3631 adapt_prob(&pp
[0], c
[0], c
[1] + c
[2] + c
[3], 20, 128);
3632 adapt_prob(&pp
[1], c
[1], c
[2] + c
[3], 20, 128);
3633 adapt_prob(&pp
[2], c
[2], c
[3], 20, 128);
3635 pp
= p
->mv_comp
[i
].fp
;
3636 c
= s
->counts
.mv_comp
[i
].fp
;
3637 adapt_prob(&pp
[0], c
[0], c
[1] + c
[2] + c
[3], 20, 128);
3638 adapt_prob(&pp
[1], c
[1], c
[2] + c
[3], 20, 128);
3639 adapt_prob(&pp
[2], c
[2], c
[3], 20, 128);
3641 if (s
->highprecisionmvs
) {
3642 adapt_prob(&p
->mv_comp
[i
].class0_hp
, s
->counts
.mv_comp
[i
].class0_hp
[0],
3643 s
->counts
.mv_comp
[i
].class0_hp
[1], 20, 128);
3644 adapt_prob(&p
->mv_comp
[i
].hp
, s
->counts
.mv_comp
[i
].hp
[0],
3645 s
->counts
.mv_comp
[i
].hp
[1], 20, 128);
3650 for (i
= 0; i
< 4; i
++) {
3651 uint8_t *pp
= p
->y_mode
[i
];
3652 unsigned *c
= s
->counts
.y_mode
[i
], sum
, s2
;
3654 sum
= c
[0] + c
[1] + c
[3] + c
[4] + c
[5] + c
[6] + c
[7] + c
[8] + c
[9];
3655 adapt_prob(&pp
[0], c
[DC_PRED
], sum
, 20, 128);
3656 sum
-= c
[TM_VP8_PRED
];
3657 adapt_prob(&pp
[1], c
[TM_VP8_PRED
], sum
, 20, 128);
3658 sum
-= c
[VERT_PRED
];
3659 adapt_prob(&pp
[2], c
[VERT_PRED
], sum
, 20, 128);
3660 s2
= c
[HOR_PRED
] + c
[DIAG_DOWN_RIGHT_PRED
] + c
[VERT_RIGHT_PRED
];
3662 adapt_prob(&pp
[3], s2
, sum
, 20, 128);
3664 adapt_prob(&pp
[4], c
[HOR_PRED
], s2
, 20, 128);
3665 adapt_prob(&pp
[5], c
[DIAG_DOWN_RIGHT_PRED
], c
[VERT_RIGHT_PRED
], 20, 128);
3666 sum
-= c
[DIAG_DOWN_LEFT_PRED
];
3667 adapt_prob(&pp
[6], c
[DIAG_DOWN_LEFT_PRED
], sum
, 20, 128);
3668 sum
-= c
[VERT_LEFT_PRED
];
3669 adapt_prob(&pp
[7], c
[VERT_LEFT_PRED
], sum
, 20, 128);
3670 adapt_prob(&pp
[8], c
[HOR_DOWN_PRED
], c
[HOR_UP_PRED
], 20, 128);
3674 for (i
= 0; i
< 10; i
++) {
3675 uint8_t *pp
= p
->uv_mode
[i
];
3676 unsigned *c
= s
->counts
.uv_mode
[i
], sum
, s2
;
3678 sum
= c
[0] + c
[1] + c
[3] + c
[4] + c
[5] + c
[6] + c
[7] + c
[8] + c
[9];
3679 adapt_prob(&pp
[0], c
[DC_PRED
], sum
, 20, 128);
3680 sum
-= c
[TM_VP8_PRED
];
3681 adapt_prob(&pp
[1], c
[TM_VP8_PRED
], sum
, 20, 128);
3682 sum
-= c
[VERT_PRED
];
3683 adapt_prob(&pp
[2], c
[VERT_PRED
], sum
, 20, 128);
3684 s2
= c
[HOR_PRED
] + c
[DIAG_DOWN_RIGHT_PRED
] + c
[VERT_RIGHT_PRED
];
3686 adapt_prob(&pp
[3], s2
, sum
, 20, 128);
3688 adapt_prob(&pp
[4], c
[HOR_PRED
], s2
, 20, 128);
3689 adapt_prob(&pp
[5], c
[DIAG_DOWN_RIGHT_PRED
], c
[VERT_RIGHT_PRED
], 20, 128);
3690 sum
-= c
[DIAG_DOWN_LEFT_PRED
];
3691 adapt_prob(&pp
[6], c
[DIAG_DOWN_LEFT_PRED
], sum
, 20, 128);
3692 sum
-= c
[VERT_LEFT_PRED
];
3693 adapt_prob(&pp
[7], c
[VERT_LEFT_PRED
], sum
, 20, 128);
3694 adapt_prob(&pp
[8], c
[HOR_DOWN_PRED
], c
[HOR_UP_PRED
], 20, 128);
3698 static void free_buffers(VP9Context
*s
)
3700 av_freep(&s
->intra_pred_data
[0]);
3701 av_freep(&s
->b_base
);
3702 av_freep(&s
->block_base
);
3705 static av_cold
int vp9_decode_free(AVCodecContext
*ctx
)
3707 VP9Context
*s
= ctx
->priv_data
;
3710 for (i
= 0; i
< 2; i
++) {
3711 if (s
->frames
[i
].tf
.f
->data
[0])
3712 vp9_unref_frame(ctx
, &s
->frames
[i
]);
3713 av_frame_free(&s
->frames
[i
].tf
.f
);
3715 for (i
= 0; i
< 8; i
++) {
3716 if (s
->refs
[i
].f
->data
[0])
3717 ff_thread_release_buffer(ctx
, &s
->refs
[i
]);
3718 av_frame_free(&s
->refs
[i
].f
);
3719 if (s
->next_refs
[i
].f
->data
[0])
3720 ff_thread_release_buffer(ctx
, &s
->next_refs
[i
]);
3721 av_frame_free(&s
->next_refs
[i
].f
);
3731 static int vp9_decode_frame(AVCodecContext
*ctx
, void *frame
,
3732 int *got_frame
, AVPacket
*pkt
)
3734 const uint8_t *data
= pkt
->data
;
3735 int size
= pkt
->size
;
3736 VP9Context
*s
= ctx
->priv_data
;
3737 int res
, tile_row
, tile_col
, i
, ref
, row
, col
;
3738 ptrdiff_t yoff
, uvoff
, ls_y
, ls_uv
;
3741 if ((res
= decode_frame_header(ctx
, data
, size
, &ref
)) < 0) {
3743 } else if (res
== 0) {
3744 if (!s
->refs
[ref
].f
->data
[0]) {
3745 av_log(ctx
, AV_LOG_ERROR
, "Requested reference %d not available\n", ref
);
3746 return AVERROR_INVALIDDATA
;
3748 if ((res
= av_frame_ref(frame
, s
->refs
[ref
].f
)) < 0)
3756 if (s
->frames
[LAST_FRAME
].tf
.f
->data
[0])
3757 vp9_unref_frame(ctx
, &s
->frames
[LAST_FRAME
]);
3758 if (!s
->keyframe
&& s
->frames
[CUR_FRAME
].tf
.f
->data
[0] &&
3759 (res
= vp9_ref_frame(ctx
, &s
->frames
[LAST_FRAME
], &s
->frames
[CUR_FRAME
])) < 0)
3761 if (s
->frames
[CUR_FRAME
].tf
.f
->data
[0])
3762 vp9_unref_frame(ctx
, &s
->frames
[CUR_FRAME
]);
3763 if ((res
= vp9_alloc_frame(ctx
, &s
->frames
[CUR_FRAME
])) < 0)
3765 f
= s
->frames
[CUR_FRAME
].tf
.f
;
3766 f
->key_frame
= s
->keyframe
;
3767 f
->pict_type
= s
->keyframe
? AV_PICTURE_TYPE_I
: AV_PICTURE_TYPE_P
;
3768 ls_y
= f
->linesize
[0];
3769 ls_uv
=f
->linesize
[1];
3772 for (i
= 0; i
< 8; i
++) {
3773 if (s
->next_refs
[i
].f
->data
[0])
3774 ff_thread_release_buffer(ctx
, &s
->next_refs
[i
]);
3775 if (s
->refreshrefmask
& (1 << i
)) {
3776 res
= ff_thread_ref_frame(&s
->next_refs
[i
], &s
->frames
[CUR_FRAME
].tf
);
3778 res
= ff_thread_ref_frame(&s
->next_refs
[i
], &s
->refs
[i
]);
3785 ctx
->color_range
= AVCOL_RANGE_JPEG
;
3787 ctx
->color_range
= AVCOL_RANGE_MPEG
;
3789 switch (s
->colorspace
) {
3790 case 1: ctx
->colorspace
= AVCOL_SPC_BT470BG
; break;
3791 case 2: ctx
->colorspace
= AVCOL_SPC_BT709
; break;
3792 case 3: ctx
->colorspace
= AVCOL_SPC_SMPTE170M
; break;
3793 case 4: ctx
->colorspace
= AVCOL_SPC_SMPTE240M
; break;
3796 // main tile decode loop
3797 memset(s
->above_partition_ctx
, 0, s
->cols
);
3798 memset(s
->above_skip_ctx
, 0, s
->cols
);
3799 if (s
->keyframe
|| s
->intraonly
) {
3800 memset(s
->above_mode_ctx
, DC_PRED
, s
->cols
* 2);
3802 memset(s
->above_mode_ctx
, NEARESTMV
, s
->cols
);
3804 memset(s
->above_y_nnz_ctx
, 0, s
->sb_cols
* 16);
3805 memset(s
->above_uv_nnz_ctx
[0], 0, s
->sb_cols
* 8);
3806 memset(s
->above_uv_nnz_ctx
[1], 0, s
->sb_cols
* 8);
3807 memset(s
->above_segpred_ctx
, 0, s
->cols
);
3808 s
->pass
= s
->uses_2pass
=
3809 ctx
->active_thread_type
== FF_THREAD_FRAME
&& s
->refreshctx
&& !s
->parallelmode
;
3810 if ((res
= update_block_buffers(ctx
)) < 0) {
3811 av_log(ctx
, AV_LOG_ERROR
,
3812 "Failed to allocate block buffers\n");
3815 if (s
->refreshctx
&& s
->parallelmode
) {
3818 for (i
= 0; i
< 4; i
++) {
3819 for (j
= 0; j
< 2; j
++)
3820 for (k
= 0; k
< 2; k
++)
3821 for (l
= 0; l
< 6; l
++)
3822 for (m
= 0; m
< 6; m
++)
3823 memcpy(s
->prob_ctx
[s
->framectxid
].coef
[i
][j
][k
][l
][m
],
3824 s
->prob
.coef
[i
][j
][k
][l
][m
], 3);
3825 if (s
->txfmmode
== i
)
3828 s
->prob_ctx
[s
->framectxid
].p
= s
->prob
.p
;
3829 ff_thread_finish_setup(ctx
);
3830 } else if (!s
->refreshctx
) {
3831 ff_thread_finish_setup(ctx
);
3837 s
->block
= s
->block_base
;
3838 s
->uvblock
[0] = s
->uvblock_base
[0];
3839 s
->uvblock
[1] = s
->uvblock_base
[1];
3840 s
->eob
= s
->eob_base
;
3841 s
->uveob
[0] = s
->uveob_base
[0];
3842 s
->uveob
[1] = s
->uveob_base
[1];
3844 for (tile_row
= 0; tile_row
< s
->tiling
.tile_rows
; tile_row
++) {
3845 set_tile_offset(&s
->tiling
.tile_row_start
, &s
->tiling
.tile_row_end
,
3846 tile_row
, s
->tiling
.log2_tile_rows
, s
->sb_rows
);
3848 for (tile_col
= 0; tile_col
< s
->tiling
.tile_cols
; tile_col
++) {
3851 if (tile_col
== s
->tiling
.tile_cols
- 1 &&
3852 tile_row
== s
->tiling
.tile_rows
- 1) {
3855 tile_size
= AV_RB32(data
);
3859 if (tile_size
> size
) {
3860 ff_thread_report_progress(&s
->frames
[CUR_FRAME
].tf
, INT_MAX
, 0);
3861 return AVERROR_INVALIDDATA
;
3863 ff_vp56_init_range_decoder(&s
->c_b
[tile_col
], data
, tile_size
);
3864 if (vp56_rac_get_prob_branchy(&s
->c_b
[tile_col
], 128)) { // marker bit
3865 ff_thread_report_progress(&s
->frames
[CUR_FRAME
].tf
, INT_MAX
, 0);
3866 return AVERROR_INVALIDDATA
;
3873 for (row
= s
->tiling
.tile_row_start
; row
< s
->tiling
.tile_row_end
;
3874 row
+= 8, yoff
+= ls_y
* 64, uvoff
+= ls_uv
* 32) {
3875 struct VP9Filter
*lflvl_ptr
= s
->lflvl
;
3876 ptrdiff_t yoff2
= yoff
, uvoff2
= uvoff
;
3878 for (tile_col
= 0; tile_col
< s
->tiling
.tile_cols
; tile_col
++) {
3879 set_tile_offset(&s
->tiling
.tile_col_start
, &s
->tiling
.tile_col_end
,
3880 tile_col
, s
->tiling
.log2_tile_cols
, s
->sb_cols
);
3883 memset(s
->left_partition_ctx
, 0, 8);
3884 memset(s
->left_skip_ctx
, 0, 8);
3885 if (s
->keyframe
|| s
->intraonly
) {
3886 memset(s
->left_mode_ctx
, DC_PRED
, 16);
3888 memset(s
->left_mode_ctx
, NEARESTMV
, 8);
3890 memset(s
->left_y_nnz_ctx
, 0, 16);
3891 memset(s
->left_uv_nnz_ctx
, 0, 16);
3892 memset(s
->left_segpred_ctx
, 0, 8);
3894 memcpy(&s
->c
, &s
->c_b
[tile_col
], sizeof(s
->c
));
3897 for (col
= s
->tiling
.tile_col_start
;
3898 col
< s
->tiling
.tile_col_end
;
3899 col
+= 8, yoff2
+= 64, uvoff2
+= 32, lflvl_ptr
++) {
3900 // FIXME integrate with lf code (i.e. zero after each
3901 // use, similar to invtxfm coefficients, or similar)
3903 memset(lflvl_ptr
->mask
, 0, sizeof(lflvl_ptr
->mask
));
3907 decode_sb_mem(ctx
, row
, col
, lflvl_ptr
,
3908 yoff2
, uvoff2
, BL_64X64
);
3910 decode_sb(ctx
, row
, col
, lflvl_ptr
,
3911 yoff2
, uvoff2
, BL_64X64
);
3915 memcpy(&s
->c_b
[tile_col
], &s
->c
, sizeof(s
->c
));
3923 // backup pre-loopfilter reconstruction data for intra
3924 // prediction of next row of sb64s
3925 if (row
+ 8 < s
->rows
) {
3926 memcpy(s
->intra_pred_data
[0],
3927 f
->data
[0] + yoff
+ 63 * ls_y
,
3929 memcpy(s
->intra_pred_data
[1],
3930 f
->data
[1] + uvoff
+ 31 * ls_uv
,
3932 memcpy(s
->intra_pred_data
[2],
3933 f
->data
[2] + uvoff
+ 31 * ls_uv
,
3937 // loopfilter one row
3938 if (s
->filter
.level
) {
3941 lflvl_ptr
= s
->lflvl
;
3942 for (col
= 0; col
< s
->cols
;
3943 col
+= 8, yoff2
+= 64, uvoff2
+= 32, lflvl_ptr
++) {
3944 loopfilter_sb(ctx
, lflvl_ptr
, row
, col
, yoff2
, uvoff2
);
3948 // FIXME maybe we can make this more finegrained by running the
3949 // loopfilter per-block instead of after each sbrow
3950 // In fact that would also make intra pred left preparation easier?
3951 ff_thread_report_progress(&s
->frames
[CUR_FRAME
].tf
, row
>> 3, 0);
3955 if (s
->pass
< 2 && s
->refreshctx
&& !s
->parallelmode
) {
3957 ff_thread_finish_setup(ctx
);
3959 } while (s
->pass
++ == 1);
3960 ff_thread_report_progress(&s
->frames
[CUR_FRAME
].tf
, INT_MAX
, 0);
3963 for (i
= 0; i
< 8; i
++) {
3964 if (s
->refs
[i
].f
->data
[0])
3965 ff_thread_release_buffer(ctx
, &s
->refs
[i
]);
3966 ff_thread_ref_frame(&s
->refs
[i
], &s
->next_refs
[i
]);
3969 if (!s
->invisible
) {
3970 if ((res
= av_frame_ref(frame
, s
->frames
[CUR_FRAME
].tf
.f
)) < 0)
3978 static void vp9_decode_flush(AVCodecContext
*ctx
)
3980 VP9Context
*s
= ctx
->priv_data
;
3983 for (i
= 0; i
< 2; i
++)
3984 vp9_unref_frame(ctx
, &s
->frames
[i
]);
3985 for (i
= 0; i
< 8; i
++)
3986 ff_thread_release_buffer(ctx
, &s
->refs
[i
]);
3989 static int init_frames(AVCodecContext
*ctx
)
3991 VP9Context
*s
= ctx
->priv_data
;
3994 for (i
= 0; i
< 2; i
++) {
3995 s
->frames
[i
].tf
.f
= av_frame_alloc();
3996 if (!s
->frames
[i
].tf
.f
) {
3997 vp9_decode_free(ctx
);
3998 av_log(ctx
, AV_LOG_ERROR
, "Failed to allocate frame buffer %d\n", i
);
3999 return AVERROR(ENOMEM
);
4002 for (i
= 0; i
< 8; i
++) {
4003 s
->refs
[i
].f
= av_frame_alloc();
4004 s
->next_refs
[i
].f
= av_frame_alloc();
4005 if (!s
->refs
[i
].f
|| !s
->next_refs
[i
].f
) {
4006 vp9_decode_free(ctx
);
4007 av_log(ctx
, AV_LOG_ERROR
, "Failed to allocate frame buffer %d\n", i
);
4008 return AVERROR(ENOMEM
);
4015 static av_cold
int vp9_decode_init(AVCodecContext
*ctx
)
4017 VP9Context
*s
= ctx
->priv_data
;
4019 ctx
->internal
->allocate_progress
= 1;
4020 ctx
->pix_fmt
= AV_PIX_FMT_YUV420P
;
4021 ff_vp9dsp_init(&s
->dsp
);
4022 ff_videodsp_init(&s
->vdsp
, 8);
4023 s
->filter
.sharpness
= -1;
4025 return init_frames(ctx
);
4028 static av_cold
int vp9_decode_init_thread_copy(AVCodecContext
*avctx
)
4030 return init_frames(avctx
);
4033 static int vp9_decode_update_thread_context(AVCodecContext
*dst
, const AVCodecContext
*src
)
4036 VP9Context
*s
= dst
->priv_data
, *ssrc
= src
->priv_data
;
4038 // detect size changes in other threads
4039 if (s
->intra_pred_data
[0] &&
4040 (!ssrc
->intra_pred_data
[0] || s
->cols
!= ssrc
->cols
|| s
->rows
!= ssrc
->rows
)) {
4044 for (i
= 0; i
< 2; i
++) {
4045 if (s
->frames
[i
].tf
.f
->data
[0])
4046 vp9_unref_frame(dst
, &s
->frames
[i
]);
4047 if (ssrc
->frames
[i
].tf
.f
->data
[0]) {
4048 if ((res
= vp9_ref_frame(dst
, &s
->frames
[i
], &ssrc
->frames
[i
])) < 0)
4052 for (i
= 0; i
< 8; i
++) {
4053 if (s
->refs
[i
].f
->data
[0])
4054 ff_thread_release_buffer(dst
, &s
->refs
[i
]);
4055 if (ssrc
->next_refs
[i
].f
->data
[0]) {
4056 if ((res
= ff_thread_ref_frame(&s
->refs
[i
], &ssrc
->next_refs
[i
])) < 0)
4061 s
->invisible
= ssrc
->invisible
;
4062 s
->keyframe
= ssrc
->keyframe
;
4063 s
->uses_2pass
= ssrc
->uses_2pass
;
4064 memcpy(&s
->prob_ctx
, &ssrc
->prob_ctx
, sizeof(s
->prob_ctx
));
4065 memcpy(&s
->lf_delta
, &ssrc
->lf_delta
, sizeof(s
->lf_delta
));
4066 if (ssrc
->segmentation
.enabled
) {
4067 memcpy(&s
->segmentation
.feat
, &ssrc
->segmentation
.feat
,
4068 sizeof(s
->segmentation
.feat
));
4074 AVCodec ff_vp9_decoder
= {
4076 .long_name
= NULL_IF_CONFIG_SMALL("Google VP9"),
4077 .type
= AVMEDIA_TYPE_VIDEO
,
4078 .id
= AV_CODEC_ID_VP9
,
4079 .priv_data_size
= sizeof(VP9Context
),
4080 .init
= vp9_decode_init
,
4081 .close
= vp9_decode_free
,
4082 .decode
= vp9_decode_frame
,
4083 .capabilities
= CODEC_CAP_DR1
| CODEC_CAP_FRAME_THREADS
,
4084 .flush
= vp9_decode_flush
,
4085 .init_thread_copy
= ONLY_IF_THREADS_ENABLED(vp9_decode_init_thread_copy
),
4086 .update_thread_context
= ONLY_IF_THREADS_ENABLED(vp9_decode_update_thread_context
),