| 1 | /* |
| 2 | * VP7/VP8 compatible video decoder |
| 3 | * |
| 4 | * Copyright (C) 2010 David Conrad |
| 5 | * Copyright (C) 2010 Ronald S. Bultje |
| 6 | * Copyright (C) 2010 Fiona Glaser |
| 7 | * Copyright (C) 2012 Daniel Kang |
| 8 | * Copyright (C) 2014 Peter Ross |
| 9 | * |
| 10 | * This file is part of FFmpeg. |
| 11 | * |
| 12 | * FFmpeg is free software; you can redistribute it and/or |
| 13 | * modify it under the terms of the GNU Lesser General Public |
| 14 | * License as published by the Free Software Foundation; either |
| 15 | * version 2.1 of the License, or (at your option) any later version. |
| 16 | * |
| 17 | * FFmpeg is distributed in the hope that it will be useful, |
| 18 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 19 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| 20 | * Lesser General Public License for more details. |
| 21 | * |
| 22 | * You should have received a copy of the GNU Lesser General Public |
| 23 | * License along with FFmpeg; if not, write to the Free Software |
| 24 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
| 25 | */ |
| 26 | |
| 27 | #include "libavutil/imgutils.h" |
| 28 | |
| 29 | #include "avcodec.h" |
| 30 | #include "internal.h" |
| 31 | #include "rectangle.h" |
| 32 | #include "thread.h" |
| 33 | #include "vp8.h" |
| 34 | #include "vp8data.h" |
| 35 | |
| 36 | #if ARCH_ARM |
| 37 | # include "arm/vp8.h" |
| 38 | #endif |
| 39 | |
| 40 | #if CONFIG_VP7_DECODER && CONFIG_VP8_DECODER |
| 41 | #define VPX(vp7, f) (vp7 ? vp7_ ## f : vp8_ ## f) |
| 42 | #elif CONFIG_VP7_DECODER |
| 43 | #define VPX(vp7, f) vp7_ ## f |
| 44 | #else // CONFIG_VP8_DECODER |
| 45 | #define VPX(vp7, f) vp8_ ## f |
| 46 | #endif |
| 47 | |
| 48 | static void free_buffers(VP8Context *s) |
| 49 | { |
| 50 | int i; |
| 51 | if (s->thread_data) |
| 52 | for (i = 0; i < MAX_THREADS; i++) { |
| 53 | #if HAVE_THREADS |
| 54 | pthread_cond_destroy(&s->thread_data[i].cond); |
| 55 | pthread_mutex_destroy(&s->thread_data[i].lock); |
| 56 | #endif |
| 57 | av_freep(&s->thread_data[i].filter_strength); |
| 58 | } |
| 59 | av_freep(&s->thread_data); |
| 60 | av_freep(&s->macroblocks_base); |
| 61 | av_freep(&s->intra4x4_pred_mode_top); |
| 62 | av_freep(&s->top_nnz); |
| 63 | av_freep(&s->top_border); |
| 64 | |
| 65 | s->macroblocks = NULL; |
| 66 | } |
| 67 | |
| 68 | static int vp8_alloc_frame(VP8Context *s, VP8Frame *f, int ref) |
| 69 | { |
| 70 | int ret; |
| 71 | if ((ret = ff_thread_get_buffer(s->avctx, &f->tf, |
| 72 | ref ? AV_GET_BUFFER_FLAG_REF : 0)) < 0) |
| 73 | return ret; |
| 74 | if (!(f->seg_map = av_buffer_allocz(s->mb_width * s->mb_height))) { |
| 75 | ff_thread_release_buffer(s->avctx, &f->tf); |
| 76 | return AVERROR(ENOMEM); |
| 77 | } |
| 78 | return 0; |
| 79 | } |
| 80 | |
| 81 | static void vp8_release_frame(VP8Context *s, VP8Frame *f) |
| 82 | { |
| 83 | av_buffer_unref(&f->seg_map); |
| 84 | ff_thread_release_buffer(s->avctx, &f->tf); |
| 85 | } |
| 86 | |
| 87 | #if CONFIG_VP8_DECODER |
| 88 | static int vp8_ref_frame(VP8Context *s, VP8Frame *dst, VP8Frame *src) |
| 89 | { |
| 90 | int ret; |
| 91 | |
| 92 | vp8_release_frame(s, dst); |
| 93 | |
| 94 | if ((ret = ff_thread_ref_frame(&dst->tf, &src->tf)) < 0) |
| 95 | return ret; |
| 96 | if (src->seg_map && |
| 97 | !(dst->seg_map = av_buffer_ref(src->seg_map))) { |
| 98 | vp8_release_frame(s, dst); |
| 99 | return AVERROR(ENOMEM); |
| 100 | } |
| 101 | |
| 102 | return 0; |
| 103 | } |
| 104 | #endif /* CONFIG_VP8_DECODER */ |
| 105 | |
| 106 | static void vp8_decode_flush_impl(AVCodecContext *avctx, int free_mem) |
| 107 | { |
| 108 | VP8Context *s = avctx->priv_data; |
| 109 | int i; |
| 110 | |
| 111 | for (i = 0; i < FF_ARRAY_ELEMS(s->frames); i++) |
| 112 | vp8_release_frame(s, &s->frames[i]); |
| 113 | memset(s->framep, 0, sizeof(s->framep)); |
| 114 | |
| 115 | if (free_mem) |
| 116 | free_buffers(s); |
| 117 | } |
| 118 | |
| 119 | static void vp8_decode_flush(AVCodecContext *avctx) |
| 120 | { |
| 121 | vp8_decode_flush_impl(avctx, 0); |
| 122 | } |
| 123 | |
| 124 | static VP8Frame *vp8_find_free_buffer(VP8Context *s) |
| 125 | { |
| 126 | VP8Frame *frame = NULL; |
| 127 | int i; |
| 128 | |
| 129 | // find a free buffer |
| 130 | for (i = 0; i < 5; i++) |
| 131 | if (&s->frames[i] != s->framep[VP56_FRAME_CURRENT] && |
| 132 | &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] && |
| 133 | &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] && |
| 134 | &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2]) { |
| 135 | frame = &s->frames[i]; |
| 136 | break; |
| 137 | } |
| 138 | if (i == 5) { |
| 139 | av_log(s->avctx, AV_LOG_FATAL, "Ran out of free frames!\n"); |
| 140 | abort(); |
| 141 | } |
| 142 | if (frame->tf.f->data[0]) |
| 143 | vp8_release_frame(s, frame); |
| 144 | |
| 145 | return frame; |
| 146 | } |
| 147 | |
| 148 | static av_always_inline |
| 149 | int update_dimensions(VP8Context *s, int width, int height, int is_vp7) |
| 150 | { |
| 151 | AVCodecContext *avctx = s->avctx; |
| 152 | int i, ret; |
| 153 | |
| 154 | if (width != s->avctx->width || ((width+15)/16 != s->mb_width || (height+15)/16 != s->mb_height) && s->macroblocks_base || |
| 155 | height != s->avctx->height) { |
| 156 | vp8_decode_flush_impl(s->avctx, 1); |
| 157 | |
| 158 | ret = ff_set_dimensions(s->avctx, width, height); |
| 159 | if (ret < 0) |
| 160 | return ret; |
| 161 | } |
| 162 | |
| 163 | s->mb_width = (s->avctx->coded_width + 15) / 16; |
| 164 | s->mb_height = (s->avctx->coded_height + 15) / 16; |
| 165 | |
| 166 | s->mb_layout = is_vp7 || avctx->active_thread_type == FF_THREAD_SLICE && |
| 167 | FFMIN(s->num_coeff_partitions, avctx->thread_count) > 1; |
| 168 | if (!s->mb_layout) { // Frame threading and one thread |
| 169 | s->macroblocks_base = av_mallocz((s->mb_width + s->mb_height * 2 + 1) * |
| 170 | sizeof(*s->macroblocks)); |
| 171 | s->intra4x4_pred_mode_top = av_mallocz(s->mb_width * 4); |
| 172 | } else // Sliced threading |
| 173 | s->macroblocks_base = av_mallocz((s->mb_width + 2) * (s->mb_height + 2) * |
| 174 | sizeof(*s->macroblocks)); |
| 175 | s->top_nnz = av_mallocz(s->mb_width * sizeof(*s->top_nnz)); |
| 176 | s->top_border = av_mallocz((s->mb_width + 1) * sizeof(*s->top_border)); |
| 177 | s->thread_data = av_mallocz(MAX_THREADS * sizeof(VP8ThreadData)); |
| 178 | |
| 179 | for (i = 0; i < MAX_THREADS; i++) { |
| 180 | s->thread_data[i].filter_strength = |
| 181 | av_mallocz(s->mb_width * sizeof(*s->thread_data[0].filter_strength)); |
| 182 | #if HAVE_THREADS |
| 183 | pthread_mutex_init(&s->thread_data[i].lock, NULL); |
| 184 | pthread_cond_init(&s->thread_data[i].cond, NULL); |
| 185 | #endif |
| 186 | } |
| 187 | |
| 188 | if (!s->macroblocks_base || !s->top_nnz || !s->top_border || |
| 189 | (!s->intra4x4_pred_mode_top && !s->mb_layout)) |
| 190 | return AVERROR(ENOMEM); |
| 191 | |
| 192 | s->macroblocks = s->macroblocks_base + 1; |
| 193 | |
| 194 | return 0; |
| 195 | } |
| 196 | |
| 197 | static int vp7_update_dimensions(VP8Context *s, int width, int height) |
| 198 | { |
| 199 | return update_dimensions(s, width, height, IS_VP7); |
| 200 | } |
| 201 | |
| 202 | static int vp8_update_dimensions(VP8Context *s, int width, int height) |
| 203 | { |
| 204 | return update_dimensions(s, width, height, IS_VP8); |
| 205 | } |
| 206 | |
| 207 | |
| 208 | static void parse_segment_info(VP8Context *s) |
| 209 | { |
| 210 | VP56RangeCoder *c = &s->c; |
| 211 | int i; |
| 212 | |
| 213 | s->segmentation.update_map = vp8_rac_get(c); |
| 214 | |
| 215 | if (vp8_rac_get(c)) { // update segment feature data |
| 216 | s->segmentation.absolute_vals = vp8_rac_get(c); |
| 217 | |
| 218 | for (i = 0; i < 4; i++) |
| 219 | s->segmentation.base_quant[i] = vp8_rac_get_sint(c, 7); |
| 220 | |
| 221 | for (i = 0; i < 4; i++) |
| 222 | s->segmentation.filter_level[i] = vp8_rac_get_sint(c, 6); |
| 223 | } |
| 224 | if (s->segmentation.update_map) |
| 225 | for (i = 0; i < 3; i++) |
| 226 | s->prob->segmentid[i] = vp8_rac_get(c) ? vp8_rac_get_uint(c, 8) : 255; |
| 227 | } |
| 228 | |
| 229 | static void update_lf_deltas(VP8Context *s) |
| 230 | { |
| 231 | VP56RangeCoder *c = &s->c; |
| 232 | int i; |
| 233 | |
| 234 | for (i = 0; i < 4; i++) { |
| 235 | if (vp8_rac_get(c)) { |
| 236 | s->lf_delta.ref[i] = vp8_rac_get_uint(c, 6); |
| 237 | |
| 238 | if (vp8_rac_get(c)) |
| 239 | s->lf_delta.ref[i] = -s->lf_delta.ref[i]; |
| 240 | } |
| 241 | } |
| 242 | |
| 243 | for (i = MODE_I4x4; i <= VP8_MVMODE_SPLIT; i++) { |
| 244 | if (vp8_rac_get(c)) { |
| 245 | s->lf_delta.mode[i] = vp8_rac_get_uint(c, 6); |
| 246 | |
| 247 | if (vp8_rac_get(c)) |
| 248 | s->lf_delta.mode[i] = -s->lf_delta.mode[i]; |
| 249 | } |
| 250 | } |
| 251 | } |
| 252 | |
| 253 | static int setup_partitions(VP8Context *s, const uint8_t *buf, int buf_size) |
| 254 | { |
| 255 | const uint8_t *sizes = buf; |
| 256 | int i; |
| 257 | |
| 258 | s->num_coeff_partitions = 1 << vp8_rac_get_uint(&s->c, 2); |
| 259 | |
| 260 | buf += 3 * (s->num_coeff_partitions - 1); |
| 261 | buf_size -= 3 * (s->num_coeff_partitions - 1); |
| 262 | if (buf_size < 0) |
| 263 | return -1; |
| 264 | |
| 265 | for (i = 0; i < s->num_coeff_partitions - 1; i++) { |
| 266 | int size = AV_RL24(sizes + 3 * i); |
| 267 | if (buf_size - size < 0) |
| 268 | return -1; |
| 269 | |
| 270 | ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, size); |
| 271 | buf += size; |
| 272 | buf_size -= size; |
| 273 | } |
| 274 | ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, buf_size); |
| 275 | |
| 276 | return 0; |
| 277 | } |
| 278 | |
| 279 | static void vp7_get_quants(VP8Context *s) |
| 280 | { |
| 281 | VP56RangeCoder *c = &s->c; |
| 282 | |
| 283 | int yac_qi = vp8_rac_get_uint(c, 7); |
| 284 | int ydc_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi; |
| 285 | int y2dc_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi; |
| 286 | int y2ac_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi; |
| 287 | int uvdc_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi; |
| 288 | int uvac_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi; |
| 289 | |
| 290 | s->qmat[0].luma_qmul[0] = vp7_ydc_qlookup[ydc_qi]; |
| 291 | s->qmat[0].luma_qmul[1] = vp7_yac_qlookup[yac_qi]; |
| 292 | s->qmat[0].luma_dc_qmul[0] = vp7_y2dc_qlookup[y2dc_qi]; |
| 293 | s->qmat[0].luma_dc_qmul[1] = vp7_y2ac_qlookup[y2ac_qi]; |
| 294 | s->qmat[0].chroma_qmul[0] = FFMIN(vp7_ydc_qlookup[uvdc_qi], 132); |
| 295 | s->qmat[0].chroma_qmul[1] = vp7_yac_qlookup[uvac_qi]; |
| 296 | } |
| 297 | |
| 298 | static void vp8_get_quants(VP8Context *s) |
| 299 | { |
| 300 | VP56RangeCoder *c = &s->c; |
| 301 | int i, base_qi; |
| 302 | |
| 303 | int yac_qi = vp8_rac_get_uint(c, 7); |
| 304 | int ydc_delta = vp8_rac_get_sint(c, 4); |
| 305 | int y2dc_delta = vp8_rac_get_sint(c, 4); |
| 306 | int y2ac_delta = vp8_rac_get_sint(c, 4); |
| 307 | int uvdc_delta = vp8_rac_get_sint(c, 4); |
| 308 | int uvac_delta = vp8_rac_get_sint(c, 4); |
| 309 | |
| 310 | for (i = 0; i < 4; i++) { |
| 311 | if (s->segmentation.enabled) { |
| 312 | base_qi = s->segmentation.base_quant[i]; |
| 313 | if (!s->segmentation.absolute_vals) |
| 314 | base_qi += yac_qi; |
| 315 | } else |
| 316 | base_qi = yac_qi; |
| 317 | |
| 318 | s->qmat[i].luma_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + ydc_delta, 7)]; |
| 319 | s->qmat[i].luma_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi, 7)]; |
| 320 | s->qmat[i].luma_dc_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + y2dc_delta, 7)] * 2; |
| 321 | /* 101581>>16 is equivalent to 155/100 */ |
| 322 | s->qmat[i].luma_dc_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi + y2ac_delta, 7)] * 101581 >> 16; |
| 323 | s->qmat[i].chroma_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + uvdc_delta, 7)]; |
| 324 | s->qmat[i].chroma_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi + uvac_delta, 7)]; |
| 325 | |
| 326 | s->qmat[i].luma_dc_qmul[1] = FFMAX(s->qmat[i].luma_dc_qmul[1], 8); |
| 327 | s->qmat[i].chroma_qmul[0] = FFMIN(s->qmat[i].chroma_qmul[0], 132); |
| 328 | } |
| 329 | } |
| 330 | |
| 331 | /** |
| 332 | * Determine which buffers golden and altref should be updated with after this frame. |
| 333 | * The spec isn't clear here, so I'm going by my understanding of what libvpx does |
| 334 | * |
| 335 | * Intra frames update all 3 references |
| 336 | * Inter frames update VP56_FRAME_PREVIOUS if the update_last flag is set |
| 337 | * If the update (golden|altref) flag is set, it's updated with the current frame |
| 338 | * if update_last is set, and VP56_FRAME_PREVIOUS otherwise. |
| 339 | * If the flag is not set, the number read means: |
| 340 | * 0: no update |
| 341 | * 1: VP56_FRAME_PREVIOUS |
| 342 | * 2: update golden with altref, or update altref with golden |
| 343 | */ |
| 344 | static VP56Frame ref_to_update(VP8Context *s, int update, VP56Frame ref) |
| 345 | { |
| 346 | VP56RangeCoder *c = &s->c; |
| 347 | |
| 348 | if (update) |
| 349 | return VP56_FRAME_CURRENT; |
| 350 | |
| 351 | switch (vp8_rac_get_uint(c, 2)) { |
| 352 | case 1: |
| 353 | return VP56_FRAME_PREVIOUS; |
| 354 | case 2: |
| 355 | return (ref == VP56_FRAME_GOLDEN) ? VP56_FRAME_GOLDEN2 : VP56_FRAME_GOLDEN; |
| 356 | } |
| 357 | return VP56_FRAME_NONE; |
| 358 | } |
| 359 | |
| 360 | static void vp78_reset_probability_tables(VP8Context *s) |
| 361 | { |
| 362 | int i, j; |
| 363 | for (i = 0; i < 4; i++) |
| 364 | for (j = 0; j < 16; j++) |
| 365 | memcpy(s->prob->token[i][j], vp8_token_default_probs[i][vp8_coeff_band[j]], |
| 366 | sizeof(s->prob->token[i][j])); |
| 367 | } |
| 368 | |
| 369 | static void vp78_update_probability_tables(VP8Context *s) |
| 370 | { |
| 371 | VP56RangeCoder *c = &s->c; |
| 372 | int i, j, k, l, m; |
| 373 | |
| 374 | for (i = 0; i < 4; i++) |
| 375 | for (j = 0; j < 8; j++) |
| 376 | for (k = 0; k < 3; k++) |
| 377 | for (l = 0; l < NUM_DCT_TOKENS-1; l++) |
| 378 | if (vp56_rac_get_prob_branchy(c, vp8_token_update_probs[i][j][k][l])) { |
| 379 | int prob = vp8_rac_get_uint(c, 8); |
| 380 | for (m = 0; vp8_coeff_band_indexes[j][m] >= 0; m++) |
| 381 | s->prob->token[i][vp8_coeff_band_indexes[j][m]][k][l] = prob; |
| 382 | } |
| 383 | } |
| 384 | |
| 385 | #define VP7_MVC_SIZE 17 |
| 386 | #define VP8_MVC_SIZE 19 |
| 387 | |
| 388 | static void vp78_update_pred16x16_pred8x8_mvc_probabilities(VP8Context *s, |
| 389 | int mvc_size) |
| 390 | { |
| 391 | VP56RangeCoder *c = &s->c; |
| 392 | int i, j; |
| 393 | |
| 394 | if (vp8_rac_get(c)) |
| 395 | for (i = 0; i < 4; i++) |
| 396 | s->prob->pred16x16[i] = vp8_rac_get_uint(c, 8); |
| 397 | if (vp8_rac_get(c)) |
| 398 | for (i = 0; i < 3; i++) |
| 399 | s->prob->pred8x8c[i] = vp8_rac_get_uint(c, 8); |
| 400 | |
| 401 | // 17.2 MV probability update |
| 402 | for (i = 0; i < 2; i++) |
| 403 | for (j = 0; j < mvc_size; j++) |
| 404 | if (vp56_rac_get_prob_branchy(c, vp8_mv_update_prob[i][j])) |
| 405 | s->prob->mvc[i][j] = vp8_rac_get_nn(c); |
| 406 | } |
| 407 | |
| 408 | static void update_refs(VP8Context *s) |
| 409 | { |
| 410 | VP56RangeCoder *c = &s->c; |
| 411 | |
| 412 | int update_golden = vp8_rac_get(c); |
| 413 | int update_altref = vp8_rac_get(c); |
| 414 | |
| 415 | s->update_golden = ref_to_update(s, update_golden, VP56_FRAME_GOLDEN); |
| 416 | s->update_altref = ref_to_update(s, update_altref, VP56_FRAME_GOLDEN2); |
| 417 | } |
| 418 | |
| 419 | static void copy_chroma(AVFrame *dst, AVFrame *src, int width, int height) |
| 420 | { |
| 421 | int i, j; |
| 422 | |
| 423 | for (j = 1; j < 3; j++) { |
| 424 | for (i = 0; i < height / 2; i++) |
| 425 | memcpy(dst->data[j] + i * dst->linesize[j], |
| 426 | src->data[j] + i * src->linesize[j], width / 2); |
| 427 | } |
| 428 | } |
| 429 | |
| 430 | static void fade(uint8_t *dst, int dst_linesize, |
| 431 | const uint8_t *src, int src_linesize, |
| 432 | int width, int height, |
| 433 | int alpha, int beta) |
| 434 | { |
| 435 | int i, j; |
| 436 | for (j = 0; j < height; j++) { |
| 437 | for (i = 0; i < width; i++) { |
| 438 | uint8_t y = src[j * src_linesize + i]; |
| 439 | dst[j * dst_linesize + i] = av_clip_uint8(y + ((y * beta) >> 8) + alpha); |
| 440 | } |
| 441 | } |
| 442 | } |
| 443 | |
| 444 | static int vp7_fade_frame(VP8Context *s, VP56RangeCoder *c) |
| 445 | { |
| 446 | int alpha = (int8_t) vp8_rac_get_uint(c, 8); |
| 447 | int beta = (int8_t) vp8_rac_get_uint(c, 8); |
| 448 | int ret; |
| 449 | |
| 450 | if (!s->keyframe && (alpha || beta)) { |
| 451 | int width = s->mb_width * 16; |
| 452 | int height = s->mb_height * 16; |
| 453 | AVFrame *src, *dst; |
| 454 | |
| 455 | if (!s->framep[VP56_FRAME_PREVIOUS] || |
| 456 | !s->framep[VP56_FRAME_GOLDEN]) { |
| 457 | av_log(s->avctx, AV_LOG_WARNING, "Discarding interframe without a prior keyframe!\n"); |
| 458 | return AVERROR_INVALIDDATA; |
| 459 | } |
| 460 | |
| 461 | dst = |
| 462 | src = s->framep[VP56_FRAME_PREVIOUS]->tf.f; |
| 463 | |
| 464 | /* preserve the golden frame, write a new previous frame */ |
| 465 | if (s->framep[VP56_FRAME_GOLDEN] == s->framep[VP56_FRAME_PREVIOUS]) { |
| 466 | s->framep[VP56_FRAME_PREVIOUS] = vp8_find_free_buffer(s); |
| 467 | if ((ret = vp8_alloc_frame(s, s->framep[VP56_FRAME_PREVIOUS], 1)) < 0) |
| 468 | return ret; |
| 469 | |
| 470 | dst = s->framep[VP56_FRAME_PREVIOUS]->tf.f; |
| 471 | |
| 472 | copy_chroma(dst, src, width, height); |
| 473 | } |
| 474 | |
| 475 | fade(dst->data[0], dst->linesize[0], |
| 476 | src->data[0], src->linesize[0], |
| 477 | width, height, alpha, beta); |
| 478 | } |
| 479 | |
| 480 | return 0; |
| 481 | } |
| 482 | |
| 483 | static int vp7_decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_size) |
| 484 | { |
| 485 | VP56RangeCoder *c = &s->c; |
| 486 | int part1_size, hscale, vscale, i, j, ret; |
| 487 | int width = s->avctx->width; |
| 488 | int height = s->avctx->height; |
| 489 | |
| 490 | s->profile = (buf[0] >> 1) & 7; |
| 491 | if (s->profile > 1) { |
| 492 | avpriv_request_sample(s->avctx, "Unknown profile %d", s->profile); |
| 493 | return AVERROR_INVALIDDATA; |
| 494 | } |
| 495 | |
| 496 | s->keyframe = !(buf[0] & 1); |
| 497 | s->invisible = 0; |
| 498 | part1_size = AV_RL24(buf) >> 4; |
| 499 | |
| 500 | if (buf_size < 4 - s->profile + part1_size) { |
| 501 | av_log(s->avctx, AV_LOG_ERROR, "Buffer size %d is too small, needed : %d\n", buf_size, 4 - s->profile + part1_size); |
| 502 | return AVERROR_INVALIDDATA; |
| 503 | } |
| 504 | |
| 505 | buf += 4 - s->profile; |
| 506 | buf_size -= 4 - s->profile; |
| 507 | |
| 508 | memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_epel_pixels_tab, sizeof(s->put_pixels_tab)); |
| 509 | |
| 510 | ff_vp56_init_range_decoder(c, buf, part1_size); |
| 511 | buf += part1_size; |
| 512 | buf_size -= part1_size; |
| 513 | |
| 514 | /* A. Dimension information (keyframes only) */ |
| 515 | if (s->keyframe) { |
| 516 | width = vp8_rac_get_uint(c, 12); |
| 517 | height = vp8_rac_get_uint(c, 12); |
| 518 | hscale = vp8_rac_get_uint(c, 2); |
| 519 | vscale = vp8_rac_get_uint(c, 2); |
| 520 | if (hscale || vscale) |
| 521 | avpriv_request_sample(s->avctx, "Upscaling"); |
| 522 | |
| 523 | s->update_golden = s->update_altref = VP56_FRAME_CURRENT; |
| 524 | vp78_reset_probability_tables(s); |
| 525 | memcpy(s->prob->pred16x16, vp8_pred16x16_prob_inter, |
| 526 | sizeof(s->prob->pred16x16)); |
| 527 | memcpy(s->prob->pred8x8c, vp8_pred8x8c_prob_inter, |
| 528 | sizeof(s->prob->pred8x8c)); |
| 529 | for (i = 0; i < 2; i++) |
| 530 | memcpy(s->prob->mvc[i], vp7_mv_default_prob[i], |
| 531 | sizeof(vp7_mv_default_prob[i])); |
| 532 | memset(&s->segmentation, 0, sizeof(s->segmentation)); |
| 533 | memset(&s->lf_delta, 0, sizeof(s->lf_delta)); |
| 534 | memcpy(s->prob[0].scan, zigzag_scan, sizeof(s->prob[0].scan)); |
| 535 | } |
| 536 | |
| 537 | if (s->keyframe || s->profile > 0) |
| 538 | memset(s->inter_dc_pred, 0 , sizeof(s->inter_dc_pred)); |
| 539 | |
| 540 | /* B. Decoding information for all four macroblock-level features */ |
| 541 | for (i = 0; i < 4; i++) { |
| 542 | s->feature_enabled[i] = vp8_rac_get(c); |
| 543 | if (s->feature_enabled[i]) { |
| 544 | s->feature_present_prob[i] = vp8_rac_get_uint(c, 8); |
| 545 | |
| 546 | for (j = 0; j < 3; j++) |
| 547 | s->feature_index_prob[i][j] = |
| 548 | vp8_rac_get(c) ? vp8_rac_get_uint(c, 8) : 255; |
| 549 | |
| 550 | if (vp7_feature_value_size[s->profile][i]) |
| 551 | for (j = 0; j < 4; j++) |
| 552 | s->feature_value[i][j] = |
| 553 | vp8_rac_get(c) ? vp8_rac_get_uint(c, vp7_feature_value_size[s->profile][i]) : 0; |
| 554 | } |
| 555 | } |
| 556 | |
| 557 | s->segmentation.enabled = 0; |
| 558 | s->segmentation.update_map = 0; |
| 559 | s->lf_delta.enabled = 0; |
| 560 | |
| 561 | s->num_coeff_partitions = 1; |
| 562 | ff_vp56_init_range_decoder(&s->coeff_partition[0], buf, buf_size); |
| 563 | |
| 564 | if (!s->macroblocks_base || /* first frame */ |
| 565 | width != s->avctx->width || height != s->avctx->height || |
| 566 | (width + 15) / 16 != s->mb_width || (height + 15) / 16 != s->mb_height) { |
| 567 | if ((ret = vp7_update_dimensions(s, width, height)) < 0) |
| 568 | return ret; |
| 569 | } |
| 570 | |
| 571 | /* C. Dequantization indices */ |
| 572 | vp7_get_quants(s); |
| 573 | |
| 574 | /* D. Golden frame update flag (a Flag) for interframes only */ |
| 575 | if (!s->keyframe) { |
| 576 | s->update_golden = vp8_rac_get(c) ? VP56_FRAME_CURRENT : VP56_FRAME_NONE; |
| 577 | s->sign_bias[VP56_FRAME_GOLDEN] = 0; |
| 578 | } |
| 579 | |
| 580 | s->update_last = 1; |
| 581 | s->update_probabilities = 1; |
| 582 | s->fade_present = 1; |
| 583 | |
| 584 | if (s->profile > 0) { |
| 585 | s->update_probabilities = vp8_rac_get(c); |
| 586 | if (!s->update_probabilities) |
| 587 | s->prob[1] = s->prob[0]; |
| 588 | |
| 589 | if (!s->keyframe) |
| 590 | s->fade_present = vp8_rac_get(c); |
| 591 | } |
| 592 | |
| 593 | /* E. Fading information for previous frame */ |
| 594 | if (s->fade_present && vp8_rac_get(c)) { |
| 595 | if ((ret = vp7_fade_frame(s ,c)) < 0) |
| 596 | return ret; |
| 597 | } |
| 598 | |
| 599 | /* F. Loop filter type */ |
| 600 | if (!s->profile) |
| 601 | s->filter.simple = vp8_rac_get(c); |
| 602 | |
| 603 | /* G. DCT coefficient ordering specification */ |
| 604 | if (vp8_rac_get(c)) |
| 605 | for (i = 1; i < 16; i++) |
| 606 | s->prob[0].scan[i] = zigzag_scan[vp8_rac_get_uint(c, 4)]; |
| 607 | |
| 608 | /* H. Loop filter levels */ |
| 609 | if (s->profile > 0) |
| 610 | s->filter.simple = vp8_rac_get(c); |
| 611 | s->filter.level = vp8_rac_get_uint(c, 6); |
| 612 | s->filter.sharpness = vp8_rac_get_uint(c, 3); |
| 613 | |
| 614 | /* I. DCT coefficient probability update; 13.3 Token Probability Updates */ |
| 615 | vp78_update_probability_tables(s); |
| 616 | |
| 617 | s->mbskip_enabled = 0; |
| 618 | |
| 619 | /* J. The remaining frame header data occurs ONLY FOR INTERFRAMES */ |
| 620 | if (!s->keyframe) { |
| 621 | s->prob->intra = vp8_rac_get_uint(c, 8); |
| 622 | s->prob->last = vp8_rac_get_uint(c, 8); |
| 623 | vp78_update_pred16x16_pred8x8_mvc_probabilities(s, VP7_MVC_SIZE); |
| 624 | } |
| 625 | |
| 626 | return 0; |
| 627 | } |
| 628 | |
| 629 | static int vp8_decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_size) |
| 630 | { |
| 631 | VP56RangeCoder *c = &s->c; |
| 632 | int header_size, hscale, vscale, ret; |
| 633 | int width = s->avctx->width; |
| 634 | int height = s->avctx->height; |
| 635 | |
| 636 | s->keyframe = !(buf[0] & 1); |
| 637 | s->profile = (buf[0]>>1) & 7; |
| 638 | s->invisible = !(buf[0] & 0x10); |
| 639 | header_size = AV_RL24(buf) >> 5; |
| 640 | buf += 3; |
| 641 | buf_size -= 3; |
| 642 | |
| 643 | if (s->profile > 3) |
| 644 | av_log(s->avctx, AV_LOG_WARNING, "Unknown profile %d\n", s->profile); |
| 645 | |
| 646 | if (!s->profile) |
| 647 | memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_epel_pixels_tab, |
| 648 | sizeof(s->put_pixels_tab)); |
| 649 | else // profile 1-3 use bilinear, 4+ aren't defined so whatever |
| 650 | memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_bilinear_pixels_tab, |
| 651 | sizeof(s->put_pixels_tab)); |
| 652 | |
| 653 | if (header_size > buf_size - 7 * s->keyframe) { |
| 654 | av_log(s->avctx, AV_LOG_ERROR, "Header size larger than data provided\n"); |
| 655 | return AVERROR_INVALIDDATA; |
| 656 | } |
| 657 | |
| 658 | if (s->keyframe) { |
| 659 | if (AV_RL24(buf) != 0x2a019d) { |
| 660 | av_log(s->avctx, AV_LOG_ERROR, |
| 661 | "Invalid start code 0x%x\n", AV_RL24(buf)); |
| 662 | return AVERROR_INVALIDDATA; |
| 663 | } |
| 664 | width = AV_RL16(buf + 3) & 0x3fff; |
| 665 | height = AV_RL16(buf + 5) & 0x3fff; |
| 666 | hscale = buf[4] >> 6; |
| 667 | vscale = buf[6] >> 6; |
| 668 | buf += 7; |
| 669 | buf_size -= 7; |
| 670 | |
| 671 | if (hscale || vscale) |
| 672 | avpriv_request_sample(s->avctx, "Upscaling"); |
| 673 | |
| 674 | s->update_golden = s->update_altref = VP56_FRAME_CURRENT; |
| 675 | vp78_reset_probability_tables(s); |
| 676 | memcpy(s->prob->pred16x16, vp8_pred16x16_prob_inter, |
| 677 | sizeof(s->prob->pred16x16)); |
| 678 | memcpy(s->prob->pred8x8c, vp8_pred8x8c_prob_inter, |
| 679 | sizeof(s->prob->pred8x8c)); |
| 680 | memcpy(s->prob->mvc, vp8_mv_default_prob, |
| 681 | sizeof(s->prob->mvc)); |
| 682 | memset(&s->segmentation, 0, sizeof(s->segmentation)); |
| 683 | memset(&s->lf_delta, 0, sizeof(s->lf_delta)); |
| 684 | } |
| 685 | |
| 686 | ff_vp56_init_range_decoder(c, buf, header_size); |
| 687 | buf += header_size; |
| 688 | buf_size -= header_size; |
| 689 | |
| 690 | if (s->keyframe) { |
| 691 | if (vp8_rac_get(c)) |
| 692 | av_log(s->avctx, AV_LOG_WARNING, "Unspecified colorspace\n"); |
| 693 | vp8_rac_get(c); // whether we can skip clamping in dsp functions |
| 694 | } |
| 695 | |
| 696 | if ((s->segmentation.enabled = vp8_rac_get(c))) |
| 697 | parse_segment_info(s); |
| 698 | else |
| 699 | s->segmentation.update_map = 0; // FIXME: move this to some init function? |
| 700 | |
| 701 | s->filter.simple = vp8_rac_get(c); |
| 702 | s->filter.level = vp8_rac_get_uint(c, 6); |
| 703 | s->filter.sharpness = vp8_rac_get_uint(c, 3); |
| 704 | |
| 705 | if ((s->lf_delta.enabled = vp8_rac_get(c))) |
| 706 | if (vp8_rac_get(c)) |
| 707 | update_lf_deltas(s); |
| 708 | |
| 709 | if (setup_partitions(s, buf, buf_size)) { |
| 710 | av_log(s->avctx, AV_LOG_ERROR, "Invalid partitions\n"); |
| 711 | return AVERROR_INVALIDDATA; |
| 712 | } |
| 713 | |
| 714 | if (!s->macroblocks_base || /* first frame */ |
| 715 | width != s->avctx->width || height != s->avctx->height || |
| 716 | (width+15)/16 != s->mb_width || (height+15)/16 != s->mb_height) |
| 717 | if ((ret = vp8_update_dimensions(s, width, height)) < 0) |
| 718 | return ret; |
| 719 | |
| 720 | vp8_get_quants(s); |
| 721 | |
| 722 | if (!s->keyframe) { |
| 723 | update_refs(s); |
| 724 | s->sign_bias[VP56_FRAME_GOLDEN] = vp8_rac_get(c); |
| 725 | s->sign_bias[VP56_FRAME_GOLDEN2 /* altref */] = vp8_rac_get(c); |
| 726 | } |
| 727 | |
| 728 | // if we aren't saving this frame's probabilities for future frames, |
| 729 | // make a copy of the current probabilities |
| 730 | if (!(s->update_probabilities = vp8_rac_get(c))) |
| 731 | s->prob[1] = s->prob[0]; |
| 732 | |
| 733 | s->update_last = s->keyframe || vp8_rac_get(c); |
| 734 | |
| 735 | vp78_update_probability_tables(s); |
| 736 | |
| 737 | if ((s->mbskip_enabled = vp8_rac_get(c))) |
| 738 | s->prob->mbskip = vp8_rac_get_uint(c, 8); |
| 739 | |
| 740 | if (!s->keyframe) { |
| 741 | s->prob->intra = vp8_rac_get_uint(c, 8); |
| 742 | s->prob->last = vp8_rac_get_uint(c, 8); |
| 743 | s->prob->golden = vp8_rac_get_uint(c, 8); |
| 744 | vp78_update_pred16x16_pred8x8_mvc_probabilities(s, VP8_MVC_SIZE); |
| 745 | } |
| 746 | |
| 747 | return 0; |
| 748 | } |
| 749 | |
| 750 | static av_always_inline |
| 751 | void clamp_mv(VP8Context *s, VP56mv *dst, const VP56mv *src) |
| 752 | { |
| 753 | dst->x = av_clip(src->x, s->mv_min.x, s->mv_max.x); |
| 754 | dst->y = av_clip(src->y, s->mv_min.y, s->mv_max.y); |
| 755 | } |
| 756 | |
| 757 | /** |
| 758 | * Motion vector coding, 17.1. |
| 759 | */ |
| 760 | static av_always_inline int read_mv_component(VP56RangeCoder *c, const uint8_t *p, int vp7) |
| 761 | { |
| 762 | int bit, x = 0; |
| 763 | |
| 764 | if (vp56_rac_get_prob_branchy(c, p[0])) { |
| 765 | int i; |
| 766 | |
| 767 | for (i = 0; i < 3; i++) |
| 768 | x += vp56_rac_get_prob(c, p[9 + i]) << i; |
| 769 | for (i = (vp7 ? 7 : 9); i > 3; i--) |
| 770 | x += vp56_rac_get_prob(c, p[9 + i]) << i; |
| 771 | if (!(x & (vp7 ? 0xF0 : 0xFFF0)) || vp56_rac_get_prob(c, p[12])) |
| 772 | x += 8; |
| 773 | } else { |
| 774 | // small_mvtree |
| 775 | const uint8_t *ps = p + 2; |
| 776 | bit = vp56_rac_get_prob(c, *ps); |
| 777 | ps += 1 + 3 * bit; |
| 778 | x += 4 * bit; |
| 779 | bit = vp56_rac_get_prob(c, *ps); |
| 780 | ps += 1 + bit; |
| 781 | x += 2 * bit; |
| 782 | x += vp56_rac_get_prob(c, *ps); |
| 783 | } |
| 784 | |
| 785 | return (x && vp56_rac_get_prob(c, p[1])) ? -x : x; |
| 786 | } |
| 787 | |
| 788 | static int vp7_read_mv_component(VP56RangeCoder *c, const uint8_t *p) |
| 789 | { |
| 790 | return read_mv_component(c, p, 1); |
| 791 | } |
| 792 | |
| 793 | static int vp8_read_mv_component(VP56RangeCoder *c, const uint8_t *p) |
| 794 | { |
| 795 | return read_mv_component(c, p, 0); |
| 796 | } |
| 797 | |
| 798 | static av_always_inline |
| 799 | const uint8_t *get_submv_prob(uint32_t left, uint32_t top, int is_vp7) |
| 800 | { |
| 801 | if (is_vp7) |
| 802 | return vp7_submv_prob; |
| 803 | |
| 804 | if (left == top) |
| 805 | return vp8_submv_prob[4 - !!left]; |
| 806 | if (!top) |
| 807 | return vp8_submv_prob[2]; |
| 808 | return vp8_submv_prob[1 - !!left]; |
| 809 | } |
| 810 | |
| 811 | /** |
| 812 | * Split motion vector prediction, 16.4. |
| 813 | * @returns the number of motion vectors parsed (2, 4 or 16) |
| 814 | */ |
| 815 | static av_always_inline |
| 816 | int decode_splitmvs(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb, |
| 817 | int layout, int is_vp7) |
| 818 | { |
| 819 | int part_idx; |
| 820 | int n, num; |
| 821 | VP8Macroblock *top_mb; |
| 822 | VP8Macroblock *left_mb = &mb[-1]; |
| 823 | const uint8_t *mbsplits_left = vp8_mbsplits[left_mb->partitioning]; |
| 824 | const uint8_t *mbsplits_top, *mbsplits_cur, *firstidx; |
| 825 | VP56mv *top_mv; |
| 826 | VP56mv *left_mv = left_mb->bmv; |
| 827 | VP56mv *cur_mv = mb->bmv; |
| 828 | |
| 829 | if (!layout) // layout is inlined, s->mb_layout is not |
| 830 | top_mb = &mb[2]; |
| 831 | else |
| 832 | top_mb = &mb[-s->mb_width - 1]; |
| 833 | mbsplits_top = vp8_mbsplits[top_mb->partitioning]; |
| 834 | top_mv = top_mb->bmv; |
| 835 | |
| 836 | if (vp56_rac_get_prob_branchy(c, vp8_mbsplit_prob[0])) { |
| 837 | if (vp56_rac_get_prob_branchy(c, vp8_mbsplit_prob[1])) |
| 838 | part_idx = VP8_SPLITMVMODE_16x8 + vp56_rac_get_prob(c, vp8_mbsplit_prob[2]); |
| 839 | else |
| 840 | part_idx = VP8_SPLITMVMODE_8x8; |
| 841 | } else { |
| 842 | part_idx = VP8_SPLITMVMODE_4x4; |
| 843 | } |
| 844 | |
| 845 | num = vp8_mbsplit_count[part_idx]; |
| 846 | mbsplits_cur = vp8_mbsplits[part_idx], |
| 847 | firstidx = vp8_mbfirstidx[part_idx]; |
| 848 | mb->partitioning = part_idx; |
| 849 | |
| 850 | for (n = 0; n < num; n++) { |
| 851 | int k = firstidx[n]; |
| 852 | uint32_t left, above; |
| 853 | const uint8_t *submv_prob; |
| 854 | |
| 855 | if (!(k & 3)) |
| 856 | left = AV_RN32A(&left_mv[mbsplits_left[k + 3]]); |
| 857 | else |
| 858 | left = AV_RN32A(&cur_mv[mbsplits_cur[k - 1]]); |
| 859 | if (k <= 3) |
| 860 | above = AV_RN32A(&top_mv[mbsplits_top[k + 12]]); |
| 861 | else |
| 862 | above = AV_RN32A(&cur_mv[mbsplits_cur[k - 4]]); |
| 863 | |
| 864 | submv_prob = get_submv_prob(left, above, is_vp7); |
| 865 | |
| 866 | if (vp56_rac_get_prob_branchy(c, submv_prob[0])) { |
| 867 | if (vp56_rac_get_prob_branchy(c, submv_prob[1])) { |
| 868 | if (vp56_rac_get_prob_branchy(c, submv_prob[2])) { |
| 869 | mb->bmv[n].y = mb->mv.y + |
| 870 | read_mv_component(c, s->prob->mvc[0], is_vp7); |
| 871 | mb->bmv[n].x = mb->mv.x + |
| 872 | read_mv_component(c, s->prob->mvc[1], is_vp7); |
| 873 | } else { |
| 874 | AV_ZERO32(&mb->bmv[n]); |
| 875 | } |
| 876 | } else { |
| 877 | AV_WN32A(&mb->bmv[n], above); |
| 878 | } |
| 879 | } else { |
| 880 | AV_WN32A(&mb->bmv[n], left); |
| 881 | } |
| 882 | } |
| 883 | |
| 884 | return num; |
| 885 | } |
| 886 | |
| 887 | /** |
| 888 | * The vp7 reference decoder uses a padding macroblock column (added to right |
| 889 | * edge of the frame) to guard against illegal macroblock offsets. The |
| 890 | * algorithm has bugs that permit offsets to straddle the padding column. |
| 891 | * This function replicates those bugs. |
| 892 | * |
| 893 | * @param[out] edge_x macroblock x address |
| 894 | * @param[out] edge_y macroblock y address |
| 895 | * |
| 896 | * @return macroblock offset legal (boolean) |
| 897 | */ |
| 898 | static int vp7_calculate_mb_offset(int mb_x, int mb_y, int mb_width, |
| 899 | int xoffset, int yoffset, int boundary, |
| 900 | int *edge_x, int *edge_y) |
| 901 | { |
| 902 | int vwidth = mb_width + 1; |
| 903 | int new = (mb_y + yoffset) * vwidth + mb_x + xoffset; |
| 904 | if (new < boundary || new % vwidth == vwidth - 1) |
| 905 | return 0; |
| 906 | *edge_y = new / vwidth; |
| 907 | *edge_x = new % vwidth; |
| 908 | return 1; |
| 909 | } |
| 910 | |
| 911 | static const VP56mv *get_bmv_ptr(const VP8Macroblock *mb, int subblock) |
| 912 | { |
| 913 | return &mb->bmv[mb->mode == VP8_MVMODE_SPLIT ? vp8_mbsplits[mb->partitioning][subblock] : 0]; |
| 914 | } |
| 915 | |
| 916 | static av_always_inline |
| 917 | void vp7_decode_mvs(VP8Context *s, VP8Macroblock *mb, |
| 918 | int mb_x, int mb_y, int layout) |
| 919 | { |
| 920 | VP8Macroblock *mb_edge[12]; |
| 921 | enum { CNT_ZERO, CNT_NEAREST, CNT_NEAR }; |
| 922 | enum { VP8_EDGE_TOP, VP8_EDGE_LEFT, VP8_EDGE_TOPLEFT }; |
| 923 | int idx = CNT_ZERO; |
| 924 | VP56mv near_mv[3]; |
| 925 | uint8_t cnt[3] = { 0 }; |
| 926 | VP56RangeCoder *c = &s->c; |
| 927 | int i; |
| 928 | |
| 929 | AV_ZERO32(&near_mv[0]); |
| 930 | AV_ZERO32(&near_mv[1]); |
| 931 | AV_ZERO32(&near_mv[2]); |
| 932 | |
| 933 | for (i = 0; i < VP7_MV_PRED_COUNT; i++) { |
| 934 | const VP7MVPred * pred = &vp7_mv_pred[i]; |
| 935 | int edge_x, edge_y; |
| 936 | |
| 937 | if (vp7_calculate_mb_offset(mb_x, mb_y, s->mb_width, pred->xoffset, |
| 938 | pred->yoffset, !s->profile, &edge_x, &edge_y)) { |
| 939 | VP8Macroblock *edge = mb_edge[i] = (s->mb_layout == 1) |
| 940 | ? s->macroblocks_base + 1 + edge_x + |
| 941 | (s->mb_width + 1) * (edge_y + 1) |
| 942 | : s->macroblocks + edge_x + |
| 943 | (s->mb_height - edge_y - 1) * 2; |
| 944 | uint32_t mv = AV_RN32A(get_bmv_ptr(edge, vp7_mv_pred[i].subblock)); |
| 945 | if (mv) { |
| 946 | if (AV_RN32A(&near_mv[CNT_NEAREST])) { |
| 947 | if (mv == AV_RN32A(&near_mv[CNT_NEAREST])) { |
| 948 | idx = CNT_NEAREST; |
| 949 | } else if (AV_RN32A(&near_mv[CNT_NEAR])) { |
| 950 | if (mv != AV_RN32A(&near_mv[CNT_NEAR])) |
| 951 | continue; |
| 952 | idx = CNT_NEAR; |
| 953 | } else { |
| 954 | AV_WN32A(&near_mv[CNT_NEAR], mv); |
| 955 | idx = CNT_NEAR; |
| 956 | } |
| 957 | } else { |
| 958 | AV_WN32A(&near_mv[CNT_NEAREST], mv); |
| 959 | idx = CNT_NEAREST; |
| 960 | } |
| 961 | } else { |
| 962 | idx = CNT_ZERO; |
| 963 | } |
| 964 | } else { |
| 965 | idx = CNT_ZERO; |
| 966 | } |
| 967 | cnt[idx] += vp7_mv_pred[i].score; |
| 968 | } |
| 969 | |
| 970 | mb->partitioning = VP8_SPLITMVMODE_NONE; |
| 971 | |
| 972 | if (vp56_rac_get_prob_branchy(c, vp7_mode_contexts[cnt[CNT_ZERO]][0])) { |
| 973 | mb->mode = VP8_MVMODE_MV; |
| 974 | |
| 975 | if (vp56_rac_get_prob_branchy(c, vp7_mode_contexts[cnt[CNT_NEAREST]][1])) { |
| 976 | |
| 977 | if (vp56_rac_get_prob_branchy(c, vp7_mode_contexts[cnt[CNT_NEAR]][2])) { |
| 978 | |
| 979 | if (cnt[CNT_NEAREST] > cnt[CNT_NEAR]) |
| 980 | AV_WN32A(&mb->mv, cnt[CNT_ZERO] > cnt[CNT_NEAREST] ? 0 : AV_RN32A(&near_mv[CNT_NEAREST])); |
| 981 | else |
| 982 | AV_WN32A(&mb->mv, cnt[CNT_ZERO] > cnt[CNT_NEAR] ? 0 : AV_RN32A(&near_mv[CNT_NEAR])); |
| 983 | |
| 984 | if (vp56_rac_get_prob_branchy(c, vp7_mode_contexts[cnt[CNT_NEAR]][3])) { |
| 985 | mb->mode = VP8_MVMODE_SPLIT; |
| 986 | mb->mv = mb->bmv[decode_splitmvs(s, c, mb, layout, IS_VP7) - 1]; |
| 987 | } else { |
| 988 | mb->mv.y += vp7_read_mv_component(c, s->prob->mvc[0]); |
| 989 | mb->mv.x += vp7_read_mv_component(c, s->prob->mvc[1]); |
| 990 | mb->bmv[0] = mb->mv; |
| 991 | } |
| 992 | } else { |
| 993 | mb->mv = near_mv[CNT_NEAR]; |
| 994 | mb->bmv[0] = mb->mv; |
| 995 | } |
| 996 | } else { |
| 997 | mb->mv = near_mv[CNT_NEAREST]; |
| 998 | mb->bmv[0] = mb->mv; |
| 999 | } |
| 1000 | } else { |
| 1001 | mb->mode = VP8_MVMODE_ZERO; |
| 1002 | AV_ZERO32(&mb->mv); |
| 1003 | mb->bmv[0] = mb->mv; |
| 1004 | } |
| 1005 | } |
| 1006 | |
| 1007 | static av_always_inline |
| 1008 | void vp8_decode_mvs(VP8Context *s, VP8Macroblock *mb, |
| 1009 | int mb_x, int mb_y, int layout) |
| 1010 | { |
| 1011 | VP8Macroblock *mb_edge[3] = { 0 /* top */, |
| 1012 | mb - 1 /* left */, |
| 1013 | 0 /* top-left */ }; |
| 1014 | enum { CNT_ZERO, CNT_NEAREST, CNT_NEAR, CNT_SPLITMV }; |
| 1015 | enum { VP8_EDGE_TOP, VP8_EDGE_LEFT, VP8_EDGE_TOPLEFT }; |
| 1016 | int idx = CNT_ZERO; |
| 1017 | int cur_sign_bias = s->sign_bias[mb->ref_frame]; |
| 1018 | int8_t *sign_bias = s->sign_bias; |
| 1019 | VP56mv near_mv[4]; |
| 1020 | uint8_t cnt[4] = { 0 }; |
| 1021 | VP56RangeCoder *c = &s->c; |
| 1022 | |
| 1023 | if (!layout) { // layout is inlined (s->mb_layout is not) |
| 1024 | mb_edge[0] = mb + 2; |
| 1025 | mb_edge[2] = mb + 1; |
| 1026 | } else { |
| 1027 | mb_edge[0] = mb - s->mb_width - 1; |
| 1028 | mb_edge[2] = mb - s->mb_width - 2; |
| 1029 | } |
| 1030 | |
| 1031 | AV_ZERO32(&near_mv[0]); |
| 1032 | AV_ZERO32(&near_mv[1]); |
| 1033 | AV_ZERO32(&near_mv[2]); |
| 1034 | |
| 1035 | /* Process MB on top, left and top-left */ |
| 1036 | #define MV_EDGE_CHECK(n) \ |
| 1037 | { \ |
| 1038 | VP8Macroblock *edge = mb_edge[n]; \ |
| 1039 | int edge_ref = edge->ref_frame; \ |
| 1040 | if (edge_ref != VP56_FRAME_CURRENT) { \ |
| 1041 | uint32_t mv = AV_RN32A(&edge->mv); \ |
| 1042 | if (mv) { \ |
| 1043 | if (cur_sign_bias != sign_bias[edge_ref]) { \ |
| 1044 | /* SWAR negate of the values in mv. */ \ |
| 1045 | mv = ~mv; \ |
| 1046 | mv = ((mv & 0x7fff7fff) + \ |
| 1047 | 0x00010001) ^ (mv & 0x80008000); \ |
| 1048 | } \ |
| 1049 | if (!n || mv != AV_RN32A(&near_mv[idx])) \ |
| 1050 | AV_WN32A(&near_mv[++idx], mv); \ |
| 1051 | cnt[idx] += 1 + (n != 2); \ |
| 1052 | } else \ |
| 1053 | cnt[CNT_ZERO] += 1 + (n != 2); \ |
| 1054 | } \ |
| 1055 | } |
| 1056 | |
| 1057 | MV_EDGE_CHECK(0) |
| 1058 | MV_EDGE_CHECK(1) |
| 1059 | MV_EDGE_CHECK(2) |
| 1060 | |
| 1061 | mb->partitioning = VP8_SPLITMVMODE_NONE; |
| 1062 | if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_ZERO]][0])) { |
| 1063 | mb->mode = VP8_MVMODE_MV; |
| 1064 | |
| 1065 | /* If we have three distinct MVs, merge first and last if they're the same */ |
| 1066 | if (cnt[CNT_SPLITMV] && |
| 1067 | AV_RN32A(&near_mv[1 + VP8_EDGE_TOP]) == AV_RN32A(&near_mv[1 + VP8_EDGE_TOPLEFT])) |
| 1068 | cnt[CNT_NEAREST] += 1; |
| 1069 | |
| 1070 | /* Swap near and nearest if necessary */ |
| 1071 | if (cnt[CNT_NEAR] > cnt[CNT_NEAREST]) { |
| 1072 | FFSWAP(uint8_t, cnt[CNT_NEAREST], cnt[CNT_NEAR]); |
| 1073 | FFSWAP( VP56mv, near_mv[CNT_NEAREST], near_mv[CNT_NEAR]); |
| 1074 | } |
| 1075 | |
| 1076 | if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAREST]][1])) { |
| 1077 | if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAR]][2])) { |
| 1078 | /* Choose the best mv out of 0,0 and the nearest mv */ |
| 1079 | clamp_mv(s, &mb->mv, &near_mv[CNT_ZERO + (cnt[CNT_NEAREST] >= cnt[CNT_ZERO])]); |
| 1080 | cnt[CNT_SPLITMV] = ((mb_edge[VP8_EDGE_LEFT]->mode == VP8_MVMODE_SPLIT) + |
| 1081 | (mb_edge[VP8_EDGE_TOP]->mode == VP8_MVMODE_SPLIT)) * 2 + |
| 1082 | (mb_edge[VP8_EDGE_TOPLEFT]->mode == VP8_MVMODE_SPLIT); |
| 1083 | |
| 1084 | if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_SPLITMV]][3])) { |
| 1085 | mb->mode = VP8_MVMODE_SPLIT; |
| 1086 | mb->mv = mb->bmv[decode_splitmvs(s, c, mb, layout, IS_VP8) - 1]; |
| 1087 | } else { |
| 1088 | mb->mv.y += vp8_read_mv_component(c, s->prob->mvc[0]); |
| 1089 | mb->mv.x += vp8_read_mv_component(c, s->prob->mvc[1]); |
| 1090 | mb->bmv[0] = mb->mv; |
| 1091 | } |
| 1092 | } else { |
| 1093 | clamp_mv(s, &mb->mv, &near_mv[CNT_NEAR]); |
| 1094 | mb->bmv[0] = mb->mv; |
| 1095 | } |
| 1096 | } else { |
| 1097 | clamp_mv(s, &mb->mv, &near_mv[CNT_NEAREST]); |
| 1098 | mb->bmv[0] = mb->mv; |
| 1099 | } |
| 1100 | } else { |
| 1101 | mb->mode = VP8_MVMODE_ZERO; |
| 1102 | AV_ZERO32(&mb->mv); |
| 1103 | mb->bmv[0] = mb->mv; |
| 1104 | } |
| 1105 | } |
| 1106 | |
| 1107 | static av_always_inline |
| 1108 | void decode_intra4x4_modes(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb, |
| 1109 | int mb_x, int keyframe, int layout) |
| 1110 | { |
| 1111 | uint8_t *intra4x4 = mb->intra4x4_pred_mode_mb; |
| 1112 | |
| 1113 | if (layout) { |
| 1114 | VP8Macroblock *mb_top = mb - s->mb_width - 1; |
| 1115 | memcpy(mb->intra4x4_pred_mode_top, mb_top->intra4x4_pred_mode_top, 4); |
| 1116 | } |
| 1117 | if (keyframe) { |
| 1118 | int x, y; |
| 1119 | uint8_t *top; |
| 1120 | uint8_t *const left = s->intra4x4_pred_mode_left; |
| 1121 | if (layout) |
| 1122 | top = mb->intra4x4_pred_mode_top; |
| 1123 | else |
| 1124 | top = s->intra4x4_pred_mode_top + 4 * mb_x; |
| 1125 | for (y = 0; y < 4; y++) { |
| 1126 | for (x = 0; x < 4; x++) { |
| 1127 | const uint8_t *ctx; |
| 1128 | ctx = vp8_pred4x4_prob_intra[top[x]][left[y]]; |
| 1129 | *intra4x4 = vp8_rac_get_tree(c, vp8_pred4x4_tree, ctx); |
| 1130 | left[y] = top[x] = *intra4x4; |
| 1131 | intra4x4++; |
| 1132 | } |
| 1133 | } |
| 1134 | } else { |
| 1135 | int i; |
| 1136 | for (i = 0; i < 16; i++) |
| 1137 | intra4x4[i] = vp8_rac_get_tree(c, vp8_pred4x4_tree, |
| 1138 | vp8_pred4x4_prob_inter); |
| 1139 | } |
| 1140 | } |
| 1141 | |
| 1142 | static av_always_inline |
| 1143 | void decode_mb_mode(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, |
| 1144 | uint8_t *segment, uint8_t *ref, int layout, int is_vp7) |
| 1145 | { |
| 1146 | VP56RangeCoder *c = &s->c; |
| 1147 | const char *vp7_feature_name[] = { "q-index", |
| 1148 | "lf-delta", |
| 1149 | "partial-golden-update", |
| 1150 | "blit-pitch" }; |
| 1151 | if (is_vp7) { |
| 1152 | int i; |
| 1153 | *segment = 0; |
| 1154 | for (i = 0; i < 4; i++) { |
| 1155 | if (s->feature_enabled[i]) { |
| 1156 | if (vp56_rac_get_prob_branchy(c, s->feature_present_prob[i])) { |
| 1157 | int index = vp8_rac_get_tree(c, vp7_feature_index_tree, |
| 1158 | s->feature_index_prob[i]); |
| 1159 | av_log(s->avctx, AV_LOG_WARNING, |
| 1160 | "Feature %s present in macroblock (value 0x%x)\n", |
| 1161 | vp7_feature_name[i], s->feature_value[i][index]); |
| 1162 | } |
| 1163 | } |
| 1164 | } |
| 1165 | } else if (s->segmentation.update_map) { |
| 1166 | int bit = vp56_rac_get_prob(c, s->prob->segmentid[0]); |
| 1167 | *segment = vp56_rac_get_prob(c, s->prob->segmentid[1+bit]) + 2*bit; |
| 1168 | } else if (s->segmentation.enabled) |
| 1169 | *segment = ref ? *ref : *segment; |
| 1170 | mb->segment = *segment; |
| 1171 | |
| 1172 | mb->skip = s->mbskip_enabled ? vp56_rac_get_prob(c, s->prob->mbskip) : 0; |
| 1173 | |
| 1174 | if (s->keyframe) { |
| 1175 | mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_intra, |
| 1176 | vp8_pred16x16_prob_intra); |
| 1177 | |
| 1178 | if (mb->mode == MODE_I4x4) { |
| 1179 | decode_intra4x4_modes(s, c, mb, mb_x, 1, layout); |
| 1180 | } else { |
| 1181 | const uint32_t modes = (is_vp7 ? vp7_pred4x4_mode |
| 1182 | : vp8_pred4x4_mode)[mb->mode] * 0x01010101u; |
| 1183 | if (s->mb_layout) |
| 1184 | AV_WN32A(mb->intra4x4_pred_mode_top, modes); |
| 1185 | else |
| 1186 | AV_WN32A(s->intra4x4_pred_mode_top + 4 * mb_x, modes); |
| 1187 | AV_WN32A(s->intra4x4_pred_mode_left, modes); |
| 1188 | } |
| 1189 | |
| 1190 | mb->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree, |
| 1191 | vp8_pred8x8c_prob_intra); |
| 1192 | mb->ref_frame = VP56_FRAME_CURRENT; |
| 1193 | } else if (vp56_rac_get_prob_branchy(c, s->prob->intra)) { |
| 1194 | // inter MB, 16.2 |
| 1195 | if (vp56_rac_get_prob_branchy(c, s->prob->last)) |
| 1196 | mb->ref_frame = |
| 1197 | (!is_vp7 && vp56_rac_get_prob(c, s->prob->golden)) ? VP56_FRAME_GOLDEN2 /* altref */ |
| 1198 | : VP56_FRAME_GOLDEN; |
| 1199 | else |
| 1200 | mb->ref_frame = VP56_FRAME_PREVIOUS; |
| 1201 | s->ref_count[mb->ref_frame - 1]++; |
| 1202 | |
| 1203 | // motion vectors, 16.3 |
| 1204 | if (is_vp7) |
| 1205 | vp7_decode_mvs(s, mb, mb_x, mb_y, layout); |
| 1206 | else |
| 1207 | vp8_decode_mvs(s, mb, mb_x, mb_y, layout); |
| 1208 | } else { |
| 1209 | // intra MB, 16.1 |
| 1210 | mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_inter, s->prob->pred16x16); |
| 1211 | |
| 1212 | if (mb->mode == MODE_I4x4) |
| 1213 | decode_intra4x4_modes(s, c, mb, mb_x, 0, layout); |
| 1214 | |
| 1215 | mb->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree, |
| 1216 | s->prob->pred8x8c); |
| 1217 | mb->ref_frame = VP56_FRAME_CURRENT; |
| 1218 | mb->partitioning = VP8_SPLITMVMODE_NONE; |
| 1219 | AV_ZERO32(&mb->bmv[0]); |
| 1220 | } |
| 1221 | } |
| 1222 | |
| 1223 | /** |
| 1224 | * @param r arithmetic bitstream reader context |
| 1225 | * @param block destination for block coefficients |
| 1226 | * @param probs probabilities to use when reading trees from the bitstream |
| 1227 | * @param i initial coeff index, 0 unless a separate DC block is coded |
| 1228 | * @param qmul array holding the dc/ac dequant factor at position 0/1 |
| 1229 | * |
| 1230 | * @return 0 if no coeffs were decoded |
| 1231 | * otherwise, the index of the last coeff decoded plus one |
| 1232 | */ |
| 1233 | static av_always_inline |
| 1234 | int decode_block_coeffs_internal(VP56RangeCoder *r, int16_t block[16], |
| 1235 | uint8_t probs[16][3][NUM_DCT_TOKENS - 1], |
| 1236 | int i, uint8_t *token_prob, int16_t qmul[2], |
| 1237 | const uint8_t scan[16], int vp7) |
| 1238 | { |
| 1239 | VP56RangeCoder c = *r; |
| 1240 | goto skip_eob; |
| 1241 | do { |
| 1242 | int coeff; |
| 1243 | restart: |
| 1244 | if (!vp56_rac_get_prob_branchy(&c, token_prob[0])) // DCT_EOB |
| 1245 | break; |
| 1246 | |
| 1247 | skip_eob: |
| 1248 | if (!vp56_rac_get_prob_branchy(&c, token_prob[1])) { // DCT_0 |
| 1249 | if (++i == 16) |
| 1250 | break; // invalid input; blocks should end with EOB |
| 1251 | token_prob = probs[i][0]; |
| 1252 | if (vp7) |
| 1253 | goto restart; |
| 1254 | goto skip_eob; |
| 1255 | } |
| 1256 | |
| 1257 | if (!vp56_rac_get_prob_branchy(&c, token_prob[2])) { // DCT_1 |
| 1258 | coeff = 1; |
| 1259 | token_prob = probs[i + 1][1]; |
| 1260 | } else { |
| 1261 | if (!vp56_rac_get_prob_branchy(&c, token_prob[3])) { // DCT 2,3,4 |
| 1262 | coeff = vp56_rac_get_prob_branchy(&c, token_prob[4]); |
| 1263 | if (coeff) |
| 1264 | coeff += vp56_rac_get_prob(&c, token_prob[5]); |
| 1265 | coeff += 2; |
| 1266 | } else { |
| 1267 | // DCT_CAT* |
| 1268 | if (!vp56_rac_get_prob_branchy(&c, token_prob[6])) { |
| 1269 | if (!vp56_rac_get_prob_branchy(&c, token_prob[7])) { // DCT_CAT1 |
| 1270 | coeff = 5 + vp56_rac_get_prob(&c, vp8_dct_cat1_prob[0]); |
| 1271 | } else { // DCT_CAT2 |
| 1272 | coeff = 7; |
| 1273 | coeff += vp56_rac_get_prob(&c, vp8_dct_cat2_prob[0]) << 1; |
| 1274 | coeff += vp56_rac_get_prob(&c, vp8_dct_cat2_prob[1]); |
| 1275 | } |
| 1276 | } else { // DCT_CAT3 and up |
| 1277 | int a = vp56_rac_get_prob(&c, token_prob[8]); |
| 1278 | int b = vp56_rac_get_prob(&c, token_prob[9 + a]); |
| 1279 | int cat = (a << 1) + b; |
| 1280 | coeff = 3 + (8 << cat); |
| 1281 | coeff += vp8_rac_get_coeff(&c, ff_vp8_dct_cat_prob[cat]); |
| 1282 | } |
| 1283 | } |
| 1284 | token_prob = probs[i + 1][2]; |
| 1285 | } |
| 1286 | block[scan[i]] = (vp8_rac_get(&c) ? -coeff : coeff) * qmul[!!i]; |
| 1287 | } while (++i < 16); |
| 1288 | |
| 1289 | *r = c; |
| 1290 | return i; |
| 1291 | } |
| 1292 | |
| 1293 | static av_always_inline |
| 1294 | int inter_predict_dc(int16_t block[16], int16_t pred[2]) |
| 1295 | { |
| 1296 | int16_t dc = block[0]; |
| 1297 | int ret = 0; |
| 1298 | |
| 1299 | if (pred[1] > 3) { |
| 1300 | dc += pred[0]; |
| 1301 | ret = 1; |
| 1302 | } |
| 1303 | |
| 1304 | if (!pred[0] | !dc | ((int32_t)pred[0] ^ (int32_t)dc) >> 31) { |
| 1305 | block[0] = pred[0] = dc; |
| 1306 | pred[1] = 0; |
| 1307 | } else { |
| 1308 | if (pred[0] == dc) |
| 1309 | pred[1]++; |
| 1310 | block[0] = pred[0] = dc; |
| 1311 | } |
| 1312 | |
| 1313 | return ret; |
| 1314 | } |
| 1315 | |
| 1316 | static int vp7_decode_block_coeffs_internal(VP56RangeCoder *r, |
| 1317 | int16_t block[16], |
| 1318 | uint8_t probs[16][3][NUM_DCT_TOKENS - 1], |
| 1319 | int i, uint8_t *token_prob, |
| 1320 | int16_t qmul[2], |
| 1321 | const uint8_t scan[16]) |
| 1322 | { |
| 1323 | return decode_block_coeffs_internal(r, block, probs, i, |
| 1324 | token_prob, qmul, scan, IS_VP7); |
| 1325 | } |
| 1326 | |
| 1327 | #ifndef vp8_decode_block_coeffs_internal |
| 1328 | static int vp8_decode_block_coeffs_internal(VP56RangeCoder *r, |
| 1329 | int16_t block[16], |
| 1330 | uint8_t probs[16][3][NUM_DCT_TOKENS - 1], |
| 1331 | int i, uint8_t *token_prob, |
| 1332 | int16_t qmul[2]) |
| 1333 | { |
| 1334 | return decode_block_coeffs_internal(r, block, probs, i, |
| 1335 | token_prob, qmul, zigzag_scan, IS_VP8); |
| 1336 | } |
| 1337 | #endif |
| 1338 | |
| 1339 | /** |
| 1340 | * @param c arithmetic bitstream reader context |
| 1341 | * @param block destination for block coefficients |
| 1342 | * @param probs probabilities to use when reading trees from the bitstream |
| 1343 | * @param i initial coeff index, 0 unless a separate DC block is coded |
| 1344 | * @param zero_nhood the initial prediction context for number of surrounding |
| 1345 | * all-zero blocks (only left/top, so 0-2) |
| 1346 | * @param qmul array holding the dc/ac dequant factor at position 0/1 |
| 1347 | * @param scan scan pattern (VP7 only) |
| 1348 | * |
| 1349 | * @return 0 if no coeffs were decoded |
| 1350 | * otherwise, the index of the last coeff decoded plus one |
| 1351 | */ |
| 1352 | static av_always_inline |
| 1353 | int decode_block_coeffs(VP56RangeCoder *c, int16_t block[16], |
| 1354 | uint8_t probs[16][3][NUM_DCT_TOKENS - 1], |
| 1355 | int i, int zero_nhood, int16_t qmul[2], |
| 1356 | const uint8_t scan[16], int vp7) |
| 1357 | { |
| 1358 | uint8_t *token_prob = probs[i][zero_nhood]; |
| 1359 | if (!vp56_rac_get_prob_branchy(c, token_prob[0])) // DCT_EOB |
| 1360 | return 0; |
| 1361 | return vp7 ? vp7_decode_block_coeffs_internal(c, block, probs, i, |
| 1362 | token_prob, qmul, scan) |
| 1363 | : vp8_decode_block_coeffs_internal(c, block, probs, i, |
| 1364 | token_prob, qmul); |
| 1365 | } |
| 1366 | |
| 1367 | static av_always_inline |
| 1368 | void decode_mb_coeffs(VP8Context *s, VP8ThreadData *td, VP56RangeCoder *c, |
| 1369 | VP8Macroblock *mb, uint8_t t_nnz[9], uint8_t l_nnz[9], |
| 1370 | int is_vp7) |
| 1371 | { |
| 1372 | int i, x, y, luma_start = 0, luma_ctx = 3; |
| 1373 | int nnz_pred, nnz, nnz_total = 0; |
| 1374 | int segment = mb->segment; |
| 1375 | int block_dc = 0; |
| 1376 | |
| 1377 | if (mb->mode != MODE_I4x4 && (is_vp7 || mb->mode != VP8_MVMODE_SPLIT)) { |
| 1378 | nnz_pred = t_nnz[8] + l_nnz[8]; |
| 1379 | |
| 1380 | // decode DC values and do hadamard |
| 1381 | nnz = decode_block_coeffs(c, td->block_dc, s->prob->token[1], 0, |
| 1382 | nnz_pred, s->qmat[segment].luma_dc_qmul, |
| 1383 | zigzag_scan, is_vp7); |
| 1384 | l_nnz[8] = t_nnz[8] = !!nnz; |
| 1385 | |
| 1386 | if (is_vp7 && mb->mode > MODE_I4x4) { |
| 1387 | nnz |= inter_predict_dc(td->block_dc, |
| 1388 | s->inter_dc_pred[mb->ref_frame - 1]); |
| 1389 | } |
| 1390 | |
| 1391 | if (nnz) { |
| 1392 | nnz_total += nnz; |
| 1393 | block_dc = 1; |
| 1394 | if (nnz == 1) |
| 1395 | s->vp8dsp.vp8_luma_dc_wht_dc(td->block, td->block_dc); |
| 1396 | else |
| 1397 | s->vp8dsp.vp8_luma_dc_wht(td->block, td->block_dc); |
| 1398 | } |
| 1399 | luma_start = 1; |
| 1400 | luma_ctx = 0; |
| 1401 | } |
| 1402 | |
| 1403 | // luma blocks |
| 1404 | for (y = 0; y < 4; y++) |
| 1405 | for (x = 0; x < 4; x++) { |
| 1406 | nnz_pred = l_nnz[y] + t_nnz[x]; |
| 1407 | nnz = decode_block_coeffs(c, td->block[y][x], |
| 1408 | s->prob->token[luma_ctx], |
| 1409 | luma_start, nnz_pred, |
| 1410 | s->qmat[segment].luma_qmul, |
| 1411 | s->prob[0].scan, is_vp7); |
| 1412 | /* nnz+block_dc may be one more than the actual last index, |
| 1413 | * but we don't care */ |
| 1414 | td->non_zero_count_cache[y][x] = nnz + block_dc; |
| 1415 | t_nnz[x] = l_nnz[y] = !!nnz; |
| 1416 | nnz_total += nnz; |
| 1417 | } |
| 1418 | |
| 1419 | // chroma blocks |
| 1420 | // TODO: what to do about dimensions? 2nd dim for luma is x, |
| 1421 | // but for chroma it's (y<<1)|x |
| 1422 | for (i = 4; i < 6; i++) |
| 1423 | for (y = 0; y < 2; y++) |
| 1424 | for (x = 0; x < 2; x++) { |
| 1425 | nnz_pred = l_nnz[i + 2 * y] + t_nnz[i + 2 * x]; |
| 1426 | nnz = decode_block_coeffs(c, td->block[i][(y << 1) + x], |
| 1427 | s->prob->token[2], 0, nnz_pred, |
| 1428 | s->qmat[segment].chroma_qmul, |
| 1429 | s->prob[0].scan, is_vp7); |
| 1430 | td->non_zero_count_cache[i][(y << 1) + x] = nnz; |
| 1431 | t_nnz[i + 2 * x] = l_nnz[i + 2 * y] = !!nnz; |
| 1432 | nnz_total += nnz; |
| 1433 | } |
| 1434 | |
| 1435 | // if there were no coded coeffs despite the macroblock not being marked skip, |
| 1436 | // we MUST not do the inner loop filter and should not do IDCT |
| 1437 | // Since skip isn't used for bitstream prediction, just manually set it. |
| 1438 | if (!nnz_total) |
| 1439 | mb->skip = 1; |
| 1440 | } |
| 1441 | |
| 1442 | static av_always_inline |
| 1443 | void backup_mb_border(uint8_t *top_border, uint8_t *src_y, |
| 1444 | uint8_t *src_cb, uint8_t *src_cr, |
| 1445 | int linesize, int uvlinesize, int simple) |
| 1446 | { |
| 1447 | AV_COPY128(top_border, src_y + 15 * linesize); |
| 1448 | if (!simple) { |
| 1449 | AV_COPY64(top_border + 16, src_cb + 7 * uvlinesize); |
| 1450 | AV_COPY64(top_border + 24, src_cr + 7 * uvlinesize); |
| 1451 | } |
| 1452 | } |
| 1453 | |
| 1454 | static av_always_inline |
| 1455 | void xchg_mb_border(uint8_t *top_border, uint8_t *src_y, uint8_t *src_cb, |
| 1456 | uint8_t *src_cr, int linesize, int uvlinesize, int mb_x, |
| 1457 | int mb_y, int mb_width, int simple, int xchg) |
| 1458 | { |
| 1459 | uint8_t *top_border_m1 = top_border - 32; // for TL prediction |
| 1460 | src_y -= linesize; |
| 1461 | src_cb -= uvlinesize; |
| 1462 | src_cr -= uvlinesize; |
| 1463 | |
| 1464 | #define XCHG(a, b, xchg) \ |
| 1465 | do { \ |
| 1466 | if (xchg) \ |
| 1467 | AV_SWAP64(b, a); \ |
| 1468 | else \ |
| 1469 | AV_COPY64(b, a); \ |
| 1470 | } while (0) |
| 1471 | |
| 1472 | XCHG(top_border_m1 + 8, src_y - 8, xchg); |
| 1473 | XCHG(top_border, src_y, xchg); |
| 1474 | XCHG(top_border + 8, src_y + 8, 1); |
| 1475 | if (mb_x < mb_width - 1) |
| 1476 | XCHG(top_border + 32, src_y + 16, 1); |
| 1477 | |
| 1478 | // only copy chroma for normal loop filter |
| 1479 | // or to initialize the top row to 127 |
| 1480 | if (!simple || !mb_y) { |
| 1481 | XCHG(top_border_m1 + 16, src_cb - 8, xchg); |
| 1482 | XCHG(top_border_m1 + 24, src_cr - 8, xchg); |
| 1483 | XCHG(top_border + 16, src_cb, 1); |
| 1484 | XCHG(top_border + 24, src_cr, 1); |
| 1485 | } |
| 1486 | } |
| 1487 | |
| 1488 | static av_always_inline |
| 1489 | int check_dc_pred8x8_mode(int mode, int mb_x, int mb_y) |
| 1490 | { |
| 1491 | if (!mb_x) |
| 1492 | return mb_y ? TOP_DC_PRED8x8 : DC_128_PRED8x8; |
| 1493 | else |
| 1494 | return mb_y ? mode : LEFT_DC_PRED8x8; |
| 1495 | } |
| 1496 | |
| 1497 | static av_always_inline |
| 1498 | int check_tm_pred8x8_mode(int mode, int mb_x, int mb_y, int vp7) |
| 1499 | { |
| 1500 | if (!mb_x) |
| 1501 | return mb_y ? VERT_PRED8x8 : (vp7 ? DC_128_PRED8x8 : DC_129_PRED8x8); |
| 1502 | else |
| 1503 | return mb_y ? mode : HOR_PRED8x8; |
| 1504 | } |
| 1505 | |
| 1506 | static av_always_inline |
| 1507 | int check_intra_pred8x8_mode_emuedge(int mode, int mb_x, int mb_y, int vp7) |
| 1508 | { |
| 1509 | switch (mode) { |
| 1510 | case DC_PRED8x8: |
| 1511 | return check_dc_pred8x8_mode(mode, mb_x, mb_y); |
| 1512 | case VERT_PRED8x8: |
| 1513 | return !mb_y ? (vp7 ? DC_128_PRED8x8 : DC_127_PRED8x8) : mode; |
| 1514 | case HOR_PRED8x8: |
| 1515 | return !mb_x ? (vp7 ? DC_128_PRED8x8 : DC_129_PRED8x8) : mode; |
| 1516 | case PLANE_PRED8x8: /* TM */ |
| 1517 | return check_tm_pred8x8_mode(mode, mb_x, mb_y, vp7); |
| 1518 | } |
| 1519 | return mode; |
| 1520 | } |
| 1521 | |
| 1522 | static av_always_inline |
| 1523 | int check_tm_pred4x4_mode(int mode, int mb_x, int mb_y, int vp7) |
| 1524 | { |
| 1525 | if (!mb_x) { |
| 1526 | return mb_y ? VERT_VP8_PRED : (vp7 ? DC_128_PRED : DC_129_PRED); |
| 1527 | } else { |
| 1528 | return mb_y ? mode : HOR_VP8_PRED; |
| 1529 | } |
| 1530 | } |
| 1531 | |
| 1532 | static av_always_inline |
| 1533 | int check_intra_pred4x4_mode_emuedge(int mode, int mb_x, int mb_y, |
| 1534 | int *copy_buf, int vp7) |
| 1535 | { |
| 1536 | switch (mode) { |
| 1537 | case VERT_PRED: |
| 1538 | if (!mb_x && mb_y) { |
| 1539 | *copy_buf = 1; |
| 1540 | return mode; |
| 1541 | } |
| 1542 | /* fall-through */ |
| 1543 | case DIAG_DOWN_LEFT_PRED: |
| 1544 | case VERT_LEFT_PRED: |
| 1545 | return !mb_y ? (vp7 ? DC_128_PRED : DC_127_PRED) : mode; |
| 1546 | case HOR_PRED: |
| 1547 | if (!mb_y) { |
| 1548 | *copy_buf = 1; |
| 1549 | return mode; |
| 1550 | } |
| 1551 | /* fall-through */ |
| 1552 | case HOR_UP_PRED: |
| 1553 | return !mb_x ? (vp7 ? DC_128_PRED : DC_129_PRED) : mode; |
| 1554 | case TM_VP8_PRED: |
| 1555 | return check_tm_pred4x4_mode(mode, mb_x, mb_y, vp7); |
| 1556 | case DC_PRED: /* 4x4 DC doesn't use the same "H.264-style" exceptions |
| 1557 | * as 16x16/8x8 DC */ |
| 1558 | case DIAG_DOWN_RIGHT_PRED: |
| 1559 | case VERT_RIGHT_PRED: |
| 1560 | case HOR_DOWN_PRED: |
| 1561 | if (!mb_y || !mb_x) |
| 1562 | *copy_buf = 1; |
| 1563 | return mode; |
| 1564 | } |
| 1565 | return mode; |
| 1566 | } |
| 1567 | |
| 1568 | static av_always_inline |
| 1569 | void intra_predict(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3], |
| 1570 | VP8Macroblock *mb, int mb_x, int mb_y, int is_vp7) |
| 1571 | { |
| 1572 | int x, y, mode, nnz; |
| 1573 | uint32_t tr; |
| 1574 | |
| 1575 | /* for the first row, we need to run xchg_mb_border to init the top edge |
| 1576 | * to 127 otherwise, skip it if we aren't going to deblock */ |
| 1577 | if (mb_y && (s->deblock_filter || !mb_y) && td->thread_nr == 0) |
| 1578 | xchg_mb_border(s->top_border[mb_x + 1], dst[0], dst[1], dst[2], |
| 1579 | s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width, |
| 1580 | s->filter.simple, 1); |
| 1581 | |
| 1582 | if (mb->mode < MODE_I4x4) { |
| 1583 | mode = check_intra_pred8x8_mode_emuedge(mb->mode, mb_x, mb_y, is_vp7); |
| 1584 | s->hpc.pred16x16[mode](dst[0], s->linesize); |
| 1585 | } else { |
| 1586 | uint8_t *ptr = dst[0]; |
| 1587 | uint8_t *intra4x4 = mb->intra4x4_pred_mode_mb; |
| 1588 | const uint8_t lo = is_vp7 ? 128 : 127; |
| 1589 | const uint8_t hi = is_vp7 ? 128 : 129; |
| 1590 | uint8_t tr_top[4] = { lo, lo, lo, lo }; |
| 1591 | |
| 1592 | // all blocks on the right edge of the macroblock use bottom edge |
| 1593 | // the top macroblock for their topright edge |
| 1594 | uint8_t *tr_right = ptr - s->linesize + 16; |
| 1595 | |
| 1596 | // if we're on the right edge of the frame, said edge is extended |
| 1597 | // from the top macroblock |
| 1598 | if (mb_y && mb_x == s->mb_width - 1) { |
| 1599 | tr = tr_right[-1] * 0x01010101u; |
| 1600 | tr_right = (uint8_t *) &tr; |
| 1601 | } |
| 1602 | |
| 1603 | if (mb->skip) |
| 1604 | AV_ZERO128(td->non_zero_count_cache); |
| 1605 | |
| 1606 | for (y = 0; y < 4; y++) { |
| 1607 | uint8_t *topright = ptr + 4 - s->linesize; |
| 1608 | for (x = 0; x < 4; x++) { |
| 1609 | int copy = 0, linesize = s->linesize; |
| 1610 | uint8_t *dst = ptr + 4 * x; |
| 1611 | DECLARE_ALIGNED(4, uint8_t, copy_dst)[5 * 8]; |
| 1612 | |
| 1613 | if ((y == 0 || x == 3) && mb_y == 0) { |
| 1614 | topright = tr_top; |
| 1615 | } else if (x == 3) |
| 1616 | topright = tr_right; |
| 1617 | |
| 1618 | mode = check_intra_pred4x4_mode_emuedge(intra4x4[x], mb_x + x, |
| 1619 | mb_y + y, ©, is_vp7); |
| 1620 | if (copy) { |
| 1621 | dst = copy_dst + 12; |
| 1622 | linesize = 8; |
| 1623 | if (!(mb_y + y)) { |
| 1624 | copy_dst[3] = lo; |
| 1625 | AV_WN32A(copy_dst + 4, lo * 0x01010101U); |
| 1626 | } else { |
| 1627 | AV_COPY32(copy_dst + 4, ptr + 4 * x - s->linesize); |
| 1628 | if (!(mb_x + x)) { |
| 1629 | copy_dst[3] = hi; |
| 1630 | } else { |
| 1631 | copy_dst[3] = ptr[4 * x - s->linesize - 1]; |
| 1632 | } |
| 1633 | } |
| 1634 | if (!(mb_x + x)) { |
| 1635 | copy_dst[11] = |
| 1636 | copy_dst[19] = |
| 1637 | copy_dst[27] = |
| 1638 | copy_dst[35] = hi; |
| 1639 | } else { |
| 1640 | copy_dst[11] = ptr[4 * x - 1]; |
| 1641 | copy_dst[19] = ptr[4 * x + s->linesize - 1]; |
| 1642 | copy_dst[27] = ptr[4 * x + s->linesize * 2 - 1]; |
| 1643 | copy_dst[35] = ptr[4 * x + s->linesize * 3 - 1]; |
| 1644 | } |
| 1645 | } |
| 1646 | s->hpc.pred4x4[mode](dst, topright, linesize); |
| 1647 | if (copy) { |
| 1648 | AV_COPY32(ptr + 4 * x, copy_dst + 12); |
| 1649 | AV_COPY32(ptr + 4 * x + s->linesize, copy_dst + 20); |
| 1650 | AV_COPY32(ptr + 4 * x + s->linesize * 2, copy_dst + 28); |
| 1651 | AV_COPY32(ptr + 4 * x + s->linesize * 3, copy_dst + 36); |
| 1652 | } |
| 1653 | |
| 1654 | nnz = td->non_zero_count_cache[y][x]; |
| 1655 | if (nnz) { |
| 1656 | if (nnz == 1) |
| 1657 | s->vp8dsp.vp8_idct_dc_add(ptr + 4 * x, |
| 1658 | td->block[y][x], s->linesize); |
| 1659 | else |
| 1660 | s->vp8dsp.vp8_idct_add(ptr + 4 * x, |
| 1661 | td->block[y][x], s->linesize); |
| 1662 | } |
| 1663 | topright += 4; |
| 1664 | } |
| 1665 | |
| 1666 | ptr += 4 * s->linesize; |
| 1667 | intra4x4 += 4; |
| 1668 | } |
| 1669 | } |
| 1670 | |
| 1671 | mode = check_intra_pred8x8_mode_emuedge(mb->chroma_pred_mode, |
| 1672 | mb_x, mb_y, is_vp7); |
| 1673 | s->hpc.pred8x8[mode](dst[1], s->uvlinesize); |
| 1674 | s->hpc.pred8x8[mode](dst[2], s->uvlinesize); |
| 1675 | |
| 1676 | if (mb_y && (s->deblock_filter || !mb_y) && td->thread_nr == 0) |
| 1677 | xchg_mb_border(s->top_border[mb_x + 1], dst[0], dst[1], dst[2], |
| 1678 | s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width, |
| 1679 | s->filter.simple, 0); |
| 1680 | } |
| 1681 | |
| 1682 | static const uint8_t subpel_idx[3][8] = { |
| 1683 | { 0, 1, 2, 1, 2, 1, 2, 1 }, // nr. of left extra pixels, |
| 1684 | // also function pointer index |
| 1685 | { 0, 3, 5, 3, 5, 3, 5, 3 }, // nr. of extra pixels required |
| 1686 | { 0, 2, 3, 2, 3, 2, 3, 2 }, // nr. of right extra pixels |
| 1687 | }; |
| 1688 | |
| 1689 | /** |
| 1690 | * luma MC function |
| 1691 | * |
| 1692 | * @param s VP8 decoding context |
| 1693 | * @param dst target buffer for block data at block position |
| 1694 | * @param ref reference picture buffer at origin (0, 0) |
| 1695 | * @param mv motion vector (relative to block position) to get pixel data from |
| 1696 | * @param x_off horizontal position of block from origin (0, 0) |
| 1697 | * @param y_off vertical position of block from origin (0, 0) |
| 1698 | * @param block_w width of block (16, 8 or 4) |
| 1699 | * @param block_h height of block (always same as block_w) |
| 1700 | * @param width width of src/dst plane data |
| 1701 | * @param height height of src/dst plane data |
| 1702 | * @param linesize size of a single line of plane data, including padding |
| 1703 | * @param mc_func motion compensation function pointers (bilinear or sixtap MC) |
| 1704 | */ |
| 1705 | static av_always_inline |
| 1706 | void vp8_mc_luma(VP8Context *s, VP8ThreadData *td, uint8_t *dst, |
| 1707 | ThreadFrame *ref, const VP56mv *mv, |
| 1708 | int x_off, int y_off, int block_w, int block_h, |
| 1709 | int width, int height, ptrdiff_t linesize, |
| 1710 | vp8_mc_func mc_func[3][3]) |
| 1711 | { |
| 1712 | uint8_t *src = ref->f->data[0]; |
| 1713 | |
| 1714 | if (AV_RN32A(mv)) { |
| 1715 | int src_linesize = linesize; |
| 1716 | |
| 1717 | int mx = (mv->x << 1) & 7, mx_idx = subpel_idx[0][mx]; |
| 1718 | int my = (mv->y << 1) & 7, my_idx = subpel_idx[0][my]; |
| 1719 | |
| 1720 | x_off += mv->x >> 2; |
| 1721 | y_off += mv->y >> 2; |
| 1722 | |
| 1723 | // edge emulation |
| 1724 | ff_thread_await_progress(ref, (3 + y_off + block_h + subpel_idx[2][my]) >> 4, 0); |
| 1725 | src += y_off * linesize + x_off; |
| 1726 | if (x_off < mx_idx || x_off >= width - block_w - subpel_idx[2][mx] || |
| 1727 | y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) { |
| 1728 | s->vdsp.emulated_edge_mc(td->edge_emu_buffer, |
| 1729 | src - my_idx * linesize - mx_idx, |
| 1730 | EDGE_EMU_LINESIZE, linesize, |
| 1731 | block_w + subpel_idx[1][mx], |
| 1732 | block_h + subpel_idx[1][my], |
| 1733 | x_off - mx_idx, y_off - my_idx, |
| 1734 | width, height); |
| 1735 | src = td->edge_emu_buffer + mx_idx + EDGE_EMU_LINESIZE * my_idx; |
| 1736 | src_linesize = EDGE_EMU_LINESIZE; |
| 1737 | } |
| 1738 | mc_func[my_idx][mx_idx](dst, linesize, src, src_linesize, block_h, mx, my); |
| 1739 | } else { |
| 1740 | ff_thread_await_progress(ref, (3 + y_off + block_h) >> 4, 0); |
| 1741 | mc_func[0][0](dst, linesize, src + y_off * linesize + x_off, |
| 1742 | linesize, block_h, 0, 0); |
| 1743 | } |
| 1744 | } |
| 1745 | |
| 1746 | /** |
| 1747 | * chroma MC function |
| 1748 | * |
| 1749 | * @param s VP8 decoding context |
| 1750 | * @param dst1 target buffer for block data at block position (U plane) |
| 1751 | * @param dst2 target buffer for block data at block position (V plane) |
| 1752 | * @param ref reference picture buffer at origin (0, 0) |
| 1753 | * @param mv motion vector (relative to block position) to get pixel data from |
| 1754 | * @param x_off horizontal position of block from origin (0, 0) |
| 1755 | * @param y_off vertical position of block from origin (0, 0) |
| 1756 | * @param block_w width of block (16, 8 or 4) |
| 1757 | * @param block_h height of block (always same as block_w) |
| 1758 | * @param width width of src/dst plane data |
| 1759 | * @param height height of src/dst plane data |
| 1760 | * @param linesize size of a single line of plane data, including padding |
| 1761 | * @param mc_func motion compensation function pointers (bilinear or sixtap MC) |
| 1762 | */ |
| 1763 | static av_always_inline |
| 1764 | void vp8_mc_chroma(VP8Context *s, VP8ThreadData *td, uint8_t *dst1, |
| 1765 | uint8_t *dst2, ThreadFrame *ref, const VP56mv *mv, |
| 1766 | int x_off, int y_off, int block_w, int block_h, |
| 1767 | int width, int height, ptrdiff_t linesize, |
| 1768 | vp8_mc_func mc_func[3][3]) |
| 1769 | { |
| 1770 | uint8_t *src1 = ref->f->data[1], *src2 = ref->f->data[2]; |
| 1771 | |
| 1772 | if (AV_RN32A(mv)) { |
| 1773 | int mx = mv->x & 7, mx_idx = subpel_idx[0][mx]; |
| 1774 | int my = mv->y & 7, my_idx = subpel_idx[0][my]; |
| 1775 | |
| 1776 | x_off += mv->x >> 3; |
| 1777 | y_off += mv->y >> 3; |
| 1778 | |
| 1779 | // edge emulation |
| 1780 | src1 += y_off * linesize + x_off; |
| 1781 | src2 += y_off * linesize + x_off; |
| 1782 | ff_thread_await_progress(ref, (3 + y_off + block_h + subpel_idx[2][my]) >> 3, 0); |
| 1783 | if (x_off < mx_idx || x_off >= width - block_w - subpel_idx[2][mx] || |
| 1784 | y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) { |
| 1785 | s->vdsp.emulated_edge_mc(td->edge_emu_buffer, |
| 1786 | src1 - my_idx * linesize - mx_idx, |
| 1787 | EDGE_EMU_LINESIZE, linesize, |
| 1788 | block_w + subpel_idx[1][mx], |
| 1789 | block_h + subpel_idx[1][my], |
| 1790 | x_off - mx_idx, y_off - my_idx, width, height); |
| 1791 | src1 = td->edge_emu_buffer + mx_idx + EDGE_EMU_LINESIZE * my_idx; |
| 1792 | mc_func[my_idx][mx_idx](dst1, linesize, src1, EDGE_EMU_LINESIZE, block_h, mx, my); |
| 1793 | |
| 1794 | s->vdsp.emulated_edge_mc(td->edge_emu_buffer, |
| 1795 | src2 - my_idx * linesize - mx_idx, |
| 1796 | EDGE_EMU_LINESIZE, linesize, |
| 1797 | block_w + subpel_idx[1][mx], |
| 1798 | block_h + subpel_idx[1][my], |
| 1799 | x_off - mx_idx, y_off - my_idx, width, height); |
| 1800 | src2 = td->edge_emu_buffer + mx_idx + EDGE_EMU_LINESIZE * my_idx; |
| 1801 | mc_func[my_idx][mx_idx](dst2, linesize, src2, EDGE_EMU_LINESIZE, block_h, mx, my); |
| 1802 | } else { |
| 1803 | mc_func[my_idx][mx_idx](dst1, linesize, src1, linesize, block_h, mx, my); |
| 1804 | mc_func[my_idx][mx_idx](dst2, linesize, src2, linesize, block_h, mx, my); |
| 1805 | } |
| 1806 | } else { |
| 1807 | ff_thread_await_progress(ref, (3 + y_off + block_h) >> 3, 0); |
| 1808 | mc_func[0][0](dst1, linesize, src1 + y_off * linesize + x_off, linesize, block_h, 0, 0); |
| 1809 | mc_func[0][0](dst2, linesize, src2 + y_off * linesize + x_off, linesize, block_h, 0, 0); |
| 1810 | } |
| 1811 | } |
| 1812 | |
| 1813 | static av_always_inline |
| 1814 | void vp8_mc_part(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3], |
| 1815 | ThreadFrame *ref_frame, int x_off, int y_off, |
| 1816 | int bx_off, int by_off, int block_w, int block_h, |
| 1817 | int width, int height, VP56mv *mv) |
| 1818 | { |
| 1819 | VP56mv uvmv = *mv; |
| 1820 | |
| 1821 | /* Y */ |
| 1822 | vp8_mc_luma(s, td, dst[0] + by_off * s->linesize + bx_off, |
| 1823 | ref_frame, mv, x_off + bx_off, y_off + by_off, |
| 1824 | block_w, block_h, width, height, s->linesize, |
| 1825 | s->put_pixels_tab[block_w == 8]); |
| 1826 | |
| 1827 | /* U/V */ |
| 1828 | if (s->profile == 3) { |
| 1829 | /* this block only applies VP8; it is safe to check |
| 1830 | * only the profile, as VP7 profile <= 1 */ |
| 1831 | uvmv.x &= ~7; |
| 1832 | uvmv.y &= ~7; |
| 1833 | } |
| 1834 | x_off >>= 1; |
| 1835 | y_off >>= 1; |
| 1836 | bx_off >>= 1; |
| 1837 | by_off >>= 1; |
| 1838 | width >>= 1; |
| 1839 | height >>= 1; |
| 1840 | block_w >>= 1; |
| 1841 | block_h >>= 1; |
| 1842 | vp8_mc_chroma(s, td, dst[1] + by_off * s->uvlinesize + bx_off, |
| 1843 | dst[2] + by_off * s->uvlinesize + bx_off, ref_frame, |
| 1844 | &uvmv, x_off + bx_off, y_off + by_off, |
| 1845 | block_w, block_h, width, height, s->uvlinesize, |
| 1846 | s->put_pixels_tab[1 + (block_w == 4)]); |
| 1847 | } |
| 1848 | |
| 1849 | /* Fetch pixels for estimated mv 4 macroblocks ahead. |
| 1850 | * Optimized for 64-byte cache lines. Inspired by ffh264 prefetch_motion. */ |
| 1851 | static av_always_inline |
| 1852 | void prefetch_motion(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, |
| 1853 | int mb_xy, int ref) |
| 1854 | { |
| 1855 | /* Don't prefetch refs that haven't been used very often this frame. */ |
| 1856 | if (s->ref_count[ref - 1] > (mb_xy >> 5)) { |
| 1857 | int x_off = mb_x << 4, y_off = mb_y << 4; |
| 1858 | int mx = (mb->mv.x >> 2) + x_off + 8; |
| 1859 | int my = (mb->mv.y >> 2) + y_off; |
| 1860 | uint8_t **src = s->framep[ref]->tf.f->data; |
| 1861 | int off = mx + (my + (mb_x & 3) * 4) * s->linesize + 64; |
| 1862 | /* For threading, a ff_thread_await_progress here might be useful, but |
| 1863 | * it actually slows down the decoder. Since a bad prefetch doesn't |
| 1864 | * generate bad decoder output, we don't run it here. */ |
| 1865 | s->vdsp.prefetch(src[0] + off, s->linesize, 4); |
| 1866 | off = (mx >> 1) + ((my >> 1) + (mb_x & 7)) * s->uvlinesize + 64; |
| 1867 | s->vdsp.prefetch(src[1] + off, src[2] - src[1], 2); |
| 1868 | } |
| 1869 | } |
| 1870 | |
| 1871 | /** |
| 1872 | * Apply motion vectors to prediction buffer, chapter 18. |
| 1873 | */ |
| 1874 | static av_always_inline |
| 1875 | void inter_predict(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3], |
| 1876 | VP8Macroblock *mb, int mb_x, int mb_y) |
| 1877 | { |
| 1878 | int x_off = mb_x << 4, y_off = mb_y << 4; |
| 1879 | int width = 16 * s->mb_width, height = 16 * s->mb_height; |
| 1880 | ThreadFrame *ref = &s->framep[mb->ref_frame]->tf; |
| 1881 | VP56mv *bmv = mb->bmv; |
| 1882 | |
| 1883 | switch (mb->partitioning) { |
| 1884 | case VP8_SPLITMVMODE_NONE: |
| 1885 | vp8_mc_part(s, td, dst, ref, x_off, y_off, |
| 1886 | 0, 0, 16, 16, width, height, &mb->mv); |
| 1887 | break; |
| 1888 | case VP8_SPLITMVMODE_4x4: { |
| 1889 | int x, y; |
| 1890 | VP56mv uvmv; |
| 1891 | |
| 1892 | /* Y */ |
| 1893 | for (y = 0; y < 4; y++) { |
| 1894 | for (x = 0; x < 4; x++) { |
| 1895 | vp8_mc_luma(s, td, dst[0] + 4 * y * s->linesize + x * 4, |
| 1896 | ref, &bmv[4 * y + x], |
| 1897 | 4 * x + x_off, 4 * y + y_off, 4, 4, |
| 1898 | width, height, s->linesize, |
| 1899 | s->put_pixels_tab[2]); |
| 1900 | } |
| 1901 | } |
| 1902 | |
| 1903 | /* U/V */ |
| 1904 | x_off >>= 1; |
| 1905 | y_off >>= 1; |
| 1906 | width >>= 1; |
| 1907 | height >>= 1; |
| 1908 | for (y = 0; y < 2; y++) { |
| 1909 | for (x = 0; x < 2; x++) { |
| 1910 | uvmv.x = mb->bmv[2 * y * 4 + 2 * x ].x + |
| 1911 | mb->bmv[2 * y * 4 + 2 * x + 1].x + |
| 1912 | mb->bmv[(2 * y + 1) * 4 + 2 * x ].x + |
| 1913 | mb->bmv[(2 * y + 1) * 4 + 2 * x + 1].x; |
| 1914 | uvmv.y = mb->bmv[2 * y * 4 + 2 * x ].y + |
| 1915 | mb->bmv[2 * y * 4 + 2 * x + 1].y + |
| 1916 | mb->bmv[(2 * y + 1) * 4 + 2 * x ].y + |
| 1917 | mb->bmv[(2 * y + 1) * 4 + 2 * x + 1].y; |
| 1918 | uvmv.x = (uvmv.x + 2 + (uvmv.x >> (INT_BIT - 1))) >> 2; |
| 1919 | uvmv.y = (uvmv.y + 2 + (uvmv.y >> (INT_BIT - 1))) >> 2; |
| 1920 | if (s->profile == 3) { |
| 1921 | uvmv.x &= ~7; |
| 1922 | uvmv.y &= ~7; |
| 1923 | } |
| 1924 | vp8_mc_chroma(s, td, dst[1] + 4 * y * s->uvlinesize + x * 4, |
| 1925 | dst[2] + 4 * y * s->uvlinesize + x * 4, ref, |
| 1926 | &uvmv, 4 * x + x_off, 4 * y + y_off, 4, 4, |
| 1927 | width, height, s->uvlinesize, |
| 1928 | s->put_pixels_tab[2]); |
| 1929 | } |
| 1930 | } |
| 1931 | break; |
| 1932 | } |
| 1933 | case VP8_SPLITMVMODE_16x8: |
| 1934 | vp8_mc_part(s, td, dst, ref, x_off, y_off, |
| 1935 | 0, 0, 16, 8, width, height, &bmv[0]); |
| 1936 | vp8_mc_part(s, td, dst, ref, x_off, y_off, |
| 1937 | 0, 8, 16, 8, width, height, &bmv[1]); |
| 1938 | break; |
| 1939 | case VP8_SPLITMVMODE_8x16: |
| 1940 | vp8_mc_part(s, td, dst, ref, x_off, y_off, |
| 1941 | 0, 0, 8, 16, width, height, &bmv[0]); |
| 1942 | vp8_mc_part(s, td, dst, ref, x_off, y_off, |
| 1943 | 8, 0, 8, 16, width, height, &bmv[1]); |
| 1944 | break; |
| 1945 | case VP8_SPLITMVMODE_8x8: |
| 1946 | vp8_mc_part(s, td, dst, ref, x_off, y_off, |
| 1947 | 0, 0, 8, 8, width, height, &bmv[0]); |
| 1948 | vp8_mc_part(s, td, dst, ref, x_off, y_off, |
| 1949 | 8, 0, 8, 8, width, height, &bmv[1]); |
| 1950 | vp8_mc_part(s, td, dst, ref, x_off, y_off, |
| 1951 | 0, 8, 8, 8, width, height, &bmv[2]); |
| 1952 | vp8_mc_part(s, td, dst, ref, x_off, y_off, |
| 1953 | 8, 8, 8, 8, width, height, &bmv[3]); |
| 1954 | break; |
| 1955 | } |
| 1956 | } |
| 1957 | |
| 1958 | static av_always_inline |
| 1959 | void idct_mb(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3], VP8Macroblock *mb) |
| 1960 | { |
| 1961 | int x, y, ch; |
| 1962 | |
| 1963 | if (mb->mode != MODE_I4x4) { |
| 1964 | uint8_t *y_dst = dst[0]; |
| 1965 | for (y = 0; y < 4; y++) { |
| 1966 | uint32_t nnz4 = AV_RL32(td->non_zero_count_cache[y]); |
| 1967 | if (nnz4) { |
| 1968 | if (nnz4 & ~0x01010101) { |
| 1969 | for (x = 0; x < 4; x++) { |
| 1970 | if ((uint8_t) nnz4 == 1) |
| 1971 | s->vp8dsp.vp8_idct_dc_add(y_dst + 4 * x, |
| 1972 | td->block[y][x], |
| 1973 | s->linesize); |
| 1974 | else if ((uint8_t) nnz4 > 1) |
| 1975 | s->vp8dsp.vp8_idct_add(y_dst + 4 * x, |
| 1976 | td->block[y][x], |
| 1977 | s->linesize); |
| 1978 | nnz4 >>= 8; |
| 1979 | if (!nnz4) |
| 1980 | break; |
| 1981 | } |
| 1982 | } else { |
| 1983 | s->vp8dsp.vp8_idct_dc_add4y(y_dst, td->block[y], s->linesize); |
| 1984 | } |
| 1985 | } |
| 1986 | y_dst += 4 * s->linesize; |
| 1987 | } |
| 1988 | } |
| 1989 | |
| 1990 | for (ch = 0; ch < 2; ch++) { |
| 1991 | uint32_t nnz4 = AV_RL32(td->non_zero_count_cache[4 + ch]); |
| 1992 | if (nnz4) { |
| 1993 | uint8_t *ch_dst = dst[1 + ch]; |
| 1994 | if (nnz4 & ~0x01010101) { |
| 1995 | for (y = 0; y < 2; y++) { |
| 1996 | for (x = 0; x < 2; x++) { |
| 1997 | if ((uint8_t) nnz4 == 1) |
| 1998 | s->vp8dsp.vp8_idct_dc_add(ch_dst + 4 * x, |
| 1999 | td->block[4 + ch][(y << 1) + x], |
| 2000 | s->uvlinesize); |
| 2001 | else if ((uint8_t) nnz4 > 1) |
| 2002 | s->vp8dsp.vp8_idct_add(ch_dst + 4 * x, |
| 2003 | td->block[4 + ch][(y << 1) + x], |
| 2004 | s->uvlinesize); |
| 2005 | nnz4 >>= 8; |
| 2006 | if (!nnz4) |
| 2007 | goto chroma_idct_end; |
| 2008 | } |
| 2009 | ch_dst += 4 * s->uvlinesize; |
| 2010 | } |
| 2011 | } else { |
| 2012 | s->vp8dsp.vp8_idct_dc_add4uv(ch_dst, td->block[4 + ch], s->uvlinesize); |
| 2013 | } |
| 2014 | } |
| 2015 | chroma_idct_end: |
| 2016 | ; |
| 2017 | } |
| 2018 | } |
| 2019 | |
| 2020 | static av_always_inline |
| 2021 | void filter_level_for_mb(VP8Context *s, VP8Macroblock *mb, |
| 2022 | VP8FilterStrength *f, int is_vp7) |
| 2023 | { |
| 2024 | int interior_limit, filter_level; |
| 2025 | |
| 2026 | if (s->segmentation.enabled) { |
| 2027 | filter_level = s->segmentation.filter_level[mb->segment]; |
| 2028 | if (!s->segmentation.absolute_vals) |
| 2029 | filter_level += s->filter.level; |
| 2030 | } else |
| 2031 | filter_level = s->filter.level; |
| 2032 | |
| 2033 | if (s->lf_delta.enabled) { |
| 2034 | filter_level += s->lf_delta.ref[mb->ref_frame]; |
| 2035 | filter_level += s->lf_delta.mode[mb->mode]; |
| 2036 | } |
| 2037 | |
| 2038 | filter_level = av_clip_uintp2(filter_level, 6); |
| 2039 | |
| 2040 | interior_limit = filter_level; |
| 2041 | if (s->filter.sharpness) { |
| 2042 | interior_limit >>= (s->filter.sharpness + 3) >> 2; |
| 2043 | interior_limit = FFMIN(interior_limit, 9 - s->filter.sharpness); |
| 2044 | } |
| 2045 | interior_limit = FFMAX(interior_limit, 1); |
| 2046 | |
| 2047 | f->filter_level = filter_level; |
| 2048 | f->inner_limit = interior_limit; |
| 2049 | f->inner_filter = is_vp7 || !mb->skip || mb->mode == MODE_I4x4 || |
| 2050 | mb->mode == VP8_MVMODE_SPLIT; |
| 2051 | } |
| 2052 | |
| 2053 | static av_always_inline |
| 2054 | void filter_mb(VP8Context *s, uint8_t *dst[3], VP8FilterStrength *f, |
| 2055 | int mb_x, int mb_y, int is_vp7) |
| 2056 | { |
| 2057 | int mbedge_lim, bedge_lim_y, bedge_lim_uv, hev_thresh; |
| 2058 | int filter_level = f->filter_level; |
| 2059 | int inner_limit = f->inner_limit; |
| 2060 | int inner_filter = f->inner_filter; |
| 2061 | int linesize = s->linesize; |
| 2062 | int uvlinesize = s->uvlinesize; |
| 2063 | static const uint8_t hev_thresh_lut[2][64] = { |
| 2064 | { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, |
| 2065 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, |
| 2066 | 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, |
| 2067 | 3, 3, 3, 3 }, |
| 2068 | { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, |
| 2069 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
| 2070 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, |
| 2071 | 2, 2, 2, 2 } |
| 2072 | }; |
| 2073 | |
| 2074 | if (!filter_level) |
| 2075 | return; |
| 2076 | |
| 2077 | if (is_vp7) { |
| 2078 | bedge_lim_y = filter_level; |
| 2079 | bedge_lim_uv = filter_level * 2; |
| 2080 | mbedge_lim = filter_level + 2; |
| 2081 | } else { |
| 2082 | bedge_lim_y = |
| 2083 | bedge_lim_uv = filter_level * 2 + inner_limit; |
| 2084 | mbedge_lim = bedge_lim_y + 4; |
| 2085 | } |
| 2086 | |
| 2087 | hev_thresh = hev_thresh_lut[s->keyframe][filter_level]; |
| 2088 | |
| 2089 | if (mb_x) { |
| 2090 | s->vp8dsp.vp8_h_loop_filter16y(dst[0], linesize, |
| 2091 | mbedge_lim, inner_limit, hev_thresh); |
| 2092 | s->vp8dsp.vp8_h_loop_filter8uv(dst[1], dst[2], uvlinesize, |
| 2093 | mbedge_lim, inner_limit, hev_thresh); |
| 2094 | } |
| 2095 | |
| 2096 | #define H_LOOP_FILTER_16Y_INNER(cond) \ |
| 2097 | if (cond && inner_filter) { \ |
| 2098 | s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0] + 4, linesize, \ |
| 2099 | bedge_lim_y, inner_limit, \ |
| 2100 | hev_thresh); \ |
| 2101 | s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0] + 8, linesize, \ |
| 2102 | bedge_lim_y, inner_limit, \ |
| 2103 | hev_thresh); \ |
| 2104 | s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0] + 12, linesize, \ |
| 2105 | bedge_lim_y, inner_limit, \ |
| 2106 | hev_thresh); \ |
| 2107 | s->vp8dsp.vp8_h_loop_filter8uv_inner(dst[1] + 4, dst[2] + 4, \ |
| 2108 | uvlinesize, bedge_lim_uv, \ |
| 2109 | inner_limit, hev_thresh); \ |
| 2110 | } |
| 2111 | |
| 2112 | H_LOOP_FILTER_16Y_INNER(!is_vp7) |
| 2113 | |
| 2114 | if (mb_y) { |
| 2115 | s->vp8dsp.vp8_v_loop_filter16y(dst[0], linesize, |
| 2116 | mbedge_lim, inner_limit, hev_thresh); |
| 2117 | s->vp8dsp.vp8_v_loop_filter8uv(dst[1], dst[2], uvlinesize, |
| 2118 | mbedge_lim, inner_limit, hev_thresh); |
| 2119 | } |
| 2120 | |
| 2121 | if (inner_filter) { |
| 2122 | s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0] + 4 * linesize, |
| 2123 | linesize, bedge_lim_y, |
| 2124 | inner_limit, hev_thresh); |
| 2125 | s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0] + 8 * linesize, |
| 2126 | linesize, bedge_lim_y, |
| 2127 | inner_limit, hev_thresh); |
| 2128 | s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0] + 12 * linesize, |
| 2129 | linesize, bedge_lim_y, |
| 2130 | inner_limit, hev_thresh); |
| 2131 | s->vp8dsp.vp8_v_loop_filter8uv_inner(dst[1] + 4 * uvlinesize, |
| 2132 | dst[2] + 4 * uvlinesize, |
| 2133 | uvlinesize, bedge_lim_uv, |
| 2134 | inner_limit, hev_thresh); |
| 2135 | } |
| 2136 | |
| 2137 | H_LOOP_FILTER_16Y_INNER(is_vp7) |
| 2138 | } |
| 2139 | |
| 2140 | static av_always_inline |
| 2141 | void filter_mb_simple(VP8Context *s, uint8_t *dst, VP8FilterStrength *f, |
| 2142 | int mb_x, int mb_y) |
| 2143 | { |
| 2144 | int mbedge_lim, bedge_lim; |
| 2145 | int filter_level = f->filter_level; |
| 2146 | int inner_limit = f->inner_limit; |
| 2147 | int inner_filter = f->inner_filter; |
| 2148 | int linesize = s->linesize; |
| 2149 | |
| 2150 | if (!filter_level) |
| 2151 | return; |
| 2152 | |
| 2153 | bedge_lim = 2 * filter_level + inner_limit; |
| 2154 | mbedge_lim = bedge_lim + 4; |
| 2155 | |
| 2156 | if (mb_x) |
| 2157 | s->vp8dsp.vp8_h_loop_filter_simple(dst, linesize, mbedge_lim); |
| 2158 | if (inner_filter) { |
| 2159 | s->vp8dsp.vp8_h_loop_filter_simple(dst + 4, linesize, bedge_lim); |
| 2160 | s->vp8dsp.vp8_h_loop_filter_simple(dst + 8, linesize, bedge_lim); |
| 2161 | s->vp8dsp.vp8_h_loop_filter_simple(dst + 12, linesize, bedge_lim); |
| 2162 | } |
| 2163 | |
| 2164 | if (mb_y) |
| 2165 | s->vp8dsp.vp8_v_loop_filter_simple(dst, linesize, mbedge_lim); |
| 2166 | if (inner_filter) { |
| 2167 | s->vp8dsp.vp8_v_loop_filter_simple(dst + 4 * linesize, linesize, bedge_lim); |
| 2168 | s->vp8dsp.vp8_v_loop_filter_simple(dst + 8 * linesize, linesize, bedge_lim); |
| 2169 | s->vp8dsp.vp8_v_loop_filter_simple(dst + 12 * linesize, linesize, bedge_lim); |
| 2170 | } |
| 2171 | } |
| 2172 | |
| 2173 | #define MARGIN (16 << 2) |
| 2174 | static av_always_inline |
| 2175 | void vp78_decode_mv_mb_modes(AVCodecContext *avctx, VP8Frame *curframe, |
| 2176 | VP8Frame *prev_frame, int is_vp7) |
| 2177 | { |
| 2178 | VP8Context *s = avctx->priv_data; |
| 2179 | int mb_x, mb_y; |
| 2180 | |
| 2181 | s->mv_min.y = -MARGIN; |
| 2182 | s->mv_max.y = ((s->mb_height - 1) << 6) + MARGIN; |
| 2183 | for (mb_y = 0; mb_y < s->mb_height; mb_y++) { |
| 2184 | VP8Macroblock *mb = s->macroblocks_base + |
| 2185 | ((s->mb_width + 1) * (mb_y + 1) + 1); |
| 2186 | int mb_xy = mb_y * s->mb_width; |
| 2187 | |
| 2188 | AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED * 0x01010101); |
| 2189 | |
| 2190 | s->mv_min.x = -MARGIN; |
| 2191 | s->mv_max.x = ((s->mb_width - 1) << 6) + MARGIN; |
| 2192 | for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) { |
| 2193 | if (mb_y == 0) |
| 2194 | AV_WN32A((mb - s->mb_width - 1)->intra4x4_pred_mode_top, |
| 2195 | DC_PRED * 0x01010101); |
| 2196 | decode_mb_mode(s, mb, mb_x, mb_y, curframe->seg_map->data + mb_xy, |
| 2197 | prev_frame && prev_frame->seg_map ? |
| 2198 | prev_frame->seg_map->data + mb_xy : NULL, 1, is_vp7); |
| 2199 | s->mv_min.x -= 64; |
| 2200 | s->mv_max.x -= 64; |
| 2201 | } |
| 2202 | s->mv_min.y -= 64; |
| 2203 | s->mv_max.y -= 64; |
| 2204 | } |
| 2205 | } |
| 2206 | |
| 2207 | static void vp7_decode_mv_mb_modes(AVCodecContext *avctx, VP8Frame *cur_frame, |
| 2208 | VP8Frame *prev_frame) |
| 2209 | { |
| 2210 | vp78_decode_mv_mb_modes(avctx, cur_frame, prev_frame, IS_VP7); |
| 2211 | } |
| 2212 | |
| 2213 | static void vp8_decode_mv_mb_modes(AVCodecContext *avctx, VP8Frame *cur_frame, |
| 2214 | VP8Frame *prev_frame) |
| 2215 | { |
| 2216 | vp78_decode_mv_mb_modes(avctx, cur_frame, prev_frame, IS_VP8); |
| 2217 | } |
| 2218 | |
| 2219 | #if HAVE_THREADS |
| 2220 | #define check_thread_pos(td, otd, mb_x_check, mb_y_check) \ |
| 2221 | do { \ |
| 2222 | int tmp = (mb_y_check << 16) | (mb_x_check & 0xFFFF); \ |
| 2223 | if (otd->thread_mb_pos < tmp) { \ |
| 2224 | pthread_mutex_lock(&otd->lock); \ |
| 2225 | td->wait_mb_pos = tmp; \ |
| 2226 | do { \ |
| 2227 | if (otd->thread_mb_pos >= tmp) \ |
| 2228 | break; \ |
| 2229 | pthread_cond_wait(&otd->cond, &otd->lock); \ |
| 2230 | } while (1); \ |
| 2231 | td->wait_mb_pos = INT_MAX; \ |
| 2232 | pthread_mutex_unlock(&otd->lock); \ |
| 2233 | } \ |
| 2234 | } while (0); |
| 2235 | |
| 2236 | #define update_pos(td, mb_y, mb_x) \ |
| 2237 | do { \ |
| 2238 | int pos = (mb_y << 16) | (mb_x & 0xFFFF); \ |
| 2239 | int sliced_threading = (avctx->active_thread_type == FF_THREAD_SLICE) && \ |
| 2240 | (num_jobs > 1); \ |
| 2241 | int is_null = !next_td || !prev_td; \ |
| 2242 | int pos_check = (is_null) ? 1 \ |
| 2243 | : (next_td != td && \ |
| 2244 | pos >= next_td->wait_mb_pos) || \ |
| 2245 | (prev_td != td && \ |
| 2246 | pos >= prev_td->wait_mb_pos); \ |
| 2247 | td->thread_mb_pos = pos; \ |
| 2248 | if (sliced_threading && pos_check) { \ |
| 2249 | pthread_mutex_lock(&td->lock); \ |
| 2250 | pthread_cond_broadcast(&td->cond); \ |
| 2251 | pthread_mutex_unlock(&td->lock); \ |
| 2252 | } \ |
| 2253 | } while (0); |
| 2254 | #else |
| 2255 | #define check_thread_pos(td, otd, mb_x_check, mb_y_check) |
| 2256 | #define update_pos(td, mb_y, mb_x) |
| 2257 | #endif |
| 2258 | |
| 2259 | static av_always_inline void decode_mb_row_no_filter(AVCodecContext *avctx, void *tdata, |
| 2260 | int jobnr, int threadnr, int is_vp7) |
| 2261 | { |
| 2262 | VP8Context *s = avctx->priv_data; |
| 2263 | VP8ThreadData *prev_td, *next_td, *td = &s->thread_data[threadnr]; |
| 2264 | int mb_y = td->thread_mb_pos >> 16; |
| 2265 | int mb_x, mb_xy = mb_y * s->mb_width; |
| 2266 | int num_jobs = s->num_jobs; |
| 2267 | VP8Frame *curframe = s->curframe, *prev_frame = s->prev_frame; |
| 2268 | VP56RangeCoder *c = &s->coeff_partition[mb_y & (s->num_coeff_partitions - 1)]; |
| 2269 | VP8Macroblock *mb; |
| 2270 | uint8_t *dst[3] = { |
| 2271 | curframe->tf.f->data[0] + 16 * mb_y * s->linesize, |
| 2272 | curframe->tf.f->data[1] + 8 * mb_y * s->uvlinesize, |
| 2273 | curframe->tf.f->data[2] + 8 * mb_y * s->uvlinesize |
| 2274 | }; |
| 2275 | if (mb_y == 0) |
| 2276 | prev_td = td; |
| 2277 | else |
| 2278 | prev_td = &s->thread_data[(jobnr + num_jobs - 1) % num_jobs]; |
| 2279 | if (mb_y == s->mb_height - 1) |
| 2280 | next_td = td; |
| 2281 | else |
| 2282 | next_td = &s->thread_data[(jobnr + 1) % num_jobs]; |
| 2283 | if (s->mb_layout == 1) |
| 2284 | mb = s->macroblocks_base + ((s->mb_width + 1) * (mb_y + 1) + 1); |
| 2285 | else { |
| 2286 | // Make sure the previous frame has read its segmentation map, |
| 2287 | // if we re-use the same map. |
| 2288 | if (prev_frame && s->segmentation.enabled && |
| 2289 | !s->segmentation.update_map) |
| 2290 | ff_thread_await_progress(&prev_frame->tf, mb_y, 0); |
| 2291 | mb = s->macroblocks + (s->mb_height - mb_y - 1) * 2; |
| 2292 | memset(mb - 1, 0, sizeof(*mb)); // zero left macroblock |
| 2293 | AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED * 0x01010101); |
| 2294 | } |
| 2295 | |
| 2296 | if (!is_vp7 || mb_y == 0) |
| 2297 | memset(td->left_nnz, 0, sizeof(td->left_nnz)); |
| 2298 | |
| 2299 | s->mv_min.x = -MARGIN; |
| 2300 | s->mv_max.x = ((s->mb_width - 1) << 6) + MARGIN; |
| 2301 | |
| 2302 | for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) { |
| 2303 | // Wait for previous thread to read mb_x+2, and reach mb_y-1. |
| 2304 | if (prev_td != td) { |
| 2305 | if (threadnr != 0) { |
| 2306 | check_thread_pos(td, prev_td, |
| 2307 | mb_x + (is_vp7 ? 2 : 1), |
| 2308 | mb_y - (is_vp7 ? 2 : 1)); |
| 2309 | } else { |
| 2310 | check_thread_pos(td, prev_td, |
| 2311 | mb_x + (is_vp7 ? 2 : 1) + s->mb_width + 3, |
| 2312 | mb_y - (is_vp7 ? 2 : 1)); |
| 2313 | } |
| 2314 | } |
| 2315 | |
| 2316 | s->vdsp.prefetch(dst[0] + (mb_x & 3) * 4 * s->linesize + 64, |
| 2317 | s->linesize, 4); |
| 2318 | s->vdsp.prefetch(dst[1] + (mb_x & 7) * s->uvlinesize + 64, |
| 2319 | dst[2] - dst[1], 2); |
| 2320 | |
| 2321 | if (!s->mb_layout) |
| 2322 | decode_mb_mode(s, mb, mb_x, mb_y, curframe->seg_map->data + mb_xy, |
| 2323 | prev_frame && prev_frame->seg_map ? |
| 2324 | prev_frame->seg_map->data + mb_xy : NULL, 0, is_vp7); |
| 2325 | |
| 2326 | prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_PREVIOUS); |
| 2327 | |
| 2328 | if (!mb->skip) |
| 2329 | decode_mb_coeffs(s, td, c, mb, s->top_nnz[mb_x], td->left_nnz, is_vp7); |
| 2330 | |
| 2331 | if (mb->mode <= MODE_I4x4) |
| 2332 | intra_predict(s, td, dst, mb, mb_x, mb_y, is_vp7); |
| 2333 | else |
| 2334 | inter_predict(s, td, dst, mb, mb_x, mb_y); |
| 2335 | |
| 2336 | prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN); |
| 2337 | |
| 2338 | if (!mb->skip) { |
| 2339 | idct_mb(s, td, dst, mb); |
| 2340 | } else { |
| 2341 | AV_ZERO64(td->left_nnz); |
| 2342 | AV_WN64(s->top_nnz[mb_x], 0); // array of 9, so unaligned |
| 2343 | |
| 2344 | /* Reset DC block predictors if they would exist |
| 2345 | * if the mb had coefficients */ |
| 2346 | if (mb->mode != MODE_I4x4 && mb->mode != VP8_MVMODE_SPLIT) { |
| 2347 | td->left_nnz[8] = 0; |
| 2348 | s->top_nnz[mb_x][8] = 0; |
| 2349 | } |
| 2350 | } |
| 2351 | |
| 2352 | if (s->deblock_filter) |
| 2353 | filter_level_for_mb(s, mb, &td->filter_strength[mb_x], is_vp7); |
| 2354 | |
| 2355 | if (s->deblock_filter && num_jobs != 1 && threadnr == num_jobs - 1) { |
| 2356 | if (s->filter.simple) |
| 2357 | backup_mb_border(s->top_border[mb_x + 1], dst[0], |
| 2358 | NULL, NULL, s->linesize, 0, 1); |
| 2359 | else |
| 2360 | backup_mb_border(s->top_border[mb_x + 1], dst[0], |
| 2361 | dst[1], dst[2], s->linesize, s->uvlinesize, 0); |
| 2362 | } |
| 2363 | |
| 2364 | prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN2); |
| 2365 | |
| 2366 | dst[0] += 16; |
| 2367 | dst[1] += 8; |
| 2368 | dst[2] += 8; |
| 2369 | s->mv_min.x -= 64; |
| 2370 | s->mv_max.x -= 64; |
| 2371 | |
| 2372 | if (mb_x == s->mb_width + 1) { |
| 2373 | update_pos(td, mb_y, s->mb_width + 3); |
| 2374 | } else { |
| 2375 | update_pos(td, mb_y, mb_x); |
| 2376 | } |
| 2377 | } |
| 2378 | } |
| 2379 | |
| 2380 | static void vp7_decode_mb_row_no_filter(AVCodecContext *avctx, void *tdata, |
| 2381 | int jobnr, int threadnr) |
| 2382 | { |
| 2383 | decode_mb_row_no_filter(avctx, tdata, jobnr, threadnr, 1); |
| 2384 | } |
| 2385 | |
| 2386 | static void vp8_decode_mb_row_no_filter(AVCodecContext *avctx, void *tdata, |
| 2387 | int jobnr, int threadnr) |
| 2388 | { |
| 2389 | decode_mb_row_no_filter(avctx, tdata, jobnr, threadnr, 0); |
| 2390 | } |
| 2391 | |
| 2392 | static av_always_inline void filter_mb_row(AVCodecContext *avctx, void *tdata, |
| 2393 | int jobnr, int threadnr, int is_vp7) |
| 2394 | { |
| 2395 | VP8Context *s = avctx->priv_data; |
| 2396 | VP8ThreadData *td = &s->thread_data[threadnr]; |
| 2397 | int mb_x, mb_y = td->thread_mb_pos >> 16, num_jobs = s->num_jobs; |
| 2398 | AVFrame *curframe = s->curframe->tf.f; |
| 2399 | VP8Macroblock *mb; |
| 2400 | VP8ThreadData *prev_td, *next_td; |
| 2401 | uint8_t *dst[3] = { |
| 2402 | curframe->data[0] + 16 * mb_y * s->linesize, |
| 2403 | curframe->data[1] + 8 * mb_y * s->uvlinesize, |
| 2404 | curframe->data[2] + 8 * mb_y * s->uvlinesize |
| 2405 | }; |
| 2406 | |
| 2407 | if (s->mb_layout == 1) |
| 2408 | mb = s->macroblocks_base + ((s->mb_width + 1) * (mb_y + 1) + 1); |
| 2409 | else |
| 2410 | mb = s->macroblocks + (s->mb_height - mb_y - 1) * 2; |
| 2411 | |
| 2412 | if (mb_y == 0) |
| 2413 | prev_td = td; |
| 2414 | else |
| 2415 | prev_td = &s->thread_data[(jobnr + num_jobs - 1) % num_jobs]; |
| 2416 | if (mb_y == s->mb_height - 1) |
| 2417 | next_td = td; |
| 2418 | else |
| 2419 | next_td = &s->thread_data[(jobnr + 1) % num_jobs]; |
| 2420 | |
| 2421 | for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb++) { |
| 2422 | VP8FilterStrength *f = &td->filter_strength[mb_x]; |
| 2423 | if (prev_td != td) |
| 2424 | check_thread_pos(td, prev_td, |
| 2425 | (mb_x + 1) + (s->mb_width + 3), mb_y - 1); |
| 2426 | if (next_td != td) |
| 2427 | if (next_td != &s->thread_data[0]) |
| 2428 | check_thread_pos(td, next_td, mb_x + 1, mb_y + 1); |
| 2429 | |
| 2430 | if (num_jobs == 1) { |
| 2431 | if (s->filter.simple) |
| 2432 | backup_mb_border(s->top_border[mb_x + 1], dst[0], |
| 2433 | NULL, NULL, s->linesize, 0, 1); |
| 2434 | else |
| 2435 | backup_mb_border(s->top_border[mb_x + 1], dst[0], |
| 2436 | dst[1], dst[2], s->linesize, s->uvlinesize, 0); |
| 2437 | } |
| 2438 | |
| 2439 | if (s->filter.simple) |
| 2440 | filter_mb_simple(s, dst[0], f, mb_x, mb_y); |
| 2441 | else |
| 2442 | filter_mb(s, dst, f, mb_x, mb_y, is_vp7); |
| 2443 | dst[0] += 16; |
| 2444 | dst[1] += 8; |
| 2445 | dst[2] += 8; |
| 2446 | |
| 2447 | update_pos(td, mb_y, (s->mb_width + 3) + mb_x); |
| 2448 | } |
| 2449 | } |
| 2450 | |
| 2451 | static void vp7_filter_mb_row(AVCodecContext *avctx, void *tdata, |
| 2452 | int jobnr, int threadnr) |
| 2453 | { |
| 2454 | filter_mb_row(avctx, tdata, jobnr, threadnr, 1); |
| 2455 | } |
| 2456 | |
| 2457 | static void vp8_filter_mb_row(AVCodecContext *avctx, void *tdata, |
| 2458 | int jobnr, int threadnr) |
| 2459 | { |
| 2460 | filter_mb_row(avctx, tdata, jobnr, threadnr, 0); |
| 2461 | } |
| 2462 | |
| 2463 | static av_always_inline |
| 2464 | int vp78_decode_mb_row_sliced(AVCodecContext *avctx, void *tdata, int jobnr, |
| 2465 | int threadnr, int is_vp7) |
| 2466 | { |
| 2467 | VP8Context *s = avctx->priv_data; |
| 2468 | VP8ThreadData *td = &s->thread_data[jobnr]; |
| 2469 | VP8ThreadData *next_td = NULL, *prev_td = NULL; |
| 2470 | VP8Frame *curframe = s->curframe; |
| 2471 | int mb_y, num_jobs = s->num_jobs; |
| 2472 | |
| 2473 | td->thread_nr = threadnr; |
| 2474 | for (mb_y = jobnr; mb_y < s->mb_height; mb_y += num_jobs) { |
| 2475 | if (mb_y >= s->mb_height) |
| 2476 | break; |
| 2477 | td->thread_mb_pos = mb_y << 16; |
| 2478 | s->decode_mb_row_no_filter(avctx, tdata, jobnr, threadnr); |
| 2479 | if (s->deblock_filter) |
| 2480 | s->filter_mb_row(avctx, tdata, jobnr, threadnr); |
| 2481 | update_pos(td, mb_y, INT_MAX & 0xFFFF); |
| 2482 | |
| 2483 | s->mv_min.y -= 64; |
| 2484 | s->mv_max.y -= 64; |
| 2485 | |
| 2486 | if (avctx->active_thread_type == FF_THREAD_FRAME) |
| 2487 | ff_thread_report_progress(&curframe->tf, mb_y, 0); |
| 2488 | } |
| 2489 | |
| 2490 | return 0; |
| 2491 | } |
| 2492 | |
| 2493 | static int vp7_decode_mb_row_sliced(AVCodecContext *avctx, void *tdata, |
| 2494 | int jobnr, int threadnr) |
| 2495 | { |
| 2496 | return vp78_decode_mb_row_sliced(avctx, tdata, jobnr, threadnr, IS_VP7); |
| 2497 | } |
| 2498 | |
| 2499 | static int vp8_decode_mb_row_sliced(AVCodecContext *avctx, void *tdata, |
| 2500 | int jobnr, int threadnr) |
| 2501 | { |
| 2502 | return vp78_decode_mb_row_sliced(avctx, tdata, jobnr, threadnr, IS_VP8); |
| 2503 | } |
| 2504 | |
| 2505 | |
| 2506 | static av_always_inline |
| 2507 | int vp78_decode_frame(AVCodecContext *avctx, void *data, int *got_frame, |
| 2508 | AVPacket *avpkt, int is_vp7) |
| 2509 | { |
| 2510 | VP8Context *s = avctx->priv_data; |
| 2511 | int ret, i, referenced, num_jobs; |
| 2512 | enum AVDiscard skip_thresh; |
| 2513 | VP8Frame *av_uninit(curframe), *prev_frame; |
| 2514 | |
| 2515 | if (is_vp7) |
| 2516 | ret = vp7_decode_frame_header(s, avpkt->data, avpkt->size); |
| 2517 | else |
| 2518 | ret = vp8_decode_frame_header(s, avpkt->data, avpkt->size); |
| 2519 | |
| 2520 | if (ret < 0) |
| 2521 | goto err; |
| 2522 | |
| 2523 | prev_frame = s->framep[VP56_FRAME_CURRENT]; |
| 2524 | |
| 2525 | referenced = s->update_last || s->update_golden == VP56_FRAME_CURRENT || |
| 2526 | s->update_altref == VP56_FRAME_CURRENT; |
| 2527 | |
| 2528 | skip_thresh = !referenced ? AVDISCARD_NONREF |
| 2529 | : !s->keyframe ? AVDISCARD_NONKEY |
| 2530 | : AVDISCARD_ALL; |
| 2531 | |
| 2532 | if (avctx->skip_frame >= skip_thresh) { |
| 2533 | s->invisible = 1; |
| 2534 | memcpy(&s->next_framep[0], &s->framep[0], sizeof(s->framep[0]) * 4); |
| 2535 | goto skip_decode; |
| 2536 | } |
| 2537 | s->deblock_filter = s->filter.level && avctx->skip_loop_filter < skip_thresh; |
| 2538 | |
| 2539 | // release no longer referenced frames |
| 2540 | for (i = 0; i < 5; i++) |
| 2541 | if (s->frames[i].tf.f->data[0] && |
| 2542 | &s->frames[i] != prev_frame && |
| 2543 | &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] && |
| 2544 | &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] && |
| 2545 | &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2]) |
| 2546 | vp8_release_frame(s, &s->frames[i]); |
| 2547 | |
| 2548 | curframe = s->framep[VP56_FRAME_CURRENT] = vp8_find_free_buffer(s); |
| 2549 | |
| 2550 | /* Given that arithmetic probabilities are updated every frame, it's quite |
| 2551 | * likely that the values we have on a random interframe are complete |
| 2552 | * junk if we didn't start decode on a keyframe. So just don't display |
| 2553 | * anything rather than junk. */ |
| 2554 | if (!s->keyframe && (!s->framep[VP56_FRAME_PREVIOUS] || |
| 2555 | !s->framep[VP56_FRAME_GOLDEN] || |
| 2556 | !s->framep[VP56_FRAME_GOLDEN2])) { |
| 2557 | av_log(avctx, AV_LOG_WARNING, |
| 2558 | "Discarding interframe without a prior keyframe!\n"); |
| 2559 | ret = AVERROR_INVALIDDATA; |
| 2560 | goto err; |
| 2561 | } |
| 2562 | |
| 2563 | curframe->tf.f->key_frame = s->keyframe; |
| 2564 | curframe->tf.f->pict_type = s->keyframe ? AV_PICTURE_TYPE_I |
| 2565 | : AV_PICTURE_TYPE_P; |
| 2566 | if ((ret = vp8_alloc_frame(s, curframe, referenced)) < 0) |
| 2567 | goto err; |
| 2568 | |
| 2569 | // check if golden and altref are swapped |
| 2570 | if (s->update_altref != VP56_FRAME_NONE) |
| 2571 | s->next_framep[VP56_FRAME_GOLDEN2] = s->framep[s->update_altref]; |
| 2572 | else |
| 2573 | s->next_framep[VP56_FRAME_GOLDEN2] = s->framep[VP56_FRAME_GOLDEN2]; |
| 2574 | |
| 2575 | if (s->update_golden != VP56_FRAME_NONE) |
| 2576 | s->next_framep[VP56_FRAME_GOLDEN] = s->framep[s->update_golden]; |
| 2577 | else |
| 2578 | s->next_framep[VP56_FRAME_GOLDEN] = s->framep[VP56_FRAME_GOLDEN]; |
| 2579 | |
| 2580 | if (s->update_last) |
| 2581 | s->next_framep[VP56_FRAME_PREVIOUS] = curframe; |
| 2582 | else |
| 2583 | s->next_framep[VP56_FRAME_PREVIOUS] = s->framep[VP56_FRAME_PREVIOUS]; |
| 2584 | |
| 2585 | s->next_framep[VP56_FRAME_CURRENT] = curframe; |
| 2586 | |
| 2587 | if (avctx->codec->update_thread_context) |
| 2588 | ff_thread_finish_setup(avctx); |
| 2589 | |
| 2590 | s->linesize = curframe->tf.f->linesize[0]; |
| 2591 | s->uvlinesize = curframe->tf.f->linesize[1]; |
| 2592 | |
| 2593 | memset(s->top_nnz, 0, s->mb_width * sizeof(*s->top_nnz)); |
| 2594 | /* Zero macroblock structures for top/top-left prediction |
| 2595 | * from outside the frame. */ |
| 2596 | if (!s->mb_layout) |
| 2597 | memset(s->macroblocks + s->mb_height * 2 - 1, 0, |
| 2598 | (s->mb_width + 1) * sizeof(*s->macroblocks)); |
| 2599 | if (!s->mb_layout && s->keyframe) |
| 2600 | memset(s->intra4x4_pred_mode_top, DC_PRED, s->mb_width * 4); |
| 2601 | |
| 2602 | memset(s->ref_count, 0, sizeof(s->ref_count)); |
| 2603 | |
| 2604 | if (s->mb_layout == 1) { |
| 2605 | // Make sure the previous frame has read its segmentation map, |
| 2606 | // if we re-use the same map. |
| 2607 | if (prev_frame && s->segmentation.enabled && |
| 2608 | !s->segmentation.update_map) |
| 2609 | ff_thread_await_progress(&prev_frame->tf, 1, 0); |
| 2610 | if (is_vp7) |
| 2611 | vp7_decode_mv_mb_modes(avctx, curframe, prev_frame); |
| 2612 | else |
| 2613 | vp8_decode_mv_mb_modes(avctx, curframe, prev_frame); |
| 2614 | } |
| 2615 | |
| 2616 | if (avctx->active_thread_type == FF_THREAD_FRAME) |
| 2617 | num_jobs = 1; |
| 2618 | else |
| 2619 | num_jobs = FFMIN(s->num_coeff_partitions, avctx->thread_count); |
| 2620 | s->num_jobs = num_jobs; |
| 2621 | s->curframe = curframe; |
| 2622 | s->prev_frame = prev_frame; |
| 2623 | s->mv_min.y = -MARGIN; |
| 2624 | s->mv_max.y = ((s->mb_height - 1) << 6) + MARGIN; |
| 2625 | for (i = 0; i < MAX_THREADS; i++) { |
| 2626 | s->thread_data[i].thread_mb_pos = 0; |
| 2627 | s->thread_data[i].wait_mb_pos = INT_MAX; |
| 2628 | } |
| 2629 | if (is_vp7) |
| 2630 | avctx->execute2(avctx, vp7_decode_mb_row_sliced, s->thread_data, NULL, |
| 2631 | num_jobs); |
| 2632 | else |
| 2633 | avctx->execute2(avctx, vp8_decode_mb_row_sliced, s->thread_data, NULL, |
| 2634 | num_jobs); |
| 2635 | |
| 2636 | ff_thread_report_progress(&curframe->tf, INT_MAX, 0); |
| 2637 | memcpy(&s->framep[0], &s->next_framep[0], sizeof(s->framep[0]) * 4); |
| 2638 | |
| 2639 | skip_decode: |
| 2640 | // if future frames don't use the updated probabilities, |
| 2641 | // reset them to the values we saved |
| 2642 | if (!s->update_probabilities) |
| 2643 | s->prob[0] = s->prob[1]; |
| 2644 | |
| 2645 | if (!s->invisible) { |
| 2646 | if ((ret = av_frame_ref(data, curframe->tf.f)) < 0) |
| 2647 | return ret; |
| 2648 | *got_frame = 1; |
| 2649 | } |
| 2650 | |
| 2651 | return avpkt->size; |
| 2652 | err: |
| 2653 | memcpy(&s->next_framep[0], &s->framep[0], sizeof(s->framep[0]) * 4); |
| 2654 | return ret; |
| 2655 | } |
| 2656 | |
| 2657 | int ff_vp8_decode_frame(AVCodecContext *avctx, void *data, int *got_frame, |
| 2658 | AVPacket *avpkt) |
| 2659 | { |
| 2660 | return vp78_decode_frame(avctx, data, got_frame, avpkt, IS_VP8); |
| 2661 | } |
| 2662 | |
| 2663 | #if CONFIG_VP7_DECODER |
| 2664 | static int vp7_decode_frame(AVCodecContext *avctx, void *data, int *got_frame, |
| 2665 | AVPacket *avpkt) |
| 2666 | { |
| 2667 | return vp78_decode_frame(avctx, data, got_frame, avpkt, IS_VP7); |
| 2668 | } |
| 2669 | #endif /* CONFIG_VP7_DECODER */ |
| 2670 | |
| 2671 | av_cold int ff_vp8_decode_free(AVCodecContext *avctx) |
| 2672 | { |
| 2673 | VP8Context *s = avctx->priv_data; |
| 2674 | int i; |
| 2675 | |
| 2676 | vp8_decode_flush_impl(avctx, 1); |
| 2677 | for (i = 0; i < FF_ARRAY_ELEMS(s->frames); i++) |
| 2678 | av_frame_free(&s->frames[i].tf.f); |
| 2679 | |
| 2680 | return 0; |
| 2681 | } |
| 2682 | |
| 2683 | static av_cold int vp8_init_frames(VP8Context *s) |
| 2684 | { |
| 2685 | int i; |
| 2686 | for (i = 0; i < FF_ARRAY_ELEMS(s->frames); i++) { |
| 2687 | s->frames[i].tf.f = av_frame_alloc(); |
| 2688 | if (!s->frames[i].tf.f) |
| 2689 | return AVERROR(ENOMEM); |
| 2690 | } |
| 2691 | return 0; |
| 2692 | } |
| 2693 | |
| 2694 | static av_always_inline |
| 2695 | int vp78_decode_init(AVCodecContext *avctx, int is_vp7) |
| 2696 | { |
| 2697 | VP8Context *s = avctx->priv_data; |
| 2698 | int ret; |
| 2699 | |
| 2700 | s->avctx = avctx; |
| 2701 | s->vp7 = avctx->codec->id == AV_CODEC_ID_VP7; |
| 2702 | avctx->pix_fmt = AV_PIX_FMT_YUV420P; |
| 2703 | avctx->internal->allocate_progress = 1; |
| 2704 | |
| 2705 | ff_videodsp_init(&s->vdsp, 8); |
| 2706 | |
| 2707 | ff_vp78dsp_init(&s->vp8dsp); |
| 2708 | if (CONFIG_VP7_DECODER && is_vp7) { |
| 2709 | ff_h264_pred_init(&s->hpc, AV_CODEC_ID_VP7, 8, 1); |
| 2710 | ff_vp7dsp_init(&s->vp8dsp); |
| 2711 | s->decode_mb_row_no_filter = vp7_decode_mb_row_no_filter; |
| 2712 | s->filter_mb_row = vp7_filter_mb_row; |
| 2713 | } else if (CONFIG_VP8_DECODER && !is_vp7) { |
| 2714 | ff_h264_pred_init(&s->hpc, AV_CODEC_ID_VP8, 8, 1); |
| 2715 | ff_vp8dsp_init(&s->vp8dsp); |
| 2716 | s->decode_mb_row_no_filter = vp8_decode_mb_row_no_filter; |
| 2717 | s->filter_mb_row = vp8_filter_mb_row; |
| 2718 | } |
| 2719 | |
| 2720 | /* does not change for VP8 */ |
| 2721 | memcpy(s->prob[0].scan, zigzag_scan, sizeof(s->prob[0].scan)); |
| 2722 | |
| 2723 | if ((ret = vp8_init_frames(s)) < 0) { |
| 2724 | ff_vp8_decode_free(avctx); |
| 2725 | return ret; |
| 2726 | } |
| 2727 | |
| 2728 | return 0; |
| 2729 | } |
| 2730 | |
| 2731 | #if CONFIG_VP7_DECODER |
| 2732 | static int vp7_decode_init(AVCodecContext *avctx) |
| 2733 | { |
| 2734 | return vp78_decode_init(avctx, IS_VP7); |
| 2735 | } |
| 2736 | #endif /* CONFIG_VP7_DECODER */ |
| 2737 | |
| 2738 | av_cold int ff_vp8_decode_init(AVCodecContext *avctx) |
| 2739 | { |
| 2740 | return vp78_decode_init(avctx, IS_VP8); |
| 2741 | } |
| 2742 | |
| 2743 | #if CONFIG_VP8_DECODER |
| 2744 | static av_cold int vp8_decode_init_thread_copy(AVCodecContext *avctx) |
| 2745 | { |
| 2746 | VP8Context *s = avctx->priv_data; |
| 2747 | int ret; |
| 2748 | |
| 2749 | s->avctx = avctx; |
| 2750 | |
| 2751 | if ((ret = vp8_init_frames(s)) < 0) { |
| 2752 | ff_vp8_decode_free(avctx); |
| 2753 | return ret; |
| 2754 | } |
| 2755 | |
| 2756 | return 0; |
| 2757 | } |
| 2758 | |
| 2759 | #define REBASE(pic) ((pic) ? (pic) - &s_src->frames[0] + &s->frames[0] : NULL) |
| 2760 | |
| 2761 | static int vp8_decode_update_thread_context(AVCodecContext *dst, |
| 2762 | const AVCodecContext *src) |
| 2763 | { |
| 2764 | VP8Context *s = dst->priv_data, *s_src = src->priv_data; |
| 2765 | int i; |
| 2766 | |
| 2767 | if (s->macroblocks_base && |
| 2768 | (s_src->mb_width != s->mb_width || s_src->mb_height != s->mb_height)) { |
| 2769 | free_buffers(s); |
| 2770 | s->mb_width = s_src->mb_width; |
| 2771 | s->mb_height = s_src->mb_height; |
| 2772 | } |
| 2773 | |
| 2774 | s->prob[0] = s_src->prob[!s_src->update_probabilities]; |
| 2775 | s->segmentation = s_src->segmentation; |
| 2776 | s->lf_delta = s_src->lf_delta; |
| 2777 | memcpy(s->sign_bias, s_src->sign_bias, sizeof(s->sign_bias)); |
| 2778 | |
| 2779 | for (i = 0; i < FF_ARRAY_ELEMS(s_src->frames); i++) { |
| 2780 | if (s_src->frames[i].tf.f->data[0]) { |
| 2781 | int ret = vp8_ref_frame(s, &s->frames[i], &s_src->frames[i]); |
| 2782 | if (ret < 0) |
| 2783 | return ret; |
| 2784 | } |
| 2785 | } |
| 2786 | |
| 2787 | s->framep[0] = REBASE(s_src->next_framep[0]); |
| 2788 | s->framep[1] = REBASE(s_src->next_framep[1]); |
| 2789 | s->framep[2] = REBASE(s_src->next_framep[2]); |
| 2790 | s->framep[3] = REBASE(s_src->next_framep[3]); |
| 2791 | |
| 2792 | return 0; |
| 2793 | } |
| 2794 | #endif /* CONFIG_VP8_DECODER */ |
| 2795 | |
| 2796 | #if CONFIG_VP7_DECODER |
| 2797 | AVCodec ff_vp7_decoder = { |
| 2798 | .name = "vp7", |
| 2799 | .long_name = NULL_IF_CONFIG_SMALL("On2 VP7"), |
| 2800 | .type = AVMEDIA_TYPE_VIDEO, |
| 2801 | .id = AV_CODEC_ID_VP7, |
| 2802 | .priv_data_size = sizeof(VP8Context), |
| 2803 | .init = vp7_decode_init, |
| 2804 | .close = ff_vp8_decode_free, |
| 2805 | .decode = vp7_decode_frame, |
| 2806 | .capabilities = CODEC_CAP_DR1, |
| 2807 | .flush = vp8_decode_flush, |
| 2808 | }; |
| 2809 | #endif /* CONFIG_VP7_DECODER */ |
| 2810 | |
| 2811 | #if CONFIG_VP8_DECODER |
| 2812 | AVCodec ff_vp8_decoder = { |
| 2813 | .name = "vp8", |
| 2814 | .long_name = NULL_IF_CONFIG_SMALL("On2 VP8"), |
| 2815 | .type = AVMEDIA_TYPE_VIDEO, |
| 2816 | .id = AV_CODEC_ID_VP8, |
| 2817 | .priv_data_size = sizeof(VP8Context), |
| 2818 | .init = ff_vp8_decode_init, |
| 2819 | .close = ff_vp8_decode_free, |
| 2820 | .decode = ff_vp8_decode_frame, |
| 2821 | .capabilities = CODEC_CAP_DR1 | CODEC_CAP_FRAME_THREADS | CODEC_CAP_SLICE_THREADS, |
| 2822 | .flush = vp8_decode_flush, |
| 2823 | .init_thread_copy = ONLY_IF_THREADS_ENABLED(vp8_decode_init_thread_copy), |
| 2824 | .update_thread_context = ONLY_IF_THREADS_ENABLED(vp8_decode_update_thread_context), |
| 2825 | }; |
| 2826 | #endif /* CONFIG_VP7_DECODER */ |