2 * H.26L/H.264/AVC/JVT/14496-10/... cavlc bitstream decoding
3 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
5 * This file is part of FFmpeg.
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24 * H.264 / AVC / MPEG4 part10 cavlc bitstream decoding.
25 * @author Michael Niedermayer <michaelni@gmx.at>
29 #define UNCHECKED_BITSTREAM_READER 1
34 #include "h264data.h" // FIXME FIXME FIXME
35 #include "h264_mvpred.h"
37 #include "mpegutils.h"
38 #include "libavutil/avassert.h"
41 static const uint8_t golomb_to_inter_cbp_gray
[16]={
42 0, 1, 2, 4, 8, 3, 5,10,12,15, 7,11,13,14, 6, 9,
45 static const uint8_t golomb_to_intra4x4_cbp_gray
[16]={
46 15, 0, 7,11,13,14, 3, 5,10,12, 1, 2, 4, 8, 6, 9,
49 static const uint8_t chroma_dc_coeff_token_len
[4*5]={
57 static const uint8_t chroma_dc_coeff_token_bits
[4*5]={
65 static const uint8_t chroma422_dc_coeff_token_len
[4*9]={
77 static const uint8_t chroma422_dc_coeff_token_bits
[4*9]={
89 static const uint8_t coeff_token_len
[4][4*17]={
92 6, 2, 0, 0, 8, 6, 3, 0, 9, 8, 7, 5, 10, 9, 8, 6,
93 11,10, 9, 7, 13,11,10, 8, 13,13,11, 9, 13,13,13,10,
94 14,14,13,11, 14,14,14,13, 15,15,14,14, 15,15,15,14,
95 16,15,15,15, 16,16,16,15, 16,16,16,16, 16,16,16,16,
99 6, 2, 0, 0, 6, 5, 3, 0, 7, 6, 6, 4, 8, 6, 6, 4,
100 8, 7, 7, 5, 9, 8, 8, 6, 11, 9, 9, 6, 11,11,11, 7,
101 12,11,11, 9, 12,12,12,11, 12,12,12,11, 13,13,13,12,
102 13,13,13,13, 13,14,13,13, 14,14,14,13, 14,14,14,14,
106 6, 4, 0, 0, 6, 5, 4, 0, 6, 5, 5, 4, 7, 5, 5, 4,
107 7, 5, 5, 4, 7, 6, 6, 4, 7, 6, 6, 4, 8, 7, 7, 5,
108 8, 8, 7, 6, 9, 8, 8, 7, 9, 9, 8, 8, 9, 9, 9, 8,
109 10, 9, 9, 9, 10,10,10,10, 10,10,10,10, 10,10,10,10,
113 6, 6, 0, 0, 6, 6, 6, 0, 6, 6, 6, 6, 6, 6, 6, 6,
114 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
115 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
116 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
120 static const uint8_t coeff_token_bits
[4][4*17]={
123 5, 1, 0, 0, 7, 4, 1, 0, 7, 6, 5, 3, 7, 6, 5, 3,
124 7, 6, 5, 4, 15, 6, 5, 4, 11,14, 5, 4, 8,10,13, 4,
125 15,14, 9, 4, 11,10,13,12, 15,14, 9,12, 11,10,13, 8,
126 15, 1, 9,12, 11,14,13, 8, 7,10, 9,12, 4, 6, 5, 8,
130 11, 2, 0, 0, 7, 7, 3, 0, 7,10, 9, 5, 7, 6, 5, 4,
131 4, 6, 5, 6, 7, 6, 5, 8, 15, 6, 5, 4, 11,14,13, 4,
132 15,10, 9, 4, 11,14,13,12, 8,10, 9, 8, 15,14,13,12,
133 11,10, 9,12, 7,11, 6, 8, 9, 8,10, 1, 7, 6, 5, 4,
137 15,14, 0, 0, 11,15,13, 0, 8,12,14,12, 15,10,11,11,
138 11, 8, 9,10, 9,14,13, 9, 8,10, 9, 8, 15,14,13,13,
139 11,14,10,12, 15,10,13,12, 11,14, 9,12, 8,10,13, 8,
140 13, 7, 9,12, 9,12,11,10, 5, 8, 7, 6, 1, 4, 3, 2,
144 0, 1, 0, 0, 4, 5, 6, 0, 8, 9,10,11, 12,13,14,15,
145 16,17,18,19, 20,21,22,23, 24,25,26,27, 28,29,30,31,
146 32,33,34,35, 36,37,38,39, 40,41,42,43, 44,45,46,47,
147 48,49,50,51, 52,53,54,55, 56,57,58,59, 60,61,62,63,
151 static const uint8_t total_zeros_len
[16][16]= {
152 {1,3,3,4,4,5,5,6,6,7,7,8,8,9,9,9},
153 {3,3,3,3,3,4,4,4,4,5,5,6,6,6,6},
154 {4,3,3,3,4,4,3,3,4,5,5,6,5,6},
155 {5,3,4,4,3,3,3,4,3,4,5,5,5},
156 {4,4,4,3,3,3,3,3,4,5,4,5},
157 {6,5,3,3,3,3,3,3,4,3,6},
158 {6,5,3,3,3,2,3,4,3,6},
169 static const uint8_t total_zeros_bits
[16][16]= {
170 {1,3,2,3,2,3,2,3,2,3,2,3,2,3,2,1},
171 {7,6,5,4,3,5,4,3,2,3,2,3,2,1,0},
172 {5,7,6,5,4,3,4,3,2,3,2,1,1,0},
173 {3,7,5,4,6,5,4,3,3,2,2,1,0},
174 {5,4,3,7,6,5,4,3,2,1,1,0},
175 {1,1,7,6,5,4,3,2,1,1,0},
176 {1,1,5,4,3,3,2,1,1,0},
187 static const uint8_t chroma_dc_total_zeros_len
[3][4]= {
193 static const uint8_t chroma_dc_total_zeros_bits
[3][4]= {
199 static const uint8_t chroma422_dc_total_zeros_len
[7][8]= {
200 { 1, 3, 3, 4, 4, 4, 5, 5 },
201 { 3, 2, 3, 3, 3, 3, 3 },
202 { 3, 3, 2, 2, 3, 3 },
209 static const uint8_t chroma422_dc_total_zeros_bits
[7][8]= {
210 { 1, 2, 3, 2, 3, 1, 1, 0 },
211 { 0, 1, 1, 4, 5, 6, 7 },
212 { 0, 1, 1, 2, 6, 7 },
219 static const uint8_t run_len
[7][16]={
226 {3,3,3,3,3,3,3,4,5,6,7,8,9,10,11},
229 static const uint8_t run_bits
[7][16]={
236 {7,6,5,4,3,2,1,1,1,1,1,1,1,1,1},
239 static VLC coeff_token_vlc
[4];
240 static VLC_TYPE coeff_token_vlc_tables
[520+332+280+256][2];
241 static const int coeff_token_vlc_tables_size
[4]={520,332,280,256};
243 static VLC chroma_dc_coeff_token_vlc
;
244 static VLC_TYPE chroma_dc_coeff_token_vlc_table
[256][2];
245 static const int chroma_dc_coeff_token_vlc_table_size
= 256;
247 static VLC chroma422_dc_coeff_token_vlc
;
248 static VLC_TYPE chroma422_dc_coeff_token_vlc_table
[8192][2];
249 static const int chroma422_dc_coeff_token_vlc_table_size
= 8192;
251 static VLC total_zeros_vlc
[15];
252 static VLC_TYPE total_zeros_vlc_tables
[15][512][2];
253 static const int total_zeros_vlc_tables_size
= 512;
255 static VLC chroma_dc_total_zeros_vlc
[3];
256 static VLC_TYPE chroma_dc_total_zeros_vlc_tables
[3][8][2];
257 static const int chroma_dc_total_zeros_vlc_tables_size
= 8;
259 static VLC chroma422_dc_total_zeros_vlc
[7];
260 static VLC_TYPE chroma422_dc_total_zeros_vlc_tables
[7][32][2];
261 static const int chroma422_dc_total_zeros_vlc_tables_size
= 32;
263 static VLC run_vlc
[6];
264 static VLC_TYPE run_vlc_tables
[6][8][2];
265 static const int run_vlc_tables_size
= 8;
268 static VLC_TYPE run7_vlc_table
[96][2];
269 static const int run7_vlc_table_size
= 96;
271 #define LEVEL_TAB_BITS 8
272 static int8_t cavlc_level_tab
[7][1<<LEVEL_TAB_BITS
][2];
274 #define CHROMA_DC_COEFF_TOKEN_VLC_BITS 8
275 #define CHROMA422_DC_COEFF_TOKEN_VLC_BITS 13
276 #define COEFF_TOKEN_VLC_BITS 8
277 #define TOTAL_ZEROS_VLC_BITS 9
278 #define CHROMA_DC_TOTAL_ZEROS_VLC_BITS 3
279 #define CHROMA422_DC_TOTAL_ZEROS_VLC_BITS 5
280 #define RUN_VLC_BITS 3
281 #define RUN7_VLC_BITS 6
284 * Get the predicted number of non-zero coefficients.
285 * @param n block index
287 static inline int pred_non_zero_count(H264Context
*h
, int n
){
288 const int index8
= scan8
[n
];
289 const int left
= h
->non_zero_count_cache
[index8
- 1];
290 const int top
= h
->non_zero_count_cache
[index8
- 8];
293 if(i
<64) i
= (i
+1)>>1;
295 tprintf(h
->avctx
, "pred_nnz L%X T%X n%d s%d P%X\n", left
, top
, n
, scan8
[n
], i
&31);
300 static av_cold
void init_cavlc_level_tab(void){
304 for(suffix_length
=0; suffix_length
<7; suffix_length
++){
305 for(i
=0; i
<(1<<LEVEL_TAB_BITS
); i
++){
306 int prefix
= LEVEL_TAB_BITS
- av_log2(2*i
);
308 if(prefix
+ 1 + suffix_length
<= LEVEL_TAB_BITS
){
309 int level_code
= (prefix
<< suffix_length
) +
310 (i
>> (av_log2(i
) - suffix_length
)) - (1 << suffix_length
);
311 int mask
= -(level_code
&1);
312 level_code
= (((2 + level_code
) >> 1) ^ mask
) - mask
;
313 cavlc_level_tab
[suffix_length
][i
][0]= level_code
;
314 cavlc_level_tab
[suffix_length
][i
][1]= prefix
+ 1 + suffix_length
;
315 }else if(prefix
+ 1 <= LEVEL_TAB_BITS
){
316 cavlc_level_tab
[suffix_length
][i
][0]= prefix
+100;
317 cavlc_level_tab
[suffix_length
][i
][1]= prefix
+ 1;
319 cavlc_level_tab
[suffix_length
][i
][0]= LEVEL_TAB_BITS
+100;
320 cavlc_level_tab
[suffix_length
][i
][1]= LEVEL_TAB_BITS
;
326 av_cold
void ff_h264_decode_init_vlc(void){
334 chroma_dc_coeff_token_vlc
.table
= chroma_dc_coeff_token_vlc_table
;
335 chroma_dc_coeff_token_vlc
.table_allocated
= chroma_dc_coeff_token_vlc_table_size
;
336 init_vlc(&chroma_dc_coeff_token_vlc
, CHROMA_DC_COEFF_TOKEN_VLC_BITS
, 4*5,
337 &chroma_dc_coeff_token_len
[0], 1, 1,
338 &chroma_dc_coeff_token_bits
[0], 1, 1,
339 INIT_VLC_USE_NEW_STATIC
);
341 chroma422_dc_coeff_token_vlc
.table
= chroma422_dc_coeff_token_vlc_table
;
342 chroma422_dc_coeff_token_vlc
.table_allocated
= chroma422_dc_coeff_token_vlc_table_size
;
343 init_vlc(&chroma422_dc_coeff_token_vlc
, CHROMA422_DC_COEFF_TOKEN_VLC_BITS
, 4*9,
344 &chroma422_dc_coeff_token_len
[0], 1, 1,
345 &chroma422_dc_coeff_token_bits
[0], 1, 1,
346 INIT_VLC_USE_NEW_STATIC
);
350 coeff_token_vlc
[i
].table
= coeff_token_vlc_tables
+offset
;
351 coeff_token_vlc
[i
].table_allocated
= coeff_token_vlc_tables_size
[i
];
352 init_vlc(&coeff_token_vlc
[i
], COEFF_TOKEN_VLC_BITS
, 4*17,
353 &coeff_token_len
[i
][0], 1, 1,
354 &coeff_token_bits
[i
][0], 1, 1,
355 INIT_VLC_USE_NEW_STATIC
);
356 offset
+= coeff_token_vlc_tables_size
[i
];
359 * This is a one time safety check to make sure that
360 * the packed static coeff_token_vlc table sizes
361 * were initialized correctly.
363 av_assert0(offset
== FF_ARRAY_ELEMS(coeff_token_vlc_tables
));
366 chroma_dc_total_zeros_vlc
[i
].table
= chroma_dc_total_zeros_vlc_tables
[i
];
367 chroma_dc_total_zeros_vlc
[i
].table_allocated
= chroma_dc_total_zeros_vlc_tables_size
;
368 init_vlc(&chroma_dc_total_zeros_vlc
[i
],
369 CHROMA_DC_TOTAL_ZEROS_VLC_BITS
, 4,
370 &chroma_dc_total_zeros_len
[i
][0], 1, 1,
371 &chroma_dc_total_zeros_bits
[i
][0], 1, 1,
372 INIT_VLC_USE_NEW_STATIC
);
376 chroma422_dc_total_zeros_vlc
[i
].table
= chroma422_dc_total_zeros_vlc_tables
[i
];
377 chroma422_dc_total_zeros_vlc
[i
].table_allocated
= chroma422_dc_total_zeros_vlc_tables_size
;
378 init_vlc(&chroma422_dc_total_zeros_vlc
[i
],
379 CHROMA422_DC_TOTAL_ZEROS_VLC_BITS
, 8,
380 &chroma422_dc_total_zeros_len
[i
][0], 1, 1,
381 &chroma422_dc_total_zeros_bits
[i
][0], 1, 1,
382 INIT_VLC_USE_NEW_STATIC
);
386 total_zeros_vlc
[i
].table
= total_zeros_vlc_tables
[i
];
387 total_zeros_vlc
[i
].table_allocated
= total_zeros_vlc_tables_size
;
388 init_vlc(&total_zeros_vlc
[i
],
389 TOTAL_ZEROS_VLC_BITS
, 16,
390 &total_zeros_len
[i
][0], 1, 1,
391 &total_zeros_bits
[i
][0], 1, 1,
392 INIT_VLC_USE_NEW_STATIC
);
396 run_vlc
[i
].table
= run_vlc_tables
[i
];
397 run_vlc
[i
].table_allocated
= run_vlc_tables_size
;
398 init_vlc(&run_vlc
[i
],
400 &run_len
[i
][0], 1, 1,
401 &run_bits
[i
][0], 1, 1,
402 INIT_VLC_USE_NEW_STATIC
);
404 run7_vlc
.table
= run7_vlc_table
,
405 run7_vlc
.table_allocated
= run7_vlc_table_size
;
406 init_vlc(&run7_vlc
, RUN7_VLC_BITS
, 16,
407 &run_len
[6][0], 1, 1,
408 &run_bits
[6][0], 1, 1,
409 INIT_VLC_USE_NEW_STATIC
);
411 init_cavlc_level_tab();
418 static inline int get_level_prefix(GetBitContext
*gb
){
423 UPDATE_CACHE(re
, gb
);
424 buf
=GET_CACHE(re
, gb
);
426 log
= 32 - av_log2(buf
);
428 print_bin(buf
>>(32-log
), log
);
429 av_log(NULL
, AV_LOG_DEBUG
, "%5d %2d %3d lpr @%5d in %s get_level_prefix\n", buf
>>(32-log
), log
, log
-1, get_bits_count(gb
), __FILE__
);
432 LAST_SKIP_BITS(re
, gb
, log
);
433 CLOSE_READER(re
, gb
);
439 * Decode a residual block.
440 * @param n block index
441 * @param scantable scantable
442 * @param max_coeff number of coefficients in the block
443 * @return <0 if an error occurred
445 static int decode_residual(H264Context
*h
, GetBitContext
*gb
, int16_t *block
, int n
, const uint8_t *scantable
, const uint32_t *qmul
, int max_coeff
){
446 static const int coeff_token_table_index
[17]= {0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3};
448 int zeros_left
, coeff_token
, total_coeff
, i
, trailing_ones
, run_before
;
450 //FIXME put trailing_onex into the context
454 coeff_token
= get_vlc2(gb
, chroma_dc_coeff_token_vlc
.table
, CHROMA_DC_COEFF_TOKEN_VLC_BITS
, 1);
456 coeff_token
= get_vlc2(gb
, chroma422_dc_coeff_token_vlc
.table
, CHROMA422_DC_COEFF_TOKEN_VLC_BITS
, 1);
457 total_coeff
= coeff_token
>>2;
459 if(n
>= LUMA_DC_BLOCK_INDEX
){
460 total_coeff
= pred_non_zero_count(h
, (n
- LUMA_DC_BLOCK_INDEX
)*16);
461 coeff_token
= get_vlc2(gb
, coeff_token_vlc
[ coeff_token_table_index
[total_coeff
] ].table
, COEFF_TOKEN_VLC_BITS
, 2);
462 total_coeff
= coeff_token
>>2;
464 total_coeff
= pred_non_zero_count(h
, n
);
465 coeff_token
= get_vlc2(gb
, coeff_token_vlc
[ coeff_token_table_index
[total_coeff
] ].table
, COEFF_TOKEN_VLC_BITS
, 2);
466 total_coeff
= coeff_token
>>2;
469 h
->non_zero_count_cache
[ scan8
[n
] ]= total_coeff
;
471 //FIXME set last_non_zero?
475 if(total_coeff
> (unsigned)max_coeff
) {
476 av_log(h
->avctx
, AV_LOG_ERROR
, "corrupted macroblock %d %d (total_coeff=%d)\n", h
->mb_x
, h
->mb_y
, total_coeff
);
480 trailing_ones
= coeff_token
&3;
481 tprintf(h
->avctx
, "trailing:%d, total:%d\n", trailing_ones
, total_coeff
);
482 av_assert2(total_coeff
<=16);
484 i
= show_bits(gb
, 3);
485 skip_bits(gb
, trailing_ones
);
486 level
[0] = 1-((i
&4)>>1);
487 level
[1] = 1-((i
&2) );
488 level
[2] = 1-((i
&1)<<1);
490 if(trailing_ones
<total_coeff
) {
492 int suffix_length
= total_coeff
> 10 & trailing_ones
< 3;
493 int bitsi
= show_bits(gb
, LEVEL_TAB_BITS
);
494 int level_code
= cavlc_level_tab
[suffix_length
][bitsi
][0];
496 skip_bits(gb
, cavlc_level_tab
[suffix_length
][bitsi
][1]);
497 if(level_code
>= 100){
498 prefix
= level_code
- 100;
499 if(prefix
== LEVEL_TAB_BITS
)
500 prefix
+= get_level_prefix(gb
);
502 //first coefficient has suffix_length equal to 0 or 1
503 if(prefix
<14){ //FIXME try to build a large unified VLC table for all this
505 level_code
= (prefix
<<1) + get_bits1(gb
); //part
507 level_code
= prefix
; //part
508 }else if(prefix
==14){
510 level_code
= (prefix
<<1) + get_bits1(gb
); //part
512 level_code
= prefix
+ get_bits(gb
, 4); //part
517 av_log(h
->avctx
, AV_LOG_ERROR
, "Invalid level prefix\n");
520 level_code
+= (1<<(prefix
-3))-4096;
522 level_code
+= get_bits(gb
, prefix
-3); //part
525 if(trailing_ones
< 3) level_code
+= 2;
528 mask
= -(level_code
&1);
529 level
[trailing_ones
]= (((2+level_code
)>>1) ^ mask
) - mask
;
531 level_code
+= ((level_code
>>31)|1) & -(trailing_ones
< 3);
533 suffix_length
= 1 + (level_code
+ 3U > 6U);
534 level
[trailing_ones
]= level_code
;
537 //remaining coefficients have suffix_length > 0
538 for(i
=trailing_ones
+1;i
<total_coeff
;i
++) {
539 static const unsigned int suffix_limit
[7] = {0,3,6,12,24,48,INT_MAX
};
540 int bitsi
= show_bits(gb
, LEVEL_TAB_BITS
);
541 level_code
= cavlc_level_tab
[suffix_length
][bitsi
][0];
543 skip_bits(gb
, cavlc_level_tab
[suffix_length
][bitsi
][1]);
544 if(level_code
>= 100){
545 prefix
= level_code
- 100;
546 if(prefix
== LEVEL_TAB_BITS
){
547 prefix
+= get_level_prefix(gb
);
550 level_code
= (prefix
<<suffix_length
) + get_bits(gb
, suffix_length
);
552 level_code
= 15<<suffix_length
;
555 av_log(h
->avctx
, AV_LOG_ERROR
, "Invalid level prefix\n");
556 return AVERROR_INVALIDDATA
;
558 level_code
+= (1<<(prefix
-3))-4096;
560 level_code
+= get_bits(gb
, prefix
-3);
562 mask
= -(level_code
&1);
563 level_code
= (((2+level_code
)>>1) ^ mask
) - mask
;
565 level
[i
]= level_code
;
566 suffix_length
+= suffix_limit
[suffix_length
] + level_code
> 2U*suffix_limit
[suffix_length
];
570 if(total_coeff
== max_coeff
)
573 if (max_coeff
<= 8) {
575 zeros_left
= get_vlc2(gb
, (chroma_dc_total_zeros_vlc
-1)[total_coeff
].table
,
576 CHROMA_DC_TOTAL_ZEROS_VLC_BITS
, 1);
578 zeros_left
= get_vlc2(gb
, (chroma422_dc_total_zeros_vlc
-1)[total_coeff
].table
,
579 CHROMA422_DC_TOTAL_ZEROS_VLC_BITS
, 1);
581 zeros_left
= get_vlc2(gb
, (total_zeros_vlc
-1)[ total_coeff
].table
, TOTAL_ZEROS_VLC_BITS
, 1);
585 #define STORE_BLOCK(type) \
586 scantable += zeros_left + total_coeff - 1; \
587 if(n >= LUMA_DC_BLOCK_INDEX){ \
588 ((type*)block)[*scantable] = level[0]; \
589 for(i=1;i<total_coeff && zeros_left > 0;i++) { \
591 run_before= get_vlc2(gb, (run_vlc-1)[zeros_left].table, RUN_VLC_BITS, 1); \
593 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2); \
594 zeros_left -= run_before; \
595 scantable -= 1 + run_before; \
596 ((type*)block)[*scantable]= level[i]; \
598 for(;i<total_coeff;i++) { \
600 ((type*)block)[*scantable]= level[i]; \
603 ((type*)block)[*scantable] = ((int)(level[0] * qmul[*scantable] + 32))>>6; \
604 for(i=1;i<total_coeff && zeros_left > 0;i++) { \
606 run_before= get_vlc2(gb, (run_vlc-1)[zeros_left].table, RUN_VLC_BITS, 1); \
608 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2); \
609 zeros_left -= run_before; \
610 scantable -= 1 + run_before; \
611 ((type*)block)[*scantable]= ((int)(level[i] * qmul[*scantable] + 32))>>6; \
613 for(;i<total_coeff;i++) { \
615 ((type*)block)[*scantable]= ((int)(level[i] * qmul[*scantable] + 32))>>6; \
619 if (h
->pixel_shift
) {
626 av_log(h
->avctx
, AV_LOG_ERROR
, "negative number of zero coeffs at %d %d\n", h
->mb_x
, h
->mb_y
);
633 static av_always_inline
int decode_luma_residual(H264Context
*h
, GetBitContext
*gb
, const uint8_t *scan
, const uint8_t *scan8x8
, int pixel_shift
, int mb_type
, int cbp
, int p
){
635 int qscale
= p
== 0 ? h
->qscale
: h
->chroma_qp
[p
-1];
636 if(IS_INTRA16x16(mb_type
)){
637 AV_ZERO128(h
->mb_luma_dc
[p
]+0);
638 AV_ZERO128(h
->mb_luma_dc
[p
]+8);
639 AV_ZERO128(h
->mb_luma_dc
[p
]+16);
640 AV_ZERO128(h
->mb_luma_dc
[p
]+24);
641 if( decode_residual(h
, h
->intra_gb_ptr
, h
->mb_luma_dc
[p
], LUMA_DC_BLOCK_INDEX
+p
, scan
, NULL
, 16) < 0){
642 return -1; //FIXME continue if partitioned and other return -1 too
645 av_assert2((cbp
&15) == 0 || (cbp
&15) == 15);
648 for(i8x8
=0; i8x8
<4; i8x8
++){
649 for(i4x4
=0; i4x4
<4; i4x4
++){
650 const int index
= i4x4
+ 4*i8x8
+ p
*16;
651 if( decode_residual(h
, h
->intra_gb_ptr
, h
->mb
+ (16*index
<< pixel_shift
),
652 index
, scan
+ 1, h
->dequant4_coeff
[p
][qscale
], 15) < 0 ){
659 fill_rectangle(&h
->non_zero_count_cache
[scan8
[p
*16]], 4, 4, 8, 0, 1);
663 int cqm
= (IS_INTRA( mb_type
) ? 0:3)+p
;
664 /* For CAVLC 4:4:4, we need to keep track of the luma 8x8 CBP for deblocking nnz purposes. */
666 for(i8x8
=0; i8x8
<4; i8x8
++){
668 if(IS_8x8DCT(mb_type
)){
669 int16_t *buf
= &h
->mb
[64*i8x8
+256*p
<< pixel_shift
];
671 for(i4x4
=0; i4x4
<4; i4x4
++){
672 const int index
= i4x4
+ 4*i8x8
+ p
*16;
673 if( decode_residual(h
, gb
, buf
, index
, scan8x8
+16*i4x4
,
674 h
->dequant8_coeff
[cqm
][qscale
], 16) < 0 )
677 nnz
= &h
->non_zero_count_cache
[ scan8
[4*i8x8
+p
*16] ];
678 nnz
[0] += nnz
[1] + nnz
[8] + nnz
[9];
679 new_cbp
|= !!nnz
[0] << i8x8
;
681 for(i4x4
=0; i4x4
<4; i4x4
++){
682 const int index
= i4x4
+ 4*i8x8
+ p
*16;
683 if( decode_residual(h
, gb
, h
->mb
+ (16*index
<< pixel_shift
), index
,
684 scan
, h
->dequant4_coeff
[cqm
][qscale
], 16) < 0 ){
687 new_cbp
|= h
->non_zero_count_cache
[ scan8
[index
] ] << i8x8
;
691 uint8_t * const nnz
= &h
->non_zero_count_cache
[ scan8
[4*i8x8
+p
*16] ];
692 nnz
[0] = nnz
[1] = nnz
[8] = nnz
[9] = 0;
699 int ff_h264_decode_mb_cavlc(H264Context
*h
){
702 unsigned int mb_type
, cbp
;
703 int dct8x8_allowed
= h
->pps
.transform_8x8_mode
;
704 int decode_chroma
= h
->sps
.chroma_format_idc
== 1 || h
->sps
.chroma_format_idc
== 2;
705 const int pixel_shift
= h
->pixel_shift
;
706 unsigned local_ref_count
[2];
708 mb_xy
= h
->mb_xy
= h
->mb_x
+ h
->mb_y
*h
->mb_stride
;
710 tprintf(h
->avctx
, "pic:%d mb:%d/%d\n", h
->frame_num
, h
->mb_x
, h
->mb_y
);
711 cbp
= 0; /* avoid warning. FIXME: find a solution without slowing
713 if(h
->slice_type_nos
!= AV_PICTURE_TYPE_I
){
714 if(h
->mb_skip_run
==-1)
715 h
->mb_skip_run
= get_ue_golomb_long(&h
->gb
);
717 if (h
->mb_skip_run
--) {
718 if(FRAME_MBAFF(h
) && (h
->mb_y
&1) == 0){
719 if(h
->mb_skip_run
==0)
720 h
->mb_mbaff
= h
->mb_field_decoding_flag
= get_bits1(&h
->gb
);
726 if (FRAME_MBAFF(h
)) {
727 if( (h
->mb_y
&1) == 0 )
728 h
->mb_mbaff
= h
->mb_field_decoding_flag
= get_bits1(&h
->gb
);
731 h
->prev_mb_skipped
= 0;
733 mb_type
= get_ue_golomb(&h
->gb
);
734 if(h
->slice_type_nos
== AV_PICTURE_TYPE_B
){
736 partition_count
= b_mb_type_info
[mb_type
].partition_count
;
737 mb_type
= b_mb_type_info
[mb_type
].type
;
740 goto decode_intra_mb
;
742 }else if(h
->slice_type_nos
== AV_PICTURE_TYPE_P
){
744 partition_count
= p_mb_type_info
[mb_type
].partition_count
;
745 mb_type
= p_mb_type_info
[mb_type
].type
;
748 goto decode_intra_mb
;
751 av_assert2(h
->slice_type_nos
== AV_PICTURE_TYPE_I
);
752 if(h
->slice_type
== AV_PICTURE_TYPE_SI
&& mb_type
)
756 av_log(h
->avctx
, AV_LOG_ERROR
, "mb_type %d in %c slice too large at %d %d\n", mb_type
, av_get_picture_type_char(h
->slice_type
), h
->mb_x
, h
->mb_y
);
760 cbp
= i_mb_type_info
[mb_type
].cbp
;
761 h
->intra16x16_pred_mode
= i_mb_type_info
[mb_type
].pred_mode
;
762 mb_type
= i_mb_type_info
[mb_type
].type
;
766 mb_type
|= MB_TYPE_INTERLACED
;
768 h
->slice_table
[ mb_xy
]= h
->slice_num
;
770 if(IS_INTRA_PCM(mb_type
)){
771 const int mb_size
= ff_h264_mb_sizes
[h
->sps
.chroma_format_idc
] *
772 h
->sps
.bit_depth_luma
;
774 // We assume these blocks are very rare so we do not optimize it.
775 h
->intra_pcm_ptr
= align_get_bits(&h
->gb
);
776 if (get_bits_left(&h
->gb
) < mb_size
) {
777 av_log(h
->avctx
, AV_LOG_ERROR
, "Not enough data for an intra PCM block.\n");
778 return AVERROR_INVALIDDATA
;
780 skip_bits_long(&h
->gb
, mb_size
);
782 // In deblocking, the quantizer is 0
783 h
->cur_pic
.qscale_table
[mb_xy
] = 0;
784 // All coeffs are present
785 memset(h
->non_zero_count
[mb_xy
], 16, 48);
787 h
->cur_pic
.mb_type
[mb_xy
] = mb_type
;
791 local_ref_count
[0] = h
->ref_count
[0] << MB_MBAFF(h
);
792 local_ref_count
[1] = h
->ref_count
[1] << MB_MBAFF(h
);
794 fill_decode_neighbors(h
, mb_type
);
795 fill_decode_caches(h
, mb_type
);
798 if(IS_INTRA(mb_type
)){
800 // init_top_left_availability(h);
801 if(IS_INTRA4x4(mb_type
)){
804 if(dct8x8_allowed
&& get_bits1(&h
->gb
)){
805 mb_type
|= MB_TYPE_8x8DCT
;
809 // fill_intra4x4_pred_table(h);
810 for(i
=0; i
<16; i
+=di
){
811 int mode
= pred_intra_mode(h
, i
);
813 if(!get_bits1(&h
->gb
)){
814 const int rem_mode
= get_bits(&h
->gb
, 3);
815 mode
= rem_mode
+ (rem_mode
>= mode
);
819 fill_rectangle( &h
->intra4x4_pred_mode_cache
[ scan8
[i
] ], 2, 2, 8, mode
, 1 );
821 h
->intra4x4_pred_mode_cache
[ scan8
[i
] ] = mode
;
823 write_back_intra_pred_mode(h
);
824 if( ff_h264_check_intra4x4_pred_mode(h
) < 0)
827 h
->intra16x16_pred_mode
= ff_h264_check_intra_pred_mode(h
, h
->intra16x16_pred_mode
, 0);
828 if(h
->intra16x16_pred_mode
< 0)
832 pred_mode
= ff_h264_check_intra_pred_mode(h
, get_ue_golomb_31(&h
->gb
), 1);
835 h
->chroma_pred_mode
= pred_mode
;
837 h
->chroma_pred_mode
= DC_128_PRED8x8
;
839 }else if(partition_count
==4){
840 int i
, j
, sub_partition_count
[4], list
, ref
[2][4];
842 if(h
->slice_type_nos
== AV_PICTURE_TYPE_B
){
844 h
->sub_mb_type
[i
]= get_ue_golomb_31(&h
->gb
);
845 if(h
->sub_mb_type
[i
] >=13){
846 av_log(h
->avctx
, AV_LOG_ERROR
, "B sub_mb_type %u out of range at %d %d\n", h
->sub_mb_type
[i
], h
->mb_x
, h
->mb_y
);
849 sub_partition_count
[i
]= b_sub_mb_type_info
[ h
->sub_mb_type
[i
] ].partition_count
;
850 h
->sub_mb_type
[i
]= b_sub_mb_type_info
[ h
->sub_mb_type
[i
] ].type
;
852 if( IS_DIRECT(h
->sub_mb_type
[0]|h
->sub_mb_type
[1]|h
->sub_mb_type
[2]|h
->sub_mb_type
[3])) {
853 ff_h264_pred_direct_motion(h
, &mb_type
);
854 h
->ref_cache
[0][scan8
[4]] =
855 h
->ref_cache
[1][scan8
[4]] =
856 h
->ref_cache
[0][scan8
[12]] =
857 h
->ref_cache
[1][scan8
[12]] = PART_NOT_AVAILABLE
;
860 av_assert2(h
->slice_type_nos
== AV_PICTURE_TYPE_P
); //FIXME SP correct ?
862 h
->sub_mb_type
[i
]= get_ue_golomb_31(&h
->gb
);
863 if(h
->sub_mb_type
[i
] >=4){
864 av_log(h
->avctx
, AV_LOG_ERROR
, "P sub_mb_type %u out of range at %d %d\n", h
->sub_mb_type
[i
], h
->mb_x
, h
->mb_y
);
867 sub_partition_count
[i
]= p_sub_mb_type_info
[ h
->sub_mb_type
[i
] ].partition_count
;
868 h
->sub_mb_type
[i
]= p_sub_mb_type_info
[ h
->sub_mb_type
[i
] ].type
;
872 for(list
=0; list
<h
->list_count
; list
++){
873 int ref_count
= IS_REF0(mb_type
) ? 1 : local_ref_count
[list
];
875 if(IS_DIRECT(h
->sub_mb_type
[i
])) continue;
876 if(IS_DIR(h
->sub_mb_type
[i
], 0, list
)){
880 }else if(ref_count
== 2){
881 tmp
= get_bits1(&h
->gb
)^1;
883 tmp
= get_ue_golomb_31(&h
->gb
);
885 av_log(h
->avctx
, AV_LOG_ERROR
, "ref %u overflow\n", tmp
);
898 dct8x8_allowed
= get_dct8x8_allowed(h
);
900 for(list
=0; list
<h
->list_count
; list
++){
902 if(IS_DIRECT(h
->sub_mb_type
[i
])) {
903 h
->ref_cache
[list
][ scan8
[4*i
] ] = h
->ref_cache
[list
][ scan8
[4*i
]+1 ];
906 h
->ref_cache
[list
][ scan8
[4*i
] ]=h
->ref_cache
[list
][ scan8
[4*i
]+1 ]=
907 h
->ref_cache
[list
][ scan8
[4*i
]+8 ]=h
->ref_cache
[list
][ scan8
[4*i
]+9 ]= ref
[list
][i
];
909 if(IS_DIR(h
->sub_mb_type
[i
], 0, list
)){
910 const int sub_mb_type
= h
->sub_mb_type
[i
];
911 const int block_width
= (sub_mb_type
& (MB_TYPE_16x16
|MB_TYPE_16x8
)) ? 2 : 1;
912 for(j
=0; j
<sub_partition_count
[i
]; j
++){
914 const int index
= 4*i
+ block_width
*j
;
915 int16_t (* mv_cache
)[2]= &h
->mv_cache
[list
][ scan8
[index
] ];
916 pred_motion(h
, index
, block_width
, list
, h
->ref_cache
[list
][ scan8
[index
] ], &mx
, &my
);
917 mx
+= get_se_golomb(&h
->gb
);
918 my
+= get_se_golomb(&h
->gb
);
919 tprintf(h
->avctx
, "final mv:%d %d\n", mx
, my
);
921 if(IS_SUB_8X8(sub_mb_type
)){
923 mv_cache
[ 8 ][0]= mv_cache
[ 9 ][0]= mx
;
925 mv_cache
[ 8 ][1]= mv_cache
[ 9 ][1]= my
;
926 }else if(IS_SUB_8X4(sub_mb_type
)){
927 mv_cache
[ 1 ][0]= mx
;
928 mv_cache
[ 1 ][1]= my
;
929 }else if(IS_SUB_4X8(sub_mb_type
)){
930 mv_cache
[ 8 ][0]= mx
;
931 mv_cache
[ 8 ][1]= my
;
933 mv_cache
[ 0 ][0]= mx
;
934 mv_cache
[ 0 ][1]= my
;
937 uint32_t *p
= (uint32_t *)&h
->mv_cache
[list
][ scan8
[4*i
] ][0];
943 }else if(IS_DIRECT(mb_type
)){
944 ff_h264_pred_direct_motion(h
, &mb_type
);
945 dct8x8_allowed
&= h
->sps
.direct_8x8_inference_flag
;
948 //FIXME we should set ref_idx_l? to 0 if we use that later ...
949 if(IS_16X16(mb_type
)){
950 for(list
=0; list
<h
->list_count
; list
++){
952 if(IS_DIR(mb_type
, 0, list
)){
953 if(local_ref_count
[list
]==1){
955 } else if(local_ref_count
[list
]==2){
956 val
= get_bits1(&h
->gb
)^1;
958 val
= get_ue_golomb_31(&h
->gb
);
959 if (val
>= local_ref_count
[list
]){
960 av_log(h
->avctx
, AV_LOG_ERROR
, "ref %u overflow\n", val
);
964 fill_rectangle(&h
->ref_cache
[list
][ scan8
[0] ], 4, 4, 8, val
, 1);
967 for(list
=0; list
<h
->list_count
; list
++){
968 if(IS_DIR(mb_type
, 0, list
)){
969 pred_motion(h
, 0, 4, list
, h
->ref_cache
[list
][ scan8
[0] ], &mx
, &my
);
970 mx
+= get_se_golomb(&h
->gb
);
971 my
+= get_se_golomb(&h
->gb
);
972 tprintf(h
->avctx
, "final mv:%d %d\n", mx
, my
);
974 fill_rectangle(h
->mv_cache
[list
][ scan8
[0] ], 4, 4, 8, pack16to32(mx
,my
), 4);
978 else if(IS_16X8(mb_type
)){
979 for(list
=0; list
<h
->list_count
; list
++){
982 if(IS_DIR(mb_type
, i
, list
)){
983 if(local_ref_count
[list
] == 1) {
985 } else if(local_ref_count
[list
] == 2) {
986 val
= get_bits1(&h
->gb
)^1;
988 val
= get_ue_golomb_31(&h
->gb
);
989 if (val
>= local_ref_count
[list
]){
990 av_log(h
->avctx
, AV_LOG_ERROR
, "ref %u overflow\n", val
);
995 val
= LIST_NOT_USED
&0xFF;
996 fill_rectangle(&h
->ref_cache
[list
][ scan8
[0] + 16*i
], 4, 2, 8, val
, 1);
999 for(list
=0; list
<h
->list_count
; list
++){
1002 if(IS_DIR(mb_type
, i
, list
)){
1003 pred_16x8_motion(h
, 8*i
, list
, h
->ref_cache
[list
][scan8
[0] + 16*i
], &mx
, &my
);
1004 mx
+= get_se_golomb(&h
->gb
);
1005 my
+= get_se_golomb(&h
->gb
);
1006 tprintf(h
->avctx
, "final mv:%d %d\n", mx
, my
);
1008 val
= pack16to32(mx
,my
);
1011 fill_rectangle(h
->mv_cache
[list
][ scan8
[0] + 16*i
], 4, 2, 8, val
, 4);
1015 av_assert2(IS_8X16(mb_type
));
1016 for(list
=0; list
<h
->list_count
; list
++){
1019 if(IS_DIR(mb_type
, i
, list
)){ //FIXME optimize
1020 if(local_ref_count
[list
]==1){
1022 } else if(local_ref_count
[list
]==2){
1023 val
= get_bits1(&h
->gb
)^1;
1025 val
= get_ue_golomb_31(&h
->gb
);
1026 if (val
>= local_ref_count
[list
]){
1027 av_log(h
->avctx
, AV_LOG_ERROR
, "ref %u overflow\n", val
);
1032 val
= LIST_NOT_USED
&0xFF;
1033 fill_rectangle(&h
->ref_cache
[list
][ scan8
[0] + 2*i
], 2, 4, 8, val
, 1);
1036 for(list
=0; list
<h
->list_count
; list
++){
1039 if(IS_DIR(mb_type
, i
, list
)){
1040 pred_8x16_motion(h
, i
*4, list
, h
->ref_cache
[list
][ scan8
[0] + 2*i
], &mx
, &my
);
1041 mx
+= get_se_golomb(&h
->gb
);
1042 my
+= get_se_golomb(&h
->gb
);
1043 tprintf(h
->avctx
, "final mv:%d %d\n", mx
, my
);
1045 val
= pack16to32(mx
,my
);
1048 fill_rectangle(h
->mv_cache
[list
][ scan8
[0] + 2*i
], 2, 4, 8, val
, 4);
1054 if(IS_INTER(mb_type
))
1055 write_back_motion(h
, mb_type
);
1057 if(!IS_INTRA16x16(mb_type
)){
1058 cbp
= get_ue_golomb(&h
->gb
);
1062 av_log(h
->avctx
, AV_LOG_ERROR
, "cbp too large (%u) at %d %d\n", cbp
, h
->mb_x
, h
->mb_y
);
1065 if(IS_INTRA4x4(mb_type
)) cbp
= golomb_to_intra4x4_cbp
[cbp
];
1066 else cbp
= golomb_to_inter_cbp
[cbp
];
1069 av_log(h
->avctx
, AV_LOG_ERROR
, "cbp too large (%u) at %d %d\n", cbp
, h
->mb_x
, h
->mb_y
);
1072 if(IS_INTRA4x4(mb_type
)) cbp
= golomb_to_intra4x4_cbp_gray
[cbp
];
1073 else cbp
= golomb_to_inter_cbp_gray
[cbp
];
1076 if (!decode_chroma
&& cbp
>15) {
1077 av_log(h
->avctx
, AV_LOG_ERROR
, "gray chroma\n");
1078 return AVERROR_INVALIDDATA
;
1082 if(dct8x8_allowed
&& (cbp
&15) && !IS_INTRA(mb_type
)){
1083 mb_type
|= MB_TYPE_8x8DCT
*get_bits1(&h
->gb
);
1086 h
->cbp_table
[mb_xy
]= cbp
;
1087 h
->cur_pic
.mb_type
[mb_xy
] = mb_type
;
1089 if(cbp
|| IS_INTRA16x16(mb_type
)){
1090 int i4x4
, i8x8
, chroma_idx
;
1093 GetBitContext
*gb
= IS_INTRA(mb_type
) ? h
->intra_gb_ptr
: h
->inter_gb_ptr
;
1094 const uint8_t *scan
, *scan8x8
;
1095 const int max_qp
= 51 + 6*(h
->sps
.bit_depth_luma
-8);
1097 if(IS_INTERLACED(mb_type
)){
1098 scan8x8
= h
->qscale
? h
->field_scan8x8_cavlc
: h
->field_scan8x8_cavlc_q0
;
1099 scan
= h
->qscale
? h
->field_scan
: h
->field_scan_q0
;
1101 scan8x8
= h
->qscale
? h
->zigzag_scan8x8_cavlc
: h
->zigzag_scan8x8_cavlc_q0
;
1102 scan
= h
->qscale
? h
->zigzag_scan
: h
->zigzag_scan_q0
;
1105 dquant
= get_se_golomb(&h
->gb
);
1107 h
->qscale
+= dquant
;
1109 if(((unsigned)h
->qscale
) > max_qp
){
1110 if(h
->qscale
<0) h
->qscale
+= max_qp
+1;
1111 else h
->qscale
-= max_qp
+1;
1112 if(((unsigned)h
->qscale
) > max_qp
){
1113 av_log(h
->avctx
, AV_LOG_ERROR
, "dquant out of range (%d) at %d %d\n", dquant
, h
->mb_x
, h
->mb_y
);
1118 h
->chroma_qp
[0]= get_chroma_qp(h
, 0, h
->qscale
);
1119 h
->chroma_qp
[1]= get_chroma_qp(h
, 1, h
->qscale
);
1121 if( (ret
= decode_luma_residual(h
, gb
, scan
, scan8x8
, pixel_shift
, mb_type
, cbp
, 0)) < 0 ){
1124 h
->cbp_table
[mb_xy
] |= ret
<< 12;
1126 if( decode_luma_residual(h
, gb
, scan
, scan8x8
, pixel_shift
, mb_type
, cbp
, 1) < 0 ){
1129 if( decode_luma_residual(h
, gb
, scan
, scan8x8
, pixel_shift
, mb_type
, cbp
, 2) < 0 ){
1133 const int num_c8x8
= h
->sps
.chroma_format_idc
;
1136 for(chroma_idx
=0; chroma_idx
<2; chroma_idx
++)
1137 if (decode_residual(h
, gb
, h
->mb
+ ((256 + 16*16*chroma_idx
) << pixel_shift
),
1138 CHROMA_DC_BLOCK_INDEX
+chroma_idx
,
1139 CHROMA422(h
) ? chroma422_dc_scan
: chroma_dc_scan
,
1140 NULL
, 4*num_c8x8
) < 0) {
1146 for(chroma_idx
=0; chroma_idx
<2; chroma_idx
++){
1147 const uint32_t *qmul
= h
->dequant4_coeff
[chroma_idx
+1+(IS_INTRA( mb_type
) ? 0:3)][h
->chroma_qp
[chroma_idx
]];
1148 int16_t *mb
= h
->mb
+ (16*(16 + 16*chroma_idx
) << pixel_shift
);
1149 for (i8x8
= 0; i8x8
<num_c8x8
; i8x8
++) {
1150 for (i4x4
= 0; i4x4
< 4; i4x4
++) {
1151 const int index
= 16 + 16*chroma_idx
+ 8*i8x8
+ i4x4
;
1152 if (decode_residual(h
, gb
, mb
, index
, scan
+ 1, qmul
, 15) < 0)
1154 mb
+= 16 << pixel_shift
;
1159 fill_rectangle(&h
->non_zero_count_cache
[scan8
[16]], 4, 4, 8, 0, 1);
1160 fill_rectangle(&h
->non_zero_count_cache
[scan8
[32]], 4, 4, 8, 0, 1);
1164 fill_rectangle(&h
->non_zero_count_cache
[scan8
[ 0]], 4, 4, 8, 0, 1);
1165 fill_rectangle(&h
->non_zero_count_cache
[scan8
[16]], 4, 4, 8, 0, 1);
1166 fill_rectangle(&h
->non_zero_count_cache
[scan8
[32]], 4, 4, 8, 0, 1);
1168 h
->cur_pic
.qscale_table
[mb_xy
] = h
->qscale
;
1169 write_back_non_zero_count(h
);