2 * H.26L/H.264/AVC/JVT/14496-10/... parser
3 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
5 * This file is part of FFmpeg.
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24 * H.264 / AVC / MPEG4 part10 parser.
25 * @author Michael Niedermayer <michaelni@gmx.at>
28 #define UNCHECKED_BITSTREAM_READER 1
30 #include "libavutil/attributes.h"
35 #include "mpegutils.h"
38 static int h264_find_frame_end(H264Context
*h
, const uint8_t *buf
,
43 ParseContext
*pc
= &h
->parse_context
;
44 int next_avc
= h
->is_avc
? 0 : buf_size
;
46 // mb_addr= pc->mb_addr - 1;
51 if (h
->is_avc
&& !h
->nal_length_size
)
52 av_log(h
->avctx
, AV_LOG_ERROR
, "AVC-parser: nal length size invalid\n");
54 for (i
= 0; i
< buf_size
; i
++) {
58 for (j
= 0; j
< h
->nal_length_size
; j
++)
59 nalsize
= (nalsize
<< 8) | buf
[i
++];
60 if (nalsize
<= 0 || nalsize
> buf_size
- i
) {
61 av_log(h
->avctx
, AV_LOG_ERROR
, "AVC-parser: nal size %d remaining %d\n", nalsize
, buf_size
- i
);
64 next_avc
= i
+ nalsize
;
69 i
+= h
->h264dsp
.startcode_find_candidate(buf
+ i
, next_avc
- i
);
72 } else if (state
<= 2) {
74 state
^= 5; // 2->7, 1->4, 0->5
78 state
>>= 1; // 2->1, 1->0, 0->0
79 } else if (state
<= 5) {
80 int nalu_type
= buf
[i
] & 0x1F;
81 if (nalu_type
== NAL_SEI
|| nalu_type
== NAL_SPS
||
82 nalu_type
== NAL_PPS
|| nalu_type
== NAL_AUD
) {
83 if (pc
->frame_start_found
) {
87 } else if (nalu_type
== NAL_SLICE
|| nalu_type
== NAL_DPA
||
88 nalu_type
== NAL_IDR_SLICE
) {
94 h
->parse_history
[h
->parse_history_count
++]= buf
[i
];
95 if (h
->parse_history_count
>5) {
96 unsigned int mb
, last_mb
= h
->parse_last_mb
;
99 init_get_bits(&gb
, h
->parse_history
, 8*h
->parse_history_count
);
100 h
->parse_history_count
=0;
101 mb
= get_ue_golomb_long(&gb
);
102 h
->parse_last_mb
= mb
;
103 if (pc
->frame_start_found
) {
107 pc
->frame_start_found
= 1;
115 return END_NOT_FOUND
;
119 pc
->frame_start_found
= 0;
122 return i
- (state
& 5) - 5 * (state
> 7);
125 static int scan_mmco_reset(AVCodecParserContext
*s
)
127 H264Context
*h
= s
->priv_data
;
129 h
->slice_type_nos
= s
->pict_type
& 3;
131 if (h
->pps
.redundant_pic_cnt_present
)
132 get_ue_golomb(&h
->gb
); // redundant_pic_count
134 if (ff_set_ref_count(h
) < 0)
135 return AVERROR_INVALIDDATA
;
137 if (h
->slice_type_nos
!= AV_PICTURE_TYPE_I
) {
139 for (list
= 0; list
< h
->list_count
; list
++) {
140 if (get_bits1(&h
->gb
)) {
142 for (index
= 0; ; index
++) {
143 unsigned int reordering_of_pic_nums_idc
= get_ue_golomb_31(&h
->gb
);
145 if (reordering_of_pic_nums_idc
< 3)
146 get_ue_golomb(&h
->gb
);
147 else if (reordering_of_pic_nums_idc
> 3) {
148 av_log(h
->avctx
, AV_LOG_ERROR
,
149 "illegal reordering_of_pic_nums_idc %d\n",
150 reordering_of_pic_nums_idc
);
151 return AVERROR_INVALIDDATA
;
155 if (index
>= h
->ref_count
[list
]) {
156 av_log(h
->avctx
, AV_LOG_ERROR
,
157 "reference count %d overflow\n", index
);
158 return AVERROR_INVALIDDATA
;
165 if ((h
->pps
.weighted_pred
&& h
->slice_type_nos
== AV_PICTURE_TYPE_P
) ||
166 (h
->pps
.weighted_bipred_idc
== 1 && h
->slice_type_nos
== AV_PICTURE_TYPE_B
))
167 ff_pred_weight_table(h
);
169 if (get_bits1(&h
->gb
)) { // adaptive_ref_pic_marking_mode_flag
171 for (i
= 0; i
< MAX_MMCO_COUNT
; i
++) {
172 MMCOOpcode opcode
= get_ue_golomb_31(&h
->gb
);
173 if (opcode
> (unsigned) MMCO_LONG
) {
174 av_log(h
->avctx
, AV_LOG_ERROR
,
175 "illegal memory management control operation %d\n",
177 return AVERROR_INVALIDDATA
;
179 if (opcode
== MMCO_END
)
181 else if (opcode
== MMCO_RESET
)
184 if (opcode
== MMCO_SHORT2UNUSED
|| opcode
== MMCO_SHORT2LONG
)
185 get_ue_golomb(&h
->gb
);
186 if (opcode
== MMCO_SHORT2LONG
|| opcode
== MMCO_LONG2UNUSED
||
187 opcode
== MMCO_LONG
|| opcode
== MMCO_SET_MAX_LONG
)
188 get_ue_golomb_31(&h
->gb
);
196 * Parse NAL units of found picture and decode some basic information.
198 * @param s parser context.
199 * @param avctx codec context.
200 * @param buf buffer with field/frame data.
201 * @param buf_size size of the buffer.
203 static inline int parse_nal_units(AVCodecParserContext
*s
,
204 AVCodecContext
*avctx
,
205 const uint8_t * const buf
, int buf_size
)
207 H264Context
*h
= s
->priv_data
;
208 int buf_index
, next_avc
;
210 unsigned int slice_type
;
211 int state
= -1, got_reset
= 0;
213 int q264
= buf_size
>=4 && !memcmp("Q264", buf
, 4);
216 /* set some sane default values */
217 s
->pict_type
= AV_PICTURE_TYPE_I
;
219 s
->picture_structure
= AV_PICTURE_STRUCTURE_UNKNOWN
;
222 ff_h264_reset_sei(h
);
223 h
->sei_fpa
.frame_packing_arrangement_cancel_flag
= -1;
229 next_avc
= h
->is_avc
? 0 : buf_size
;
231 int src_length
, dst_length
, consumed
, nalsize
= 0;
233 if (buf_index
>= next_avc
) {
234 nalsize
= get_avc_nalsize(h
, buf
, buf_size
, &buf_index
);
237 next_avc
= buf_index
+ nalsize
;
239 buf_index
= find_start_code(buf
, buf_size
, buf_index
, next_avc
);
240 if (buf_index
>= buf_size
)
242 if (buf_index
>= next_avc
)
245 src_length
= next_avc
- buf_index
;
247 state
= buf
[buf_index
];
248 switch (state
& 0x1f) {
251 // Do not walk the whole buffer just to decode slice header
252 if ((state
& 0x1f) == NAL_IDR_SLICE
|| ((state
>> 5) & 0x3) == 0) {
253 /* IDR or disposable slice
254 * No need to decode many bytes because MMCOs shall not be present. */
258 /* To decode up to MMCOs */
259 if (src_length
> 1000)
264 ptr
= ff_h264_decode_nal(h
, buf
+ buf_index
, &dst_length
,
265 &consumed
, src_length
);
266 if (!ptr
|| dst_length
< 0)
269 buf_index
+= consumed
;
271 init_get_bits(&h
->gb
, ptr
, 8 * dst_length
);
272 switch (h
->nal_unit_type
) {
274 ff_h264_decode_seq_parameter_set(h
);
277 ff_h264_decode_picture_parameter_set(h
, h
->gb
.size_in_bits
);
280 ff_h264_decode_sei(h
);
285 h
->prev_frame_num
= 0;
286 h
->prev_frame_num_offset
= 0;
291 get_ue_golomb_long(&h
->gb
); // skip first_mb_in_slice
292 slice_type
= get_ue_golomb_31(&h
->gb
);
293 s
->pict_type
= golomb_to_pict_type
[slice_type
% 5];
294 if (h
->sei_recovery_frame_cnt
>= 0) {
295 /* key frame, since recovery_frame_cnt is set */
298 pps_id
= get_ue_golomb(&h
->gb
);
299 if (pps_id
>= MAX_PPS_COUNT
) {
300 av_log(h
->avctx
, AV_LOG_ERROR
,
301 "pps_id %u out of range\n", pps_id
);
304 if (!h
->pps_buffers
[pps_id
]) {
305 av_log(h
->avctx
, AV_LOG_ERROR
,
306 "non-existing PPS %u referenced\n", pps_id
);
309 h
->pps
= *h
->pps_buffers
[pps_id
];
310 if (!h
->sps_buffers
[h
->pps
.sps_id
]) {
311 av_log(h
->avctx
, AV_LOG_ERROR
,
312 "non-existing SPS %u referenced\n", h
->pps
.sps_id
);
315 h
->sps
= *h
->sps_buffers
[h
->pps
.sps_id
];
316 h
->frame_num
= get_bits(&h
->gb
, h
->sps
.log2_max_frame_num
);
318 if(h
->sps
.ref_frame_count
<= 1 && h
->pps
.ref_count
[0] <= 1 && s
->pict_type
== AV_PICTURE_TYPE_I
)
321 avctx
->profile
= ff_h264_get_profile(&h
->sps
);
322 avctx
->level
= h
->sps
.level_idc
;
324 if (h
->sps
.frame_mbs_only_flag
) {
325 h
->picture_structure
= PICT_FRAME
;
327 if (get_bits1(&h
->gb
)) { // field_pic_flag
328 h
->picture_structure
= PICT_TOP_FIELD
+ get_bits1(&h
->gb
); // bottom_field_flag
330 h
->picture_structure
= PICT_FRAME
;
334 if (h
->nal_unit_type
== NAL_IDR_SLICE
)
335 get_ue_golomb(&h
->gb
); /* idr_pic_id */
336 if (h
->sps
.poc_type
== 0) {
337 h
->poc_lsb
= get_bits(&h
->gb
, h
->sps
.log2_max_poc_lsb
);
339 if (h
->pps
.pic_order_present
== 1 &&
340 h
->picture_structure
== PICT_FRAME
)
341 h
->delta_poc_bottom
= get_se_golomb(&h
->gb
);
344 if (h
->sps
.poc_type
== 1 &&
345 !h
->sps
.delta_pic_order_always_zero_flag
) {
346 h
->delta_poc
[0] = get_se_golomb(&h
->gb
);
348 if (h
->pps
.pic_order_present
== 1 &&
349 h
->picture_structure
== PICT_FRAME
)
350 h
->delta_poc
[1] = get_se_golomb(&h
->gb
);
353 /* Decode POC of this picture.
354 * The prev_ values needed for decoding POC of the next picture are not set here. */
355 field_poc
[0] = field_poc
[1] = INT_MAX
;
356 ff_init_poc(h
, field_poc
, &s
->output_picture_number
);
358 /* Continue parsing to check if MMCO_RESET is present.
359 * FIXME: MMCO_RESET could appear in non-first slice.
360 * Maybe, we should parse all undisposable non-IDR slice of this
361 * picture until encountering MMCO_RESET in a slice of it. */
362 if (h
->nal_ref_idc
&& h
->nal_unit_type
!= NAL_IDR_SLICE
) {
363 got_reset
= scan_mmco_reset(s
);
368 /* Set up the prev_ values for decoding POC of the next picture. */
369 h
->prev_frame_num
= got_reset
? 0 : h
->frame_num
;
370 h
->prev_frame_num_offset
= got_reset
? 0 : h
->frame_num_offset
;
371 if (h
->nal_ref_idc
!= 0) {
373 h
->prev_poc_msb
= h
->poc_msb
;
374 h
->prev_poc_lsb
= h
->poc_lsb
;
378 h
->picture_structure
== PICT_BOTTOM_FIELD
? 0 : field_poc
[0];
382 if (h
->sps
.pic_struct_present_flag
) {
383 switch (h
->sei_pic_struct
) {
384 case SEI_PIC_STRUCT_TOP_FIELD
:
385 case SEI_PIC_STRUCT_BOTTOM_FIELD
:
388 case SEI_PIC_STRUCT_FRAME
:
389 case SEI_PIC_STRUCT_TOP_BOTTOM
:
390 case SEI_PIC_STRUCT_BOTTOM_TOP
:
393 case SEI_PIC_STRUCT_TOP_BOTTOM_TOP
:
394 case SEI_PIC_STRUCT_BOTTOM_TOP_BOTTOM
:
397 case SEI_PIC_STRUCT_FRAME_DOUBLING
:
400 case SEI_PIC_STRUCT_FRAME_TRIPLING
:
404 s
->repeat_pict
= h
->picture_structure
== PICT_FRAME
? 1 : 0;
408 s
->repeat_pict
= h
->picture_structure
== PICT_FRAME
? 1 : 0;
411 if (h
->picture_structure
== PICT_FRAME
) {
412 s
->picture_structure
= AV_PICTURE_STRUCTURE_FRAME
;
413 if (h
->sps
.pic_struct_present_flag
) {
414 switch (h
->sei_pic_struct
) {
415 case SEI_PIC_STRUCT_TOP_BOTTOM
:
416 case SEI_PIC_STRUCT_TOP_BOTTOM_TOP
:
417 s
->field_order
= AV_FIELD_TT
;
419 case SEI_PIC_STRUCT_BOTTOM_TOP
:
420 case SEI_PIC_STRUCT_BOTTOM_TOP_BOTTOM
:
421 s
->field_order
= AV_FIELD_BB
;
424 s
->field_order
= AV_FIELD_PROGRESSIVE
;
428 if (field_poc
[0] < field_poc
[1])
429 s
->field_order
= AV_FIELD_TT
;
430 else if (field_poc
[0] > field_poc
[1])
431 s
->field_order
= AV_FIELD_BB
;
433 s
->field_order
= AV_FIELD_PROGRESSIVE
;
436 if (h
->picture_structure
== PICT_TOP_FIELD
)
437 s
->picture_structure
= AV_PICTURE_STRUCTURE_TOP_FIELD
;
439 s
->picture_structure
= AV_PICTURE_STRUCTURE_BOTTOM_FIELD
;
440 s
->field_order
= AV_FIELD_UNKNOWN
;
443 return 0; /* no need to evaluate the rest */
448 /* didn't find a picture! */
449 av_log(h
->avctx
, AV_LOG_ERROR
, "missing picture in access unit with size %d\n", buf_size
);
453 static int h264_parse(AVCodecParserContext
*s
,
454 AVCodecContext
*avctx
,
455 const uint8_t **poutbuf
, int *poutbuf_size
,
456 const uint8_t *buf
, int buf_size
)
458 H264Context
*h
= s
->priv_data
;
459 ParseContext
*pc
= &h
->parse_context
;
464 if (avctx
->extradata_size
) {
466 // must be done like in decoder, otherwise opening the parser,
467 // letting it create extradata and then closing and opening again
468 // will cause has_b_frames to be always set.
469 // Note that estimate_timings_from_pts does exactly this.
470 if (!avctx
->has_b_frames
)
472 ff_h264_decode_extradata(h
, avctx
->extradata
, avctx
->extradata_size
);
476 if (s
->flags
& PARSER_FLAG_COMPLETE_FRAMES
) {
479 next
= h264_find_frame_end(h
, buf
, buf_size
);
481 if (ff_combine_frame(pc
, next
, &buf
, &buf_size
) < 0) {
487 if (next
< 0 && next
!= END_NOT_FOUND
) {
488 av_assert1(pc
->last_index
+ next
>= 0);
489 h264_find_frame_end(h
, &pc
->buffer
[pc
->last_index
+ next
], -next
); // update state
493 parse_nal_units(s
, avctx
, buf
, buf_size
);
495 if (avctx
->framerate
.num
)
496 avctx
->time_base
= av_inv_q(av_mul_q(avctx
->framerate
, (AVRational
){avctx
->ticks_per_frame
, 1}));
497 if (h
->sei_cpb_removal_delay
>= 0) {
498 s
->dts_sync_point
= h
->sei_buffering_period_present
;
499 s
->dts_ref_dts_delta
= h
->sei_cpb_removal_delay
;
500 s
->pts_dts_delta
= h
->sei_dpb_output_delay
;
502 s
->dts_sync_point
= INT_MIN
;
503 s
->dts_ref_dts_delta
= INT_MIN
;
504 s
->pts_dts_delta
= INT_MIN
;
507 if (s
->flags
& PARSER_FLAG_ONCE
) {
508 s
->flags
&= PARSER_FLAG_COMPLETE_FRAMES
;
512 *poutbuf_size
= buf_size
;
516 static int h264_split(AVCodecContext
*avctx
,
517 const uint8_t *buf
, int buf_size
)
524 for (i
= 0; i
<= buf_size
; i
++) {
525 if ((state
& 0xFFFFFF1F) == 0x107)
527 if ((state
& 0xFFFFFF1F) == 0x108)
529 /* if ((state&0xFFFFFF1F) == 0x101 ||
530 * (state&0xFFFFFF1F) == 0x102 ||
531 * (state&0xFFFFFF1F) == 0x105) {
534 if ((state
& 0xFFFFFF00) == 0x100 && ((state
& 0xFFFFFF1F) != 0x106 || has_pps
) &&
535 (state
& 0xFFFFFF1F) != 0x107 && (state
& 0xFFFFFF1F) != 0x108 &&
536 (state
& 0xFFFFFF1F) != 0x109 && (state
& 0xFFFFFF1F) != 0x10d &&
537 (state
& 0xFFFFFF1F) != 0x10f) {
539 while (i
> 4 && buf
[i
- 5] == 0)
545 state
= (state
<< 8) | buf
[i
];
550 static void close(AVCodecParserContext
*s
)
552 H264Context
*h
= s
->priv_data
;
553 ParseContext
*pc
= &h
->parse_context
;
555 av_freep(&pc
->buffer
);
556 ff_h264_free_context(h
);
559 static av_cold
int init(AVCodecParserContext
*s
)
561 H264Context
*h
= s
->priv_data
;
562 h
->thread_context
[0] = h
;
563 h
->slice_context_count
= 1;
564 ff_h264dsp_init(&h
->h264dsp
, 8, 1);
568 AVCodecParser ff_h264_parser
= {
569 .codec_ids
= { AV_CODEC_ID_H264
},
570 .priv_data_size
= sizeof(H264Context
),
572 .parser_parse
= h264_parse
,
573 .parser_close
= close
,