2 * DXVA2 H264 HW acceleration.
4 * copyright (c) 2009 Laurent Aimar
6 * This file is part of FFmpeg.
8 * FFmpeg is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
13 * FFmpeg is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with FFmpeg; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23 #include "dxva2_internal.h"
26 #include "mpegutils.h"
28 struct dxva2_picture_context
{
29 DXVA_PicParams_H264 pp
;
32 DXVA_Slice_H264_Short slice_short
[MAX_SLICES
];
33 DXVA_Slice_H264_Long slice_long
[MAX_SLICES
];
34 const uint8_t *bitstream
;
35 unsigned bitstream_size
;
38 static void fill_picture_entry(DXVA_PicEntry_H264
*pic
,
39 unsigned index
, unsigned flag
)
41 assert((index
&0x7f) == index
&& (flag
&0x01) == flag
);
42 pic
->bPicEntry
= index
| (flag
<< 7);
45 static void fill_picture_parameters(struct dxva_context
*ctx
, const H264Context
*h
,
46 DXVA_PicParams_H264
*pp
)
48 const H264Picture
*current_picture
= h
->cur_pic_ptr
;
51 memset(pp
, 0, sizeof(*pp
));
52 /* Configure current picture */
53 fill_picture_entry(&pp
->CurrPic
,
54 ff_dxva2_get_surface_index(ctx
, ¤t_picture
->f
),
55 h
->picture_structure
== PICT_BOTTOM_FIELD
);
56 /* Configure the set of references */
57 pp
->UsedForReferenceFlags
= 0;
58 pp
->NonExistingFrameFlags
= 0;
59 for (i
= 0, j
= 0; i
< FF_ARRAY_ELEMS(pp
->RefFrameList
); i
++) {
61 if (j
< h
->short_ref_count
) {
62 r
= h
->short_ref
[j
++];
65 while (!r
&& j
< h
->short_ref_count
+ 16)
66 r
= h
->long_ref
[j
++ - h
->short_ref_count
];
69 fill_picture_entry(&pp
->RefFrameList
[i
],
70 ff_dxva2_get_surface_index(ctx
, &r
->f
),
73 if ((r
->reference
& PICT_TOP_FIELD
) && r
->field_poc
[0] != INT_MAX
)
74 pp
->FieldOrderCntList
[i
][0] = r
->field_poc
[0];
75 if ((r
->reference
& PICT_BOTTOM_FIELD
) && r
->field_poc
[1] != INT_MAX
)
76 pp
->FieldOrderCntList
[i
][1] = r
->field_poc
[1];
78 pp
->FrameNumList
[i
] = r
->long_ref
? r
->pic_id
: r
->frame_num
;
79 if (r
->reference
& PICT_TOP_FIELD
)
80 pp
->UsedForReferenceFlags
|= 1 << (2*i
+ 0);
81 if (r
->reference
& PICT_BOTTOM_FIELD
)
82 pp
->UsedForReferenceFlags
|= 1 << (2*i
+ 1);
84 pp
->RefFrameList
[i
].bPicEntry
= 0xff;
85 pp
->FieldOrderCntList
[i
][0] = 0;
86 pp
->FieldOrderCntList
[i
][1] = 0;
87 pp
->FrameNumList
[i
] = 0;
91 pp
->wFrameWidthInMbsMinus1
= h
->mb_width
- 1;
92 pp
->wFrameHeightInMbsMinus1
= h
->mb_height
- 1;
93 pp
->num_ref_frames
= h
->sps
.ref_frame_count
;
95 pp
->wBitFields
= ((h
->picture_structure
!= PICT_FRAME
) << 0) |
97 (h
->picture_structure
== PICT_FRAME
)) << 1) |
98 (h
->sps
.residual_color_transform_flag
<< 2) |
99 /* sp_for_switch_flag (not implemented by FFmpeg) */
101 (h
->sps
.chroma_format_idc
<< 4) |
102 ((h
->nal_ref_idc
!= 0) << 6) |
103 (h
->pps
.constrained_intra_pred
<< 7) |
104 (h
->pps
.weighted_pred
<< 8) |
105 (h
->pps
.weighted_bipred_idc
<< 9) |
106 /* MbsConsecutiveFlag */
108 (h
->sps
.frame_mbs_only_flag
<< 12) |
109 (h
->pps
.transform_8x8_mode
<< 13) |
110 ((h
->sps
.level_idc
>= 31) << 14) |
111 /* IntraPicFlag (Modified if we detect a non
112 * intra slice in dxva2_h264_decode_slice) */
115 pp
->bit_depth_luma_minus8
= h
->sps
.bit_depth_luma
- 8;
116 pp
->bit_depth_chroma_minus8
= h
->sps
.bit_depth_chroma
- 8;
117 if (ctx
->workaround
& FF_DXVA2_WORKAROUND_SCALING_LIST_ZIGZAG
)
118 pp
->Reserved16Bits
= 0;
119 else if (ctx
->workaround
& FF_DXVA2_WORKAROUND_INTEL_CLEARVIDEO
)
120 pp
->Reserved16Bits
= 0x34c;
122 pp
->Reserved16Bits
= 3; /* FIXME is there a way to detect the right mode ? */
123 pp
->StatusReportFeedbackNumber
= 1 + ctx
->report_id
++;
124 pp
->CurrFieldOrderCnt
[0] = 0;
125 if ((h
->picture_structure
& PICT_TOP_FIELD
) &&
126 current_picture
->field_poc
[0] != INT_MAX
)
127 pp
->CurrFieldOrderCnt
[0] = current_picture
->field_poc
[0];
128 pp
->CurrFieldOrderCnt
[1] = 0;
129 if ((h
->picture_structure
& PICT_BOTTOM_FIELD
) &&
130 current_picture
->field_poc
[1] != INT_MAX
)
131 pp
->CurrFieldOrderCnt
[1] = current_picture
->field_poc
[1];
132 pp
->pic_init_qs_minus26
= h
->pps
.init_qs
- 26;
133 pp
->chroma_qp_index_offset
= h
->pps
.chroma_qp_index_offset
[0];
134 pp
->second_chroma_qp_index_offset
= h
->pps
.chroma_qp_index_offset
[1];
135 pp
->ContinuationFlag
= 1;
136 pp
->pic_init_qp_minus26
= h
->pps
.init_qp
- 26;
137 pp
->num_ref_idx_l0_active_minus1
= h
->pps
.ref_count
[0] - 1;
138 pp
->num_ref_idx_l1_active_minus1
= h
->pps
.ref_count
[1] - 1;
139 pp
->Reserved8BitsA
= 0;
140 pp
->frame_num
= h
->frame_num
;
141 pp
->log2_max_frame_num_minus4
= h
->sps
.log2_max_frame_num
- 4;
142 pp
->pic_order_cnt_type
= h
->sps
.poc_type
;
143 if (h
->sps
.poc_type
== 0)
144 pp
->log2_max_pic_order_cnt_lsb_minus4
= h
->sps
.log2_max_poc_lsb
- 4;
145 else if (h
->sps
.poc_type
== 1)
146 pp
->delta_pic_order_always_zero_flag
= h
->sps
.delta_pic_order_always_zero_flag
;
147 pp
->direct_8x8_inference_flag
= h
->sps
.direct_8x8_inference_flag
;
148 pp
->entropy_coding_mode_flag
= h
->pps
.cabac
;
149 pp
->pic_order_present_flag
= h
->pps
.pic_order_present
;
150 pp
->num_slice_groups_minus1
= h
->pps
.slice_group_count
- 1;
151 pp
->slice_group_map_type
= h
->pps
.mb_slice_group_map_type
;
152 pp
->deblocking_filter_control_present_flag
= h
->pps
.deblocking_filter_parameters_present
;
153 pp
->redundant_pic_cnt_present_flag
= h
->pps
.redundant_pic_cnt_present
;
154 pp
->Reserved8BitsB
= 0;
155 pp
->slice_group_change_rate_minus1
= 0; /* XXX not implemented by FFmpeg */
156 //pp->SliceGroupMap[810]; /* XXX not implemented by FFmpeg */
159 static void fill_scaling_lists(struct dxva_context
*ctx
, const H264Context
*h
, DXVA_Qmatrix_H264
*qm
)
162 memset(qm
, 0, sizeof(*qm
));
163 if (ctx
->workaround
& FF_DXVA2_WORKAROUND_SCALING_LIST_ZIGZAG
) {
164 for (i
= 0; i
< 6; i
++)
165 for (j
= 0; j
< 16; j
++)
166 qm
->bScalingLists4x4
[i
][j
] = h
->pps
.scaling_matrix4
[i
][j
];
168 for (i
= 0; i
< 64; i
++) {
169 qm
->bScalingLists8x8
[0][i
] = h
->pps
.scaling_matrix8
[0][i
];
170 qm
->bScalingLists8x8
[1][i
] = h
->pps
.scaling_matrix8
[3][i
];
173 for (i
= 0; i
< 6; i
++)
174 for (j
= 0; j
< 16; j
++)
175 qm
->bScalingLists4x4
[i
][j
] = h
->pps
.scaling_matrix4
[i
][zigzag_scan
[j
]];
177 for (i
= 0; i
< 64; i
++) {
178 qm
->bScalingLists8x8
[0][i
] = h
->pps
.scaling_matrix8
[0][ff_zigzag_direct
[i
]];
179 qm
->bScalingLists8x8
[1][i
] = h
->pps
.scaling_matrix8
[3][ff_zigzag_direct
[i
]];
184 static int is_slice_short(struct dxva_context
*ctx
)
186 assert(ctx
->cfg
->ConfigBitstreamRaw
== 1 ||
187 ctx
->cfg
->ConfigBitstreamRaw
== 2);
188 return ctx
->cfg
->ConfigBitstreamRaw
== 2;
191 static void fill_slice_short(DXVA_Slice_H264_Short
*slice
,
192 unsigned position
, unsigned size
)
194 memset(slice
, 0, sizeof(*slice
));
195 slice
->BSNALunitDataLocation
= position
;
196 slice
->SliceBytesInBuffer
= size
;
197 slice
->wBadSliceChopping
= 0;
200 static int get_refpic_index(const DXVA_PicParams_H264
*pp
, int surface_index
)
203 for (i
= 0; i
< FF_ARRAY_ELEMS(pp
->RefFrameList
); i
++) {
204 if ((pp
->RefFrameList
[i
].bPicEntry
& 0x7f) == surface_index
)
210 static void fill_slice_long(AVCodecContext
*avctx
, DXVA_Slice_H264_Long
*slice
,
211 const DXVA_PicParams_H264
*pp
, unsigned position
, unsigned size
)
213 const H264Context
*h
= avctx
->priv_data
;
214 struct dxva_context
*ctx
= avctx
->hwaccel_context
;
217 memset(slice
, 0, sizeof(*slice
));
218 slice
->BSNALunitDataLocation
= position
;
219 slice
->SliceBytesInBuffer
= size
;
220 slice
->wBadSliceChopping
= 0;
222 slice
->first_mb_in_slice
= (h
->mb_y
>> FIELD_OR_MBAFF_PICTURE(h
)) * h
->mb_width
+ h
->mb_x
;
223 slice
->NumMbsForSlice
= 0; /* XXX it is set once we have all slices */
224 slice
->BitOffsetToSliceData
= get_bits_count(&h
->gb
);
225 slice
->slice_type
= ff_h264_get_slice_type(h
);
226 if (h
->slice_type_fixed
)
227 slice
->slice_type
+= 5;
228 slice
->luma_log2_weight_denom
= h
->luma_log2_weight_denom
;
229 slice
->chroma_log2_weight_denom
= h
->chroma_log2_weight_denom
;
230 if (h
->list_count
> 0)
231 slice
->num_ref_idx_l0_active_minus1
= h
->ref_count
[0] - 1;
232 if (h
->list_count
> 1)
233 slice
->num_ref_idx_l1_active_minus1
= h
->ref_count
[1] - 1;
234 slice
->slice_alpha_c0_offset_div2
= h
->slice_alpha_c0_offset
/ 2;
235 slice
->slice_beta_offset_div2
= h
->slice_beta_offset
/ 2;
236 slice
->Reserved8Bits
= 0;
238 for (list
= 0; list
< 2; list
++) {
240 for (i
= 0; i
< FF_ARRAY_ELEMS(slice
->RefPicList
[list
]); i
++) {
241 if (list
< h
->list_count
&& i
< h
->ref_count
[list
]) {
242 const H264Picture
*r
= &h
->ref_list
[list
][i
];
245 if (ctx
->workaround
& FF_DXVA2_WORKAROUND_INTEL_CLEARVIDEO
)
246 index
= ff_dxva2_get_surface_index(ctx
, &r
->f
);
248 index
= get_refpic_index(pp
, ff_dxva2_get_surface_index(ctx
, &r
->f
));
249 fill_picture_entry(&slice
->RefPicList
[list
][i
], index
,
250 r
->reference
== PICT_BOTTOM_FIELD
);
251 for (plane
= 0; plane
< 3; plane
++) {
253 if (plane
== 0 && h
->luma_weight_flag
[list
]) {
254 w
= h
->luma_weight
[i
][list
][0];
255 o
= h
->luma_weight
[i
][list
][1];
256 } else if (plane
>= 1 && h
->chroma_weight_flag
[list
]) {
257 w
= h
->chroma_weight
[i
][list
][plane
-1][0];
258 o
= h
->chroma_weight
[i
][list
][plane
-1][1];
260 w
= 1 << (plane
== 0 ? h
->luma_log2_weight_denom
:
261 h
->chroma_log2_weight_denom
);
264 slice
->Weights
[list
][i
][plane
][0] = w
;
265 slice
->Weights
[list
][i
][plane
][1] = o
;
269 slice
->RefPicList
[list
][i
].bPicEntry
= 0xff;
270 for (plane
= 0; plane
< 3; plane
++) {
271 slice
->Weights
[list
][i
][plane
][0] = 0;
272 slice
->Weights
[list
][i
][plane
][1] = 0;
277 slice
->slice_qs_delta
= 0; /* XXX not implemented by FFmpeg */
278 slice
->slice_qp_delta
= h
->qscale
- h
->pps
.init_qp
;
279 slice
->redundant_pic_cnt
= h
->redundant_pic_count
;
280 if (h
->slice_type
== AV_PICTURE_TYPE_B
)
281 slice
->direct_spatial_mv_pred_flag
= h
->direct_spatial_mv_pred
;
282 slice
->cabac_init_idc
= h
->pps
.cabac
? h
->cabac_init_idc
: 0;
283 if (h
->deblocking_filter
< 2)
284 slice
->disable_deblocking_filter_idc
= 1 - h
->deblocking_filter
;
286 slice
->disable_deblocking_filter_idc
= h
->deblocking_filter
;
287 slice
->slice_id
= h
->current_slice
- 1;
290 static int commit_bitstream_and_slice_buffer(AVCodecContext
*avctx
,
291 DXVA2_DecodeBufferDesc
*bs
,
292 DXVA2_DecodeBufferDesc
*sc
)
294 const H264Context
*h
= avctx
->priv_data
;
295 const unsigned mb_count
= h
->mb_width
* h
->mb_height
;
296 struct dxva_context
*ctx
= avctx
->hwaccel_context
;
297 const H264Picture
*current_picture
= h
->cur_pic_ptr
;
298 struct dxva2_picture_context
*ctx_pic
= current_picture
->hwaccel_picture_private
;
299 DXVA_Slice_H264_Short
*slice
= NULL
;
301 uint8_t *dxva_data
, *current
, *end
;
308 /* Create an annex B bitstream buffer with only slice NAL and finalize slice */
309 if (FAILED(IDirectXVideoDecoder_GetBuffer(ctx
->decoder
,
310 DXVA2_BitStreamDateBufferType
,
311 &dxva_data_ptr
, &dxva_size
)))
314 dxva_data
= dxva_data_ptr
;
316 end
= dxva_data
+ dxva_size
;
318 for (i
= 0; i
< ctx_pic
->slice_count
; i
++) {
319 static const uint8_t start_code
[] = { 0, 0, 1 };
320 static const unsigned start_code_size
= sizeof(start_code
);
321 unsigned position
, size
;
323 assert(offsetof(DXVA_Slice_H264_Short
, BSNALunitDataLocation
) ==
324 offsetof(DXVA_Slice_H264_Long
, BSNALunitDataLocation
));
325 assert(offsetof(DXVA_Slice_H264_Short
, SliceBytesInBuffer
) ==
326 offsetof(DXVA_Slice_H264_Long
, SliceBytesInBuffer
));
328 if (is_slice_short(ctx
))
329 slice
= &ctx_pic
->slice_short
[i
];
331 slice
= (DXVA_Slice_H264_Short
*)&ctx_pic
->slice_long
[i
];
333 position
= slice
->BSNALunitDataLocation
;
334 size
= slice
->SliceBytesInBuffer
;
335 if (start_code_size
+ size
> end
- current
) {
336 av_log(avctx
, AV_LOG_ERROR
, "Failed to build bitstream");
340 slice
->BSNALunitDataLocation
= current
- dxva_data
;
341 slice
->SliceBytesInBuffer
= start_code_size
+ size
;
343 if (!is_slice_short(ctx
)) {
344 DXVA_Slice_H264_Long
*slice_long
= (DXVA_Slice_H264_Long
*)slice
;
345 if (i
< ctx_pic
->slice_count
- 1)
346 slice_long
->NumMbsForSlice
=
347 slice_long
[1].first_mb_in_slice
- slice_long
[0].first_mb_in_slice
;
349 slice_long
->NumMbsForSlice
= mb_count
- slice_long
->first_mb_in_slice
;
352 memcpy(current
, start_code
, start_code_size
);
353 current
+= start_code_size
;
355 memcpy(current
, &ctx_pic
->bitstream
[position
], size
);
358 padding
= FFMIN(128 - ((current
- dxva_data
) & 127), end
- current
);
359 if (slice
&& padding
> 0) {
360 memset(current
, 0, padding
);
363 slice
->SliceBytesInBuffer
+= padding
;
365 if (FAILED(IDirectXVideoDecoder_ReleaseBuffer(ctx
->decoder
,
366 DXVA2_BitStreamDateBufferType
)))
368 if (i
< ctx_pic
->slice_count
)
371 memset(bs
, 0, sizeof(*bs
));
372 bs
->CompressedBufferType
= DXVA2_BitStreamDateBufferType
;
373 bs
->DataSize
= current
- dxva_data
;
374 bs
->NumMBsInBuffer
= mb_count
;
376 if (is_slice_short(ctx
)) {
377 slice_data
= ctx_pic
->slice_short
;
378 slice_size
= ctx_pic
->slice_count
* sizeof(*ctx_pic
->slice_short
);
380 slice_data
= ctx_pic
->slice_long
;
381 slice_size
= ctx_pic
->slice_count
* sizeof(*ctx_pic
->slice_long
);
383 assert((bs
->DataSize
& 127) == 0);
384 return ff_dxva2_commit_buffer(avctx
, ctx
, sc
,
385 DXVA2_SliceControlBufferType
,
386 slice_data
, slice_size
, mb_count
);
390 static int dxva2_h264_start_frame(AVCodecContext
*avctx
,
391 av_unused
const uint8_t *buffer
,
392 av_unused
uint32_t size
)
394 const H264Context
*h
= avctx
->priv_data
;
395 struct dxva_context
*ctx
= avctx
->hwaccel_context
;
396 struct dxva2_picture_context
*ctx_pic
= h
->cur_pic_ptr
->hwaccel_picture_private
;
398 if (!ctx
->decoder
|| !ctx
->cfg
|| ctx
->surface_count
<= 0)
402 /* Fill up DXVA_PicParams_H264 */
403 fill_picture_parameters(ctx
, h
, &ctx_pic
->pp
);
405 /* Fill up DXVA_Qmatrix_H264 */
406 fill_scaling_lists(ctx
, h
, &ctx_pic
->qm
);
408 ctx_pic
->slice_count
= 0;
409 ctx_pic
->bitstream_size
= 0;
410 ctx_pic
->bitstream
= NULL
;
414 static int dxva2_h264_decode_slice(AVCodecContext
*avctx
,
415 const uint8_t *buffer
,
418 const H264Context
*h
= avctx
->priv_data
;
419 struct dxva_context
*ctx
= avctx
->hwaccel_context
;
420 const H264Picture
*current_picture
= h
->cur_pic_ptr
;
421 struct dxva2_picture_context
*ctx_pic
= current_picture
->hwaccel_picture_private
;
424 if (ctx_pic
->slice_count
>= MAX_SLICES
)
427 if (!ctx_pic
->bitstream
)
428 ctx_pic
->bitstream
= buffer
;
429 ctx_pic
->bitstream_size
+= size
;
431 position
= buffer
- ctx_pic
->bitstream
;
432 if (is_slice_short(ctx
))
433 fill_slice_short(&ctx_pic
->slice_short
[ctx_pic
->slice_count
],
436 fill_slice_long(avctx
, &ctx_pic
->slice_long
[ctx_pic
->slice_count
],
437 &ctx_pic
->pp
, position
, size
);
438 ctx_pic
->slice_count
++;
440 if (h
->slice_type
!= AV_PICTURE_TYPE_I
&& h
->slice_type
!= AV_PICTURE_TYPE_SI
)
441 ctx_pic
->pp
.wBitFields
&= ~(1 << 15); /* Set IntraPicFlag to 0 */
445 static int dxva2_h264_end_frame(AVCodecContext
*avctx
)
447 H264Context
*h
= avctx
->priv_data
;
448 struct dxva2_picture_context
*ctx_pic
=
449 h
->cur_pic_ptr
->hwaccel_picture_private
;
452 if (ctx_pic
->slice_count
<= 0 || ctx_pic
->bitstream_size
<= 0)
454 ret
= ff_dxva2_common_end_frame(avctx
, &h
->cur_pic_ptr
->f
,
455 &ctx_pic
->pp
, sizeof(ctx_pic
->pp
),
456 &ctx_pic
->qm
, sizeof(ctx_pic
->qm
),
457 commit_bitstream_and_slice_buffer
);
459 ff_h264_draw_horiz_band(h
, 0, h
->avctx
->height
);
463 AVHWAccel ff_h264_dxva2_hwaccel
= {
464 .name
= "h264_dxva2",
465 .type
= AVMEDIA_TYPE_VIDEO
,
466 .id
= AV_CODEC_ID_H264
,
467 .pix_fmt
= AV_PIX_FMT_DXVA2_VLD
,
468 .start_frame
= dxva2_h264_start_frame
,
469 .decode_slice
= dxva2_h264_decode_slice
,
470 .end_frame
= dxva2_h264_end_frame
,
471 .frame_priv_data_size
= sizeof(struct dxva2_picture_context
),