2 * H.26L/H.264/AVC/JVT/14496-10/... decoder
3 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
5 * This file is part of FFmpeg.
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
28 # define FUNC(n) AV_JOIN(n ## _simple_, BITS)
29 # define PIXEL_SHIFT (BITS >> 4)
31 # define FUNC(n) n ## _complex
32 # define PIXEL_SHIFT h->pixel_shift
37 #include "h264_mc_template.c"
41 #include "h264_mc_template.c"
43 static av_noinline
void FUNC(hl_decode_mb
)(H264Context
*h
)
45 const int mb_x
= h
->mb_x
;
46 const int mb_y
= h
->mb_y
;
47 const int mb_xy
= h
->mb_xy
;
48 const int mb_type
= h
->cur_pic
.mb_type
[mb_xy
];
49 uint8_t *dest_y
, *dest_cb
, *dest_cr
;
50 int linesize
, uvlinesize
/*dct_offset*/;
52 int *block_offset
= &h
->block_offset
[0];
53 const int transform_bypass
= !SIMPLE
&& (h
->qscale
== 0 && h
->sps
.transform_bypass
);
54 /* is_h264 should always be true if SVQ3 is disabled. */
55 const int is_h264
= !CONFIG_SVQ3_DECODER
|| SIMPLE
|| h
->avctx
->codec_id
== AV_CODEC_ID_H264
;
56 void (*idct_add
)(uint8_t *dst
, int16_t *block
, int stride
);
57 const int block_h
= 16 >> h
->chroma_y_shift
;
58 const int chroma422
= CHROMA422(h
);
60 dest_y
= h
->cur_pic
.f
.data
[0] + ((mb_x
<< PIXEL_SHIFT
) + mb_y
* h
->linesize
) * 16;
61 dest_cb
= h
->cur_pic
.f
.data
[1] + (mb_x
<< PIXEL_SHIFT
) * 8 + mb_y
* h
->uvlinesize
* block_h
;
62 dest_cr
= h
->cur_pic
.f
.data
[2] + (mb_x
<< PIXEL_SHIFT
) * 8 + mb_y
* h
->uvlinesize
* block_h
;
64 h
->vdsp
.prefetch(dest_y
+ (h
->mb_x
& 3) * 4 * h
->linesize
+ (64 << PIXEL_SHIFT
), h
->linesize
, 4);
65 h
->vdsp
.prefetch(dest_cb
+ (h
->mb_x
& 7) * h
->uvlinesize
+ (64 << PIXEL_SHIFT
), dest_cr
- dest_cb
, 2);
67 h
->list_counts
[mb_xy
] = h
->list_count
;
69 if (!SIMPLE
&& MB_FIELD(h
)) {
70 linesize
= h
->mb_linesize
= h
->linesize
* 2;
71 uvlinesize
= h
->mb_uvlinesize
= h
->uvlinesize
* 2;
72 block_offset
= &h
->block_offset
[48];
73 if (mb_y
& 1) { // FIXME move out of this function?
74 dest_y
-= h
->linesize
* 15;
75 dest_cb
-= h
->uvlinesize
* (block_h
- 1);
76 dest_cr
-= h
->uvlinesize
* (block_h
- 1);
80 for (list
= 0; list
< h
->list_count
; list
++) {
81 if (!USES_LIST(mb_type
, list
))
83 if (IS_16X16(mb_type
)) {
84 int8_t *ref
= &h
->ref_cache
[list
][scan8
[0]];
85 fill_rectangle(ref
, 4, 4, 8, (16 + *ref
) ^ (h
->mb_y
& 1), 1);
87 for (i
= 0; i
< 16; i
+= 4) {
88 int ref
= h
->ref_cache
[list
][scan8
[i
]];
90 fill_rectangle(&h
->ref_cache
[list
][scan8
[i
]], 2, 2,
91 8, (16 + ref
) ^ (h
->mb_y
& 1), 1);
97 linesize
= h
->mb_linesize
= h
->linesize
;
98 uvlinesize
= h
->mb_uvlinesize
= h
->uvlinesize
;
99 // dct_offset = s->linesize * 16;
102 if (!SIMPLE
&& IS_INTRA_PCM(mb_type
)) {
103 const int bit_depth
= h
->sps
.bit_depth_luma
;
107 init_get_bits(&gb
, h
->intra_pcm_ptr
,
108 ff_h264_mb_sizes
[h
->sps
.chroma_format_idc
] * bit_depth
);
110 for (i
= 0; i
< 16; i
++) {
111 uint16_t *tmp_y
= (uint16_t *)(dest_y
+ i
* linesize
);
112 for (j
= 0; j
< 16; j
++)
113 tmp_y
[j
] = get_bits(&gb
, bit_depth
);
115 if (SIMPLE
|| !CONFIG_GRAY
|| !(h
->flags
& CODEC_FLAG_GRAY
)) {
116 if (!h
->sps
.chroma_format_idc
) {
117 for (i
= 0; i
< block_h
; i
++) {
118 uint16_t *tmp_cb
= (uint16_t *)(dest_cb
+ i
* uvlinesize
);
119 uint16_t *tmp_cr
= (uint16_t *)(dest_cr
+ i
* uvlinesize
);
120 for (j
= 0; j
< 8; j
++) {
121 tmp_cb
[j
] = tmp_cr
[j
] = 1 << (bit_depth
- 1);
125 for (i
= 0; i
< block_h
; i
++) {
126 uint16_t *tmp_cb
= (uint16_t *)(dest_cb
+ i
* uvlinesize
);
127 for (j
= 0; j
< 8; j
++)
128 tmp_cb
[j
] = get_bits(&gb
, bit_depth
);
130 for (i
= 0; i
< block_h
; i
++) {
131 uint16_t *tmp_cr
= (uint16_t *)(dest_cr
+ i
* uvlinesize
);
132 for (j
= 0; j
< 8; j
++)
133 tmp_cr
[j
] = get_bits(&gb
, bit_depth
);
138 for (i
= 0; i
< 16; i
++)
139 memcpy(dest_y
+ i
* linesize
, h
->intra_pcm_ptr
+ i
* 16, 16);
140 if (SIMPLE
|| !CONFIG_GRAY
|| !(h
->flags
& CODEC_FLAG_GRAY
)) {
141 if (!h
->sps
.chroma_format_idc
) {
142 for (i
= 0; i
< 8; i
++) {
143 memset(dest_cb
+ i
* uvlinesize
, 1 << (bit_depth
- 1), 8);
144 memset(dest_cr
+ i
* uvlinesize
, 1 << (bit_depth
- 1), 8);
147 const uint8_t *src_cb
= h
->intra_pcm_ptr
+ 256;
148 const uint8_t *src_cr
= h
->intra_pcm_ptr
+ 256 + block_h
* 8;
149 for (i
= 0; i
< block_h
; i
++) {
150 memcpy(dest_cb
+ i
* uvlinesize
, src_cb
+ i
* 8, 8);
151 memcpy(dest_cr
+ i
* uvlinesize
, src_cr
+ i
* 8, 8);
157 if (IS_INTRA(mb_type
)) {
158 if (h
->deblocking_filter
)
159 xchg_mb_border(h
, dest_y
, dest_cb
, dest_cr
, linesize
,
160 uvlinesize
, 1, 0, SIMPLE
, PIXEL_SHIFT
);
162 if (SIMPLE
|| !CONFIG_GRAY
|| !(h
->flags
& CODEC_FLAG_GRAY
)) {
163 h
->hpc
.pred8x8
[h
->chroma_pred_mode
](dest_cb
, uvlinesize
);
164 h
->hpc
.pred8x8
[h
->chroma_pred_mode
](dest_cr
, uvlinesize
);
167 hl_decode_mb_predict_luma(h
, mb_type
, is_h264
, SIMPLE
,
168 transform_bypass
, PIXEL_SHIFT
,
169 block_offset
, linesize
, dest_y
, 0);
171 if (h
->deblocking_filter
)
172 xchg_mb_border(h
, dest_y
, dest_cb
, dest_cr
, linesize
,
173 uvlinesize
, 0, 0, SIMPLE
, PIXEL_SHIFT
);
174 } else if (is_h264
) {
176 FUNC(hl_motion_422
)(h
, dest_y
, dest_cb
, dest_cr
,
177 h
->qpel_put
, h
->h264chroma
.put_h264_chroma_pixels_tab
,
178 h
->qpel_avg
, h
->h264chroma
.avg_h264_chroma_pixels_tab
,
179 h
->h264dsp
.weight_h264_pixels_tab
,
180 h
->h264dsp
.biweight_h264_pixels_tab
);
182 FUNC(hl_motion_420
)(h
, dest_y
, dest_cb
, dest_cr
,
183 h
->qpel_put
, h
->h264chroma
.put_h264_chroma_pixels_tab
,
184 h
->qpel_avg
, h
->h264chroma
.avg_h264_chroma_pixels_tab
,
185 h
->h264dsp
.weight_h264_pixels_tab
,
186 h
->h264dsp
.biweight_h264_pixels_tab
);
190 hl_decode_mb_idct_luma(h
, mb_type
, is_h264
, SIMPLE
, transform_bypass
,
191 PIXEL_SHIFT
, block_offset
, linesize
, dest_y
, 0);
193 if ((SIMPLE
|| !CONFIG_GRAY
|| !(h
->flags
& CODEC_FLAG_GRAY
)) &&
195 uint8_t *dest
[2] = { dest_cb
, dest_cr
};
196 if (transform_bypass
) {
197 if (IS_INTRA(mb_type
) && h
->sps
.profile_idc
== 244 &&
198 (h
->chroma_pred_mode
== VERT_PRED8x8
||
199 h
->chroma_pred_mode
== HOR_PRED8x8
)) {
200 h
->hpc
.pred8x8_add
[h
->chroma_pred_mode
](dest
[0],
202 h
->mb
+ (16 * 16 * 1 << PIXEL_SHIFT
),
204 h
->hpc
.pred8x8_add
[h
->chroma_pred_mode
](dest
[1],
206 h
->mb
+ (16 * 16 * 2 << PIXEL_SHIFT
),
209 idct_add
= h
->h264dsp
.h264_add_pixels4_clear
;
210 for (j
= 1; j
< 3; j
++) {
211 for (i
= j
* 16; i
< j
* 16 + 4; i
++)
212 if (h
->non_zero_count_cache
[scan8
[i
]] ||
213 dctcoef_get(h
->mb
, PIXEL_SHIFT
, i
* 16))
214 idct_add(dest
[j
- 1] + block_offset
[i
],
215 h
->mb
+ (i
* 16 << PIXEL_SHIFT
),
218 for (i
= j
* 16 + 4; i
< j
* 16 + 8; i
++)
219 if (h
->non_zero_count_cache
[scan8
[i
+ 4]] ||
220 dctcoef_get(h
->mb
, PIXEL_SHIFT
, i
* 16))
221 idct_add(dest
[j
- 1] + block_offset
[i
+ 4],
222 h
->mb
+ (i
* 16 << PIXEL_SHIFT
),
231 qp
[0] = h
->chroma_qp
[0] + 3;
232 qp
[1] = h
->chroma_qp
[1] + 3;
234 qp
[0] = h
->chroma_qp
[0];
235 qp
[1] = h
->chroma_qp
[1];
237 if (h
->non_zero_count_cache
[scan8
[CHROMA_DC_BLOCK_INDEX
+ 0]])
238 h
->h264dsp
.h264_chroma_dc_dequant_idct(h
->mb
+ (16 * 16 * 1 << PIXEL_SHIFT
),
239 h
->dequant4_coeff
[IS_INTRA(mb_type
) ? 1 : 4][qp
[0]][0]);
240 if (h
->non_zero_count_cache
[scan8
[CHROMA_DC_BLOCK_INDEX
+ 1]])
241 h
->h264dsp
.h264_chroma_dc_dequant_idct(h
->mb
+ (16 * 16 * 2 << PIXEL_SHIFT
),
242 h
->dequant4_coeff
[IS_INTRA(mb_type
) ? 2 : 5][qp
[1]][0]);
243 h
->h264dsp
.h264_idct_add8(dest
, block_offset
,
245 h
->non_zero_count_cache
);
246 } else if (CONFIG_SVQ3_DECODER
) {
247 h
->h264dsp
.h264_chroma_dc_dequant_idct(h
->mb
+ 16 * 16 * 1,
248 h
->dequant4_coeff
[IS_INTRA(mb_type
) ? 1 : 4][h
->chroma_qp
[0]][0]);
249 h
->h264dsp
.h264_chroma_dc_dequant_idct(h
->mb
+ 16 * 16 * 2,
250 h
->dequant4_coeff
[IS_INTRA(mb_type
) ? 2 : 5][h
->chroma_qp
[1]][0]);
251 for (j
= 1; j
< 3; j
++) {
252 for (i
= j
* 16; i
< j
* 16 + 4; i
++)
253 if (h
->non_zero_count_cache
[scan8
[i
]] || h
->mb
[i
* 16]) {
254 uint8_t *const ptr
= dest
[j
- 1] + block_offset
[i
];
255 ff_svq3_add_idct_c(ptr
, h
->mb
+ i
* 16,
257 ff_h264_chroma_qp
[0][h
->qscale
+ 12] - 12, 2);
266 #if !SIMPLE || BITS == 8
270 #include "h264_mc_template.c"
272 static av_noinline
void FUNC(hl_decode_mb_444
)(H264Context
*h
)
274 const int mb_x
= h
->mb_x
;
275 const int mb_y
= h
->mb_y
;
276 const int mb_xy
= h
->mb_xy
;
277 const int mb_type
= h
->cur_pic
.mb_type
[mb_xy
];
281 int *block_offset
= &h
->block_offset
[0];
282 const int transform_bypass
= !SIMPLE
&& (h
->qscale
== 0 && h
->sps
.transform_bypass
);
283 const int plane_count
= (SIMPLE
|| !CONFIG_GRAY
|| !(h
->flags
& CODEC_FLAG_GRAY
)) ? 3 : 1;
285 for (p
= 0; p
< plane_count
; p
++) {
286 dest
[p
] = h
->cur_pic
.f
.data
[p
] +
287 ((mb_x
<< PIXEL_SHIFT
) + mb_y
* h
->linesize
) * 16;
288 h
->vdsp
.prefetch(dest
[p
] + (h
->mb_x
& 3) * 4 * h
->linesize
+ (64 << PIXEL_SHIFT
),
292 h
->list_counts
[mb_xy
] = h
->list_count
;
294 if (!SIMPLE
&& MB_FIELD(h
)) {
295 linesize
= h
->mb_linesize
= h
->mb_uvlinesize
= h
->linesize
* 2;
296 block_offset
= &h
->block_offset
[48];
297 if (mb_y
& 1) // FIXME move out of this function?
298 for (p
= 0; p
< 3; p
++)
299 dest
[p
] -= h
->linesize
* 15;
300 if (FRAME_MBAFF(h
)) {
302 for (list
= 0; list
< h
->list_count
; list
++) {
303 if (!USES_LIST(mb_type
, list
))
305 if (IS_16X16(mb_type
)) {
306 int8_t *ref
= &h
->ref_cache
[list
][scan8
[0]];
307 fill_rectangle(ref
, 4, 4, 8, (16 + *ref
) ^ (h
->mb_y
& 1), 1);
309 for (i
= 0; i
< 16; i
+= 4) {
310 int ref
= h
->ref_cache
[list
][scan8
[i
]];
312 fill_rectangle(&h
->ref_cache
[list
][scan8
[i
]], 2, 2,
313 8, (16 + ref
) ^ (h
->mb_y
& 1), 1);
319 linesize
= h
->mb_linesize
= h
->mb_uvlinesize
= h
->linesize
;
322 if (!SIMPLE
&& IS_INTRA_PCM(mb_type
)) {
324 const int bit_depth
= h
->sps
.bit_depth_luma
;
326 init_get_bits(&gb
, h
->intra_pcm_ptr
, 768 * bit_depth
);
328 for (p
= 0; p
< plane_count
; p
++)
329 for (i
= 0; i
< 16; i
++) {
330 uint16_t *tmp
= (uint16_t *)(dest
[p
] + i
* linesize
);
331 for (j
= 0; j
< 16; j
++)
332 tmp
[j
] = get_bits(&gb
, bit_depth
);
335 for (p
= 0; p
< plane_count
; p
++)
336 for (i
= 0; i
< 16; i
++)
337 memcpy(dest
[p
] + i
* linesize
,
338 h
->intra_pcm_ptr
+ p
* 256 + i
* 16, 16);
341 if (IS_INTRA(mb_type
)) {
342 if (h
->deblocking_filter
)
343 xchg_mb_border(h
, dest
[0], dest
[1], dest
[2], linesize
,
344 linesize
, 1, 1, SIMPLE
, PIXEL_SHIFT
);
346 for (p
= 0; p
< plane_count
; p
++)
347 hl_decode_mb_predict_luma(h
, mb_type
, 1, SIMPLE
,
348 transform_bypass
, PIXEL_SHIFT
,
349 block_offset
, linesize
, dest
[p
], p
);
351 if (h
->deblocking_filter
)
352 xchg_mb_border(h
, dest
[0], dest
[1], dest
[2], linesize
,
353 linesize
, 0, 1, SIMPLE
, PIXEL_SHIFT
);
355 FUNC(hl_motion_444
)(h
, dest
[0], dest
[1], dest
[2],
356 h
->qpel_put
, h
->h264chroma
.put_h264_chroma_pixels_tab
,
357 h
->qpel_avg
, h
->h264chroma
.avg_h264_chroma_pixels_tab
,
358 h
->h264dsp
.weight_h264_pixels_tab
,
359 h
->h264dsp
.biweight_h264_pixels_tab
);
362 for (p
= 0; p
< plane_count
; p
++)
363 hl_decode_mb_idct_luma(h
, mb_type
, 1, SIMPLE
, transform_bypass
,
364 PIXEL_SHIFT
, block_offset
, linesize
,