Commit | Line | Data |
---|---|---|
2ba45a60 DM |
1 | /* |
2 | * H.26L/H.264/AVC/JVT/14496-10/... direct mb/block decoding | |
3 | * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at> | |
4 | * | |
5 | * This file is part of FFmpeg. | |
6 | * | |
7 | * FFmpeg is free software; you can redistribute it and/or | |
8 | * modify it under the terms of the GNU Lesser General Public | |
9 | * License as published by the Free Software Foundation; either | |
10 | * version 2.1 of the License, or (at your option) any later version. | |
11 | * | |
12 | * FFmpeg is distributed in the hope that it will be useful, | |
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
15 | * Lesser General Public License for more details. | |
16 | * | |
17 | * You should have received a copy of the GNU Lesser General Public | |
18 | * License along with FFmpeg; if not, write to the Free Software | |
19 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |
20 | */ | |
21 | ||
22 | /** | |
23 | * @file | |
24 | * H.264 / AVC / MPEG4 part10 direct mb/block decoding. | |
25 | * @author Michael Niedermayer <michaelni@gmx.at> | |
26 | */ | |
27 | ||
28 | #include "internal.h" | |
29 | #include "avcodec.h" | |
30 | #include "h264.h" | |
31 | #include "mpegutils.h" | |
32 | #include "rectangle.h" | |
33 | #include "thread.h" | |
34 | ||
35 | #include <assert.h> | |
36 | ||
37 | static int get_scale_factor(H264Context *const h, int poc, int poc1, int i) | |
38 | { | |
39 | int poc0 = h->ref_list[0][i].poc; | |
40 | int td = av_clip(poc1 - poc0, -128, 127); | |
41 | if (td == 0 || h->ref_list[0][i].long_ref) { | |
42 | return 256; | |
43 | } else { | |
44 | int tb = av_clip(poc - poc0, -128, 127); | |
45 | int tx = (16384 + (FFABS(td) >> 1)) / td; | |
46 | return av_clip((tb * tx + 32) >> 6, -1024, 1023); | |
47 | } | |
48 | } | |
49 | ||
50 | void ff_h264_direct_dist_scale_factor(H264Context *const h) | |
51 | { | |
52 | const int poc = FIELD_PICTURE(h) ? h->cur_pic_ptr->field_poc[h->picture_structure == PICT_BOTTOM_FIELD] | |
53 | : h->cur_pic_ptr->poc; | |
54 | const int poc1 = h->ref_list[1][0].poc; | |
55 | int i, field; | |
56 | ||
57 | if (FRAME_MBAFF(h)) | |
58 | for (field = 0; field < 2; field++) { | |
59 | const int poc = h->cur_pic_ptr->field_poc[field]; | |
60 | const int poc1 = h->ref_list[1][0].field_poc[field]; | |
61 | for (i = 0; i < 2 * h->ref_count[0]; i++) | |
62 | h->dist_scale_factor_field[field][i ^ field] = | |
63 | get_scale_factor(h, poc, poc1, i + 16); | |
64 | } | |
65 | ||
66 | for (i = 0; i < h->ref_count[0]; i++) | |
67 | h->dist_scale_factor[i] = get_scale_factor(h, poc, poc1, i); | |
68 | } | |
69 | ||
70 | static void fill_colmap(H264Context *h, int map[2][16 + 32], int list, | |
71 | int field, int colfield, int mbafi) | |
72 | { | |
73 | H264Picture *const ref1 = &h->ref_list[1][0]; | |
74 | int j, old_ref, rfield; | |
75 | int start = mbafi ? 16 : 0; | |
76 | int end = mbafi ? 16 + 2 * h->ref_count[0] : h->ref_count[0]; | |
77 | int interl = mbafi || h->picture_structure != PICT_FRAME; | |
78 | ||
79 | /* bogus; fills in for missing frames */ | |
80 | memset(map[list], 0, sizeof(map[list])); | |
81 | ||
82 | for (rfield = 0; rfield < 2; rfield++) { | |
83 | for (old_ref = 0; old_ref < ref1->ref_count[colfield][list]; old_ref++) { | |
84 | int poc = ref1->ref_poc[colfield][list][old_ref]; | |
85 | ||
86 | if (!interl) | |
87 | poc |= 3; | |
88 | // FIXME: store all MBAFF references so this is not needed | |
89 | else if (interl && (poc & 3) == 3) | |
90 | poc = (poc & ~3) + rfield + 1; | |
91 | ||
92 | for (j = start; j < end; j++) { | |
93 | if (4 * h->ref_list[0][j].frame_num + | |
94 | (h->ref_list[0][j].reference & 3) == poc) { | |
95 | int cur_ref = mbafi ? (j - 16) ^ field : j; | |
96 | if (ref1->mbaff) | |
97 | map[list][2 * old_ref + (rfield ^ field) + 16] = cur_ref; | |
98 | if (rfield == field || !interl) | |
99 | map[list][old_ref] = cur_ref; | |
100 | break; | |
101 | } | |
102 | } | |
103 | } | |
104 | } | |
105 | } | |
106 | ||
107 | void ff_h264_direct_ref_list_init(H264Context *const h) | |
108 | { | |
109 | H264Picture *const ref1 = &h->ref_list[1][0]; | |
110 | H264Picture *const cur = h->cur_pic_ptr; | |
111 | int list, j, field; | |
112 | int sidx = (h->picture_structure & 1) ^ 1; | |
113 | int ref1sidx = (ref1->reference & 1) ^ 1; | |
114 | ||
115 | for (list = 0; list < 2; list++) { | |
116 | cur->ref_count[sidx][list] = h->ref_count[list]; | |
117 | for (j = 0; j < h->ref_count[list]; j++) | |
118 | cur->ref_poc[sidx][list][j] = 4 * h->ref_list[list][j].frame_num + | |
119 | (h->ref_list[list][j].reference & 3); | |
120 | } | |
121 | ||
122 | if (h->picture_structure == PICT_FRAME) { | |
123 | memcpy(cur->ref_count[1], cur->ref_count[0], sizeof(cur->ref_count[0])); | |
124 | memcpy(cur->ref_poc[1], cur->ref_poc[0], sizeof(cur->ref_poc[0])); | |
125 | } | |
126 | ||
127 | cur->mbaff = FRAME_MBAFF(h); | |
128 | ||
129 | h->col_fieldoff = 0; | |
130 | if (h->picture_structure == PICT_FRAME) { | |
131 | int cur_poc = h->cur_pic_ptr->poc; | |
132 | int *col_poc = h->ref_list[1]->field_poc; | |
133 | h->col_parity = (FFABS(col_poc[0] - cur_poc) >= | |
134 | FFABS(col_poc[1] - cur_poc)); | |
135 | ref1sidx = | |
136 | sidx = h->col_parity; | |
137 | // FL -> FL & differ parity | |
138 | } else if (!(h->picture_structure & h->ref_list[1][0].reference) && | |
139 | !h->ref_list[1][0].mbaff) { | |
140 | h->col_fieldoff = 2 * h->ref_list[1][0].reference - 3; | |
141 | } | |
142 | ||
143 | if (h->slice_type_nos != AV_PICTURE_TYPE_B || h->direct_spatial_mv_pred) | |
144 | return; | |
145 | ||
146 | for (list = 0; list < 2; list++) { | |
147 | fill_colmap(h, h->map_col_to_list0, list, sidx, ref1sidx, 0); | |
148 | if (FRAME_MBAFF(h)) | |
149 | for (field = 0; field < 2; field++) | |
150 | fill_colmap(h, h->map_col_to_list0_field[field], list, field, | |
151 | field, 1); | |
152 | } | |
153 | } | |
154 | ||
155 | static void await_reference_mb_row(H264Context *const h, H264Picture *ref, | |
156 | int mb_y) | |
157 | { | |
158 | int ref_field = ref->reference - 1; | |
159 | int ref_field_picture = ref->field_picture; | |
160 | int ref_height = 16 * h->mb_height >> ref_field_picture; | |
161 | ||
162 | if (!HAVE_THREADS || !(h->avctx->active_thread_type & FF_THREAD_FRAME)) | |
163 | return; | |
164 | ||
165 | /* FIXME: It can be safe to access mb stuff | |
166 | * even if pixels aren't deblocked yet. */ | |
167 | ||
168 | ff_thread_await_progress(&ref->tf, | |
169 | FFMIN(16 * mb_y >> ref_field_picture, | |
170 | ref_height - 1), | |
171 | ref_field_picture && ref_field); | |
172 | } | |
173 | ||
174 | static void pred_spatial_direct_motion(H264Context *const h, int *mb_type) | |
175 | { | |
176 | int b8_stride = 2; | |
177 | int b4_stride = h->b_stride; | |
178 | int mb_xy = h->mb_xy, mb_y = h->mb_y; | |
179 | int mb_type_col[2]; | |
180 | const int16_t (*l1mv0)[2], (*l1mv1)[2]; | |
181 | const int8_t *l1ref0, *l1ref1; | |
182 | const int is_b8x8 = IS_8X8(*mb_type); | |
183 | unsigned int sub_mb_type = MB_TYPE_L0L1; | |
184 | int i8, i4; | |
185 | int ref[2]; | |
186 | int mv[2]; | |
187 | int list; | |
188 | ||
189 | assert(h->ref_list[1][0].reference & 3); | |
190 | ||
191 | await_reference_mb_row(h, &h->ref_list[1][0], | |
192 | h->mb_y + !!IS_INTERLACED(*mb_type)); | |
193 | ||
194 | #define MB_TYPE_16x16_OR_INTRA (MB_TYPE_16x16 | MB_TYPE_INTRA4x4 | \ | |
195 | MB_TYPE_INTRA16x16 | MB_TYPE_INTRA_PCM) | |
196 | ||
197 | /* ref = min(neighbors) */ | |
198 | for (list = 0; list < 2; list++) { | |
199 | int left_ref = h->ref_cache[list][scan8[0] - 1]; | |
200 | int top_ref = h->ref_cache[list][scan8[0] - 8]; | |
201 | int refc = h->ref_cache[list][scan8[0] - 8 + 4]; | |
202 | const int16_t *C = h->mv_cache[list][scan8[0] - 8 + 4]; | |
203 | if (refc == PART_NOT_AVAILABLE) { | |
204 | refc = h->ref_cache[list][scan8[0] - 8 - 1]; | |
205 | C = h->mv_cache[list][scan8[0] - 8 - 1]; | |
206 | } | |
207 | ref[list] = FFMIN3((unsigned)left_ref, | |
208 | (unsigned)top_ref, | |
209 | (unsigned)refc); | |
210 | if (ref[list] >= 0) { | |
211 | /* This is just pred_motion() but with the cases removed that | |
212 | * cannot happen for direct blocks. */ | |
213 | const int16_t *const A = h->mv_cache[list][scan8[0] - 1]; | |
214 | const int16_t *const B = h->mv_cache[list][scan8[0] - 8]; | |
215 | ||
216 | int match_count = (left_ref == ref[list]) + | |
217 | (top_ref == ref[list]) + | |
218 | (refc == ref[list]); | |
219 | ||
220 | if (match_count > 1) { // most common | |
221 | mv[list] = pack16to32(mid_pred(A[0], B[0], C[0]), | |
222 | mid_pred(A[1], B[1], C[1])); | |
223 | } else { | |
224 | assert(match_count == 1); | |
225 | if (left_ref == ref[list]) | |
226 | mv[list] = AV_RN32A(A); | |
227 | else if (top_ref == ref[list]) | |
228 | mv[list] = AV_RN32A(B); | |
229 | else | |
230 | mv[list] = AV_RN32A(C); | |
231 | } | |
232 | av_assert2(ref[list] < (h->ref_count[list] << !!FRAME_MBAFF(h))); | |
233 | } else { | |
234 | int mask = ~(MB_TYPE_L0 << (2 * list)); | |
235 | mv[list] = 0; | |
236 | ref[list] = -1; | |
237 | if (!is_b8x8) | |
238 | *mb_type &= mask; | |
239 | sub_mb_type &= mask; | |
240 | } | |
241 | } | |
242 | if (ref[0] < 0 && ref[1] < 0) { | |
243 | ref[0] = ref[1] = 0; | |
244 | if (!is_b8x8) | |
245 | *mb_type |= MB_TYPE_L0L1; | |
246 | sub_mb_type |= MB_TYPE_L0L1; | |
247 | } | |
248 | ||
249 | if (!(is_b8x8 | mv[0] | mv[1])) { | |
250 | fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, (uint8_t)ref[0], 1); | |
251 | fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, (uint8_t)ref[1], 1); | |
252 | fill_rectangle(&h->mv_cache[0][scan8[0]], 4, 4, 8, 0, 4); | |
253 | fill_rectangle(&h->mv_cache[1][scan8[0]], 4, 4, 8, 0, 4); | |
254 | *mb_type = (*mb_type & ~(MB_TYPE_8x8 | MB_TYPE_16x8 | MB_TYPE_8x16 | | |
255 | MB_TYPE_P1L0 | MB_TYPE_P1L1)) | | |
256 | MB_TYPE_16x16 | MB_TYPE_DIRECT2; | |
257 | return; | |
258 | } | |
259 | ||
260 | if (IS_INTERLACED(h->ref_list[1][0].mb_type[mb_xy])) { // AFL/AFR/FR/FL -> AFL/FL | |
261 | if (!IS_INTERLACED(*mb_type)) { // AFR/FR -> AFL/FL | |
262 | mb_y = (h->mb_y & ~1) + h->col_parity; | |
263 | mb_xy = h->mb_x + | |
264 | ((h->mb_y & ~1) + h->col_parity) * h->mb_stride; | |
265 | b8_stride = 0; | |
266 | } else { | |
267 | mb_y += h->col_fieldoff; | |
268 | mb_xy += h->mb_stride * h->col_fieldoff; // non-zero for FL -> FL & differ parity | |
269 | } | |
270 | goto single_col; | |
271 | } else { // AFL/AFR/FR/FL -> AFR/FR | |
272 | if (IS_INTERLACED(*mb_type)) { // AFL /FL -> AFR/FR | |
273 | mb_y = h->mb_y & ~1; | |
274 | mb_xy = (h->mb_y & ~1) * h->mb_stride + h->mb_x; | |
275 | mb_type_col[0] = h->ref_list[1][0].mb_type[mb_xy]; | |
276 | mb_type_col[1] = h->ref_list[1][0].mb_type[mb_xy + h->mb_stride]; | |
277 | b8_stride = 2 + 4 * h->mb_stride; | |
278 | b4_stride *= 6; | |
279 | if (IS_INTERLACED(mb_type_col[0]) != | |
280 | IS_INTERLACED(mb_type_col[1])) { | |
281 | mb_type_col[0] &= ~MB_TYPE_INTERLACED; | |
282 | mb_type_col[1] &= ~MB_TYPE_INTERLACED; | |
283 | } | |
284 | ||
285 | sub_mb_type |= MB_TYPE_16x16 | MB_TYPE_DIRECT2; /* B_SUB_8x8 */ | |
286 | if ((mb_type_col[0] & MB_TYPE_16x16_OR_INTRA) && | |
287 | (mb_type_col[1] & MB_TYPE_16x16_OR_INTRA) && | |
288 | !is_b8x8) { | |
289 | *mb_type |= MB_TYPE_16x8 | MB_TYPE_DIRECT2; /* B_16x8 */ | |
290 | } else { | |
291 | *mb_type |= MB_TYPE_8x8; | |
292 | } | |
293 | } else { // AFR/FR -> AFR/FR | |
294 | single_col: | |
295 | mb_type_col[0] = | |
296 | mb_type_col[1] = h->ref_list[1][0].mb_type[mb_xy]; | |
297 | ||
298 | sub_mb_type |= MB_TYPE_16x16 | MB_TYPE_DIRECT2; /* B_SUB_8x8 */ | |
299 | if (!is_b8x8 && (mb_type_col[0] & MB_TYPE_16x16_OR_INTRA)) { | |
300 | *mb_type |= MB_TYPE_16x16 | MB_TYPE_DIRECT2; /* B_16x16 */ | |
301 | } else if (!is_b8x8 && | |
302 | (mb_type_col[0] & (MB_TYPE_16x8 | MB_TYPE_8x16))) { | |
303 | *mb_type |= MB_TYPE_DIRECT2 | | |
304 | (mb_type_col[0] & (MB_TYPE_16x8 | MB_TYPE_8x16)); | |
305 | } else { | |
306 | if (!h->sps.direct_8x8_inference_flag) { | |
307 | /* FIXME: Save sub mb types from previous frames (or derive | |
308 | * from MVs) so we know exactly what block size to use. */ | |
309 | sub_mb_type += (MB_TYPE_8x8 - MB_TYPE_16x16); /* B_SUB_4x4 */ | |
310 | } | |
311 | *mb_type |= MB_TYPE_8x8; | |
312 | } | |
313 | } | |
314 | } | |
315 | ||
316 | await_reference_mb_row(h, &h->ref_list[1][0], mb_y); | |
317 | ||
318 | l1mv0 = (void*)&h->ref_list[1][0].motion_val[0][h->mb2b_xy[mb_xy]]; | |
319 | l1mv1 = (void*)&h->ref_list[1][0].motion_val[1][h->mb2b_xy[mb_xy]]; | |
320 | l1ref0 = &h->ref_list[1][0].ref_index[0][4 * mb_xy]; | |
321 | l1ref1 = &h->ref_list[1][0].ref_index[1][4 * mb_xy]; | |
322 | if (!b8_stride) { | |
323 | if (h->mb_y & 1) { | |
324 | l1ref0 += 2; | |
325 | l1ref1 += 2; | |
326 | l1mv0 += 2 * b4_stride; | |
327 | l1mv1 += 2 * b4_stride; | |
328 | } | |
329 | } | |
330 | ||
331 | if (IS_INTERLACED(*mb_type) != IS_INTERLACED(mb_type_col[0])) { | |
332 | int n = 0; | |
333 | for (i8 = 0; i8 < 4; i8++) { | |
334 | int x8 = i8 & 1; | |
335 | int y8 = i8 >> 1; | |
336 | int xy8 = x8 + y8 * b8_stride; | |
337 | int xy4 = x8 * 3 + y8 * b4_stride; | |
338 | int a, b; | |
339 | ||
340 | if (is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8])) | |
341 | continue; | |
342 | h->sub_mb_type[i8] = sub_mb_type; | |
343 | ||
344 | fill_rectangle(&h->ref_cache[0][scan8[i8 * 4]], 2, 2, 8, | |
345 | (uint8_t)ref[0], 1); | |
346 | fill_rectangle(&h->ref_cache[1][scan8[i8 * 4]], 2, 2, 8, | |
347 | (uint8_t)ref[1], 1); | |
348 | if (!IS_INTRA(mb_type_col[y8]) && !h->ref_list[1][0].long_ref && | |
349 | ((l1ref0[xy8] == 0 && | |
350 | FFABS(l1mv0[xy4][0]) <= 1 && | |
351 | FFABS(l1mv0[xy4][1]) <= 1) || | |
352 | (l1ref0[xy8] < 0 && | |
353 | l1ref1[xy8] == 0 && | |
354 | FFABS(l1mv1[xy4][0]) <= 1 && | |
355 | FFABS(l1mv1[xy4][1]) <= 1))) { | |
356 | a = | |
357 | b = 0; | |
358 | if (ref[0] > 0) | |
359 | a = mv[0]; | |
360 | if (ref[1] > 0) | |
361 | b = mv[1]; | |
362 | n++; | |
363 | } else { | |
364 | a = mv[0]; | |
365 | b = mv[1]; | |
366 | } | |
367 | fill_rectangle(&h->mv_cache[0][scan8[i8 * 4]], 2, 2, 8, a, 4); | |
368 | fill_rectangle(&h->mv_cache[1][scan8[i8 * 4]], 2, 2, 8, b, 4); | |
369 | } | |
370 | if (!is_b8x8 && !(n & 3)) | |
371 | *mb_type = (*mb_type & ~(MB_TYPE_8x8 | MB_TYPE_16x8 | MB_TYPE_8x16 | | |
372 | MB_TYPE_P1L0 | MB_TYPE_P1L1)) | | |
373 | MB_TYPE_16x16 | MB_TYPE_DIRECT2; | |
374 | } else if (IS_16X16(*mb_type)) { | |
375 | int a, b; | |
376 | ||
377 | fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, (uint8_t)ref[0], 1); | |
378 | fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, (uint8_t)ref[1], 1); | |
379 | if (!IS_INTRA(mb_type_col[0]) && !h->ref_list[1][0].long_ref && | |
380 | ((l1ref0[0] == 0 && | |
381 | FFABS(l1mv0[0][0]) <= 1 && | |
382 | FFABS(l1mv0[0][1]) <= 1) || | |
383 | (l1ref0[0] < 0 && !l1ref1[0] && | |
384 | FFABS(l1mv1[0][0]) <= 1 && | |
385 | FFABS(l1mv1[0][1]) <= 1 && | |
386 | h->x264_build > 33U))) { | |
387 | a = b = 0; | |
388 | if (ref[0] > 0) | |
389 | a = mv[0]; | |
390 | if (ref[1] > 0) | |
391 | b = mv[1]; | |
392 | } else { | |
393 | a = mv[0]; | |
394 | b = mv[1]; | |
395 | } | |
396 | fill_rectangle(&h->mv_cache[0][scan8[0]], 4, 4, 8, a, 4); | |
397 | fill_rectangle(&h->mv_cache[1][scan8[0]], 4, 4, 8, b, 4); | |
398 | } else { | |
399 | int n = 0; | |
400 | for (i8 = 0; i8 < 4; i8++) { | |
401 | const int x8 = i8 & 1; | |
402 | const int y8 = i8 >> 1; | |
403 | ||
404 | if (is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8])) | |
405 | continue; | |
406 | h->sub_mb_type[i8] = sub_mb_type; | |
407 | ||
408 | fill_rectangle(&h->mv_cache[0][scan8[i8 * 4]], 2, 2, 8, mv[0], 4); | |
409 | fill_rectangle(&h->mv_cache[1][scan8[i8 * 4]], 2, 2, 8, mv[1], 4); | |
410 | fill_rectangle(&h->ref_cache[0][scan8[i8 * 4]], 2, 2, 8, | |
411 | (uint8_t)ref[0], 1); | |
412 | fill_rectangle(&h->ref_cache[1][scan8[i8 * 4]], 2, 2, 8, | |
413 | (uint8_t)ref[1], 1); | |
414 | ||
415 | assert(b8_stride == 2); | |
416 | /* col_zero_flag */ | |
417 | if (!IS_INTRA(mb_type_col[0]) && !h->ref_list[1][0].long_ref && | |
418 | (l1ref0[i8] == 0 || | |
419 | (l1ref0[i8] < 0 && | |
420 | l1ref1[i8] == 0 && | |
421 | h->x264_build > 33U))) { | |
422 | const int16_t (*l1mv)[2] = l1ref0[i8] == 0 ? l1mv0 : l1mv1; | |
423 | if (IS_SUB_8X8(sub_mb_type)) { | |
424 | const int16_t *mv_col = l1mv[x8 * 3 + y8 * 3 * b4_stride]; | |
425 | if (FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1) { | |
426 | if (ref[0] == 0) | |
427 | fill_rectangle(&h->mv_cache[0][scan8[i8 * 4]], 2, 2, | |
428 | 8, 0, 4); | |
429 | if (ref[1] == 0) | |
430 | fill_rectangle(&h->mv_cache[1][scan8[i8 * 4]], 2, 2, | |
431 | 8, 0, 4); | |
432 | n += 4; | |
433 | } | |
434 | } else { | |
435 | int m = 0; | |
436 | for (i4 = 0; i4 < 4; i4++) { | |
437 | const int16_t *mv_col = l1mv[x8 * 2 + (i4 & 1) + | |
438 | (y8 * 2 + (i4 >> 1)) * b4_stride]; | |
439 | if (FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1) { | |
440 | if (ref[0] == 0) | |
441 | AV_ZERO32(h->mv_cache[0][scan8[i8 * 4 + i4]]); | |
442 | if (ref[1] == 0) | |
443 | AV_ZERO32(h->mv_cache[1][scan8[i8 * 4 + i4]]); | |
444 | m++; | |
445 | } | |
446 | } | |
447 | if (!(m & 3)) | |
448 | h->sub_mb_type[i8] += MB_TYPE_16x16 - MB_TYPE_8x8; | |
449 | n += m; | |
450 | } | |
451 | } | |
452 | } | |
453 | if (!is_b8x8 && !(n & 15)) | |
454 | *mb_type = (*mb_type & ~(MB_TYPE_8x8 | MB_TYPE_16x8 | MB_TYPE_8x16 | | |
455 | MB_TYPE_P1L0 | MB_TYPE_P1L1)) | | |
456 | MB_TYPE_16x16 | MB_TYPE_DIRECT2; | |
457 | } | |
458 | } | |
459 | ||
460 | static void pred_temp_direct_motion(H264Context *const h, int *mb_type) | |
461 | { | |
462 | int b8_stride = 2; | |
463 | int b4_stride = h->b_stride; | |
464 | int mb_xy = h->mb_xy, mb_y = h->mb_y; | |
465 | int mb_type_col[2]; | |
466 | const int16_t (*l1mv0)[2], (*l1mv1)[2]; | |
467 | const int8_t *l1ref0, *l1ref1; | |
468 | const int is_b8x8 = IS_8X8(*mb_type); | |
469 | unsigned int sub_mb_type; | |
470 | int i8, i4; | |
471 | ||
472 | assert(h->ref_list[1][0].reference & 3); | |
473 | ||
474 | await_reference_mb_row(h, &h->ref_list[1][0], | |
475 | h->mb_y + !!IS_INTERLACED(*mb_type)); | |
476 | ||
477 | if (IS_INTERLACED(h->ref_list[1][0].mb_type[mb_xy])) { // AFL/AFR/FR/FL -> AFL/FL | |
478 | if (!IS_INTERLACED(*mb_type)) { // AFR/FR -> AFL/FL | |
479 | mb_y = (h->mb_y & ~1) + h->col_parity; | |
480 | mb_xy = h->mb_x + | |
481 | ((h->mb_y & ~1) + h->col_parity) * h->mb_stride; | |
482 | b8_stride = 0; | |
483 | } else { | |
484 | mb_y += h->col_fieldoff; | |
485 | mb_xy += h->mb_stride * h->col_fieldoff; // non-zero for FL -> FL & differ parity | |
486 | } | |
487 | goto single_col; | |
488 | } else { // AFL/AFR/FR/FL -> AFR/FR | |
489 | if (IS_INTERLACED(*mb_type)) { // AFL /FL -> AFR/FR | |
490 | mb_y = h->mb_y & ~1; | |
491 | mb_xy = h->mb_x + (h->mb_y & ~1) * h->mb_stride; | |
492 | mb_type_col[0] = h->ref_list[1][0].mb_type[mb_xy]; | |
493 | mb_type_col[1] = h->ref_list[1][0].mb_type[mb_xy + h->mb_stride]; | |
494 | b8_stride = 2 + 4 * h->mb_stride; | |
495 | b4_stride *= 6; | |
496 | if (IS_INTERLACED(mb_type_col[0]) != | |
497 | IS_INTERLACED(mb_type_col[1])) { | |
498 | mb_type_col[0] &= ~MB_TYPE_INTERLACED; | |
499 | mb_type_col[1] &= ~MB_TYPE_INTERLACED; | |
500 | } | |
501 | ||
502 | sub_mb_type = MB_TYPE_16x16 | MB_TYPE_P0L0 | MB_TYPE_P0L1 | | |
503 | MB_TYPE_DIRECT2; /* B_SUB_8x8 */ | |
504 | ||
505 | if ((mb_type_col[0] & MB_TYPE_16x16_OR_INTRA) && | |
506 | (mb_type_col[1] & MB_TYPE_16x16_OR_INTRA) && | |
507 | !is_b8x8) { | |
508 | *mb_type |= MB_TYPE_16x8 | MB_TYPE_L0L1 | | |
509 | MB_TYPE_DIRECT2; /* B_16x8 */ | |
510 | } else { | |
511 | *mb_type |= MB_TYPE_8x8 | MB_TYPE_L0L1; | |
512 | } | |
513 | } else { // AFR/FR -> AFR/FR | |
514 | single_col: | |
515 | mb_type_col[0] = | |
516 | mb_type_col[1] = h->ref_list[1][0].mb_type[mb_xy]; | |
517 | ||
518 | sub_mb_type = MB_TYPE_16x16 | MB_TYPE_P0L0 | MB_TYPE_P0L1 | | |
519 | MB_TYPE_DIRECT2; /* B_SUB_8x8 */ | |
520 | if (!is_b8x8 && (mb_type_col[0] & MB_TYPE_16x16_OR_INTRA)) { | |
521 | *mb_type |= MB_TYPE_16x16 | MB_TYPE_P0L0 | MB_TYPE_P0L1 | | |
522 | MB_TYPE_DIRECT2; /* B_16x16 */ | |
523 | } else if (!is_b8x8 && | |
524 | (mb_type_col[0] & (MB_TYPE_16x8 | MB_TYPE_8x16))) { | |
525 | *mb_type |= MB_TYPE_L0L1 | MB_TYPE_DIRECT2 | | |
526 | (mb_type_col[0] & (MB_TYPE_16x8 | MB_TYPE_8x16)); | |
527 | } else { | |
528 | if (!h->sps.direct_8x8_inference_flag) { | |
529 | /* FIXME: save sub mb types from previous frames (or derive | |
530 | * from MVs) so we know exactly what block size to use */ | |
531 | sub_mb_type = MB_TYPE_8x8 | MB_TYPE_P0L0 | MB_TYPE_P0L1 | | |
532 | MB_TYPE_DIRECT2; /* B_SUB_4x4 */ | |
533 | } | |
534 | *mb_type |= MB_TYPE_8x8 | MB_TYPE_L0L1; | |
535 | } | |
536 | } | |
537 | } | |
538 | ||
539 | await_reference_mb_row(h, &h->ref_list[1][0], mb_y); | |
540 | ||
541 | l1mv0 = (void*)&h->ref_list[1][0].motion_val[0][h->mb2b_xy[mb_xy]]; | |
542 | l1mv1 = (void*)&h->ref_list[1][0].motion_val[1][h->mb2b_xy[mb_xy]]; | |
543 | l1ref0 = &h->ref_list[1][0].ref_index[0][4 * mb_xy]; | |
544 | l1ref1 = &h->ref_list[1][0].ref_index[1][4 * mb_xy]; | |
545 | if (!b8_stride) { | |
546 | if (h->mb_y & 1) { | |
547 | l1ref0 += 2; | |
548 | l1ref1 += 2; | |
549 | l1mv0 += 2 * b4_stride; | |
550 | l1mv1 += 2 * b4_stride; | |
551 | } | |
552 | } | |
553 | ||
554 | { | |
555 | const int *map_col_to_list0[2] = { h->map_col_to_list0[0], | |
556 | h->map_col_to_list0[1] }; | |
557 | const int *dist_scale_factor = h->dist_scale_factor; | |
558 | int ref_offset; | |
559 | ||
560 | if (FRAME_MBAFF(h) && IS_INTERLACED(*mb_type)) { | |
561 | map_col_to_list0[0] = h->map_col_to_list0_field[h->mb_y & 1][0]; | |
562 | map_col_to_list0[1] = h->map_col_to_list0_field[h->mb_y & 1][1]; | |
563 | dist_scale_factor = h->dist_scale_factor_field[h->mb_y & 1]; | |
564 | } | |
565 | ref_offset = (h->ref_list[1][0].mbaff << 4) & (mb_type_col[0] >> 3); | |
566 | ||
567 | if (IS_INTERLACED(*mb_type) != IS_INTERLACED(mb_type_col[0])) { | |
568 | int y_shift = 2 * !IS_INTERLACED(*mb_type); | |
569 | assert(h->sps.direct_8x8_inference_flag); | |
570 | ||
571 | for (i8 = 0; i8 < 4; i8++) { | |
572 | const int x8 = i8 & 1; | |
573 | const int y8 = i8 >> 1; | |
574 | int ref0, scale; | |
575 | const int16_t (*l1mv)[2] = l1mv0; | |
576 | ||
577 | if (is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8])) | |
578 | continue; | |
579 | h->sub_mb_type[i8] = sub_mb_type; | |
580 | ||
581 | fill_rectangle(&h->ref_cache[1][scan8[i8 * 4]], 2, 2, 8, 0, 1); | |
582 | if (IS_INTRA(mb_type_col[y8])) { | |
583 | fill_rectangle(&h->ref_cache[0][scan8[i8 * 4]], 2, 2, 8, 0, 1); | |
584 | fill_rectangle(&h->mv_cache[0][scan8[i8 * 4]], 2, 2, 8, 0, 4); | |
585 | fill_rectangle(&h->mv_cache[1][scan8[i8 * 4]], 2, 2, 8, 0, 4); | |
586 | continue; | |
587 | } | |
588 | ||
589 | ref0 = l1ref0[x8 + y8 * b8_stride]; | |
590 | if (ref0 >= 0) | |
591 | ref0 = map_col_to_list0[0][ref0 + ref_offset]; | |
592 | else { | |
593 | ref0 = map_col_to_list0[1][l1ref1[x8 + y8 * b8_stride] + | |
594 | ref_offset]; | |
595 | l1mv = l1mv1; | |
596 | } | |
597 | scale = dist_scale_factor[ref0]; | |
598 | fill_rectangle(&h->ref_cache[0][scan8[i8 * 4]], 2, 2, 8, | |
599 | ref0, 1); | |
600 | ||
601 | { | |
602 | const int16_t *mv_col = l1mv[x8 * 3 + y8 * b4_stride]; | |
603 | int my_col = (mv_col[1] << y_shift) / 2; | |
604 | int mx = (scale * mv_col[0] + 128) >> 8; | |
605 | int my = (scale * my_col + 128) >> 8; | |
606 | fill_rectangle(&h->mv_cache[0][scan8[i8 * 4]], 2, 2, 8, | |
607 | pack16to32(mx, my), 4); | |
608 | fill_rectangle(&h->mv_cache[1][scan8[i8 * 4]], 2, 2, 8, | |
609 | pack16to32(mx - mv_col[0], my - my_col), 4); | |
610 | } | |
611 | } | |
612 | return; | |
613 | } | |
614 | ||
615 | /* one-to-one mv scaling */ | |
616 | ||
617 | if (IS_16X16(*mb_type)) { | |
618 | int ref, mv0, mv1; | |
619 | ||
620 | fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, 0, 1); | |
621 | if (IS_INTRA(mb_type_col[0])) { | |
622 | ref = mv0 = mv1 = 0; | |
623 | } else { | |
624 | const int ref0 = l1ref0[0] >= 0 ? map_col_to_list0[0][l1ref0[0] + ref_offset] | |
625 | : map_col_to_list0[1][l1ref1[0] + ref_offset]; | |
626 | const int scale = dist_scale_factor[ref0]; | |
627 | const int16_t *mv_col = l1ref0[0] >= 0 ? l1mv0[0] : l1mv1[0]; | |
628 | int mv_l0[2]; | |
629 | mv_l0[0] = (scale * mv_col[0] + 128) >> 8; | |
630 | mv_l0[1] = (scale * mv_col[1] + 128) >> 8; | |
631 | ref = ref0; | |
632 | mv0 = pack16to32(mv_l0[0], mv_l0[1]); | |
633 | mv1 = pack16to32(mv_l0[0] - mv_col[0], mv_l0[1] - mv_col[1]); | |
634 | } | |
635 | fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, ref, 1); | |
636 | fill_rectangle(&h->mv_cache[0][scan8[0]], 4, 4, 8, mv0, 4); | |
637 | fill_rectangle(&h->mv_cache[1][scan8[0]], 4, 4, 8, mv1, 4); | |
638 | } else { | |
639 | for (i8 = 0; i8 < 4; i8++) { | |
640 | const int x8 = i8 & 1; | |
641 | const int y8 = i8 >> 1; | |
642 | int ref0, scale; | |
643 | const int16_t (*l1mv)[2] = l1mv0; | |
644 | ||
645 | if (is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8])) | |
646 | continue; | |
647 | h->sub_mb_type[i8] = sub_mb_type; | |
648 | fill_rectangle(&h->ref_cache[1][scan8[i8 * 4]], 2, 2, 8, 0, 1); | |
649 | if (IS_INTRA(mb_type_col[0])) { | |
650 | fill_rectangle(&h->ref_cache[0][scan8[i8 * 4]], 2, 2, 8, 0, 1); | |
651 | fill_rectangle(&h->mv_cache[0][scan8[i8 * 4]], 2, 2, 8, 0, 4); | |
652 | fill_rectangle(&h->mv_cache[1][scan8[i8 * 4]], 2, 2, 8, 0, 4); | |
653 | continue; | |
654 | } | |
655 | ||
656 | assert(b8_stride == 2); | |
657 | ref0 = l1ref0[i8]; | |
658 | if (ref0 >= 0) | |
659 | ref0 = map_col_to_list0[0][ref0 + ref_offset]; | |
660 | else { | |
661 | ref0 = map_col_to_list0[1][l1ref1[i8] + ref_offset]; | |
662 | l1mv = l1mv1; | |
663 | } | |
664 | scale = dist_scale_factor[ref0]; | |
665 | ||
666 | fill_rectangle(&h->ref_cache[0][scan8[i8 * 4]], 2, 2, 8, | |
667 | ref0, 1); | |
668 | if (IS_SUB_8X8(sub_mb_type)) { | |
669 | const int16_t *mv_col = l1mv[x8 * 3 + y8 * 3 * b4_stride]; | |
670 | int mx = (scale * mv_col[0] + 128) >> 8; | |
671 | int my = (scale * mv_col[1] + 128) >> 8; | |
672 | fill_rectangle(&h->mv_cache[0][scan8[i8 * 4]], 2, 2, 8, | |
673 | pack16to32(mx, my), 4); | |
674 | fill_rectangle(&h->mv_cache[1][scan8[i8 * 4]], 2, 2, 8, | |
675 | pack16to32(mx - mv_col[0], my - mv_col[1]), 4); | |
676 | } else { | |
677 | for (i4 = 0; i4 < 4; i4++) { | |
678 | const int16_t *mv_col = l1mv[x8 * 2 + (i4 & 1) + | |
679 | (y8 * 2 + (i4 >> 1)) * b4_stride]; | |
680 | int16_t *mv_l0 = h->mv_cache[0][scan8[i8 * 4 + i4]]; | |
681 | mv_l0[0] = (scale * mv_col[0] + 128) >> 8; | |
682 | mv_l0[1] = (scale * mv_col[1] + 128) >> 8; | |
683 | AV_WN32A(h->mv_cache[1][scan8[i8 * 4 + i4]], | |
684 | pack16to32(mv_l0[0] - mv_col[0], | |
685 | mv_l0[1] - mv_col[1])); | |
686 | } | |
687 | } | |
688 | } | |
689 | } | |
690 | } | |
691 | } | |
692 | ||
693 | void ff_h264_pred_direct_motion(H264Context *const h, int *mb_type) | |
694 | { | |
695 | if (h->direct_spatial_mv_pred) | |
696 | pred_spatial_direct_motion(h, mb_type); | |
697 | else | |
698 | pred_temp_direct_motion(h, mb_type); | |
699 | } |