Commit | Line | Data |
---|---|---|
f6fa7814 DM |
1 | /* |
2 | * VC-1 and WMV3 decoder | |
3 | * Copyright (c) 2011 Mashiat Sarker Shakkhar | |
4 | * Copyright (c) 2006-2007 Konstantin Shishkov | |
5 | * Partly based on vc9.c (c) 2005 Anonymous, Alex Beregszaszi, Michael Niedermayer | |
6 | * | |
7 | * This file is part of FFmpeg. | |
8 | * | |
9 | * FFmpeg is free software; you can redistribute it and/or | |
10 | * modify it under the terms of the GNU Lesser General Public | |
11 | * License as published by the Free Software Foundation; either | |
12 | * version 2.1 of the License, or (at your option) any later version. | |
13 | * | |
14 | * FFmpeg is distributed in the hope that it will be useful, | |
15 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
16 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
17 | * Lesser General Public License for more details. | |
18 | * | |
19 | * You should have received a copy of the GNU Lesser General Public | |
20 | * License along with FFmpeg; if not, write to the Free Software | |
21 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |
22 | */ | |
23 | ||
24 | /** | |
25 | * @file | |
26 | * VC-1 and WMV3 loopfilter | |
27 | */ | |
28 | ||
29 | #include "avcodec.h" | |
30 | #include "mpegvideo.h" | |
31 | #include "vc1.h" | |
32 | #include "vc1dsp.h" | |
33 | ||
34 | void ff_vc1_loop_filter_iblk(VC1Context *v, int pq) | |
35 | { | |
36 | MpegEncContext *s = &v->s; | |
37 | int j; | |
38 | if (!s->first_slice_line) { | |
39 | v->vc1dsp.vc1_v_loop_filter16(s->dest[0], s->linesize, pq); | |
40 | if (s->mb_x) | |
41 | v->vc1dsp.vc1_h_loop_filter16(s->dest[0] - 16 * s->linesize, s->linesize, pq); | |
42 | v->vc1dsp.vc1_h_loop_filter16(s->dest[0] - 16 * s->linesize + 8, s->linesize, pq); | |
43 | for (j = 0; j < 2; j++) { | |
44 | v->vc1dsp.vc1_v_loop_filter8(s->dest[j + 1], s->uvlinesize, pq); | |
45 | if (s->mb_x) | |
46 | v->vc1dsp.vc1_h_loop_filter8(s->dest[j + 1] - 8 * s->uvlinesize, s->uvlinesize, pq); | |
47 | } | |
48 | } | |
49 | v->vc1dsp.vc1_v_loop_filter16(s->dest[0] + 8 * s->linesize, s->linesize, pq); | |
50 | ||
51 | if (s->mb_y == s->end_mb_y - 1) { | |
52 | if (s->mb_x) { | |
53 | v->vc1dsp.vc1_h_loop_filter16(s->dest[0], s->linesize, pq); | |
54 | v->vc1dsp.vc1_h_loop_filter8(s->dest[1], s->uvlinesize, pq); | |
55 | v->vc1dsp.vc1_h_loop_filter8(s->dest[2], s->uvlinesize, pq); | |
56 | } | |
57 | v->vc1dsp.vc1_h_loop_filter16(s->dest[0] + 8, s->linesize, pq); | |
58 | } | |
59 | } | |
60 | ||
61 | void ff_vc1_loop_filter_iblk_delayed(VC1Context *v, int pq) | |
62 | { | |
63 | MpegEncContext *s = &v->s; | |
64 | int j; | |
65 | ||
66 | /* The loopfilter runs 1 row and 1 column behind the overlap filter, which | |
67 | * means it runs two rows/cols behind the decoding loop. */ | |
68 | if (!s->first_slice_line) { | |
69 | if (s->mb_x) { | |
70 | if (s->mb_y >= s->start_mb_y + 2) { | |
71 | v->vc1dsp.vc1_v_loop_filter16(s->dest[0] - 16 * s->linesize - 16, s->linesize, pq); | |
72 | ||
73 | if (s->mb_x >= 2) | |
74 | v->vc1dsp.vc1_h_loop_filter16(s->dest[0] - 32 * s->linesize - 16, s->linesize, pq); | |
75 | v->vc1dsp.vc1_h_loop_filter16(s->dest[0] - 32 * s->linesize - 8, s->linesize, pq); | |
76 | for (j = 0; j < 2; j++) { | |
77 | v->vc1dsp.vc1_v_loop_filter8(s->dest[j + 1] - 8 * s->uvlinesize - 8, s->uvlinesize, pq); | |
78 | if (s->mb_x >= 2) { | |
79 | v->vc1dsp.vc1_h_loop_filter8(s->dest[j + 1] - 16 * s->uvlinesize - 8, s->uvlinesize, pq); | |
80 | } | |
81 | } | |
82 | } | |
83 | v->vc1dsp.vc1_v_loop_filter16(s->dest[0] - 8 * s->linesize - 16, s->linesize, pq); | |
84 | } | |
85 | ||
86 | if (s->mb_x == s->mb_width - 1) { | |
87 | if (s->mb_y >= s->start_mb_y + 2) { | |
88 | v->vc1dsp.vc1_v_loop_filter16(s->dest[0] - 16 * s->linesize, s->linesize, pq); | |
89 | ||
90 | if (s->mb_x) | |
91 | v->vc1dsp.vc1_h_loop_filter16(s->dest[0] - 32 * s->linesize, s->linesize, pq); | |
92 | v->vc1dsp.vc1_h_loop_filter16(s->dest[0] - 32 * s->linesize + 8, s->linesize, pq); | |
93 | for (j = 0; j < 2; j++) { | |
94 | v->vc1dsp.vc1_v_loop_filter8(s->dest[j + 1] - 8 * s->uvlinesize, s->uvlinesize, pq); | |
95 | if (s->mb_x >= 2) { | |
96 | v->vc1dsp.vc1_h_loop_filter8(s->dest[j + 1] - 16 * s->uvlinesize, s->uvlinesize, pq); | |
97 | } | |
98 | } | |
99 | } | |
100 | v->vc1dsp.vc1_v_loop_filter16(s->dest[0] - 8 * s->linesize, s->linesize, pq); | |
101 | } | |
102 | ||
103 | if (s->mb_y == s->end_mb_y) { | |
104 | if (s->mb_x) { | |
105 | if (s->mb_x >= 2) | |
106 | v->vc1dsp.vc1_h_loop_filter16(s->dest[0] - 16 * s->linesize - 16, s->linesize, pq); | |
107 | v->vc1dsp.vc1_h_loop_filter16(s->dest[0] - 16 * s->linesize - 8, s->linesize, pq); | |
108 | if (s->mb_x >= 2) { | |
109 | for (j = 0; j < 2; j++) { | |
110 | v->vc1dsp.vc1_h_loop_filter8(s->dest[j + 1] - 8 * s->uvlinesize - 8, s->uvlinesize, pq); | |
111 | } | |
112 | } | |
113 | } | |
114 | ||
115 | if (s->mb_x == s->mb_width - 1) { | |
116 | if (s->mb_x) | |
117 | v->vc1dsp.vc1_h_loop_filter16(s->dest[0] - 16 * s->linesize, s->linesize, pq); | |
118 | v->vc1dsp.vc1_h_loop_filter16(s->dest[0] - 16 * s->linesize + 8, s->linesize, pq); | |
119 | if (s->mb_x) { | |
120 | for (j = 0; j < 2; j++) { | |
121 | v->vc1dsp.vc1_h_loop_filter8(s->dest[j + 1] - 8 * s->uvlinesize, s->uvlinesize, pq); | |
122 | } | |
123 | } | |
124 | } | |
125 | } | |
126 | } | |
127 | } | |
128 | ||
129 | void ff_vc1_smooth_overlap_filter_iblk(VC1Context *v) | |
130 | { | |
131 | MpegEncContext *s = &v->s; | |
132 | int mb_pos; | |
133 | ||
134 | if (v->condover == CONDOVER_NONE) | |
135 | return; | |
136 | ||
137 | mb_pos = s->mb_x + s->mb_y * s->mb_stride; | |
138 | ||
139 | /* Within a MB, the horizontal overlap always runs before the vertical. | |
140 | * To accomplish that, we run the H on left and internal borders of the | |
141 | * currently decoded MB. Then, we wait for the next overlap iteration | |
142 | * to do H overlap on the right edge of this MB, before moving over and | |
143 | * running the V overlap. Therefore, the V overlap makes us trail by one | |
144 | * MB col and the H overlap filter makes us trail by one MB row. This | |
145 | * is reflected in the time at which we run the put_pixels loop. */ | |
146 | if (v->condover == CONDOVER_ALL || v->pq >= 9 || v->over_flags_plane[mb_pos]) { | |
147 | if (s->mb_x && (v->condover == CONDOVER_ALL || v->pq >= 9 || | |
148 | v->over_flags_plane[mb_pos - 1])) { | |
149 | v->vc1dsp.vc1_h_s_overlap(v->block[v->left_blk_idx][1], | |
150 | v->block[v->cur_blk_idx][0]); | |
151 | v->vc1dsp.vc1_h_s_overlap(v->block[v->left_blk_idx][3], | |
152 | v->block[v->cur_blk_idx][2]); | |
153 | if (!(s->flags & CODEC_FLAG_GRAY)) { | |
154 | v->vc1dsp.vc1_h_s_overlap(v->block[v->left_blk_idx][4], | |
155 | v->block[v->cur_blk_idx][4]); | |
156 | v->vc1dsp.vc1_h_s_overlap(v->block[v->left_blk_idx][5], | |
157 | v->block[v->cur_blk_idx][5]); | |
158 | } | |
159 | } | |
160 | v->vc1dsp.vc1_h_s_overlap(v->block[v->cur_blk_idx][0], | |
161 | v->block[v->cur_blk_idx][1]); | |
162 | v->vc1dsp.vc1_h_s_overlap(v->block[v->cur_blk_idx][2], | |
163 | v->block[v->cur_blk_idx][3]); | |
164 | ||
165 | if (s->mb_x == s->mb_width - 1) { | |
166 | if (!s->first_slice_line && (v->condover == CONDOVER_ALL || v->pq >= 9 || | |
167 | v->over_flags_plane[mb_pos - s->mb_stride])) { | |
168 | v->vc1dsp.vc1_v_s_overlap(v->block[v->top_blk_idx][2], | |
169 | v->block[v->cur_blk_idx][0]); | |
170 | v->vc1dsp.vc1_v_s_overlap(v->block[v->top_blk_idx][3], | |
171 | v->block[v->cur_blk_idx][1]); | |
172 | if (!(s->flags & CODEC_FLAG_GRAY)) { | |
173 | v->vc1dsp.vc1_v_s_overlap(v->block[v->top_blk_idx][4], | |
174 | v->block[v->cur_blk_idx][4]); | |
175 | v->vc1dsp.vc1_v_s_overlap(v->block[v->top_blk_idx][5], | |
176 | v->block[v->cur_blk_idx][5]); | |
177 | } | |
178 | } | |
179 | v->vc1dsp.vc1_v_s_overlap(v->block[v->cur_blk_idx][0], | |
180 | v->block[v->cur_blk_idx][2]); | |
181 | v->vc1dsp.vc1_v_s_overlap(v->block[v->cur_blk_idx][1], | |
182 | v->block[v->cur_blk_idx][3]); | |
183 | } | |
184 | } | |
185 | if (s->mb_x && (v->condover == CONDOVER_ALL || v->over_flags_plane[mb_pos - 1])) { | |
186 | if (!s->first_slice_line && (v->condover == CONDOVER_ALL || v->pq >= 9 || | |
187 | v->over_flags_plane[mb_pos - s->mb_stride - 1])) { | |
188 | v->vc1dsp.vc1_v_s_overlap(v->block[v->topleft_blk_idx][2], | |
189 | v->block[v->left_blk_idx][0]); | |
190 | v->vc1dsp.vc1_v_s_overlap(v->block[v->topleft_blk_idx][3], | |
191 | v->block[v->left_blk_idx][1]); | |
192 | if (!(s->flags & CODEC_FLAG_GRAY)) { | |
193 | v->vc1dsp.vc1_v_s_overlap(v->block[v->topleft_blk_idx][4], | |
194 | v->block[v->left_blk_idx][4]); | |
195 | v->vc1dsp.vc1_v_s_overlap(v->block[v->topleft_blk_idx][5], | |
196 | v->block[v->left_blk_idx][5]); | |
197 | } | |
198 | } | |
199 | v->vc1dsp.vc1_v_s_overlap(v->block[v->left_blk_idx][0], | |
200 | v->block[v->left_blk_idx][2]); | |
201 | v->vc1dsp.vc1_v_s_overlap(v->block[v->left_blk_idx][1], | |
202 | v->block[v->left_blk_idx][3]); | |
203 | } | |
204 | } | |
205 | ||
206 | static av_always_inline void vc1_apply_p_v_loop_filter(VC1Context *v, int block_num) | |
207 | { | |
208 | MpegEncContext *s = &v->s; | |
209 | int mb_cbp = v->cbp[s->mb_x - s->mb_stride], | |
210 | block_cbp = mb_cbp >> (block_num * 4), bottom_cbp, | |
211 | mb_is_intra = v->is_intra[s->mb_x - s->mb_stride], | |
212 | block_is_intra = mb_is_intra >> block_num, bottom_is_intra; | |
213 | int idx, linesize = block_num > 3 ? s->uvlinesize : s->linesize, ttblk; | |
214 | uint8_t *dst; | |
215 | ||
216 | if (block_num > 3) { | |
217 | dst = s->dest[block_num - 3]; | |
218 | } else { | |
219 | dst = s->dest[0] + (block_num & 1) * 8 + ((block_num & 2) * 4 - 8) * linesize; | |
220 | } | |
221 | if (s->mb_y != s->end_mb_y || block_num < 2) { | |
222 | int16_t (*mv)[2]; | |
223 | int mv_stride; | |
224 | ||
225 | if (block_num > 3) { | |
226 | bottom_cbp = v->cbp[s->mb_x] >> (block_num * 4); | |
227 | bottom_is_intra = v->is_intra[s->mb_x] >> block_num; | |
228 | mv = &v->luma_mv[s->mb_x - s->mb_stride]; | |
229 | mv_stride = s->mb_stride; | |
230 | } else { | |
231 | bottom_cbp = (block_num < 2) ? (mb_cbp >> ((block_num + 2) * 4)) | |
232 | : (v->cbp[s->mb_x] >> ((block_num - 2) * 4)); | |
233 | bottom_is_intra = (block_num < 2) ? (mb_is_intra >> (block_num + 2)) | |
234 | : (v->is_intra[s->mb_x] >> (block_num - 2)); | |
235 | mv_stride = s->b8_stride; | |
236 | mv = &s->current_picture.motion_val[0][s->block_index[block_num] - 2 * mv_stride]; | |
237 | } | |
238 | ||
239 | if (bottom_is_intra & 1 || block_is_intra & 1 || | |
240 | mv[0][0] != mv[mv_stride][0] || mv[0][1] != mv[mv_stride][1]) { | |
241 | v->vc1dsp.vc1_v_loop_filter8(dst, linesize, v->pq); | |
242 | } else { | |
243 | idx = ((bottom_cbp >> 2) | block_cbp) & 3; | |
244 | if (idx == 3) { | |
245 | v->vc1dsp.vc1_v_loop_filter8(dst, linesize, v->pq); | |
246 | } else if (idx) { | |
247 | if (idx == 1) | |
248 | v->vc1dsp.vc1_v_loop_filter4(dst + 4, linesize, v->pq); | |
249 | else | |
250 | v->vc1dsp.vc1_v_loop_filter4(dst, linesize, v->pq); | |
251 | } | |
252 | } | |
253 | } | |
254 | ||
255 | dst -= 4 * linesize; | |
256 | ttblk = (v->ttblk[s->mb_x - s->mb_stride] >> (block_num * 4)) & 0xF; | |
257 | if (ttblk == TT_4X4 || ttblk == TT_8X4) { | |
258 | idx = (block_cbp | (block_cbp >> 2)) & 3; | |
259 | if (idx == 3) { | |
260 | v->vc1dsp.vc1_v_loop_filter8(dst, linesize, v->pq); | |
261 | } else if (idx) { | |
262 | if (idx == 1) | |
263 | v->vc1dsp.vc1_v_loop_filter4(dst + 4, linesize, v->pq); | |
264 | else | |
265 | v->vc1dsp.vc1_v_loop_filter4(dst, linesize, v->pq); | |
266 | } | |
267 | } | |
268 | } | |
269 | ||
270 | static av_always_inline void vc1_apply_p_h_loop_filter(VC1Context *v, int block_num) | |
271 | { | |
272 | MpegEncContext *s = &v->s; | |
273 | int mb_cbp = v->cbp[s->mb_x - 1 - s->mb_stride], | |
274 | block_cbp = mb_cbp >> (block_num * 4), right_cbp, | |
275 | mb_is_intra = v->is_intra[s->mb_x - 1 - s->mb_stride], | |
276 | block_is_intra = mb_is_intra >> block_num, right_is_intra; | |
277 | int idx, linesize = block_num > 3 ? s->uvlinesize : s->linesize, ttblk; | |
278 | uint8_t *dst; | |
279 | ||
280 | if (block_num > 3) { | |
281 | dst = s->dest[block_num - 3] - 8 * linesize; | |
282 | } else { | |
283 | dst = s->dest[0] + (block_num & 1) * 8 + ((block_num & 2) * 4 - 16) * linesize - 8; | |
284 | } | |
285 | ||
286 | if (s->mb_x != s->mb_width || !(block_num & 5)) { | |
287 | int16_t (*mv)[2]; | |
288 | ||
289 | if (block_num > 3) { | |
290 | right_cbp = v->cbp[s->mb_x - s->mb_stride] >> (block_num * 4); | |
291 | right_is_intra = v->is_intra[s->mb_x - s->mb_stride] >> block_num; | |
292 | mv = &v->luma_mv[s->mb_x - s->mb_stride - 1]; | |
293 | } else { | |
294 | right_cbp = (block_num & 1) ? (v->cbp[s->mb_x - s->mb_stride] >> ((block_num - 1) * 4)) | |
295 | : (mb_cbp >> ((block_num + 1) * 4)); | |
296 | right_is_intra = (block_num & 1) ? (v->is_intra[s->mb_x - s->mb_stride] >> (block_num - 1)) | |
297 | : (mb_is_intra >> (block_num + 1)); | |
298 | mv = &s->current_picture.motion_val[0][s->block_index[block_num] - s->b8_stride * 2 - 2]; | |
299 | } | |
300 | if (block_is_intra & 1 || right_is_intra & 1 || mv[0][0] != mv[1][0] || mv[0][1] != mv[1][1]) { | |
301 | v->vc1dsp.vc1_h_loop_filter8(dst, linesize, v->pq); | |
302 | } else { | |
303 | idx = ((right_cbp >> 1) | block_cbp) & 5; // FIXME check | |
304 | if (idx == 5) { | |
305 | v->vc1dsp.vc1_h_loop_filter8(dst, linesize, v->pq); | |
306 | } else if (idx) { | |
307 | if (idx == 1) | |
308 | v->vc1dsp.vc1_h_loop_filter4(dst + 4 * linesize, linesize, v->pq); | |
309 | else | |
310 | v->vc1dsp.vc1_h_loop_filter4(dst, linesize, v->pq); | |
311 | } | |
312 | } | |
313 | } | |
314 | ||
315 | dst -= 4; | |
316 | ttblk = (v->ttblk[s->mb_x - s->mb_stride - 1] >> (block_num * 4)) & 0xf; | |
317 | if (ttblk == TT_4X4 || ttblk == TT_4X8) { | |
318 | idx = (block_cbp | (block_cbp >> 1)) & 5; | |
319 | if (idx == 5) { | |
320 | v->vc1dsp.vc1_h_loop_filter8(dst, linesize, v->pq); | |
321 | } else if (idx) { | |
322 | if (idx == 1) | |
323 | v->vc1dsp.vc1_h_loop_filter4(dst + linesize * 4, linesize, v->pq); | |
324 | else | |
325 | v->vc1dsp.vc1_h_loop_filter4(dst, linesize, v->pq); | |
326 | } | |
327 | } | |
328 | } | |
329 | ||
330 | void ff_vc1_apply_p_loop_filter(VC1Context *v) | |
331 | { | |
332 | MpegEncContext *s = &v->s; | |
333 | int i; | |
334 | ||
335 | for (i = 0; i < 6; i++) { | |
336 | vc1_apply_p_v_loop_filter(v, i); | |
337 | } | |
338 | ||
339 | /* V always precedes H, therefore we run H one MB before V; | |
340 | * at the end of a row, we catch up to complete the row */ | |
341 | if (s->mb_x) { | |
342 | for (i = 0; i < 6; i++) { | |
343 | vc1_apply_p_h_loop_filter(v, i); | |
344 | } | |
345 | if (s->mb_x == s->mb_width - 1) { | |
346 | s->mb_x++; | |
347 | ff_update_block_index(s); | |
348 | for (i = 0; i < 6; i++) { | |
349 | vc1_apply_p_h_loop_filter(v, i); | |
350 | } | |
351 | } | |
352 | } | |
353 | } |