| 1 | /* |
| 2 | * VC-1 and WMV3 decoder |
| 3 | * Copyright (c) 2011 Mashiat Sarker Shakkhar |
| 4 | * Copyright (c) 2006-2007 Konstantin Shishkov |
| 5 | * Partly based on vc9.c (c) 2005 Anonymous, Alex Beregszaszi, Michael Niedermayer |
| 6 | * |
| 7 | * This file is part of FFmpeg. |
| 8 | * |
| 9 | * FFmpeg is free software; you can redistribute it and/or |
| 10 | * modify it under the terms of the GNU Lesser General Public |
| 11 | * License as published by the Free Software Foundation; either |
| 12 | * version 2.1 of the License, or (at your option) any later version. |
| 13 | * |
| 14 | * FFmpeg is distributed in the hope that it will be useful, |
| 15 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 16 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| 17 | * Lesser General Public License for more details. |
| 18 | * |
| 19 | * You should have received a copy of the GNU Lesser General Public |
| 20 | * License along with FFmpeg; if not, write to the Free Software |
| 21 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
| 22 | */ |
| 23 | |
| 24 | /** |
| 25 | * @file |
| 26 | * VC-1 and WMV3 loopfilter |
| 27 | */ |
| 28 | |
| 29 | #include "avcodec.h" |
| 30 | #include "mpegvideo.h" |
| 31 | #include "vc1.h" |
| 32 | #include "vc1dsp.h" |
| 33 | |
| 34 | void ff_vc1_loop_filter_iblk(VC1Context *v, int pq) |
| 35 | { |
| 36 | MpegEncContext *s = &v->s; |
| 37 | int j; |
| 38 | if (!s->first_slice_line) { |
| 39 | v->vc1dsp.vc1_v_loop_filter16(s->dest[0], s->linesize, pq); |
| 40 | if (s->mb_x) |
| 41 | v->vc1dsp.vc1_h_loop_filter16(s->dest[0] - 16 * s->linesize, s->linesize, pq); |
| 42 | v->vc1dsp.vc1_h_loop_filter16(s->dest[0] - 16 * s->linesize + 8, s->linesize, pq); |
| 43 | for (j = 0; j < 2; j++) { |
| 44 | v->vc1dsp.vc1_v_loop_filter8(s->dest[j + 1], s->uvlinesize, pq); |
| 45 | if (s->mb_x) |
| 46 | v->vc1dsp.vc1_h_loop_filter8(s->dest[j + 1] - 8 * s->uvlinesize, s->uvlinesize, pq); |
| 47 | } |
| 48 | } |
| 49 | v->vc1dsp.vc1_v_loop_filter16(s->dest[0] + 8 * s->linesize, s->linesize, pq); |
| 50 | |
| 51 | if (s->mb_y == s->end_mb_y - 1) { |
| 52 | if (s->mb_x) { |
| 53 | v->vc1dsp.vc1_h_loop_filter16(s->dest[0], s->linesize, pq); |
| 54 | v->vc1dsp.vc1_h_loop_filter8(s->dest[1], s->uvlinesize, pq); |
| 55 | v->vc1dsp.vc1_h_loop_filter8(s->dest[2], s->uvlinesize, pq); |
| 56 | } |
| 57 | v->vc1dsp.vc1_h_loop_filter16(s->dest[0] + 8, s->linesize, pq); |
| 58 | } |
| 59 | } |
| 60 | |
| 61 | void ff_vc1_loop_filter_iblk_delayed(VC1Context *v, int pq) |
| 62 | { |
| 63 | MpegEncContext *s = &v->s; |
| 64 | int j; |
| 65 | |
| 66 | /* The loopfilter runs 1 row and 1 column behind the overlap filter, which |
| 67 | * means it runs two rows/cols behind the decoding loop. */ |
| 68 | if (!s->first_slice_line) { |
| 69 | if (s->mb_x) { |
| 70 | if (s->mb_y >= s->start_mb_y + 2) { |
| 71 | v->vc1dsp.vc1_v_loop_filter16(s->dest[0] - 16 * s->linesize - 16, s->linesize, pq); |
| 72 | |
| 73 | if (s->mb_x >= 2) |
| 74 | v->vc1dsp.vc1_h_loop_filter16(s->dest[0] - 32 * s->linesize - 16, s->linesize, pq); |
| 75 | v->vc1dsp.vc1_h_loop_filter16(s->dest[0] - 32 * s->linesize - 8, s->linesize, pq); |
| 76 | for (j = 0; j < 2; j++) { |
| 77 | v->vc1dsp.vc1_v_loop_filter8(s->dest[j + 1] - 8 * s->uvlinesize - 8, s->uvlinesize, pq); |
| 78 | if (s->mb_x >= 2) { |
| 79 | v->vc1dsp.vc1_h_loop_filter8(s->dest[j + 1] - 16 * s->uvlinesize - 8, s->uvlinesize, pq); |
| 80 | } |
| 81 | } |
| 82 | } |
| 83 | v->vc1dsp.vc1_v_loop_filter16(s->dest[0] - 8 * s->linesize - 16, s->linesize, pq); |
| 84 | } |
| 85 | |
| 86 | if (s->mb_x == s->mb_width - 1) { |
| 87 | if (s->mb_y >= s->start_mb_y + 2) { |
| 88 | v->vc1dsp.vc1_v_loop_filter16(s->dest[0] - 16 * s->linesize, s->linesize, pq); |
| 89 | |
| 90 | if (s->mb_x) |
| 91 | v->vc1dsp.vc1_h_loop_filter16(s->dest[0] - 32 * s->linesize, s->linesize, pq); |
| 92 | v->vc1dsp.vc1_h_loop_filter16(s->dest[0] - 32 * s->linesize + 8, s->linesize, pq); |
| 93 | for (j = 0; j < 2; j++) { |
| 94 | v->vc1dsp.vc1_v_loop_filter8(s->dest[j + 1] - 8 * s->uvlinesize, s->uvlinesize, pq); |
| 95 | if (s->mb_x >= 2) { |
| 96 | v->vc1dsp.vc1_h_loop_filter8(s->dest[j + 1] - 16 * s->uvlinesize, s->uvlinesize, pq); |
| 97 | } |
| 98 | } |
| 99 | } |
| 100 | v->vc1dsp.vc1_v_loop_filter16(s->dest[0] - 8 * s->linesize, s->linesize, pq); |
| 101 | } |
| 102 | |
| 103 | if (s->mb_y == s->end_mb_y) { |
| 104 | if (s->mb_x) { |
| 105 | if (s->mb_x >= 2) |
| 106 | v->vc1dsp.vc1_h_loop_filter16(s->dest[0] - 16 * s->linesize - 16, s->linesize, pq); |
| 107 | v->vc1dsp.vc1_h_loop_filter16(s->dest[0] - 16 * s->linesize - 8, s->linesize, pq); |
| 108 | if (s->mb_x >= 2) { |
| 109 | for (j = 0; j < 2; j++) { |
| 110 | v->vc1dsp.vc1_h_loop_filter8(s->dest[j + 1] - 8 * s->uvlinesize - 8, s->uvlinesize, pq); |
| 111 | } |
| 112 | } |
| 113 | } |
| 114 | |
| 115 | if (s->mb_x == s->mb_width - 1) { |
| 116 | if (s->mb_x) |
| 117 | v->vc1dsp.vc1_h_loop_filter16(s->dest[0] - 16 * s->linesize, s->linesize, pq); |
| 118 | v->vc1dsp.vc1_h_loop_filter16(s->dest[0] - 16 * s->linesize + 8, s->linesize, pq); |
| 119 | if (s->mb_x) { |
| 120 | for (j = 0; j < 2; j++) { |
| 121 | v->vc1dsp.vc1_h_loop_filter8(s->dest[j + 1] - 8 * s->uvlinesize, s->uvlinesize, pq); |
| 122 | } |
| 123 | } |
| 124 | } |
| 125 | } |
| 126 | } |
| 127 | } |
| 128 | |
| 129 | void ff_vc1_smooth_overlap_filter_iblk(VC1Context *v) |
| 130 | { |
| 131 | MpegEncContext *s = &v->s; |
| 132 | int mb_pos; |
| 133 | |
| 134 | if (v->condover == CONDOVER_NONE) |
| 135 | return; |
| 136 | |
| 137 | mb_pos = s->mb_x + s->mb_y * s->mb_stride; |
| 138 | |
| 139 | /* Within a MB, the horizontal overlap always runs before the vertical. |
| 140 | * To accomplish that, we run the H on left and internal borders of the |
| 141 | * currently decoded MB. Then, we wait for the next overlap iteration |
| 142 | * to do H overlap on the right edge of this MB, before moving over and |
| 143 | * running the V overlap. Therefore, the V overlap makes us trail by one |
| 144 | * MB col and the H overlap filter makes us trail by one MB row. This |
| 145 | * is reflected in the time at which we run the put_pixels loop. */ |
| 146 | if (v->condover == CONDOVER_ALL || v->pq >= 9 || v->over_flags_plane[mb_pos]) { |
| 147 | if (s->mb_x && (v->condover == CONDOVER_ALL || v->pq >= 9 || |
| 148 | v->over_flags_plane[mb_pos - 1])) { |
| 149 | v->vc1dsp.vc1_h_s_overlap(v->block[v->left_blk_idx][1], |
| 150 | v->block[v->cur_blk_idx][0]); |
| 151 | v->vc1dsp.vc1_h_s_overlap(v->block[v->left_blk_idx][3], |
| 152 | v->block[v->cur_blk_idx][2]); |
| 153 | if (!(s->flags & CODEC_FLAG_GRAY)) { |
| 154 | v->vc1dsp.vc1_h_s_overlap(v->block[v->left_blk_idx][4], |
| 155 | v->block[v->cur_blk_idx][4]); |
| 156 | v->vc1dsp.vc1_h_s_overlap(v->block[v->left_blk_idx][5], |
| 157 | v->block[v->cur_blk_idx][5]); |
| 158 | } |
| 159 | } |
| 160 | v->vc1dsp.vc1_h_s_overlap(v->block[v->cur_blk_idx][0], |
| 161 | v->block[v->cur_blk_idx][1]); |
| 162 | v->vc1dsp.vc1_h_s_overlap(v->block[v->cur_blk_idx][2], |
| 163 | v->block[v->cur_blk_idx][3]); |
| 164 | |
| 165 | if (s->mb_x == s->mb_width - 1) { |
| 166 | if (!s->first_slice_line && (v->condover == CONDOVER_ALL || v->pq >= 9 || |
| 167 | v->over_flags_plane[mb_pos - s->mb_stride])) { |
| 168 | v->vc1dsp.vc1_v_s_overlap(v->block[v->top_blk_idx][2], |
| 169 | v->block[v->cur_blk_idx][0]); |
| 170 | v->vc1dsp.vc1_v_s_overlap(v->block[v->top_blk_idx][3], |
| 171 | v->block[v->cur_blk_idx][1]); |
| 172 | if (!(s->flags & CODEC_FLAG_GRAY)) { |
| 173 | v->vc1dsp.vc1_v_s_overlap(v->block[v->top_blk_idx][4], |
| 174 | v->block[v->cur_blk_idx][4]); |
| 175 | v->vc1dsp.vc1_v_s_overlap(v->block[v->top_blk_idx][5], |
| 176 | v->block[v->cur_blk_idx][5]); |
| 177 | } |
| 178 | } |
| 179 | v->vc1dsp.vc1_v_s_overlap(v->block[v->cur_blk_idx][0], |
| 180 | v->block[v->cur_blk_idx][2]); |
| 181 | v->vc1dsp.vc1_v_s_overlap(v->block[v->cur_blk_idx][1], |
| 182 | v->block[v->cur_blk_idx][3]); |
| 183 | } |
| 184 | } |
| 185 | if (s->mb_x && (v->condover == CONDOVER_ALL || v->over_flags_plane[mb_pos - 1])) { |
| 186 | if (!s->first_slice_line && (v->condover == CONDOVER_ALL || v->pq >= 9 || |
| 187 | v->over_flags_plane[mb_pos - s->mb_stride - 1])) { |
| 188 | v->vc1dsp.vc1_v_s_overlap(v->block[v->topleft_blk_idx][2], |
| 189 | v->block[v->left_blk_idx][0]); |
| 190 | v->vc1dsp.vc1_v_s_overlap(v->block[v->topleft_blk_idx][3], |
| 191 | v->block[v->left_blk_idx][1]); |
| 192 | if (!(s->flags & CODEC_FLAG_GRAY)) { |
| 193 | v->vc1dsp.vc1_v_s_overlap(v->block[v->topleft_blk_idx][4], |
| 194 | v->block[v->left_blk_idx][4]); |
| 195 | v->vc1dsp.vc1_v_s_overlap(v->block[v->topleft_blk_idx][5], |
| 196 | v->block[v->left_blk_idx][5]); |
| 197 | } |
| 198 | } |
| 199 | v->vc1dsp.vc1_v_s_overlap(v->block[v->left_blk_idx][0], |
| 200 | v->block[v->left_blk_idx][2]); |
| 201 | v->vc1dsp.vc1_v_s_overlap(v->block[v->left_blk_idx][1], |
| 202 | v->block[v->left_blk_idx][3]); |
| 203 | } |
| 204 | } |
| 205 | |
| 206 | static av_always_inline void vc1_apply_p_v_loop_filter(VC1Context *v, int block_num) |
| 207 | { |
| 208 | MpegEncContext *s = &v->s; |
| 209 | int mb_cbp = v->cbp[s->mb_x - s->mb_stride], |
| 210 | block_cbp = mb_cbp >> (block_num * 4), bottom_cbp, |
| 211 | mb_is_intra = v->is_intra[s->mb_x - s->mb_stride], |
| 212 | block_is_intra = mb_is_intra >> block_num, bottom_is_intra; |
| 213 | int idx, linesize = block_num > 3 ? s->uvlinesize : s->linesize, ttblk; |
| 214 | uint8_t *dst; |
| 215 | |
| 216 | if (block_num > 3) { |
| 217 | dst = s->dest[block_num - 3]; |
| 218 | } else { |
| 219 | dst = s->dest[0] + (block_num & 1) * 8 + ((block_num & 2) * 4 - 8) * linesize; |
| 220 | } |
| 221 | if (s->mb_y != s->end_mb_y || block_num < 2) { |
| 222 | int16_t (*mv)[2]; |
| 223 | int mv_stride; |
| 224 | |
| 225 | if (block_num > 3) { |
| 226 | bottom_cbp = v->cbp[s->mb_x] >> (block_num * 4); |
| 227 | bottom_is_intra = v->is_intra[s->mb_x] >> block_num; |
| 228 | mv = &v->luma_mv[s->mb_x - s->mb_stride]; |
| 229 | mv_stride = s->mb_stride; |
| 230 | } else { |
| 231 | bottom_cbp = (block_num < 2) ? (mb_cbp >> ((block_num + 2) * 4)) |
| 232 | : (v->cbp[s->mb_x] >> ((block_num - 2) * 4)); |
| 233 | bottom_is_intra = (block_num < 2) ? (mb_is_intra >> (block_num + 2)) |
| 234 | : (v->is_intra[s->mb_x] >> (block_num - 2)); |
| 235 | mv_stride = s->b8_stride; |
| 236 | mv = &s->current_picture.motion_val[0][s->block_index[block_num] - 2 * mv_stride]; |
| 237 | } |
| 238 | |
| 239 | if (bottom_is_intra & 1 || block_is_intra & 1 || |
| 240 | mv[0][0] != mv[mv_stride][0] || mv[0][1] != mv[mv_stride][1]) { |
| 241 | v->vc1dsp.vc1_v_loop_filter8(dst, linesize, v->pq); |
| 242 | } else { |
| 243 | idx = ((bottom_cbp >> 2) | block_cbp) & 3; |
| 244 | if (idx == 3) { |
| 245 | v->vc1dsp.vc1_v_loop_filter8(dst, linesize, v->pq); |
| 246 | } else if (idx) { |
| 247 | if (idx == 1) |
| 248 | v->vc1dsp.vc1_v_loop_filter4(dst + 4, linesize, v->pq); |
| 249 | else |
| 250 | v->vc1dsp.vc1_v_loop_filter4(dst, linesize, v->pq); |
| 251 | } |
| 252 | } |
| 253 | } |
| 254 | |
| 255 | dst -= 4 * linesize; |
| 256 | ttblk = (v->ttblk[s->mb_x - s->mb_stride] >> (block_num * 4)) & 0xF; |
| 257 | if (ttblk == TT_4X4 || ttblk == TT_8X4) { |
| 258 | idx = (block_cbp | (block_cbp >> 2)) & 3; |
| 259 | if (idx == 3) { |
| 260 | v->vc1dsp.vc1_v_loop_filter8(dst, linesize, v->pq); |
| 261 | } else if (idx) { |
| 262 | if (idx == 1) |
| 263 | v->vc1dsp.vc1_v_loop_filter4(dst + 4, linesize, v->pq); |
| 264 | else |
| 265 | v->vc1dsp.vc1_v_loop_filter4(dst, linesize, v->pq); |
| 266 | } |
| 267 | } |
| 268 | } |
| 269 | |
| 270 | static av_always_inline void vc1_apply_p_h_loop_filter(VC1Context *v, int block_num) |
| 271 | { |
| 272 | MpegEncContext *s = &v->s; |
| 273 | int mb_cbp = v->cbp[s->mb_x - 1 - s->mb_stride], |
| 274 | block_cbp = mb_cbp >> (block_num * 4), right_cbp, |
| 275 | mb_is_intra = v->is_intra[s->mb_x - 1 - s->mb_stride], |
| 276 | block_is_intra = mb_is_intra >> block_num, right_is_intra; |
| 277 | int idx, linesize = block_num > 3 ? s->uvlinesize : s->linesize, ttblk; |
| 278 | uint8_t *dst; |
| 279 | |
| 280 | if (block_num > 3) { |
| 281 | dst = s->dest[block_num - 3] - 8 * linesize; |
| 282 | } else { |
| 283 | dst = s->dest[0] + (block_num & 1) * 8 + ((block_num & 2) * 4 - 16) * linesize - 8; |
| 284 | } |
| 285 | |
| 286 | if (s->mb_x != s->mb_width || !(block_num & 5)) { |
| 287 | int16_t (*mv)[2]; |
| 288 | |
| 289 | if (block_num > 3) { |
| 290 | right_cbp = v->cbp[s->mb_x - s->mb_stride] >> (block_num * 4); |
| 291 | right_is_intra = v->is_intra[s->mb_x - s->mb_stride] >> block_num; |
| 292 | mv = &v->luma_mv[s->mb_x - s->mb_stride - 1]; |
| 293 | } else { |
| 294 | right_cbp = (block_num & 1) ? (v->cbp[s->mb_x - s->mb_stride] >> ((block_num - 1) * 4)) |
| 295 | : (mb_cbp >> ((block_num + 1) * 4)); |
| 296 | right_is_intra = (block_num & 1) ? (v->is_intra[s->mb_x - s->mb_stride] >> (block_num - 1)) |
| 297 | : (mb_is_intra >> (block_num + 1)); |
| 298 | mv = &s->current_picture.motion_val[0][s->block_index[block_num] - s->b8_stride * 2 - 2]; |
| 299 | } |
| 300 | if (block_is_intra & 1 || right_is_intra & 1 || mv[0][0] != mv[1][0] || mv[0][1] != mv[1][1]) { |
| 301 | v->vc1dsp.vc1_h_loop_filter8(dst, linesize, v->pq); |
| 302 | } else { |
| 303 | idx = ((right_cbp >> 1) | block_cbp) & 5; // FIXME check |
| 304 | if (idx == 5) { |
| 305 | v->vc1dsp.vc1_h_loop_filter8(dst, linesize, v->pq); |
| 306 | } else if (idx) { |
| 307 | if (idx == 1) |
| 308 | v->vc1dsp.vc1_h_loop_filter4(dst + 4 * linesize, linesize, v->pq); |
| 309 | else |
| 310 | v->vc1dsp.vc1_h_loop_filter4(dst, linesize, v->pq); |
| 311 | } |
| 312 | } |
| 313 | } |
| 314 | |
| 315 | dst -= 4; |
| 316 | ttblk = (v->ttblk[s->mb_x - s->mb_stride - 1] >> (block_num * 4)) & 0xf; |
| 317 | if (ttblk == TT_4X4 || ttblk == TT_4X8) { |
| 318 | idx = (block_cbp | (block_cbp >> 1)) & 5; |
| 319 | if (idx == 5) { |
| 320 | v->vc1dsp.vc1_h_loop_filter8(dst, linesize, v->pq); |
| 321 | } else if (idx) { |
| 322 | if (idx == 1) |
| 323 | v->vc1dsp.vc1_h_loop_filter4(dst + linesize * 4, linesize, v->pq); |
| 324 | else |
| 325 | v->vc1dsp.vc1_h_loop_filter4(dst, linesize, v->pq); |
| 326 | } |
| 327 | } |
| 328 | } |
| 329 | |
| 330 | void ff_vc1_apply_p_loop_filter(VC1Context *v) |
| 331 | { |
| 332 | MpegEncContext *s = &v->s; |
| 333 | int i; |
| 334 | |
| 335 | for (i = 0; i < 6; i++) { |
| 336 | vc1_apply_p_v_loop_filter(v, i); |
| 337 | } |
| 338 | |
| 339 | /* V always precedes H, therefore we run H one MB before V; |
| 340 | * at the end of a row, we catch up to complete the row */ |
| 341 | if (s->mb_x) { |
| 342 | for (i = 0; i < 6; i++) { |
| 343 | vc1_apply_p_h_loop_filter(v, i); |
| 344 | } |
| 345 | if (s->mb_x == s->mb_width - 1) { |
| 346 | s->mb_x++; |
| 347 | ff_update_block_index(s); |
| 348 | for (i = 0; i < 6; i++) { |
| 349 | vc1_apply_p_h_loop_filter(v, i); |
| 350 | } |
| 351 | } |
| 352 | } |
| 353 | } |