| 1 | /* |
| 2 | * Copyright (C) 2004 Michael Niedermayer <michaelni@gmx.at> |
| 3 | * |
| 4 | * This file is part of FFmpeg. |
| 5 | * |
| 6 | * FFmpeg is free software; you can redistribute it and/or |
| 7 | * modify it under the terms of the GNU Lesser General Public |
| 8 | * License as published by the Free Software Foundation; either |
| 9 | * version 2.1 of the License, or (at your option) any later version. |
| 10 | * |
| 11 | * FFmpeg is distributed in the hope that it will be useful, |
| 12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| 14 | * Lesser General Public License for more details. |
| 15 | * |
| 16 | * You should have received a copy of the GNU Lesser General Public |
| 17 | * License along with FFmpeg; if not, write to the Free Software |
| 18 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
| 19 | */ |
| 20 | |
| 21 | #include "libavutil/intmath.h" |
| 22 | #include "libavutil/log.h" |
| 23 | #include "libavutil/opt.h" |
| 24 | #include "avcodec.h" |
| 25 | #include "me_cmp.h" |
| 26 | #include "snow_dwt.h" |
| 27 | #include "internal.h" |
| 28 | #include "snow.h" |
| 29 | #include "snowdata.h" |
| 30 | |
| 31 | #include "rangecoder.h" |
| 32 | #include "mathops.h" |
| 33 | #include "h263.h" |
| 34 | |
| 35 | |
| 36 | void ff_snow_inner_add_yblock(const uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h, |
| 37 | int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8){ |
| 38 | int y, x; |
| 39 | IDWTELEM * dst; |
| 40 | for(y=0; y<b_h; y++){ |
| 41 | //FIXME ugly misuse of obmc_stride |
| 42 | const uint8_t *obmc1= obmc + y*obmc_stride; |
| 43 | const uint8_t *obmc2= obmc1+ (obmc_stride>>1); |
| 44 | const uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1); |
| 45 | const uint8_t *obmc4= obmc3+ (obmc_stride>>1); |
| 46 | dst = slice_buffer_get_line(sb, src_y + y); |
| 47 | for(x=0; x<b_w; x++){ |
| 48 | int v= obmc1[x] * block[3][x + y*src_stride] |
| 49 | +obmc2[x] * block[2][x + y*src_stride] |
| 50 | +obmc3[x] * block[1][x + y*src_stride] |
| 51 | +obmc4[x] * block[0][x + y*src_stride]; |
| 52 | |
| 53 | v <<= 8 - LOG2_OBMC_MAX; |
| 54 | if(FRAC_BITS != 8){ |
| 55 | v >>= 8 - FRAC_BITS; |
| 56 | } |
| 57 | if(add){ |
| 58 | v += dst[x + src_x]; |
| 59 | v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS; |
| 60 | if(v&(~255)) v= ~(v>>31); |
| 61 | dst8[x + y*src_stride] = v; |
| 62 | }else{ |
| 63 | dst[x + src_x] -= v; |
| 64 | } |
| 65 | } |
| 66 | } |
| 67 | } |
| 68 | |
| 69 | int ff_snow_get_buffer(SnowContext *s, AVFrame *frame) |
| 70 | { |
| 71 | int ret, i; |
| 72 | int edges_needed = av_codec_is_encoder(s->avctx->codec); |
| 73 | |
| 74 | frame->width = s->avctx->width ; |
| 75 | frame->height = s->avctx->height; |
| 76 | if (edges_needed) { |
| 77 | frame->width += 2 * EDGE_WIDTH; |
| 78 | frame->height += 2 * EDGE_WIDTH; |
| 79 | } |
| 80 | if ((ret = ff_get_buffer(s->avctx, frame, AV_GET_BUFFER_FLAG_REF)) < 0) |
| 81 | return ret; |
| 82 | if (edges_needed) { |
| 83 | for (i = 0; frame->data[i]; i++) { |
| 84 | int offset = (EDGE_WIDTH >> (i ? s->chroma_v_shift : 0)) * |
| 85 | frame->linesize[i] + |
| 86 | (EDGE_WIDTH >> (i ? s->chroma_h_shift : 0)); |
| 87 | frame->data[i] += offset; |
| 88 | } |
| 89 | frame->width = s->avctx->width; |
| 90 | frame->height = s->avctx->height; |
| 91 | } |
| 92 | |
| 93 | return 0; |
| 94 | } |
| 95 | |
| 96 | void ff_snow_reset_contexts(SnowContext *s){ //FIXME better initial contexts |
| 97 | int plane_index, level, orientation; |
| 98 | |
| 99 | for(plane_index=0; plane_index<3; plane_index++){ |
| 100 | for(level=0; level<MAX_DECOMPOSITIONS; level++){ |
| 101 | for(orientation=level ? 1:0; orientation<4; orientation++){ |
| 102 | memset(s->plane[plane_index].band[level][orientation].state, MID_STATE, sizeof(s->plane[plane_index].band[level][orientation].state)); |
| 103 | } |
| 104 | } |
| 105 | } |
| 106 | memset(s->header_state, MID_STATE, sizeof(s->header_state)); |
| 107 | memset(s->block_state, MID_STATE, sizeof(s->block_state)); |
| 108 | } |
| 109 | |
| 110 | int ff_snow_alloc_blocks(SnowContext *s){ |
| 111 | int w= FF_CEIL_RSHIFT(s->avctx->width, LOG2_MB_SIZE); |
| 112 | int h= FF_CEIL_RSHIFT(s->avctx->height, LOG2_MB_SIZE); |
| 113 | |
| 114 | s->b_width = w; |
| 115 | s->b_height= h; |
| 116 | |
| 117 | av_free(s->block); |
| 118 | s->block= av_mallocz_array(w * h, sizeof(BlockNode) << (s->block_max_depth*2)); |
| 119 | if (!s->block) |
| 120 | return AVERROR(ENOMEM); |
| 121 | |
| 122 | return 0; |
| 123 | } |
| 124 | |
| 125 | static av_cold void init_qexp(void){ |
| 126 | int i; |
| 127 | double v=128; |
| 128 | |
| 129 | for(i=0; i<QROOT; i++){ |
| 130 | ff_qexp[i]= lrintf(v); |
| 131 | v *= pow(2, 1.0 / QROOT); |
| 132 | } |
| 133 | } |
| 134 | static void mc_block(Plane *p, uint8_t *dst, const uint8_t *src, int stride, int b_w, int b_h, int dx, int dy){ |
| 135 | static const uint8_t weight[64]={ |
| 136 | 8,7,6,5,4,3,2,1, |
| 137 | 7,7,0,0,0,0,0,1, |
| 138 | 6,0,6,0,0,0,2,0, |
| 139 | 5,0,0,5,0,3,0,0, |
| 140 | 4,0,0,0,4,0,0,0, |
| 141 | 3,0,0,5,0,3,0,0, |
| 142 | 2,0,6,0,0,0,2,0, |
| 143 | 1,7,0,0,0,0,0,1, |
| 144 | }; |
| 145 | |
| 146 | static const uint8_t brane[256]={ |
| 147 | 0x00,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x11,0x12,0x12,0x12,0x12,0x12,0x12,0x12, |
| 148 | 0x04,0x05,0xcc,0xcc,0xcc,0xcc,0xcc,0x41,0x15,0x16,0xcc,0xcc,0xcc,0xcc,0xcc,0x52, |
| 149 | 0x04,0xcc,0x05,0xcc,0xcc,0xcc,0x41,0xcc,0x15,0xcc,0x16,0xcc,0xcc,0xcc,0x52,0xcc, |
| 150 | 0x04,0xcc,0xcc,0x05,0xcc,0x41,0xcc,0xcc,0x15,0xcc,0xcc,0x16,0xcc,0x52,0xcc,0xcc, |
| 151 | 0x04,0xcc,0xcc,0xcc,0x41,0xcc,0xcc,0xcc,0x15,0xcc,0xcc,0xcc,0x16,0xcc,0xcc,0xcc, |
| 152 | 0x04,0xcc,0xcc,0x41,0xcc,0x05,0xcc,0xcc,0x15,0xcc,0xcc,0x52,0xcc,0x16,0xcc,0xcc, |
| 153 | 0x04,0xcc,0x41,0xcc,0xcc,0xcc,0x05,0xcc,0x15,0xcc,0x52,0xcc,0xcc,0xcc,0x16,0xcc, |
| 154 | 0x04,0x41,0xcc,0xcc,0xcc,0xcc,0xcc,0x05,0x15,0x52,0xcc,0xcc,0xcc,0xcc,0xcc,0x16, |
| 155 | 0x44,0x45,0x45,0x45,0x45,0x45,0x45,0x45,0x55,0x56,0x56,0x56,0x56,0x56,0x56,0x56, |
| 156 | 0x48,0x49,0xcc,0xcc,0xcc,0xcc,0xcc,0x85,0x59,0x5A,0xcc,0xcc,0xcc,0xcc,0xcc,0x96, |
| 157 | 0x48,0xcc,0x49,0xcc,0xcc,0xcc,0x85,0xcc,0x59,0xcc,0x5A,0xcc,0xcc,0xcc,0x96,0xcc, |
| 158 | 0x48,0xcc,0xcc,0x49,0xcc,0x85,0xcc,0xcc,0x59,0xcc,0xcc,0x5A,0xcc,0x96,0xcc,0xcc, |
| 159 | 0x48,0xcc,0xcc,0xcc,0x49,0xcc,0xcc,0xcc,0x59,0xcc,0xcc,0xcc,0x96,0xcc,0xcc,0xcc, |
| 160 | 0x48,0xcc,0xcc,0x85,0xcc,0x49,0xcc,0xcc,0x59,0xcc,0xcc,0x96,0xcc,0x5A,0xcc,0xcc, |
| 161 | 0x48,0xcc,0x85,0xcc,0xcc,0xcc,0x49,0xcc,0x59,0xcc,0x96,0xcc,0xcc,0xcc,0x5A,0xcc, |
| 162 | 0x48,0x85,0xcc,0xcc,0xcc,0xcc,0xcc,0x49,0x59,0x96,0xcc,0xcc,0xcc,0xcc,0xcc,0x5A, |
| 163 | }; |
| 164 | |
| 165 | static const uint8_t needs[16]={ |
| 166 | 0,1,0,0, |
| 167 | 2,4,2,0, |
| 168 | 0,1,0,0, |
| 169 | 15 |
| 170 | }; |
| 171 | |
| 172 | int x, y, b, r, l; |
| 173 | int16_t tmpIt [64*(32+HTAPS_MAX)]; |
| 174 | uint8_t tmp2t[3][64*(32+HTAPS_MAX)]; |
| 175 | int16_t *tmpI= tmpIt; |
| 176 | uint8_t *tmp2= tmp2t[0]; |
| 177 | const uint8_t *hpel[11]; |
| 178 | av_assert2(dx<16 && dy<16); |
| 179 | r= brane[dx + 16*dy]&15; |
| 180 | l= brane[dx + 16*dy]>>4; |
| 181 | |
| 182 | b= needs[l] | needs[r]; |
| 183 | if(p && !p->diag_mc) |
| 184 | b= 15; |
| 185 | |
| 186 | if(b&5){ |
| 187 | for(y=0; y < b_h+HTAPS_MAX-1; y++){ |
| 188 | for(x=0; x < b_w; x++){ |
| 189 | int a_1=src[x + HTAPS_MAX/2-4]; |
| 190 | int a0= src[x + HTAPS_MAX/2-3]; |
| 191 | int a1= src[x + HTAPS_MAX/2-2]; |
| 192 | int a2= src[x + HTAPS_MAX/2-1]; |
| 193 | int a3= src[x + HTAPS_MAX/2+0]; |
| 194 | int a4= src[x + HTAPS_MAX/2+1]; |
| 195 | int a5= src[x + HTAPS_MAX/2+2]; |
| 196 | int a6= src[x + HTAPS_MAX/2+3]; |
| 197 | int am=0; |
| 198 | if(!p || p->fast_mc){ |
| 199 | am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5); |
| 200 | tmpI[x]= am; |
| 201 | am= (am+16)>>5; |
| 202 | }else{ |
| 203 | am= p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6); |
| 204 | tmpI[x]= am; |
| 205 | am= (am+32)>>6; |
| 206 | } |
| 207 | |
| 208 | if(am&(~255)) am= ~(am>>31); |
| 209 | tmp2[x]= am; |
| 210 | } |
| 211 | tmpI+= 64; |
| 212 | tmp2+= 64; |
| 213 | src += stride; |
| 214 | } |
| 215 | src -= stride*y; |
| 216 | } |
| 217 | src += HTAPS_MAX/2 - 1; |
| 218 | tmp2= tmp2t[1]; |
| 219 | |
| 220 | if(b&2){ |
| 221 | for(y=0; y < b_h; y++){ |
| 222 | for(x=0; x < b_w+1; x++){ |
| 223 | int a_1=src[x + (HTAPS_MAX/2-4)*stride]; |
| 224 | int a0= src[x + (HTAPS_MAX/2-3)*stride]; |
| 225 | int a1= src[x + (HTAPS_MAX/2-2)*stride]; |
| 226 | int a2= src[x + (HTAPS_MAX/2-1)*stride]; |
| 227 | int a3= src[x + (HTAPS_MAX/2+0)*stride]; |
| 228 | int a4= src[x + (HTAPS_MAX/2+1)*stride]; |
| 229 | int a5= src[x + (HTAPS_MAX/2+2)*stride]; |
| 230 | int a6= src[x + (HTAPS_MAX/2+3)*stride]; |
| 231 | int am=0; |
| 232 | if(!p || p->fast_mc) |
| 233 | am= (20*(a2+a3) - 5*(a1+a4) + (a0+a5) + 16)>>5; |
| 234 | else |
| 235 | am= (p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6) + 32)>>6; |
| 236 | |
| 237 | if(am&(~255)) am= ~(am>>31); |
| 238 | tmp2[x]= am; |
| 239 | } |
| 240 | src += stride; |
| 241 | tmp2+= 64; |
| 242 | } |
| 243 | src -= stride*y; |
| 244 | } |
| 245 | src += stride*(HTAPS_MAX/2 - 1); |
| 246 | tmp2= tmp2t[2]; |
| 247 | tmpI= tmpIt; |
| 248 | if(b&4){ |
| 249 | for(y=0; y < b_h; y++){ |
| 250 | for(x=0; x < b_w; x++){ |
| 251 | int a_1=tmpI[x + (HTAPS_MAX/2-4)*64]; |
| 252 | int a0= tmpI[x + (HTAPS_MAX/2-3)*64]; |
| 253 | int a1= tmpI[x + (HTAPS_MAX/2-2)*64]; |
| 254 | int a2= tmpI[x + (HTAPS_MAX/2-1)*64]; |
| 255 | int a3= tmpI[x + (HTAPS_MAX/2+0)*64]; |
| 256 | int a4= tmpI[x + (HTAPS_MAX/2+1)*64]; |
| 257 | int a5= tmpI[x + (HTAPS_MAX/2+2)*64]; |
| 258 | int a6= tmpI[x + (HTAPS_MAX/2+3)*64]; |
| 259 | int am=0; |
| 260 | if(!p || p->fast_mc) |
| 261 | am= (20*(a2+a3) - 5*(a1+a4) + (a0+a5) + 512)>>10; |
| 262 | else |
| 263 | am= (p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6) + 2048)>>12; |
| 264 | if(am&(~255)) am= ~(am>>31); |
| 265 | tmp2[x]= am; |
| 266 | } |
| 267 | tmpI+= 64; |
| 268 | tmp2+= 64; |
| 269 | } |
| 270 | } |
| 271 | |
| 272 | hpel[ 0]= src; |
| 273 | hpel[ 1]= tmp2t[0] + 64*(HTAPS_MAX/2-1); |
| 274 | hpel[ 2]= src + 1; |
| 275 | |
| 276 | hpel[ 4]= tmp2t[1]; |
| 277 | hpel[ 5]= tmp2t[2]; |
| 278 | hpel[ 6]= tmp2t[1] + 1; |
| 279 | |
| 280 | hpel[ 8]= src + stride; |
| 281 | hpel[ 9]= hpel[1] + 64; |
| 282 | hpel[10]= hpel[8] + 1; |
| 283 | |
| 284 | #define MC_STRIDE(x) (needs[x] ? 64 : stride) |
| 285 | |
| 286 | if(b==15){ |
| 287 | int dxy = dx / 8 + dy / 8 * 4; |
| 288 | const uint8_t *src1 = hpel[dxy ]; |
| 289 | const uint8_t *src2 = hpel[dxy + 1]; |
| 290 | const uint8_t *src3 = hpel[dxy + 4]; |
| 291 | const uint8_t *src4 = hpel[dxy + 5]; |
| 292 | int stride1 = MC_STRIDE(dxy); |
| 293 | int stride2 = MC_STRIDE(dxy + 1); |
| 294 | int stride3 = MC_STRIDE(dxy + 4); |
| 295 | int stride4 = MC_STRIDE(dxy + 5); |
| 296 | dx&=7; |
| 297 | dy&=7; |
| 298 | for(y=0; y < b_h; y++){ |
| 299 | for(x=0; x < b_w; x++){ |
| 300 | dst[x]= ((8-dx)*(8-dy)*src1[x] + dx*(8-dy)*src2[x]+ |
| 301 | (8-dx)* dy *src3[x] + dx* dy *src4[x]+32)>>6; |
| 302 | } |
| 303 | src1+=stride1; |
| 304 | src2+=stride2; |
| 305 | src3+=stride3; |
| 306 | src4+=stride4; |
| 307 | dst +=stride; |
| 308 | } |
| 309 | }else{ |
| 310 | const uint8_t *src1= hpel[l]; |
| 311 | const uint8_t *src2= hpel[r]; |
| 312 | int stride1 = MC_STRIDE(l); |
| 313 | int stride2 = MC_STRIDE(r); |
| 314 | int a= weight[((dx&7) + (8*(dy&7)))]; |
| 315 | int b= 8-a; |
| 316 | for(y=0; y < b_h; y++){ |
| 317 | for(x=0; x < b_w; x++){ |
| 318 | dst[x]= (a*src1[x] + b*src2[x] + 4)>>3; |
| 319 | } |
| 320 | src1+=stride1; |
| 321 | src2+=stride2; |
| 322 | dst +=stride; |
| 323 | } |
| 324 | } |
| 325 | } |
| 326 | |
| 327 | void ff_snow_pred_block(SnowContext *s, uint8_t *dst, uint8_t *tmp, ptrdiff_t stride, int sx, int sy, int b_w, int b_h, const BlockNode *block, int plane_index, int w, int h){ |
| 328 | if(block->type & BLOCK_INTRA){ |
| 329 | int x, y; |
| 330 | const unsigned color = block->color[plane_index]; |
| 331 | const unsigned color4 = color*0x01010101; |
| 332 | if(b_w==32){ |
| 333 | for(y=0; y < b_h; y++){ |
| 334 | *(uint32_t*)&dst[0 + y*stride]= color4; |
| 335 | *(uint32_t*)&dst[4 + y*stride]= color4; |
| 336 | *(uint32_t*)&dst[8 + y*stride]= color4; |
| 337 | *(uint32_t*)&dst[12+ y*stride]= color4; |
| 338 | *(uint32_t*)&dst[16+ y*stride]= color4; |
| 339 | *(uint32_t*)&dst[20+ y*stride]= color4; |
| 340 | *(uint32_t*)&dst[24+ y*stride]= color4; |
| 341 | *(uint32_t*)&dst[28+ y*stride]= color4; |
| 342 | } |
| 343 | }else if(b_w==16){ |
| 344 | for(y=0; y < b_h; y++){ |
| 345 | *(uint32_t*)&dst[0 + y*stride]= color4; |
| 346 | *(uint32_t*)&dst[4 + y*stride]= color4; |
| 347 | *(uint32_t*)&dst[8 + y*stride]= color4; |
| 348 | *(uint32_t*)&dst[12+ y*stride]= color4; |
| 349 | } |
| 350 | }else if(b_w==8){ |
| 351 | for(y=0; y < b_h; y++){ |
| 352 | *(uint32_t*)&dst[0 + y*stride]= color4; |
| 353 | *(uint32_t*)&dst[4 + y*stride]= color4; |
| 354 | } |
| 355 | }else if(b_w==4){ |
| 356 | for(y=0; y < b_h; y++){ |
| 357 | *(uint32_t*)&dst[0 + y*stride]= color4; |
| 358 | } |
| 359 | }else{ |
| 360 | for(y=0; y < b_h; y++){ |
| 361 | for(x=0; x < b_w; x++){ |
| 362 | dst[x + y*stride]= color; |
| 363 | } |
| 364 | } |
| 365 | } |
| 366 | }else{ |
| 367 | uint8_t *src= s->last_picture[block->ref]->data[plane_index]; |
| 368 | const int scale= plane_index ? (2*s->mv_scale)>>s->chroma_h_shift : 2*s->mv_scale; |
| 369 | int mx= block->mx*scale; |
| 370 | int my= block->my*scale; |
| 371 | const int dx= mx&15; |
| 372 | const int dy= my&15; |
| 373 | const int tab_index= 3 - (b_w>>2) + (b_w>>4); |
| 374 | sx += (mx>>4) - (HTAPS_MAX/2-1); |
| 375 | sy += (my>>4) - (HTAPS_MAX/2-1); |
| 376 | src += sx + sy*stride; |
| 377 | if( (unsigned)sx >= FFMAX(w - b_w - (HTAPS_MAX-2), 0) |
| 378 | || (unsigned)sy >= FFMAX(h - b_h - (HTAPS_MAX-2), 0)){ |
| 379 | s->vdsp.emulated_edge_mc(tmp + MB_SIZE, src, |
| 380 | stride, stride, |
| 381 | b_w+HTAPS_MAX-1, b_h+HTAPS_MAX-1, |
| 382 | sx, sy, w, h); |
| 383 | src= tmp + MB_SIZE; |
| 384 | } |
| 385 | |
| 386 | av_assert2(s->chroma_h_shift == s->chroma_v_shift); // only one mv_scale |
| 387 | |
| 388 | av_assert2((tab_index>=0 && tab_index<4) || b_w==32); |
| 389 | if( (dx&3) || (dy&3) |
| 390 | || !(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h) |
| 391 | || (b_w&(b_w-1)) |
| 392 | || b_w == 1 |
| 393 | || b_h == 1 |
| 394 | || !s->plane[plane_index].fast_mc ) |
| 395 | mc_block(&s->plane[plane_index], dst, src, stride, b_w, b_h, dx, dy); |
| 396 | else if(b_w==32){ |
| 397 | int y; |
| 398 | for(y=0; y<b_h; y+=16){ |
| 399 | s->h264qpel.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + y*stride, src + 3 + (y+3)*stride,stride); |
| 400 | s->h264qpel.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + 16 + y*stride, src + 19 + (y+3)*stride,stride); |
| 401 | } |
| 402 | }else if(b_w==b_h) |
| 403 | s->h264qpel.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst,src + 3 + 3*stride,stride); |
| 404 | else if(b_w==2*b_h){ |
| 405 | s->h264qpel.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst ,src + 3 + 3*stride,stride); |
| 406 | s->h264qpel.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst+b_h,src + 3 + b_h + 3*stride,stride); |
| 407 | }else{ |
| 408 | av_assert2(2*b_w==b_h); |
| 409 | s->h264qpel.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst ,src + 3 + 3*stride ,stride); |
| 410 | s->h264qpel.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst+b_w*stride,src + 3 + 3*stride+b_w*stride,stride); |
| 411 | } |
| 412 | } |
| 413 | } |
| 414 | |
| 415 | #define mca(dx,dy,b_w)\ |
| 416 | static void mc_block_hpel ## dx ## dy ## b_w(uint8_t *dst, const uint8_t *src, ptrdiff_t stride, int h){\ |
| 417 | av_assert2(h==b_w);\ |
| 418 | mc_block(NULL, dst, src-(HTAPS_MAX/2-1)-(HTAPS_MAX/2-1)*stride, stride, b_w, b_w, dx, dy);\ |
| 419 | } |
| 420 | |
| 421 | mca( 0, 0,16) |
| 422 | mca( 8, 0,16) |
| 423 | mca( 0, 8,16) |
| 424 | mca( 8, 8,16) |
| 425 | mca( 0, 0,8) |
| 426 | mca( 8, 0,8) |
| 427 | mca( 0, 8,8) |
| 428 | mca( 8, 8,8) |
| 429 | |
| 430 | av_cold int ff_snow_common_init(AVCodecContext *avctx){ |
| 431 | SnowContext *s = avctx->priv_data; |
| 432 | int width, height; |
| 433 | int i, j; |
| 434 | |
| 435 | s->avctx= avctx; |
| 436 | s->max_ref_frames=1; //just make sure it's not an invalid value in case of no initial keyframe |
| 437 | |
| 438 | ff_me_cmp_init(&s->mecc, avctx); |
| 439 | ff_hpeldsp_init(&s->hdsp, avctx->flags); |
| 440 | ff_videodsp_init(&s->vdsp, 8); |
| 441 | ff_dwt_init(&s->dwt); |
| 442 | ff_h264qpel_init(&s->h264qpel, 8); |
| 443 | |
| 444 | #define mcf(dx,dy)\ |
| 445 | s->qdsp.put_qpel_pixels_tab [0][dy+dx/4]=\ |
| 446 | s->qdsp.put_no_rnd_qpel_pixels_tab[0][dy+dx/4]=\ |
| 447 | s->h264qpel.put_h264_qpel_pixels_tab[0][dy+dx/4];\ |
| 448 | s->qdsp.put_qpel_pixels_tab [1][dy+dx/4]=\ |
| 449 | s->qdsp.put_no_rnd_qpel_pixels_tab[1][dy+dx/4]=\ |
| 450 | s->h264qpel.put_h264_qpel_pixels_tab[1][dy+dx/4]; |
| 451 | |
| 452 | mcf( 0, 0) |
| 453 | mcf( 4, 0) |
| 454 | mcf( 8, 0) |
| 455 | mcf(12, 0) |
| 456 | mcf( 0, 4) |
| 457 | mcf( 4, 4) |
| 458 | mcf( 8, 4) |
| 459 | mcf(12, 4) |
| 460 | mcf( 0, 8) |
| 461 | mcf( 4, 8) |
| 462 | mcf( 8, 8) |
| 463 | mcf(12, 8) |
| 464 | mcf( 0,12) |
| 465 | mcf( 4,12) |
| 466 | mcf( 8,12) |
| 467 | mcf(12,12) |
| 468 | |
| 469 | #define mcfh(dx,dy)\ |
| 470 | s->hdsp.put_pixels_tab [0][dy/4+dx/8]=\ |
| 471 | s->hdsp.put_no_rnd_pixels_tab[0][dy/4+dx/8]=\ |
| 472 | mc_block_hpel ## dx ## dy ## 16;\ |
| 473 | s->hdsp.put_pixels_tab [1][dy/4+dx/8]=\ |
| 474 | s->hdsp.put_no_rnd_pixels_tab[1][dy/4+dx/8]=\ |
| 475 | mc_block_hpel ## dx ## dy ## 8; |
| 476 | |
| 477 | mcfh(0, 0) |
| 478 | mcfh(8, 0) |
| 479 | mcfh(0, 8) |
| 480 | mcfh(8, 8) |
| 481 | |
| 482 | init_qexp(); |
| 483 | |
| 484 | // dec += FFMAX(s->chroma_h_shift, s->chroma_v_shift); |
| 485 | |
| 486 | width= s->avctx->width; |
| 487 | height= s->avctx->height; |
| 488 | |
| 489 | FF_ALLOCZ_ARRAY_OR_GOTO(avctx, s->spatial_idwt_buffer, width, height * sizeof(IDWTELEM), fail); |
| 490 | FF_ALLOCZ_ARRAY_OR_GOTO(avctx, s->spatial_dwt_buffer, width, height * sizeof(DWTELEM), fail); //FIXME this does not belong here |
| 491 | FF_ALLOCZ_ARRAY_OR_GOTO(avctx, s->temp_dwt_buffer, width, sizeof(DWTELEM), fail); |
| 492 | FF_ALLOCZ_ARRAY_OR_GOTO(avctx, s->temp_idwt_buffer, width, sizeof(IDWTELEM), fail); |
| 493 | FF_ALLOC_ARRAY_OR_GOTO(avctx, s->run_buffer, ((width + 1) >> 1), ((height + 1) >> 1) * sizeof(*s->run_buffer), fail); |
| 494 | |
| 495 | for(i=0; i<MAX_REF_FRAMES; i++) { |
| 496 | for(j=0; j<MAX_REF_FRAMES; j++) |
| 497 | ff_scale_mv_ref[i][j] = 256*(i+1)/(j+1); |
| 498 | s->last_picture[i] = av_frame_alloc(); |
| 499 | if (!s->last_picture[i]) |
| 500 | goto fail; |
| 501 | } |
| 502 | |
| 503 | s->mconly_picture = av_frame_alloc(); |
| 504 | s->current_picture = av_frame_alloc(); |
| 505 | if (!s->mconly_picture || !s->current_picture) |
| 506 | goto fail; |
| 507 | |
| 508 | return 0; |
| 509 | fail: |
| 510 | return AVERROR(ENOMEM); |
| 511 | } |
| 512 | |
| 513 | int ff_snow_common_init_after_header(AVCodecContext *avctx) { |
| 514 | SnowContext *s = avctx->priv_data; |
| 515 | int plane_index, level, orientation; |
| 516 | int ret, emu_buf_size; |
| 517 | |
| 518 | if(!s->scratchbuf) { |
| 519 | if ((ret = ff_get_buffer(s->avctx, s->mconly_picture, |
| 520 | AV_GET_BUFFER_FLAG_REF)) < 0) |
| 521 | return ret; |
| 522 | FF_ALLOCZ_ARRAY_OR_GOTO(avctx, s->scratchbuf, FFMAX(s->mconly_picture->linesize[0], 2*avctx->width+256), 7*MB_SIZE, fail); |
| 523 | emu_buf_size = FFMAX(s->mconly_picture->linesize[0], 2*avctx->width+256) * (2 * MB_SIZE + HTAPS_MAX - 1); |
| 524 | FF_ALLOC_OR_GOTO(avctx, s->emu_edge_buffer, emu_buf_size, fail); |
| 525 | } |
| 526 | |
| 527 | if(s->mconly_picture->format != avctx->pix_fmt) { |
| 528 | av_log(avctx, AV_LOG_ERROR, "pixel format changed\n"); |
| 529 | return AVERROR_INVALIDDATA; |
| 530 | } |
| 531 | |
| 532 | for(plane_index=0; plane_index < s->nb_planes; plane_index++){ |
| 533 | int w= s->avctx->width; |
| 534 | int h= s->avctx->height; |
| 535 | |
| 536 | if(plane_index){ |
| 537 | w>>= s->chroma_h_shift; |
| 538 | h>>= s->chroma_v_shift; |
| 539 | } |
| 540 | s->plane[plane_index].width = w; |
| 541 | s->plane[plane_index].height= h; |
| 542 | |
| 543 | for(level=s->spatial_decomposition_count-1; level>=0; level--){ |
| 544 | for(orientation=level ? 1 : 0; orientation<4; orientation++){ |
| 545 | SubBand *b= &s->plane[plane_index].band[level][orientation]; |
| 546 | |
| 547 | b->buf= s->spatial_dwt_buffer; |
| 548 | b->level= level; |
| 549 | b->stride= s->plane[plane_index].width << (s->spatial_decomposition_count - level); |
| 550 | b->width = (w + !(orientation&1))>>1; |
| 551 | b->height= (h + !(orientation>1))>>1; |
| 552 | |
| 553 | b->stride_line = 1 << (s->spatial_decomposition_count - level); |
| 554 | b->buf_x_offset = 0; |
| 555 | b->buf_y_offset = 0; |
| 556 | |
| 557 | if(orientation&1){ |
| 558 | b->buf += (w+1)>>1; |
| 559 | b->buf_x_offset = (w+1)>>1; |
| 560 | } |
| 561 | if(orientation>1){ |
| 562 | b->buf += b->stride>>1; |
| 563 | b->buf_y_offset = b->stride_line >> 1; |
| 564 | } |
| 565 | b->ibuf= s->spatial_idwt_buffer + (b->buf - s->spatial_dwt_buffer); |
| 566 | |
| 567 | if(level) |
| 568 | b->parent= &s->plane[plane_index].band[level-1][orientation]; |
| 569 | //FIXME avoid this realloc |
| 570 | av_freep(&b->x_coeff); |
| 571 | b->x_coeff=av_mallocz_array(((b->width+1) * b->height+1), sizeof(x_and_coeff)); |
| 572 | if (!b->x_coeff) |
| 573 | goto fail; |
| 574 | } |
| 575 | w= (w+1)>>1; |
| 576 | h= (h+1)>>1; |
| 577 | } |
| 578 | } |
| 579 | |
| 580 | return 0; |
| 581 | fail: |
| 582 | return AVERROR(ENOMEM); |
| 583 | } |
| 584 | |
| 585 | #define USE_HALFPEL_PLANE 0 |
| 586 | |
| 587 | static int halfpel_interpol(SnowContext *s, uint8_t *halfpel[4][4], AVFrame *frame){ |
| 588 | int p,x,y; |
| 589 | |
| 590 | for(p=0; p < s->nb_planes; p++){ |
| 591 | int is_chroma= !!p; |
| 592 | int w= is_chroma ? s->avctx->width >>s->chroma_h_shift : s->avctx->width; |
| 593 | int h= is_chroma ? s->avctx->height>>s->chroma_v_shift : s->avctx->height; |
| 594 | int ls= frame->linesize[p]; |
| 595 | uint8_t *src= frame->data[p]; |
| 596 | |
| 597 | halfpel[1][p] = (uint8_t*) av_malloc(ls * (h + 2 * EDGE_WIDTH)) + EDGE_WIDTH * (1 + ls); |
| 598 | halfpel[2][p] = (uint8_t*) av_malloc(ls * (h + 2 * EDGE_WIDTH)) + EDGE_WIDTH * (1 + ls); |
| 599 | halfpel[3][p] = (uint8_t*) av_malloc(ls * (h + 2 * EDGE_WIDTH)) + EDGE_WIDTH * (1 + ls); |
| 600 | if (!halfpel[1][p] || !halfpel[2][p] || !halfpel[3][p]) |
| 601 | return AVERROR(ENOMEM); |
| 602 | |
| 603 | halfpel[0][p]= src; |
| 604 | for(y=0; y<h; y++){ |
| 605 | for(x=0; x<w; x++){ |
| 606 | int i= y*ls + x; |
| 607 | |
| 608 | halfpel[1][p][i]= (20*(src[i] + src[i+1]) - 5*(src[i-1] + src[i+2]) + (src[i-2] + src[i+3]) + 16 )>>5; |
| 609 | } |
| 610 | } |
| 611 | for(y=0; y<h; y++){ |
| 612 | for(x=0; x<w; x++){ |
| 613 | int i= y*ls + x; |
| 614 | |
| 615 | halfpel[2][p][i]= (20*(src[i] + src[i+ls]) - 5*(src[i-ls] + src[i+2*ls]) + (src[i-2*ls] + src[i+3*ls]) + 16 )>>5; |
| 616 | } |
| 617 | } |
| 618 | src= halfpel[1][p]; |
| 619 | for(y=0; y<h; y++){ |
| 620 | for(x=0; x<w; x++){ |
| 621 | int i= y*ls + x; |
| 622 | |
| 623 | halfpel[3][p][i]= (20*(src[i] + src[i+ls]) - 5*(src[i-ls] + src[i+2*ls]) + (src[i-2*ls] + src[i+3*ls]) + 16 )>>5; |
| 624 | } |
| 625 | } |
| 626 | |
| 627 | //FIXME border! |
| 628 | } |
| 629 | return 0; |
| 630 | } |
| 631 | |
| 632 | void ff_snow_release_buffer(AVCodecContext *avctx) |
| 633 | { |
| 634 | SnowContext *s = avctx->priv_data; |
| 635 | int i; |
| 636 | |
| 637 | if(s->last_picture[s->max_ref_frames-1]->data[0]){ |
| 638 | av_frame_unref(s->last_picture[s->max_ref_frames-1]); |
| 639 | for(i=0; i<9; i++) |
| 640 | if(s->halfpel_plane[s->max_ref_frames-1][1+i/3][i%3]) { |
| 641 | av_free(s->halfpel_plane[s->max_ref_frames-1][1+i/3][i%3] - EDGE_WIDTH*(1+s->current_picture->linesize[i%3])); |
| 642 | s->halfpel_plane[s->max_ref_frames-1][1+i/3][i%3] = NULL; |
| 643 | } |
| 644 | } |
| 645 | } |
| 646 | |
| 647 | int ff_snow_frame_start(SnowContext *s){ |
| 648 | AVFrame *tmp; |
| 649 | int i, ret; |
| 650 | |
| 651 | ff_snow_release_buffer(s->avctx); |
| 652 | |
| 653 | tmp= s->last_picture[s->max_ref_frames-1]; |
| 654 | for(i=s->max_ref_frames-1; i>0; i--) |
| 655 | s->last_picture[i] = s->last_picture[i-1]; |
| 656 | memmove(s->halfpel_plane+1, s->halfpel_plane, (s->max_ref_frames-1)*sizeof(void*)*4*4); |
| 657 | if(USE_HALFPEL_PLANE && s->current_picture->data[0]) { |
| 658 | if((ret = halfpel_interpol(s, s->halfpel_plane[0], s->current_picture)) < 0) |
| 659 | return ret; |
| 660 | } |
| 661 | s->last_picture[0] = s->current_picture; |
| 662 | s->current_picture = tmp; |
| 663 | |
| 664 | if(s->keyframe){ |
| 665 | s->ref_frames= 0; |
| 666 | }else{ |
| 667 | int i; |
| 668 | for(i=0; i<s->max_ref_frames && s->last_picture[i]->data[0]; i++) |
| 669 | if(i && s->last_picture[i-1]->key_frame) |
| 670 | break; |
| 671 | s->ref_frames= i; |
| 672 | if(s->ref_frames==0){ |
| 673 | av_log(s->avctx,AV_LOG_ERROR, "No reference frames\n"); |
| 674 | return -1; |
| 675 | } |
| 676 | } |
| 677 | if ((ret = ff_snow_get_buffer(s, s->current_picture)) < 0) |
| 678 | return ret; |
| 679 | |
| 680 | s->current_picture->key_frame= s->keyframe; |
| 681 | |
| 682 | return 0; |
| 683 | } |
| 684 | |
| 685 | av_cold void ff_snow_common_end(SnowContext *s) |
| 686 | { |
| 687 | int plane_index, level, orientation, i; |
| 688 | |
| 689 | av_freep(&s->spatial_dwt_buffer); |
| 690 | av_freep(&s->temp_dwt_buffer); |
| 691 | av_freep(&s->spatial_idwt_buffer); |
| 692 | av_freep(&s->temp_idwt_buffer); |
| 693 | av_freep(&s->run_buffer); |
| 694 | |
| 695 | s->m.me.temp= NULL; |
| 696 | av_freep(&s->m.me.scratchpad); |
| 697 | av_freep(&s->m.me.map); |
| 698 | av_freep(&s->m.me.score_map); |
| 699 | av_freep(&s->m.obmc_scratchpad); |
| 700 | |
| 701 | av_freep(&s->block); |
| 702 | av_freep(&s->scratchbuf); |
| 703 | av_freep(&s->emu_edge_buffer); |
| 704 | |
| 705 | for(i=0; i<MAX_REF_FRAMES; i++){ |
| 706 | av_freep(&s->ref_mvs[i]); |
| 707 | av_freep(&s->ref_scores[i]); |
| 708 | if(s->last_picture[i] && s->last_picture[i]->data[0]) { |
| 709 | av_assert0(s->last_picture[i]->data[0] != s->current_picture->data[0]); |
| 710 | } |
| 711 | av_frame_free(&s->last_picture[i]); |
| 712 | } |
| 713 | |
| 714 | for(plane_index=0; plane_index < s->nb_planes; plane_index++){ |
| 715 | for(level=s->spatial_decomposition_count-1; level>=0; level--){ |
| 716 | for(orientation=level ? 1 : 0; orientation<4; orientation++){ |
| 717 | SubBand *b= &s->plane[plane_index].band[level][orientation]; |
| 718 | |
| 719 | av_freep(&b->x_coeff); |
| 720 | } |
| 721 | } |
| 722 | } |
| 723 | av_frame_free(&s->mconly_picture); |
| 724 | av_frame_free(&s->current_picture); |
| 725 | } |