Imported Debian version 2.5.0~trusty1.1
[deb_ffmpeg.git] / ffmpeg / libavcodec / snow.c
CommitLineData
2ba45a60
DM
1/*
2 * Copyright (C) 2004 Michael Niedermayer <michaelni@gmx.at>
3 *
4 * This file is part of FFmpeg.
5 *
6 * FFmpeg is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
10 *
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with FFmpeg; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 */
20
21#include "libavutil/intmath.h"
22#include "libavutil/log.h"
23#include "libavutil/opt.h"
24#include "avcodec.h"
25#include "me_cmp.h"
26#include "snow_dwt.h"
27#include "internal.h"
28#include "snow.h"
29#include "snowdata.h"
30
31#include "rangecoder.h"
32#include "mathops.h"
33#include "h263.h"
34
35
36void ff_snow_inner_add_yblock(const uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h,
37 int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8){
38 int y, x;
39 IDWTELEM * dst;
40 for(y=0; y<b_h; y++){
41 //FIXME ugly misuse of obmc_stride
42 const uint8_t *obmc1= obmc + y*obmc_stride;
43 const uint8_t *obmc2= obmc1+ (obmc_stride>>1);
44 const uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
45 const uint8_t *obmc4= obmc3+ (obmc_stride>>1);
46 dst = slice_buffer_get_line(sb, src_y + y);
47 for(x=0; x<b_w; x++){
48 int v= obmc1[x] * block[3][x + y*src_stride]
49 +obmc2[x] * block[2][x + y*src_stride]
50 +obmc3[x] * block[1][x + y*src_stride]
51 +obmc4[x] * block[0][x + y*src_stride];
52
53 v <<= 8 - LOG2_OBMC_MAX;
54 if(FRAC_BITS != 8){
55 v >>= 8 - FRAC_BITS;
56 }
57 if(add){
58 v += dst[x + src_x];
59 v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
60 if(v&(~255)) v= ~(v>>31);
61 dst8[x + y*src_stride] = v;
62 }else{
63 dst[x + src_x] -= v;
64 }
65 }
66 }
67}
68
69int ff_snow_get_buffer(SnowContext *s, AVFrame *frame)
70{
71 int ret, i;
72 int edges_needed = av_codec_is_encoder(s->avctx->codec);
73
74 frame->width = s->avctx->width ;
75 frame->height = s->avctx->height;
76 if (edges_needed) {
77 frame->width += 2 * EDGE_WIDTH;
78 frame->height += 2 * EDGE_WIDTH;
79 }
80 if ((ret = ff_get_buffer(s->avctx, frame, AV_GET_BUFFER_FLAG_REF)) < 0)
81 return ret;
82 if (edges_needed) {
83 for (i = 0; frame->data[i]; i++) {
84 int offset = (EDGE_WIDTH >> (i ? s->chroma_v_shift : 0)) *
85 frame->linesize[i] +
86 (EDGE_WIDTH >> (i ? s->chroma_h_shift : 0));
87 frame->data[i] += offset;
88 }
89 frame->width = s->avctx->width;
90 frame->height = s->avctx->height;
91 }
92
93 return 0;
94}
95
96void ff_snow_reset_contexts(SnowContext *s){ //FIXME better initial contexts
97 int plane_index, level, orientation;
98
99 for(plane_index=0; plane_index<3; plane_index++){
100 for(level=0; level<MAX_DECOMPOSITIONS; level++){
101 for(orientation=level ? 1:0; orientation<4; orientation++){
102 memset(s->plane[plane_index].band[level][orientation].state, MID_STATE, sizeof(s->plane[plane_index].band[level][orientation].state));
103 }
104 }
105 }
106 memset(s->header_state, MID_STATE, sizeof(s->header_state));
107 memset(s->block_state, MID_STATE, sizeof(s->block_state));
108}
109
110int ff_snow_alloc_blocks(SnowContext *s){
111 int w= FF_CEIL_RSHIFT(s->avctx->width, LOG2_MB_SIZE);
112 int h= FF_CEIL_RSHIFT(s->avctx->height, LOG2_MB_SIZE);
113
114 s->b_width = w;
115 s->b_height= h;
116
117 av_free(s->block);
118 s->block= av_mallocz_array(w * h, sizeof(BlockNode) << (s->block_max_depth*2));
119 if (!s->block)
120 return AVERROR(ENOMEM);
121
122 return 0;
123}
124
125static av_cold void init_qexp(void){
126 int i;
127 double v=128;
128
129 for(i=0; i<QROOT; i++){
130 ff_qexp[i]= lrintf(v);
131 v *= pow(2, 1.0 / QROOT);
132 }
133}
134static void mc_block(Plane *p, uint8_t *dst, const uint8_t *src, int stride, int b_w, int b_h, int dx, int dy){
135 static const uint8_t weight[64]={
136 8,7,6,5,4,3,2,1,
137 7,7,0,0,0,0,0,1,
138 6,0,6,0,0,0,2,0,
139 5,0,0,5,0,3,0,0,
140 4,0,0,0,4,0,0,0,
141 3,0,0,5,0,3,0,0,
142 2,0,6,0,0,0,2,0,
143 1,7,0,0,0,0,0,1,
144 };
145
146 static const uint8_t brane[256]={
147 0x00,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x11,0x12,0x12,0x12,0x12,0x12,0x12,0x12,
148 0x04,0x05,0xcc,0xcc,0xcc,0xcc,0xcc,0x41,0x15,0x16,0xcc,0xcc,0xcc,0xcc,0xcc,0x52,
149 0x04,0xcc,0x05,0xcc,0xcc,0xcc,0x41,0xcc,0x15,0xcc,0x16,0xcc,0xcc,0xcc,0x52,0xcc,
150 0x04,0xcc,0xcc,0x05,0xcc,0x41,0xcc,0xcc,0x15,0xcc,0xcc,0x16,0xcc,0x52,0xcc,0xcc,
151 0x04,0xcc,0xcc,0xcc,0x41,0xcc,0xcc,0xcc,0x15,0xcc,0xcc,0xcc,0x16,0xcc,0xcc,0xcc,
152 0x04,0xcc,0xcc,0x41,0xcc,0x05,0xcc,0xcc,0x15,0xcc,0xcc,0x52,0xcc,0x16,0xcc,0xcc,
153 0x04,0xcc,0x41,0xcc,0xcc,0xcc,0x05,0xcc,0x15,0xcc,0x52,0xcc,0xcc,0xcc,0x16,0xcc,
154 0x04,0x41,0xcc,0xcc,0xcc,0xcc,0xcc,0x05,0x15,0x52,0xcc,0xcc,0xcc,0xcc,0xcc,0x16,
155 0x44,0x45,0x45,0x45,0x45,0x45,0x45,0x45,0x55,0x56,0x56,0x56,0x56,0x56,0x56,0x56,
156 0x48,0x49,0xcc,0xcc,0xcc,0xcc,0xcc,0x85,0x59,0x5A,0xcc,0xcc,0xcc,0xcc,0xcc,0x96,
157 0x48,0xcc,0x49,0xcc,0xcc,0xcc,0x85,0xcc,0x59,0xcc,0x5A,0xcc,0xcc,0xcc,0x96,0xcc,
158 0x48,0xcc,0xcc,0x49,0xcc,0x85,0xcc,0xcc,0x59,0xcc,0xcc,0x5A,0xcc,0x96,0xcc,0xcc,
159 0x48,0xcc,0xcc,0xcc,0x49,0xcc,0xcc,0xcc,0x59,0xcc,0xcc,0xcc,0x96,0xcc,0xcc,0xcc,
160 0x48,0xcc,0xcc,0x85,0xcc,0x49,0xcc,0xcc,0x59,0xcc,0xcc,0x96,0xcc,0x5A,0xcc,0xcc,
161 0x48,0xcc,0x85,0xcc,0xcc,0xcc,0x49,0xcc,0x59,0xcc,0x96,0xcc,0xcc,0xcc,0x5A,0xcc,
162 0x48,0x85,0xcc,0xcc,0xcc,0xcc,0xcc,0x49,0x59,0x96,0xcc,0xcc,0xcc,0xcc,0xcc,0x5A,
163 };
164
165 static const uint8_t needs[16]={
166 0,1,0,0,
167 2,4,2,0,
168 0,1,0,0,
169 15
170 };
171
172 int x, y, b, r, l;
173 int16_t tmpIt [64*(32+HTAPS_MAX)];
174 uint8_t tmp2t[3][64*(32+HTAPS_MAX)];
175 int16_t *tmpI= tmpIt;
176 uint8_t *tmp2= tmp2t[0];
177 const uint8_t *hpel[11];
178 av_assert2(dx<16 && dy<16);
179 r= brane[dx + 16*dy]&15;
180 l= brane[dx + 16*dy]>>4;
181
182 b= needs[l] | needs[r];
183 if(p && !p->diag_mc)
184 b= 15;
185
186 if(b&5){
187 for(y=0; y < b_h+HTAPS_MAX-1; y++){
188 for(x=0; x < b_w; x++){
189 int a_1=src[x + HTAPS_MAX/2-4];
190 int a0= src[x + HTAPS_MAX/2-3];
191 int a1= src[x + HTAPS_MAX/2-2];
192 int a2= src[x + HTAPS_MAX/2-1];
193 int a3= src[x + HTAPS_MAX/2+0];
194 int a4= src[x + HTAPS_MAX/2+1];
195 int a5= src[x + HTAPS_MAX/2+2];
196 int a6= src[x + HTAPS_MAX/2+3];
197 int am=0;
198 if(!p || p->fast_mc){
199 am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
200 tmpI[x]= am;
201 am= (am+16)>>5;
202 }else{
203 am= p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6);
204 tmpI[x]= am;
205 am= (am+32)>>6;
206 }
207
208 if(am&(~255)) am= ~(am>>31);
209 tmp2[x]= am;
210 }
211 tmpI+= 64;
212 tmp2+= 64;
213 src += stride;
214 }
215 src -= stride*y;
216 }
217 src += HTAPS_MAX/2 - 1;
218 tmp2= tmp2t[1];
219
220 if(b&2){
221 for(y=0; y < b_h; y++){
222 for(x=0; x < b_w+1; x++){
223 int a_1=src[x + (HTAPS_MAX/2-4)*stride];
224 int a0= src[x + (HTAPS_MAX/2-3)*stride];
225 int a1= src[x + (HTAPS_MAX/2-2)*stride];
226 int a2= src[x + (HTAPS_MAX/2-1)*stride];
227 int a3= src[x + (HTAPS_MAX/2+0)*stride];
228 int a4= src[x + (HTAPS_MAX/2+1)*stride];
229 int a5= src[x + (HTAPS_MAX/2+2)*stride];
230 int a6= src[x + (HTAPS_MAX/2+3)*stride];
231 int am=0;
232 if(!p || p->fast_mc)
233 am= (20*(a2+a3) - 5*(a1+a4) + (a0+a5) + 16)>>5;
234 else
235 am= (p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6) + 32)>>6;
236
237 if(am&(~255)) am= ~(am>>31);
238 tmp2[x]= am;
239 }
240 src += stride;
241 tmp2+= 64;
242 }
243 src -= stride*y;
244 }
245 src += stride*(HTAPS_MAX/2 - 1);
246 tmp2= tmp2t[2];
247 tmpI= tmpIt;
248 if(b&4){
249 for(y=0; y < b_h; y++){
250 for(x=0; x < b_w; x++){
251 int a_1=tmpI[x + (HTAPS_MAX/2-4)*64];
252 int a0= tmpI[x + (HTAPS_MAX/2-3)*64];
253 int a1= tmpI[x + (HTAPS_MAX/2-2)*64];
254 int a2= tmpI[x + (HTAPS_MAX/2-1)*64];
255 int a3= tmpI[x + (HTAPS_MAX/2+0)*64];
256 int a4= tmpI[x + (HTAPS_MAX/2+1)*64];
257 int a5= tmpI[x + (HTAPS_MAX/2+2)*64];
258 int a6= tmpI[x + (HTAPS_MAX/2+3)*64];
259 int am=0;
260 if(!p || p->fast_mc)
261 am= (20*(a2+a3) - 5*(a1+a4) + (a0+a5) + 512)>>10;
262 else
263 am= (p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6) + 2048)>>12;
264 if(am&(~255)) am= ~(am>>31);
265 tmp2[x]= am;
266 }
267 tmpI+= 64;
268 tmp2+= 64;
269 }
270 }
271
272 hpel[ 0]= src;
273 hpel[ 1]= tmp2t[0] + 64*(HTAPS_MAX/2-1);
274 hpel[ 2]= src + 1;
275
276 hpel[ 4]= tmp2t[1];
277 hpel[ 5]= tmp2t[2];
278 hpel[ 6]= tmp2t[1] + 1;
279
280 hpel[ 8]= src + stride;
281 hpel[ 9]= hpel[1] + 64;
282 hpel[10]= hpel[8] + 1;
283
284#define MC_STRIDE(x) (needs[x] ? 64 : stride)
285
286 if(b==15){
287 int dxy = dx / 8 + dy / 8 * 4;
288 const uint8_t *src1 = hpel[dxy ];
289 const uint8_t *src2 = hpel[dxy + 1];
290 const uint8_t *src3 = hpel[dxy + 4];
291 const uint8_t *src4 = hpel[dxy + 5];
292 int stride1 = MC_STRIDE(dxy);
293 int stride2 = MC_STRIDE(dxy + 1);
294 int stride3 = MC_STRIDE(dxy + 4);
295 int stride4 = MC_STRIDE(dxy + 5);
296 dx&=7;
297 dy&=7;
298 for(y=0; y < b_h; y++){
299 for(x=0; x < b_w; x++){
300 dst[x]= ((8-dx)*(8-dy)*src1[x] + dx*(8-dy)*src2[x]+
301 (8-dx)* dy *src3[x] + dx* dy *src4[x]+32)>>6;
302 }
303 src1+=stride1;
304 src2+=stride2;
305 src3+=stride3;
306 src4+=stride4;
307 dst +=stride;
308 }
309 }else{
310 const uint8_t *src1= hpel[l];
311 const uint8_t *src2= hpel[r];
312 int stride1 = MC_STRIDE(l);
313 int stride2 = MC_STRIDE(r);
314 int a= weight[((dx&7) + (8*(dy&7)))];
315 int b= 8-a;
316 for(y=0; y < b_h; y++){
317 for(x=0; x < b_w; x++){
318 dst[x]= (a*src1[x] + b*src2[x] + 4)>>3;
319 }
320 src1+=stride1;
321 src2+=stride2;
322 dst +=stride;
323 }
324 }
325}
326
f6fa7814 327void ff_snow_pred_block(SnowContext *s, uint8_t *dst, uint8_t *tmp, ptrdiff_t stride, int sx, int sy, int b_w, int b_h, const BlockNode *block, int plane_index, int w, int h){
2ba45a60
DM
328 if(block->type & BLOCK_INTRA){
329 int x, y;
330 const unsigned color = block->color[plane_index];
331 const unsigned color4 = color*0x01010101;
332 if(b_w==32){
333 for(y=0; y < b_h; y++){
334 *(uint32_t*)&dst[0 + y*stride]= color4;
335 *(uint32_t*)&dst[4 + y*stride]= color4;
336 *(uint32_t*)&dst[8 + y*stride]= color4;
337 *(uint32_t*)&dst[12+ y*stride]= color4;
338 *(uint32_t*)&dst[16+ y*stride]= color4;
339 *(uint32_t*)&dst[20+ y*stride]= color4;
340 *(uint32_t*)&dst[24+ y*stride]= color4;
341 *(uint32_t*)&dst[28+ y*stride]= color4;
342 }
343 }else if(b_w==16){
344 for(y=0; y < b_h; y++){
345 *(uint32_t*)&dst[0 + y*stride]= color4;
346 *(uint32_t*)&dst[4 + y*stride]= color4;
347 *(uint32_t*)&dst[8 + y*stride]= color4;
348 *(uint32_t*)&dst[12+ y*stride]= color4;
349 }
350 }else if(b_w==8){
351 for(y=0; y < b_h; y++){
352 *(uint32_t*)&dst[0 + y*stride]= color4;
353 *(uint32_t*)&dst[4 + y*stride]= color4;
354 }
355 }else if(b_w==4){
356 for(y=0; y < b_h; y++){
357 *(uint32_t*)&dst[0 + y*stride]= color4;
358 }
359 }else{
360 for(y=0; y < b_h; y++){
361 for(x=0; x < b_w; x++){
362 dst[x + y*stride]= color;
363 }
364 }
365 }
366 }else{
367 uint8_t *src= s->last_picture[block->ref]->data[plane_index];
368 const int scale= plane_index ? (2*s->mv_scale)>>s->chroma_h_shift : 2*s->mv_scale;
369 int mx= block->mx*scale;
370 int my= block->my*scale;
371 const int dx= mx&15;
372 const int dy= my&15;
373 const int tab_index= 3 - (b_w>>2) + (b_w>>4);
374 sx += (mx>>4) - (HTAPS_MAX/2-1);
375 sy += (my>>4) - (HTAPS_MAX/2-1);
376 src += sx + sy*stride;
377 if( (unsigned)sx >= FFMAX(w - b_w - (HTAPS_MAX-2), 0)
378 || (unsigned)sy >= FFMAX(h - b_h - (HTAPS_MAX-2), 0)){
379 s->vdsp.emulated_edge_mc(tmp + MB_SIZE, src,
380 stride, stride,
381 b_w+HTAPS_MAX-1, b_h+HTAPS_MAX-1,
382 sx, sy, w, h);
383 src= tmp + MB_SIZE;
384 }
385
386 av_assert2(s->chroma_h_shift == s->chroma_v_shift); // only one mv_scale
387
388 av_assert2((tab_index>=0 && tab_index<4) || b_w==32);
389 if( (dx&3) || (dy&3)
390 || !(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h)
391 || (b_w&(b_w-1))
392 || b_w == 1
393 || b_h == 1
394 || !s->plane[plane_index].fast_mc )
395 mc_block(&s->plane[plane_index], dst, src, stride, b_w, b_h, dx, dy);
396 else if(b_w==32){
397 int y;
398 for(y=0; y<b_h; y+=16){
399 s->h264qpel.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + y*stride, src + 3 + (y+3)*stride,stride);
400 s->h264qpel.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + 16 + y*stride, src + 19 + (y+3)*stride,stride);
401 }
402 }else if(b_w==b_h)
403 s->h264qpel.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst,src + 3 + 3*stride,stride);
404 else if(b_w==2*b_h){
405 s->h264qpel.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst ,src + 3 + 3*stride,stride);
406 s->h264qpel.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst+b_h,src + 3 + b_h + 3*stride,stride);
407 }else{
408 av_assert2(2*b_w==b_h);
409 s->h264qpel.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst ,src + 3 + 3*stride ,stride);
410 s->h264qpel.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst+b_w*stride,src + 3 + 3*stride+b_w*stride,stride);
411 }
412 }
413}
414
415#define mca(dx,dy,b_w)\
416static void mc_block_hpel ## dx ## dy ## b_w(uint8_t *dst, const uint8_t *src, ptrdiff_t stride, int h){\
417 av_assert2(h==b_w);\
418 mc_block(NULL, dst, src-(HTAPS_MAX/2-1)-(HTAPS_MAX/2-1)*stride, stride, b_w, b_w, dx, dy);\
419}
420
421mca( 0, 0,16)
422mca( 8, 0,16)
423mca( 0, 8,16)
424mca( 8, 8,16)
425mca( 0, 0,8)
426mca( 8, 0,8)
427mca( 0, 8,8)
428mca( 8, 8,8)
429
430av_cold int ff_snow_common_init(AVCodecContext *avctx){
431 SnowContext *s = avctx->priv_data;
432 int width, height;
433 int i, j;
434
435 s->avctx= avctx;
436 s->max_ref_frames=1; //just make sure it's not an invalid value in case of no initial keyframe
437
438 ff_me_cmp_init(&s->mecc, avctx);
439 ff_hpeldsp_init(&s->hdsp, avctx->flags);
440 ff_videodsp_init(&s->vdsp, 8);
441 ff_dwt_init(&s->dwt);
442 ff_h264qpel_init(&s->h264qpel, 8);
443
444#define mcf(dx,dy)\
445 s->qdsp.put_qpel_pixels_tab [0][dy+dx/4]=\
446 s->qdsp.put_no_rnd_qpel_pixels_tab[0][dy+dx/4]=\
447 s->h264qpel.put_h264_qpel_pixels_tab[0][dy+dx/4];\
448 s->qdsp.put_qpel_pixels_tab [1][dy+dx/4]=\
449 s->qdsp.put_no_rnd_qpel_pixels_tab[1][dy+dx/4]=\
450 s->h264qpel.put_h264_qpel_pixels_tab[1][dy+dx/4];
451
452 mcf( 0, 0)
453 mcf( 4, 0)
454 mcf( 8, 0)
455 mcf(12, 0)
456 mcf( 0, 4)
457 mcf( 4, 4)
458 mcf( 8, 4)
459 mcf(12, 4)
460 mcf( 0, 8)
461 mcf( 4, 8)
462 mcf( 8, 8)
463 mcf(12, 8)
464 mcf( 0,12)
465 mcf( 4,12)
466 mcf( 8,12)
467 mcf(12,12)
468
469#define mcfh(dx,dy)\
470 s->hdsp.put_pixels_tab [0][dy/4+dx/8]=\
471 s->hdsp.put_no_rnd_pixels_tab[0][dy/4+dx/8]=\
472 mc_block_hpel ## dx ## dy ## 16;\
473 s->hdsp.put_pixels_tab [1][dy/4+dx/8]=\
474 s->hdsp.put_no_rnd_pixels_tab[1][dy/4+dx/8]=\
475 mc_block_hpel ## dx ## dy ## 8;
476
477 mcfh(0, 0)
478 mcfh(8, 0)
479 mcfh(0, 8)
480 mcfh(8, 8)
481
482 init_qexp();
483
484// dec += FFMAX(s->chroma_h_shift, s->chroma_v_shift);
485
486 width= s->avctx->width;
487 height= s->avctx->height;
488
489 FF_ALLOCZ_ARRAY_OR_GOTO(avctx, s->spatial_idwt_buffer, width, height * sizeof(IDWTELEM), fail);
490 FF_ALLOCZ_ARRAY_OR_GOTO(avctx, s->spatial_dwt_buffer, width, height * sizeof(DWTELEM), fail); //FIXME this does not belong here
491 FF_ALLOCZ_ARRAY_OR_GOTO(avctx, s->temp_dwt_buffer, width, sizeof(DWTELEM), fail);
492 FF_ALLOCZ_ARRAY_OR_GOTO(avctx, s->temp_idwt_buffer, width, sizeof(IDWTELEM), fail);
493 FF_ALLOC_ARRAY_OR_GOTO(avctx, s->run_buffer, ((width + 1) >> 1), ((height + 1) >> 1) * sizeof(*s->run_buffer), fail);
494
495 for(i=0; i<MAX_REF_FRAMES; i++) {
496 for(j=0; j<MAX_REF_FRAMES; j++)
497 ff_scale_mv_ref[i][j] = 256*(i+1)/(j+1);
498 s->last_picture[i] = av_frame_alloc();
499 if (!s->last_picture[i])
500 goto fail;
501 }
502
503 s->mconly_picture = av_frame_alloc();
504 s->current_picture = av_frame_alloc();
505 if (!s->mconly_picture || !s->current_picture)
506 goto fail;
507
508 return 0;
509fail:
510 return AVERROR(ENOMEM);
511}
512
513int ff_snow_common_init_after_header(AVCodecContext *avctx) {
514 SnowContext *s = avctx->priv_data;
515 int plane_index, level, orientation;
516 int ret, emu_buf_size;
517
518 if(!s->scratchbuf) {
519 if ((ret = ff_get_buffer(s->avctx, s->mconly_picture,
520 AV_GET_BUFFER_FLAG_REF)) < 0)
521 return ret;
522 FF_ALLOCZ_ARRAY_OR_GOTO(avctx, s->scratchbuf, FFMAX(s->mconly_picture->linesize[0], 2*avctx->width+256), 7*MB_SIZE, fail);
523 emu_buf_size = FFMAX(s->mconly_picture->linesize[0], 2*avctx->width+256) * (2 * MB_SIZE + HTAPS_MAX - 1);
524 FF_ALLOC_OR_GOTO(avctx, s->emu_edge_buffer, emu_buf_size, fail);
525 }
526
527 if(s->mconly_picture->format != avctx->pix_fmt) {
528 av_log(avctx, AV_LOG_ERROR, "pixel format changed\n");
529 return AVERROR_INVALIDDATA;
530 }
531
532 for(plane_index=0; plane_index < s->nb_planes; plane_index++){
533 int w= s->avctx->width;
534 int h= s->avctx->height;
535
536 if(plane_index){
537 w>>= s->chroma_h_shift;
538 h>>= s->chroma_v_shift;
539 }
540 s->plane[plane_index].width = w;
541 s->plane[plane_index].height= h;
542
543 for(level=s->spatial_decomposition_count-1; level>=0; level--){
544 for(orientation=level ? 1 : 0; orientation<4; orientation++){
545 SubBand *b= &s->plane[plane_index].band[level][orientation];
546
547 b->buf= s->spatial_dwt_buffer;
548 b->level= level;
549 b->stride= s->plane[plane_index].width << (s->spatial_decomposition_count - level);
550 b->width = (w + !(orientation&1))>>1;
551 b->height= (h + !(orientation>1))>>1;
552
553 b->stride_line = 1 << (s->spatial_decomposition_count - level);
554 b->buf_x_offset = 0;
555 b->buf_y_offset = 0;
556
557 if(orientation&1){
558 b->buf += (w+1)>>1;
559 b->buf_x_offset = (w+1)>>1;
560 }
561 if(orientation>1){
562 b->buf += b->stride>>1;
563 b->buf_y_offset = b->stride_line >> 1;
564 }
565 b->ibuf= s->spatial_idwt_buffer + (b->buf - s->spatial_dwt_buffer);
566
567 if(level)
568 b->parent= &s->plane[plane_index].band[level-1][orientation];
569 //FIXME avoid this realloc
570 av_freep(&b->x_coeff);
571 b->x_coeff=av_mallocz_array(((b->width+1) * b->height+1), sizeof(x_and_coeff));
572 if (!b->x_coeff)
573 goto fail;
574 }
575 w= (w+1)>>1;
576 h= (h+1)>>1;
577 }
578 }
579
580 return 0;
581fail:
582 return AVERROR(ENOMEM);
583}
584
585#define USE_HALFPEL_PLANE 0
586
587static int halfpel_interpol(SnowContext *s, uint8_t *halfpel[4][4], AVFrame *frame){
588 int p,x,y;
589
590 for(p=0; p < s->nb_planes; p++){
591 int is_chroma= !!p;
592 int w= is_chroma ? s->avctx->width >>s->chroma_h_shift : s->avctx->width;
593 int h= is_chroma ? s->avctx->height>>s->chroma_v_shift : s->avctx->height;
594 int ls= frame->linesize[p];
595 uint8_t *src= frame->data[p];
596
597 halfpel[1][p] = (uint8_t*) av_malloc(ls * (h + 2 * EDGE_WIDTH)) + EDGE_WIDTH * (1 + ls);
598 halfpel[2][p] = (uint8_t*) av_malloc(ls * (h + 2 * EDGE_WIDTH)) + EDGE_WIDTH * (1 + ls);
599 halfpel[3][p] = (uint8_t*) av_malloc(ls * (h + 2 * EDGE_WIDTH)) + EDGE_WIDTH * (1 + ls);
600 if (!halfpel[1][p] || !halfpel[2][p] || !halfpel[3][p])
601 return AVERROR(ENOMEM);
602
603 halfpel[0][p]= src;
604 for(y=0; y<h; y++){
605 for(x=0; x<w; x++){
606 int i= y*ls + x;
607
608 halfpel[1][p][i]= (20*(src[i] + src[i+1]) - 5*(src[i-1] + src[i+2]) + (src[i-2] + src[i+3]) + 16 )>>5;
609 }
610 }
611 for(y=0; y<h; y++){
612 for(x=0; x<w; x++){
613 int i= y*ls + x;
614
615 halfpel[2][p][i]= (20*(src[i] + src[i+ls]) - 5*(src[i-ls] + src[i+2*ls]) + (src[i-2*ls] + src[i+3*ls]) + 16 )>>5;
616 }
617 }
618 src= halfpel[1][p];
619 for(y=0; y<h; y++){
620 for(x=0; x<w; x++){
621 int i= y*ls + x;
622
623 halfpel[3][p][i]= (20*(src[i] + src[i+ls]) - 5*(src[i-ls] + src[i+2*ls]) + (src[i-2*ls] + src[i+3*ls]) + 16 )>>5;
624 }
625 }
626
627//FIXME border!
628 }
629 return 0;
630}
631
632void ff_snow_release_buffer(AVCodecContext *avctx)
633{
634 SnowContext *s = avctx->priv_data;
635 int i;
636
637 if(s->last_picture[s->max_ref_frames-1]->data[0]){
638 av_frame_unref(s->last_picture[s->max_ref_frames-1]);
639 for(i=0; i<9; i++)
f6fa7814 640 if(s->halfpel_plane[s->max_ref_frames-1][1+i/3][i%3]) {
2ba45a60 641 av_free(s->halfpel_plane[s->max_ref_frames-1][1+i/3][i%3] - EDGE_WIDTH*(1+s->current_picture->linesize[i%3]));
f6fa7814
DM
642 s->halfpel_plane[s->max_ref_frames-1][1+i/3][i%3] = NULL;
643 }
2ba45a60
DM
644 }
645}
646
647int ff_snow_frame_start(SnowContext *s){
648 AVFrame *tmp;
649 int i, ret;
650
651 ff_snow_release_buffer(s->avctx);
652
653 tmp= s->last_picture[s->max_ref_frames-1];
654 for(i=s->max_ref_frames-1; i>0; i--)
655 s->last_picture[i] = s->last_picture[i-1];
656 memmove(s->halfpel_plane+1, s->halfpel_plane, (s->max_ref_frames-1)*sizeof(void*)*4*4);
657 if(USE_HALFPEL_PLANE && s->current_picture->data[0]) {
658 if((ret = halfpel_interpol(s, s->halfpel_plane[0], s->current_picture)) < 0)
659 return ret;
660 }
661 s->last_picture[0] = s->current_picture;
662 s->current_picture = tmp;
663
664 if(s->keyframe){
665 s->ref_frames= 0;
666 }else{
667 int i;
668 for(i=0; i<s->max_ref_frames && s->last_picture[i]->data[0]; i++)
669 if(i && s->last_picture[i-1]->key_frame)
670 break;
671 s->ref_frames= i;
672 if(s->ref_frames==0){
673 av_log(s->avctx,AV_LOG_ERROR, "No reference frames\n");
674 return -1;
675 }
676 }
677 if ((ret = ff_snow_get_buffer(s, s->current_picture)) < 0)
678 return ret;
679
680 s->current_picture->key_frame= s->keyframe;
681
682 return 0;
683}
684
685av_cold void ff_snow_common_end(SnowContext *s)
686{
687 int plane_index, level, orientation, i;
688
689 av_freep(&s->spatial_dwt_buffer);
690 av_freep(&s->temp_dwt_buffer);
691 av_freep(&s->spatial_idwt_buffer);
692 av_freep(&s->temp_idwt_buffer);
693 av_freep(&s->run_buffer);
694
695 s->m.me.temp= NULL;
696 av_freep(&s->m.me.scratchpad);
697 av_freep(&s->m.me.map);
698 av_freep(&s->m.me.score_map);
699 av_freep(&s->m.obmc_scratchpad);
700
701 av_freep(&s->block);
702 av_freep(&s->scratchbuf);
703 av_freep(&s->emu_edge_buffer);
704
705 for(i=0; i<MAX_REF_FRAMES; i++){
706 av_freep(&s->ref_mvs[i]);
707 av_freep(&s->ref_scores[i]);
708 if(s->last_picture[i] && s->last_picture[i]->data[0]) {
709 av_assert0(s->last_picture[i]->data[0] != s->current_picture->data[0]);
710 }
711 av_frame_free(&s->last_picture[i]);
712 }
713
714 for(plane_index=0; plane_index < s->nb_planes; plane_index++){
715 for(level=s->spatial_decomposition_count-1; level>=0; level--){
716 for(orientation=level ? 1 : 0; orientation<4; orientation++){
717 SubBand *b= &s->plane[plane_index].band[level][orientation];
718
719 av_freep(&b->x_coeff);
720 }
721 }
722 }
723 av_frame_free(&s->mconly_picture);
724 av_frame_free(&s->current_picture);
725}