2 * Copyright (C) 2004 Michael Niedermayer <michaelni@gmx.at>
4 * This file is part of FFmpeg.
6 * FFmpeg is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with FFmpeg; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21 #include "libavutil/intmath.h"
22 #include "libavutil/log.h"
23 #include "libavutil/opt.h"
29 #include "rangecoder.h"
32 #include "mpegvideo.h"
35 static av_cold
int encode_init(AVCodecContext
*avctx
)
37 SnowContext
*s
= avctx
->priv_data
;
41 if(avctx
->prediction_method
== DWT_97
42 && (avctx
->flags
& CODEC_FLAG_QSCALE
)
43 && avctx
->global_quality
== 0){
44 av_log(avctx
, AV_LOG_ERROR
, "The 9/7 wavelet is incompatible with lossless mode.\n");
48 s
->spatial_decomposition_type
= avctx
->prediction_method
; //FIXME add decorrelator type r transform_type
50 s
->mv_scale
= (avctx
->flags
& CODEC_FLAG_QPEL
) ? 2 : 4;
51 s
->block_max_depth
= (avctx
->flags
& CODEC_FLAG_4MV
) ? 1 : 0;
53 for(plane_index
=0; plane_index
<3; plane_index
++){
54 s
->plane
[plane_index
].diag_mc
= 1;
55 s
->plane
[plane_index
].htaps
= 6;
56 s
->plane
[plane_index
].hcoeff
[0]= 40;
57 s
->plane
[plane_index
].hcoeff
[1]= -10;
58 s
->plane
[plane_index
].hcoeff
[2]= 2;
59 s
->plane
[plane_index
].fast_mc
= 1;
62 if ((ret
= ff_snow_common_init(avctx
)) < 0) {
63 ff_snow_common_end(avctx
->priv_data
);
66 ff_mpegvideoencdsp_init(&s
->mpvencdsp
, avctx
);
68 ff_snow_alloc_blocks(s
);
73 s
->m
.flags
= avctx
->flags
;
74 s
->m
.bit_rate
= avctx
->bit_rate
;
77 s
->m
.me
.scratchpad
= av_mallocz_array((avctx
->width
+64), 2*16*2*sizeof(uint8_t));
78 s
->m
.me
.map
= av_mallocz(ME_MAP_SIZE
*sizeof(uint32_t));
79 s
->m
.me
.score_map
= av_mallocz(ME_MAP_SIZE
*sizeof(uint32_t));
80 s
->m
.obmc_scratchpad
= av_mallocz(MB_SIZE
*MB_SIZE
*12*sizeof(uint32_t));
81 if (!s
->m
.me
.scratchpad
|| !s
->m
.me
.map
|| !s
->m
.me
.score_map
|| !s
->m
.obmc_scratchpad
)
82 return AVERROR(ENOMEM
);
84 ff_h263_encode_init(&s
->m
); //mv_penalty
86 s
->max_ref_frames
= FFMAX(FFMIN(avctx
->refs
, MAX_REF_FRAMES
), 1);
88 if(avctx
->flags
&CODEC_FLAG_PASS1
){
90 avctx
->stats_out
= av_mallocz(256);
92 if (!avctx
->stats_out
)
93 return AVERROR(ENOMEM
);
95 if((avctx
->flags
&CODEC_FLAG_PASS2
) || !(avctx
->flags
&CODEC_FLAG_QSCALE
)){
96 if(ff_rate_control_init(&s
->m
) < 0)
99 s
->pass1_rc
= !(avctx
->flags
& (CODEC_FLAG_QSCALE
|CODEC_FLAG_PASS2
));
101 switch(avctx
->pix_fmt
){
102 case AV_PIX_FMT_YUV444P
:
103 // case AV_PIX_FMT_YUV422P:
104 case AV_PIX_FMT_YUV420P
:
105 // case AV_PIX_FMT_YUV411P:
106 case AV_PIX_FMT_YUV410P
:
108 s
->colorspace_type
= 0;
110 case AV_PIX_FMT_GRAY8
:
112 s
->colorspace_type
= 1;
114 /* case AV_PIX_FMT_RGB32:
118 av_log(avctx
, AV_LOG_ERROR
, "pixel format not supported\n");
121 avcodec_get_chroma_sub_sample(avctx
->pix_fmt
, &s
->chroma_h_shift
, &s
->chroma_v_shift
);
123 ff_set_cmp(&s
->mecc
, s
->mecc
.me_cmp
, s
->avctx
->me_cmp
);
124 ff_set_cmp(&s
->mecc
, s
->mecc
.me_sub_cmp
, s
->avctx
->me_sub_cmp
);
126 s
->input_picture
= av_frame_alloc();
127 if (!s
->input_picture
)
128 return AVERROR(ENOMEM
);
130 if ((ret
= ff_snow_get_buffer(s
, s
->input_picture
)) < 0)
133 if(s
->avctx
->me_method
== ME_ITER
){
134 int size
= s
->b_width
* s
->b_height
<< 2*s
->block_max_depth
;
135 for(i
=0; i
<s
->max_ref_frames
; i
++){
136 s
->ref_mvs
[i
]= av_mallocz_array(size
, sizeof(int16_t[2]));
137 s
->ref_scores
[i
]= av_mallocz_array(size
, sizeof(uint32_t));
138 if (!s
->ref_mvs
[i
] || !s
->ref_scores
[i
])
139 return AVERROR(ENOMEM
);
146 //near copy & paste from dsputil, FIXME
147 static int pix_sum(uint8_t * pix
, int line_size
, int w
, int h
)
152 for (i
= 0; i
< h
; i
++) {
153 for (j
= 0; j
< w
; j
++) {
157 pix
+= line_size
- w
;
162 //near copy & paste from dsputil, FIXME
163 static int pix_norm1(uint8_t * pix
, int line_size
, int w
)
166 uint32_t *sq
= ff_square_tab
+ 256;
169 for (i
= 0; i
< w
; i
++) {
170 for (j
= 0; j
< w
; j
++) {
174 pix
+= line_size
- w
;
179 static inline int get_penalty_factor(int lambda
, int lambda2
, int type
){
183 return lambda
>>FF_LAMBDA_SHIFT
;
185 return (3*lambda
)>>(FF_LAMBDA_SHIFT
+1);
187 return (4*lambda
)>>(FF_LAMBDA_SHIFT
);
189 return (2*lambda
)>>(FF_LAMBDA_SHIFT
);
192 return (2*lambda
)>>FF_LAMBDA_SHIFT
;
197 return lambda2
>>FF_LAMBDA_SHIFT
;
206 #define P_TOPRIGHT P[3]
207 #define P_MEDIAN P[4]
209 #define FLAG_QPEL 1 //must be 1
211 static int encode_q_branch(SnowContext
*s
, int level
, int x
, int y
){
212 uint8_t p_buffer
[1024];
213 uint8_t i_buffer
[1024];
214 uint8_t p_state
[sizeof(s
->block_state
)];
215 uint8_t i_state
[sizeof(s
->block_state
)];
217 uint8_t *pbbak
= s
->c
.bytestream
;
218 uint8_t *pbbak_start
= s
->c
.bytestream_start
;
219 int score
, score2
, iscore
, i_len
, p_len
, block_s
, sum
, base_bits
;
220 const int w
= s
->b_width
<< s
->block_max_depth
;
221 const int h
= s
->b_height
<< s
->block_max_depth
;
222 const int rem_depth
= s
->block_max_depth
- level
;
223 const int index
= (x
+ y
*w
) << rem_depth
;
224 const int block_w
= 1<<(LOG2_MB_SIZE
- level
);
225 int trx
= (x
+1)<<rem_depth
;
226 int try= (y
+1)<<rem_depth
;
227 const BlockNode
*left
= x
? &s
->block
[index
-1] : &null_block
;
228 const BlockNode
*top
= y
? &s
->block
[index
-w
] : &null_block
;
229 const BlockNode
*right
= trx
<w
? &s
->block
[index
+1] : &null_block
;
230 const BlockNode
*bottom
= try<h
? &s
->block
[index
+w
] : &null_block
;
231 const BlockNode
*tl
= y
&& x
? &s
->block
[index
-w
-1] : left
;
232 const BlockNode
*tr
= y
&& trx
<w
&& ((x
&1)==0 || level
==0) ? &s
->block
[index
-w
+(1<<rem_depth
)] : tl
; //FIXME use lt
233 int pl
= left
->color
[0];
234 int pcb
= left
->color
[1];
235 int pcr
= left
->color
[2];
239 const int stride
= s
->current_picture
->linesize
[0];
240 const int uvstride
= s
->current_picture
->linesize
[1];
241 uint8_t *current_data
[3]= { s
->input_picture
->data
[0] + (x
+ y
* stride
)*block_w
,
242 s
->input_picture
->data
[1] + ((x
*block_w
)>>s
->chroma_h_shift
) + ((y
*uvstride
*block_w
)>>s
->chroma_v_shift
),
243 s
->input_picture
->data
[2] + ((x
*block_w
)>>s
->chroma_h_shift
) + ((y
*uvstride
*block_w
)>>s
->chroma_v_shift
)};
245 int16_t last_mv
[3][2];
246 int qpel
= !!(s
->avctx
->flags
& CODEC_FLAG_QPEL
); //unused
247 const int shift
= 1+qpel
;
248 MotionEstContext
*c
= &s
->m
.me
;
249 int ref_context
= av_log2(2*left
->ref
) + av_log2(2*top
->ref
);
250 int mx_context
= av_log2(2*FFABS(left
->mx
- top
->mx
));
251 int my_context
= av_log2(2*FFABS(left
->my
- top
->my
));
252 int s_context
= 2*left
->level
+ 2*top
->level
+ tl
->level
+ tr
->level
;
253 int ref
, best_ref
, ref_score
, ref_mx
, ref_my
;
255 av_assert0(sizeof(s
->block_state
) >= 256);
257 set_blocks(s
, level
, x
, y
, pl
, pcb
, pcr
, 0, 0, 0, BLOCK_INTRA
);
261 // clip predictors / edge ?
267 P_TOPRIGHT
[0]= tr
->mx
;
268 P_TOPRIGHT
[1]= tr
->my
;
270 last_mv
[0][0]= s
->block
[index
].mx
;
271 last_mv
[0][1]= s
->block
[index
].my
;
272 last_mv
[1][0]= right
->mx
;
273 last_mv
[1][1]= right
->my
;
274 last_mv
[2][0]= bottom
->mx
;
275 last_mv
[2][1]= bottom
->my
;
282 av_assert1(c
-> stride
== stride
);
283 av_assert1(c
->uvstride
== uvstride
);
285 c
->penalty_factor
= get_penalty_factor(s
->lambda
, s
->lambda2
, c
->avctx
->me_cmp
);
286 c
->sub_penalty_factor
= get_penalty_factor(s
->lambda
, s
->lambda2
, c
->avctx
->me_sub_cmp
);
287 c
->mb_penalty_factor
= get_penalty_factor(s
->lambda
, s
->lambda2
, c
->avctx
->mb_cmp
);
288 c
->current_mv_penalty
= c
->mv_penalty
[s
->m
.f_code
=1] + MAX_MV
;
290 c
->xmin
= - x
*block_w
- 16+3;
291 c
->ymin
= - y
*block_w
- 16+3;
292 c
->xmax
= - (x
+1)*block_w
+ (w
<<(LOG2_MB_SIZE
- s
->block_max_depth
)) + 16-3;
293 c
->ymax
= - (y
+1)*block_w
+ (h
<<(LOG2_MB_SIZE
- s
->block_max_depth
)) + 16-3;
295 if(P_LEFT
[0] > (c
->xmax
<<shift
)) P_LEFT
[0] = (c
->xmax
<<shift
);
296 if(P_LEFT
[1] > (c
->ymax
<<shift
)) P_LEFT
[1] = (c
->ymax
<<shift
);
297 if(P_TOP
[0] > (c
->xmax
<<shift
)) P_TOP
[0] = (c
->xmax
<<shift
);
298 if(P_TOP
[1] > (c
->ymax
<<shift
)) P_TOP
[1] = (c
->ymax
<<shift
);
299 if(P_TOPRIGHT
[0] < (c
->xmin
<<shift
)) P_TOPRIGHT
[0]= (c
->xmin
<<shift
);
300 if(P_TOPRIGHT
[0] > (c
->xmax
<<shift
)) P_TOPRIGHT
[0]= (c
->xmax
<<shift
); //due to pmx no clip
301 if(P_TOPRIGHT
[1] > (c
->ymax
<<shift
)) P_TOPRIGHT
[1]= (c
->ymax
<<shift
);
303 P_MEDIAN
[0]= mid_pred(P_LEFT
[0], P_TOP
[0], P_TOPRIGHT
[0]);
304 P_MEDIAN
[1]= mid_pred(P_LEFT
[1], P_TOP
[1], P_TOPRIGHT
[1]);
307 c
->pred_x
= P_LEFT
[0];
308 c
->pred_y
= P_LEFT
[1];
310 c
->pred_x
= P_MEDIAN
[0];
311 c
->pred_y
= P_MEDIAN
[1];
316 for(ref
=0; ref
<s
->ref_frames
; ref
++){
317 init_ref(c
, current_data
, s
->last_picture
[ref
]->data
, NULL
, block_w
*x
, block_w
*y
, 0);
319 ref_score
= ff_epzs_motion_search(&s
->m
, &ref_mx
, &ref_my
, P
, 0, /*ref_index*/ 0, last_mv
,
320 (1<<16)>>shift
, level
-LOG2_MB_SIZE
+4, block_w
);
322 av_assert2(ref_mx
>= c
->xmin
);
323 av_assert2(ref_mx
<= c
->xmax
);
324 av_assert2(ref_my
>= c
->ymin
);
325 av_assert2(ref_my
<= c
->ymax
);
327 ref_score
= c
->sub_motion_search(&s
->m
, &ref_mx
, &ref_my
, ref_score
, 0, 0, level
-LOG2_MB_SIZE
+4, block_w
);
328 ref_score
= ff_get_mb_score(&s
->m
, ref_mx
, ref_my
, 0, 0, level
-LOG2_MB_SIZE
+4, block_w
, 0);
329 ref_score
+= 2*av_log2(2*ref
)*c
->penalty_factor
;
331 s
->ref_mvs
[ref
][index
][0]= ref_mx
;
332 s
->ref_mvs
[ref
][index
][1]= ref_my
;
333 s
->ref_scores
[ref
][index
]= ref_score
;
335 if(score
> ref_score
){
342 //FIXME if mb_cmp != SSE then intra cannot be compared currently and mb_penalty vs. lambda2
345 base_bits
= get_rac_count(&s
->c
) - 8*(s
->c
.bytestream
- s
->c
.bytestream_start
);
348 pc
.bytestream
= p_buffer
; //FIXME end/start? and at the other stoo
349 memcpy(p_state
, s
->block_state
, sizeof(s
->block_state
));
351 if(level
!=s
->block_max_depth
)
352 put_rac(&pc
, &p_state
[4 + s_context
], 1);
353 put_rac(&pc
, &p_state
[1 + left
->type
+ top
->type
], 0);
354 if(s
->ref_frames
> 1)
355 put_symbol(&pc
, &p_state
[128 + 1024 + 32*ref_context
], best_ref
, 0);
356 pred_mv(s
, &pmx
, &pmy
, best_ref
, left
, top
, tr
);
357 put_symbol(&pc
, &p_state
[128 + 32*(mx_context
+ 16*!!best_ref
)], mx
- pmx
, 1);
358 put_symbol(&pc
, &p_state
[128 + 32*(my_context
+ 16*!!best_ref
)], my
- pmy
, 1);
359 p_len
= pc
.bytestream
- pc
.bytestream_start
;
360 score
+= (s
->lambda2
*(get_rac_count(&pc
)-base_bits
))>>FF_LAMBDA_SHIFT
;
362 block_s
= block_w
*block_w
;
363 sum
= pix_sum(current_data
[0], stride
, block_w
, block_w
);
364 l
= (sum
+ block_s
/2)/block_s
;
365 iscore
= pix_norm1(current_data
[0], stride
, block_w
) - 2*l
*sum
+ l
*l
*block_s
;
367 if (s
->nb_planes
> 2) {
368 block_s
= block_w
*block_w
>>(s
->chroma_h_shift
+ s
->chroma_v_shift
);
369 sum
= pix_sum(current_data
[1], uvstride
, block_w
>>s
->chroma_h_shift
, block_w
>>s
->chroma_v_shift
);
370 cb
= (sum
+ block_s
/2)/block_s
;
371 // iscore += pix_norm1(¤t_mb[1][0], uvstride, block_w>>1) - 2*cb*sum + cb*cb*block_s;
372 sum
= pix_sum(current_data
[2], uvstride
, block_w
>>s
->chroma_h_shift
, block_w
>>s
->chroma_v_shift
);
373 cr
= (sum
+ block_s
/2)/block_s
;
374 // iscore += pix_norm1(¤t_mb[2][0], uvstride, block_w>>1) - 2*cr*sum + cr*cr*block_s;
380 ic
.bytestream
= i_buffer
; //FIXME end/start? and at the other stoo
381 memcpy(i_state
, s
->block_state
, sizeof(s
->block_state
));
382 if(level
!=s
->block_max_depth
)
383 put_rac(&ic
, &i_state
[4 + s_context
], 1);
384 put_rac(&ic
, &i_state
[1 + left
->type
+ top
->type
], 1);
385 put_symbol(&ic
, &i_state
[32], l
-pl
, 1);
386 if (s
->nb_planes
> 2) {
387 put_symbol(&ic
, &i_state
[64], cb
-pcb
, 1);
388 put_symbol(&ic
, &i_state
[96], cr
-pcr
, 1);
390 i_len
= ic
.bytestream
- ic
.bytestream_start
;
391 iscore
+= (s
->lambda2
*(get_rac_count(&ic
)-base_bits
))>>FF_LAMBDA_SHIFT
;
393 av_assert1(iscore
< 255*255*256 + s
->lambda2
*10);
394 av_assert1(iscore
>= 0);
395 av_assert1(l
>=0 && l
<=255);
396 av_assert1(pl
>=0 && pl
<=255);
399 int varc
= iscore
>> 8;
400 int vard
= score
>> 8;
401 if (vard
<= 64 || vard
< varc
)
402 c
->scene_change_score
+= ff_sqrt(vard
) - ff_sqrt(varc
);
404 c
->scene_change_score
+= s
->m
.qscale
;
407 if(level
!=s
->block_max_depth
){
408 put_rac(&s
->c
, &s
->block_state
[4 + s_context
], 0);
409 score2
= encode_q_branch(s
, level
+1, 2*x
+0, 2*y
+0);
410 score2
+= encode_q_branch(s
, level
+1, 2*x
+1, 2*y
+0);
411 score2
+= encode_q_branch(s
, level
+1, 2*x
+0, 2*y
+1);
412 score2
+= encode_q_branch(s
, level
+1, 2*x
+1, 2*y
+1);
413 score2
+= s
->lambda2
>>FF_LAMBDA_SHIFT
; //FIXME exact split overhead
415 if(score2
< score
&& score2
< iscore
)
420 pred_mv(s
, &pmx
, &pmy
, 0, left
, top
, tr
);
421 memcpy(pbbak
, i_buffer
, i_len
);
423 s
->c
.bytestream_start
= pbbak_start
;
424 s
->c
.bytestream
= pbbak
+ i_len
;
425 set_blocks(s
, level
, x
, y
, l
, cb
, cr
, pmx
, pmy
, 0, BLOCK_INTRA
);
426 memcpy(s
->block_state
, i_state
, sizeof(s
->block_state
));
429 memcpy(pbbak
, p_buffer
, p_len
);
431 s
->c
.bytestream_start
= pbbak_start
;
432 s
->c
.bytestream
= pbbak
+ p_len
;
433 set_blocks(s
, level
, x
, y
, pl
, pcb
, pcr
, mx
, my
, best_ref
, 0);
434 memcpy(s
->block_state
, p_state
, sizeof(s
->block_state
));
439 static void encode_q_branch2(SnowContext
*s
, int level
, int x
, int y
){
440 const int w
= s
->b_width
<< s
->block_max_depth
;
441 const int rem_depth
= s
->block_max_depth
- level
;
442 const int index
= (x
+ y
*w
) << rem_depth
;
443 int trx
= (x
+1)<<rem_depth
;
444 BlockNode
*b
= &s
->block
[index
];
445 const BlockNode
*left
= x
? &s
->block
[index
-1] : &null_block
;
446 const BlockNode
*top
= y
? &s
->block
[index
-w
] : &null_block
;
447 const BlockNode
*tl
= y
&& x
? &s
->block
[index
-w
-1] : left
;
448 const BlockNode
*tr
= y
&& trx
<w
&& ((x
&1)==0 || level
==0) ? &s
->block
[index
-w
+(1<<rem_depth
)] : tl
; //FIXME use lt
449 int pl
= left
->color
[0];
450 int pcb
= left
->color
[1];
451 int pcr
= left
->color
[2];
453 int ref_context
= av_log2(2*left
->ref
) + av_log2(2*top
->ref
);
454 int mx_context
= av_log2(2*FFABS(left
->mx
- top
->mx
)) + 16*!!b
->ref
;
455 int my_context
= av_log2(2*FFABS(left
->my
- top
->my
)) + 16*!!b
->ref
;
456 int s_context
= 2*left
->level
+ 2*top
->level
+ tl
->level
+ tr
->level
;
459 set_blocks(s
, level
, x
, y
, pl
, pcb
, pcr
, 0, 0, 0, BLOCK_INTRA
);
463 if(level
!=s
->block_max_depth
){
464 if(same_block(b
,b
+1) && same_block(b
,b
+w
) && same_block(b
,b
+w
+1)){
465 put_rac(&s
->c
, &s
->block_state
[4 + s_context
], 1);
467 put_rac(&s
->c
, &s
->block_state
[4 + s_context
], 0);
468 encode_q_branch2(s
, level
+1, 2*x
+0, 2*y
+0);
469 encode_q_branch2(s
, level
+1, 2*x
+1, 2*y
+0);
470 encode_q_branch2(s
, level
+1, 2*x
+0, 2*y
+1);
471 encode_q_branch2(s
, level
+1, 2*x
+1, 2*y
+1);
475 if(b
->type
& BLOCK_INTRA
){
476 pred_mv(s
, &pmx
, &pmy
, 0, left
, top
, tr
);
477 put_rac(&s
->c
, &s
->block_state
[1 + (left
->type
&1) + (top
->type
&1)], 1);
478 put_symbol(&s
->c
, &s
->block_state
[32], b
->color
[0]-pl
, 1);
479 if (s
->nb_planes
> 2) {
480 put_symbol(&s
->c
, &s
->block_state
[64], b
->color
[1]-pcb
, 1);
481 put_symbol(&s
->c
, &s
->block_state
[96], b
->color
[2]-pcr
, 1);
483 set_blocks(s
, level
, x
, y
, b
->color
[0], b
->color
[1], b
->color
[2], pmx
, pmy
, 0, BLOCK_INTRA
);
485 pred_mv(s
, &pmx
, &pmy
, b
->ref
, left
, top
, tr
);
486 put_rac(&s
->c
, &s
->block_state
[1 + (left
->type
&1) + (top
->type
&1)], 0);
487 if(s
->ref_frames
> 1)
488 put_symbol(&s
->c
, &s
->block_state
[128 + 1024 + 32*ref_context
], b
->ref
, 0);
489 put_symbol(&s
->c
, &s
->block_state
[128 + 32*mx_context
], b
->mx
- pmx
, 1);
490 put_symbol(&s
->c
, &s
->block_state
[128 + 32*my_context
], b
->my
- pmy
, 1);
491 set_blocks(s
, level
, x
, y
, pl
, pcb
, pcr
, b
->mx
, b
->my
, b
->ref
, 0);
495 static int get_dc(SnowContext
*s
, int mb_x
, int mb_y
, int plane_index
){
497 Plane
*p
= &s
->plane
[plane_index
];
498 const int block_size
= MB_SIZE
>> s
->block_max_depth
;
499 const int block_w
= plane_index
? block_size
>>s
->chroma_h_shift
: block_size
;
500 const int block_h
= plane_index
? block_size
>>s
->chroma_v_shift
: block_size
;
501 const uint8_t *obmc
= plane_index
? ff_obmc_tab
[s
->block_max_depth
+s
->chroma_h_shift
] : ff_obmc_tab
[s
->block_max_depth
];
502 const int obmc_stride
= plane_index
? (2*block_size
)>>s
->chroma_h_shift
: 2*block_size
;
503 const int ref_stride
= s
->current_picture
->linesize
[plane_index
];
504 uint8_t *src
= s
-> input_picture
->data
[plane_index
];
505 IDWTELEM
*dst
= (IDWTELEM
*)s
->m
.obmc_scratchpad
+ plane_index
*block_size
*block_size
*4; //FIXME change to unsigned
506 const int b_stride
= s
->b_width
<< s
->block_max_depth
;
507 const int w
= p
->width
;
508 const int h
= p
->height
;
509 int index
= mb_x
+ mb_y
*b_stride
;
510 BlockNode
*b
= &s
->block
[index
];
511 BlockNode backup
= *b
;
515 av_assert2(s
->chroma_h_shift
== s
->chroma_v_shift
); //obmc stuff above
517 b
->type
|= BLOCK_INTRA
;
518 b
->color
[plane_index
]= 0;
519 memset(dst
, 0, obmc_stride
*obmc_stride
*sizeof(IDWTELEM
));
522 int mb_x2
= mb_x
+ (i
&1) - 1;
523 int mb_y2
= mb_y
+ (i
>>1) - 1;
524 int x
= block_w
*mb_x2
+ block_w
/2;
525 int y
= block_h
*mb_y2
+ block_h
/2;
527 add_yblock(s
, 0, NULL
, dst
+ (i
&1)*block_w
+ (i
>>1)*obmc_stride
*block_h
, NULL
, obmc
,
528 x
, y
, block_w
, block_h
, w
, h
, obmc_stride
, ref_stride
, obmc_stride
, mb_x2
, mb_y2
, 0, 0, plane_index
);
530 for(y2
= FFMAX(y
, 0); y2
<FFMIN(h
, y
+block_h
); y2
++){
531 for(x2
= FFMAX(x
, 0); x2
<FFMIN(w
, x
+block_w
); x2
++){
532 int index
= x2
-(block_w
*mb_x
- block_w
/2) + (y2
-(block_h
*mb_y
- block_h
/2))*obmc_stride
;
533 int obmc_v
= obmc
[index
];
535 if(y
<0) obmc_v
+= obmc
[index
+ block_h
*obmc_stride
];
536 if(x
<0) obmc_v
+= obmc
[index
+ block_w
];
537 if(y
+block_h
>h
) obmc_v
+= obmc
[index
- block_h
*obmc_stride
];
538 if(x
+block_w
>w
) obmc_v
+= obmc
[index
- block_w
];
539 //FIXME precalculate this or simplify it somehow else
541 d
= -dst
[index
] + (1<<(FRAC_BITS
-1));
543 ab
+= (src
[x2
+ y2
*ref_stride
] - (d
>>FRAC_BITS
)) * obmc_v
;
544 aa
+= obmc_v
* obmc_v
; //FIXME precalculate this
550 return av_clip( ROUNDED_DIV(ab
<<LOG2_OBMC_MAX
, aa
), 0, 255); //FIXME we should not need clipping
553 static inline int get_block_bits(SnowContext
*s
, int x
, int y
, int w
){
554 const int b_stride
= s
->b_width
<< s
->block_max_depth
;
555 const int b_height
= s
->b_height
<< s
->block_max_depth
;
556 int index
= x
+ y
*b_stride
;
557 const BlockNode
*b
= &s
->block
[index
];
558 const BlockNode
*left
= x
? &s
->block
[index
-1] : &null_block
;
559 const BlockNode
*top
= y
? &s
->block
[index
-b_stride
] : &null_block
;
560 const BlockNode
*tl
= y
&& x
? &s
->block
[index
-b_stride
-1] : left
;
561 const BlockNode
*tr
= y
&& x
+w
<b_stride
? &s
->block
[index
-b_stride
+w
] : tl
;
563 // int mx_context= av_log2(2*FFABS(left->mx - top->mx));
564 // int my_context= av_log2(2*FFABS(left->my - top->my));
566 if(x
<0 || x
>=b_stride
|| y
>=b_height
)
575 //FIXME try accurate rate
576 //FIXME intra and inter predictors if surrounding blocks are not the same type
577 if(b
->type
& BLOCK_INTRA
){
578 return 3+2*( av_log2(2*FFABS(left
->color
[0] - b
->color
[0]))
579 + av_log2(2*FFABS(left
->color
[1] - b
->color
[1]))
580 + av_log2(2*FFABS(left
->color
[2] - b
->color
[2])));
582 pred_mv(s
, &dmx
, &dmy
, b
->ref
, left
, top
, tr
);
585 return 2*(1 + av_log2(2*FFABS(dmx
)) //FIXME kill the 2* can be merged in lambda
586 + av_log2(2*FFABS(dmy
))
587 + av_log2(2*b
->ref
));
591 static int get_block_rd(SnowContext
*s
, int mb_x
, int mb_y
, int plane_index
, uint8_t (*obmc_edged
)[MB_SIZE
* 2]){
592 Plane
*p
= &s
->plane
[plane_index
];
593 const int block_size
= MB_SIZE
>> s
->block_max_depth
;
594 const int block_w
= plane_index
? block_size
>>s
->chroma_h_shift
: block_size
;
595 const int block_h
= plane_index
? block_size
>>s
->chroma_v_shift
: block_size
;
596 const int obmc_stride
= plane_index
? (2*block_size
)>>s
->chroma_h_shift
: 2*block_size
;
597 const int ref_stride
= s
->current_picture
->linesize
[plane_index
];
598 uint8_t *dst
= s
->current_picture
->data
[plane_index
];
599 uint8_t *src
= s
-> input_picture
->data
[plane_index
];
600 IDWTELEM
*pred
= (IDWTELEM
*)s
->m
.obmc_scratchpad
+ plane_index
*block_size
*block_size
*4;
601 uint8_t *cur
= s
->scratchbuf
;
602 uint8_t *tmp
= s
->emu_edge_buffer
;
603 const int b_stride
= s
->b_width
<< s
->block_max_depth
;
604 const int b_height
= s
->b_height
<< s
->block_max_depth
;
605 const int w
= p
->width
;
606 const int h
= p
->height
;
609 const int penalty_factor
= get_penalty_factor(s
->lambda
, s
->lambda2
, s
->avctx
->me_cmp
);
610 int sx
= block_w
*mb_x
- block_w
/2;
611 int sy
= block_h
*mb_y
- block_h
/2;
612 int x0
= FFMAX(0,-sx
);
613 int y0
= FFMAX(0,-sy
);
614 int x1
= FFMIN(block_w
*2, w
-sx
);
615 int y1
= FFMIN(block_h
*2, h
-sy
);
618 av_assert2(s
->chroma_h_shift
== s
->chroma_v_shift
); //obmc and square assumtions below chckinhg only block_w
620 ff_snow_pred_block(s
, cur
, tmp
, ref_stride
, sx
, sy
, block_w
*2, block_h
*2, &s
->block
[mb_x
+ mb_y
*b_stride
], plane_index
, w
, h
);
622 for(y
=y0
; y
<y1
; y
++){
623 const uint8_t *obmc1
= obmc_edged
[y
];
624 const IDWTELEM
*pred1
= pred
+ y
*obmc_stride
;
625 uint8_t *cur1
= cur
+ y
*ref_stride
;
626 uint8_t *dst1
= dst
+ sx
+ (sy
+y
)*ref_stride
;
627 for(x
=x0
; x
<x1
; x
++){
628 #if FRAC_BITS >= LOG2_OBMC_MAX
629 int v
= (cur1
[x
] * obmc1
[x
]) << (FRAC_BITS
- LOG2_OBMC_MAX
);
631 int v
= (cur1
[x
] * obmc1
[x
] + (1<<(LOG2_OBMC_MAX
- FRAC_BITS
-1))) >> (LOG2_OBMC_MAX
- FRAC_BITS
);
633 v
= (v
+ pred1
[x
]) >> FRAC_BITS
;
634 if(v
&(~255)) v
= ~(v
>>31);
639 /* copy the regions where obmc[] = (uint8_t)256 */
640 if(LOG2_OBMC_MAX
== 8
641 && (mb_x
== 0 || mb_x
== b_stride
-1)
642 && (mb_y
== 0 || mb_y
== b_height
-1)){
652 memcpy(dst
+ sx
+x0
+ (sy
+y
)*ref_stride
, cur
+ x0
+ y
*ref_stride
, x1
-x0
);
656 /* FIXME rearrange dsputil to fit 32x32 cmp functions */
657 /* FIXME check alignment of the cmp wavelet vs the encoding wavelet */
658 /* FIXME cmps overlap but do not cover the wavelet's whole support.
659 * So improving the score of one block is not strictly guaranteed
660 * to improve the score of the whole frame, thus iterative motion
661 * estimation does not always converge. */
662 if(s
->avctx
->me_cmp
== FF_CMP_W97
)
663 distortion
= ff_w97_32_c(&s
->m
, src
+ sx
+ sy
*ref_stride
, dst
+ sx
+ sy
*ref_stride
, ref_stride
, 32);
664 else if(s
->avctx
->me_cmp
== FF_CMP_W53
)
665 distortion
= ff_w53_32_c(&s
->m
, src
+ sx
+ sy
*ref_stride
, dst
+ sx
+ sy
*ref_stride
, ref_stride
, 32);
669 int off
= sx
+16*(i
&1) + (sy
+16*(i
>>1))*ref_stride
;
670 distortion
+= s
->mecc
.me_cmp
[0](&s
->m
, src
+ off
, dst
+ off
, ref_stride
, 16);
674 av_assert2(block_w
==8);
675 distortion
= s
->mecc
.me_cmp
[0](&s
->m
, src
+ sx
+ sy
*ref_stride
, dst
+ sx
+ sy
*ref_stride
, ref_stride
, block_w
*2);
684 rate
+= get_block_bits(s
, mb_x
+ (i
&1) - (i
>>1), mb_y
+ (i
>>1), 1);
686 if(mb_x
== b_stride
-2)
687 rate
+= get_block_bits(s
, mb_x
+ 1, mb_y
+ 1, 1);
689 return distortion
+ rate
*penalty_factor
;
692 static int get_4block_rd(SnowContext
*s
, int mb_x
, int mb_y
, int plane_index
){
694 Plane
*p
= &s
->plane
[plane_index
];
695 const int block_size
= MB_SIZE
>> s
->block_max_depth
;
696 const int block_w
= plane_index
? block_size
>>s
->chroma_h_shift
: block_size
;
697 const int block_h
= plane_index
? block_size
>>s
->chroma_v_shift
: block_size
;
698 const uint8_t *obmc
= plane_index
? ff_obmc_tab
[s
->block_max_depth
+s
->chroma_h_shift
] : ff_obmc_tab
[s
->block_max_depth
];
699 const int obmc_stride
= plane_index
? (2*block_size
)>>s
->chroma_h_shift
: 2*block_size
;
700 const int ref_stride
= s
->current_picture
->linesize
[plane_index
];
701 uint8_t *dst
= s
->current_picture
->data
[plane_index
];
702 uint8_t *src
= s
-> input_picture
->data
[plane_index
];
703 //FIXME zero_dst is const but add_yblock changes dst if add is 0 (this is never the case for dst=zero_dst
704 // const has only been removed from zero_dst to suppress a warning
705 static IDWTELEM zero_dst
[4096]; //FIXME
706 const int b_stride
= s
->b_width
<< s
->block_max_depth
;
707 const int w
= p
->width
;
708 const int h
= p
->height
;
711 const int penalty_factor
= get_penalty_factor(s
->lambda
, s
->lambda2
, s
->avctx
->me_cmp
);
713 av_assert2(s
->chroma_h_shift
== s
->chroma_v_shift
); //obmc and square assumtions below
716 int mb_x2
= mb_x
+ (i
%3) - 1;
717 int mb_y2
= mb_y
+ (i
/3) - 1;
718 int x
= block_w
*mb_x2
+ block_w
/2;
719 int y
= block_h
*mb_y2
+ block_h
/2;
721 add_yblock(s
, 0, NULL
, zero_dst
, dst
, obmc
,
722 x
, y
, block_w
, block_h
, w
, h
, /*dst_stride*/0, ref_stride
, obmc_stride
, mb_x2
, mb_y2
, 1, 1, plane_index
);
724 //FIXME find a cleaner/simpler way to skip the outside stuff
725 for(y2
= y
; y2
<0; y2
++)
726 memcpy(dst
+ x
+ y2
*ref_stride
, src
+ x
+ y2
*ref_stride
, block_w
);
727 for(y2
= h
; y2
<y
+block_h
; y2
++)
728 memcpy(dst
+ x
+ y2
*ref_stride
, src
+ x
+ y2
*ref_stride
, block_w
);
730 for(y2
= y
; y2
<y
+block_h
; y2
++)
731 memcpy(dst
+ x
+ y2
*ref_stride
, src
+ x
+ y2
*ref_stride
, -x
);
734 for(y2
= y
; y2
<y
+block_h
; y2
++)
735 memcpy(dst
+ w
+ y2
*ref_stride
, src
+ w
+ y2
*ref_stride
, x
+block_w
- w
);
738 av_assert1(block_w
== 8 || block_w
==16);
739 distortion
+= s
->mecc
.me_cmp
[block_w
==8](&s
->m
, src
+ x
+ y
*ref_stride
, dst
+ x
+ y
*ref_stride
, ref_stride
, block_h
);
743 BlockNode
*b
= &s
->block
[mb_x
+mb_y
*b_stride
];
744 int merged
= same_block(b
,b
+1) && same_block(b
,b
+b_stride
) && same_block(b
,b
+b_stride
+1);
752 rate
= get_block_bits(s
, mb_x
, mb_y
, 2);
753 for(i
=merged
?4:0; i
<9; i
++){
754 static const int dxy
[9][2] = {{0,0},{1,0},{0,1},{1,1},{2,0},{2,1},{-1,2},{0,2},{1,2}};
755 rate
+= get_block_bits(s
, mb_x
+ dxy
[i
][0], mb_y
+ dxy
[i
][1], 1);
758 return distortion
+ rate
*penalty_factor
;
761 static int encode_subband_c0run(SnowContext
*s
, SubBand
*b
, const IDWTELEM
*src
, const IDWTELEM
*parent
, int stride
, int orientation
){
762 const int w
= b
->width
;
763 const int h
= b
->height
;
768 int *runs
= s
->run_buffer
;
775 int /*ll=0, */l
=0, lt
=0, t
=0, rt
=0;
776 v
= src
[x
+ y
*stride
];
779 t
= src
[x
+ (y
-1)*stride
];
781 lt
= src
[x
- 1 + (y
-1)*stride
];
784 rt
= src
[x
+ 1 + (y
-1)*stride
];
788 l
= src
[x
- 1 + y
*stride
];
790 if(orientation==1) ll= src[y + (x-2)*stride];
791 else ll= src[x - 2 + y*stride];
797 if(px
<b
->parent
->width
&& py
<b
->parent
->height
)
798 p
= parent
[px
+ py
*2*stride
];
800 if(!(/*ll|*/l
|lt
|t
|rt
|p
)){
802 runs
[run_index
++]= run
;
810 max_index
= run_index
;
811 runs
[run_index
++]= run
;
813 run
= runs
[run_index
++];
815 put_symbol2(&s
->c
, b
->state
[30], max_index
, 0);
816 if(run_index
<= max_index
)
817 put_symbol2(&s
->c
, b
->state
[1], run
, 3);
820 if(s
->c
.bytestream_end
- s
->c
.bytestream
< w
*40){
821 av_log(s
->avctx
, AV_LOG_ERROR
, "encoded frame too large\n");
826 int /*ll=0, */l
=0, lt
=0, t
=0, rt
=0;
827 v
= src
[x
+ y
*stride
];
830 t
= src
[x
+ (y
-1)*stride
];
832 lt
= src
[x
- 1 + (y
-1)*stride
];
835 rt
= src
[x
+ 1 + (y
-1)*stride
];
839 l
= src
[x
- 1 + y
*stride
];
841 if(orientation==1) ll= src[y + (x-2)*stride];
842 else ll= src[x - 2 + y*stride];
848 if(px
<b
->parent
->width
&& py
<b
->parent
->height
)
849 p
= parent
[px
+ py
*2*stride
];
851 if(/*ll|*/l
|lt
|t
|rt
|p
){
852 int context
= av_log2(/*FFABS(ll) + */3*FFABS(l
) + FFABS(lt
) + 2*FFABS(t
) + FFABS(rt
) + FFABS(p
));
854 put_rac(&s
->c
, &b
->state
[0][context
], !!v
);
857 run
= runs
[run_index
++];
859 if(run_index
<= max_index
)
860 put_symbol2(&s
->c
, b
->state
[1], run
, 3);
868 int context
= av_log2(/*FFABS(ll) + */3*FFABS(l
) + FFABS(lt
) + 2*FFABS(t
) + FFABS(rt
) + FFABS(p
));
869 int l2
= 2*FFABS(l
) + (l
<0);
870 int t2
= 2*FFABS(t
) + (t
<0);
872 put_symbol2(&s
->c
, b
->state
[context
+ 2], FFABS(v
)-1, context
-4);
873 put_rac(&s
->c
, &b
->state
[0][16 + 1 + 3 + ff_quant3bA
[l2
&0xFF] + 3*ff_quant3bA
[t2
&0xFF]], v
<0);
881 static int encode_subband(SnowContext
*s
, SubBand
*b
, const IDWTELEM
*src
, const IDWTELEM
*parent
, int stride
, int orientation
){
882 // encode_subband_qtree(s, b, src, parent, stride, orientation);
883 // encode_subband_z0run(s, b, src, parent, stride, orientation);
884 return encode_subband_c0run(s
, b
, src
, parent
, stride
, orientation
);
885 // encode_subband_dzr(s, b, src, parent, stride, orientation);
888 static av_always_inline
int check_block(SnowContext
*s
, int mb_x
, int mb_y
, int p
[3], int intra
, uint8_t (*obmc_edged
)[MB_SIZE
* 2], int *best_rd
){
889 const int b_stride
= s
->b_width
<< s
->block_max_depth
;
890 BlockNode
*block
= &s
->block
[mb_x
+ mb_y
* b_stride
];
891 BlockNode backup
= *block
;
895 av_assert2(mb_x
>=0 && mb_y
>=0);
896 av_assert2(mb_x
<b_stride
);
899 block
->color
[0] = p
[0];
900 block
->color
[1] = p
[1];
901 block
->color
[2] = p
[2];
902 block
->type
|= BLOCK_INTRA
;
904 index
= (p
[0] + 31*p
[1]) & (ME_CACHE_SIZE
-1);
905 value
= s
->me_cache_generation
+ (p
[0]>>10) + (p
[1]<<6) + (block
->ref
<<12);
906 if(s
->me_cache
[index
] == value
)
908 s
->me_cache
[index
]= value
;
912 block
->type
&= ~BLOCK_INTRA
;
915 rd
= get_block_rd(s
, mb_x
, mb_y
, 0, obmc_edged
);
927 /* special case for int[2] args we discard afterwards,
928 * fixes compilation problem with gcc 2.95 */
929 static av_always_inline
int check_block_inter(SnowContext
*s
, int mb_x
, int mb_y
, int p0
, int p1
, uint8_t (*obmc_edged
)[MB_SIZE
* 2], int *best_rd
){
931 return check_block(s
, mb_x
, mb_y
, p
, 0, obmc_edged
, best_rd
);
934 static av_always_inline
int check_4block_inter(SnowContext
*s
, int mb_x
, int mb_y
, int p0
, int p1
, int ref
, int *best_rd
){
935 const int b_stride
= s
->b_width
<< s
->block_max_depth
;
936 BlockNode
*block
= &s
->block
[mb_x
+ mb_y
* b_stride
];
941 /* We don't initialize backup[] during variable declaration, because
942 * that fails to compile on MSVC: "cannot convert from 'BlockNode' to
944 backup
[0] = block
[0];
945 backup
[1] = block
[1];
946 backup
[2] = block
[b_stride
];
947 backup
[3] = block
[b_stride
+ 1];
949 av_assert2(mb_x
>=0 && mb_y
>=0);
950 av_assert2(mb_x
<b_stride
);
951 av_assert2(((mb_x
|mb_y
)&1) == 0);
953 index
= (p0
+ 31*p1
) & (ME_CACHE_SIZE
-1);
954 value
= s
->me_cache_generation
+ (p0
>>10) + (p1
<<6) + (block
->ref
<<12);
955 if(s
->me_cache
[index
] == value
)
957 s
->me_cache
[index
]= value
;
962 block
->type
&= ~BLOCK_INTRA
;
963 block
[1]= block
[b_stride
]= block
[b_stride
+1]= *block
;
965 rd
= get_4block_rd(s
, mb_x
, mb_y
, 0);
974 block
[b_stride
]= backup
[2];
975 block
[b_stride
+1]= backup
[3];
980 static void iterative_me(SnowContext
*s
){
981 int pass
, mb_x
, mb_y
;
982 const int b_width
= s
->b_width
<< s
->block_max_depth
;
983 const int b_height
= s
->b_height
<< s
->block_max_depth
;
984 const int b_stride
= b_width
;
989 uint8_t state
[sizeof(s
->block_state
)];
990 memcpy(state
, s
->block_state
, sizeof(s
->block_state
));
991 for(mb_y
= 0; mb_y
<s
->b_height
; mb_y
++)
992 for(mb_x
= 0; mb_x
<s
->b_width
; mb_x
++)
993 encode_q_branch(s
, 0, mb_x
, mb_y
);
995 memcpy(s
->block_state
, state
, sizeof(s
->block_state
));
998 for(pass
=0; pass
<25; pass
++){
1001 for(mb_y
= 0; mb_y
<b_height
; mb_y
++){
1002 for(mb_x
= 0; mb_x
<b_width
; mb_x
++){
1003 int dia_change
, i
, j
, ref
;
1004 int best_rd
= INT_MAX
, ref_rd
;
1005 BlockNode backup
, ref_b
;
1006 const int index
= mb_x
+ mb_y
* b_stride
;
1007 BlockNode
*block
= &s
->block
[index
];
1008 BlockNode
*tb
= mb_y
? &s
->block
[index
-b_stride
] : NULL
;
1009 BlockNode
*lb
= mb_x
? &s
->block
[index
-1] : NULL
;
1010 BlockNode
*rb
= mb_x
+1<b_width
? &s
->block
[index
+1] : NULL
;
1011 BlockNode
*bb
= mb_y
+1<b_height
? &s
->block
[index
+b_stride
] : NULL
;
1012 BlockNode
*tlb
= mb_x
&& mb_y
? &s
->block
[index
-b_stride
-1] : NULL
;
1013 BlockNode
*trb
= mb_x
+1<b_width
&& mb_y
? &s
->block
[index
-b_stride
+1] : NULL
;
1014 BlockNode
*blb
= mb_x
&& mb_y
+1<b_height
? &s
->block
[index
+b_stride
-1] : NULL
;
1015 BlockNode
*brb
= mb_x
+1<b_width
&& mb_y
+1<b_height
? &s
->block
[index
+b_stride
+1] : NULL
;
1016 const int b_w
= (MB_SIZE
>> s
->block_max_depth
);
1017 uint8_t obmc_edged
[MB_SIZE
* 2][MB_SIZE
* 2];
1019 if(pass
&& (block
->type
& BLOCK_OPT
))
1021 block
->type
|= BLOCK_OPT
;
1025 if(!s
->me_cache_generation
)
1026 memset(s
->me_cache
, 0, sizeof(s
->me_cache
));
1027 s
->me_cache_generation
+= 1<<22;
1029 //FIXME precalculate
1032 for (y
= 0; y
< b_w
* 2; y
++)
1033 memcpy(obmc_edged
[y
], ff_obmc_tab
[s
->block_max_depth
] + y
* b_w
* 2, b_w
* 2);
1035 for(y
=0; y
<b_w
*2; y
++)
1036 memset(obmc_edged
[y
], obmc_edged
[y
][0] + obmc_edged
[y
][b_w
-1], b_w
);
1037 if(mb_x
==b_stride
-1)
1038 for(y
=0; y
<b_w
*2; y
++)
1039 memset(obmc_edged
[y
]+b_w
, obmc_edged
[y
][b_w
] + obmc_edged
[y
][b_w
*2-1], b_w
);
1041 for(x
=0; x
<b_w
*2; x
++)
1042 obmc_edged
[0][x
] += obmc_edged
[b_w
-1][x
];
1043 for(y
=1; y
<b_w
; y
++)
1044 memcpy(obmc_edged
[y
], obmc_edged
[0], b_w
*2);
1046 if(mb_y
==b_height
-1){
1047 for(x
=0; x
<b_w
*2; x
++)
1048 obmc_edged
[b_w
*2-1][x
] += obmc_edged
[b_w
][x
];
1049 for(y
=b_w
; y
<b_w
*2-1; y
++)
1050 memcpy(obmc_edged
[y
], obmc_edged
[b_w
*2-1], b_w
*2);
1054 //skip stuff outside the picture
1055 if(mb_x
==0 || mb_y
==0 || mb_x
==b_width
-1 || mb_y
==b_height
-1){
1056 uint8_t *src
= s
-> input_picture
->data
[0];
1057 uint8_t *dst
= s
->current_picture
->data
[0];
1058 const int stride
= s
->current_picture
->linesize
[0];
1059 const int block_w
= MB_SIZE
>> s
->block_max_depth
;
1060 const int block_h
= MB_SIZE
>> s
->block_max_depth
;
1061 const int sx
= block_w
*mb_x
- block_w
/2;
1062 const int sy
= block_h
*mb_y
- block_h
/2;
1063 const int w
= s
->plane
[0].width
;
1064 const int h
= s
->plane
[0].height
;
1068 memcpy(dst
+ sx
+ y
*stride
, src
+ sx
+ y
*stride
, block_w
*2);
1069 for(y
=h
; y
<sy
+block_h
*2; y
++)
1070 memcpy(dst
+ sx
+ y
*stride
, src
+ sx
+ y
*stride
, block_w
*2);
1072 for(y
=sy
; y
<sy
+block_h
*2; y
++)
1073 memcpy(dst
+ sx
+ y
*stride
, src
+ sx
+ y
*stride
, -sx
);
1075 if(sx
+block_w
*2 > w
){
1076 for(y
=sy
; y
<sy
+block_h
*2; y
++)
1077 memcpy(dst
+ w
+ y
*stride
, src
+ w
+ y
*stride
, sx
+block_w
*2 - w
);
1081 // intra(black) = neighbors' contribution to the current block
1082 for(i
=0; i
< s
->nb_planes
; i
++)
1083 color
[i
]= get_dc(s
, mb_x
, mb_y
, i
);
1085 // get previous score (cannot be cached due to OBMC)
1086 if(pass
> 0 && (block
->type
&BLOCK_INTRA
)){
1087 int color0
[3]= {block
->color
[0], block
->color
[1], block
->color
[2]};
1088 check_block(s
, mb_x
, mb_y
, color0
, 1, obmc_edged
, &best_rd
);
1090 check_block_inter(s
, mb_x
, mb_y
, block
->mx
, block
->my
, obmc_edged
, &best_rd
);
1094 for(ref
=0; ref
< s
->ref_frames
; ref
++){
1095 int16_t (*mvr
)[2]= &s
->ref_mvs
[ref
][index
];
1096 if(s
->ref_scores
[ref
][index
] > s
->ref_scores
[ref_b
.ref
][index
]*3/2) //FIXME tune threshold
1101 check_block_inter(s
, mb_x
, mb_y
, mvr
[0][0], mvr
[0][1], obmc_edged
, &best_rd
);
1102 check_block_inter(s
, mb_x
, mb_y
, 0, 0, obmc_edged
, &best_rd
);
1104 check_block_inter(s
, mb_x
, mb_y
, mvr
[-b_stride
][0], mvr
[-b_stride
][1], obmc_edged
, &best_rd
);
1106 check_block_inter(s
, mb_x
, mb_y
, mvr
[-1][0], mvr
[-1][1], obmc_edged
, &best_rd
);
1108 check_block_inter(s
, mb_x
, mb_y
, mvr
[1][0], mvr
[1][1], obmc_edged
, &best_rd
);
1110 check_block_inter(s
, mb_x
, mb_y
, mvr
[b_stride
][0], mvr
[b_stride
][1], obmc_edged
, &best_rd
);
1113 //FIXME avoid subpel interpolation / round to nearest integer
1116 for(i
=0; i
<FFMAX(s
->avctx
->dia_size
, 1); i
++){
1118 dia_change
|= check_block_inter(s
, mb_x
, mb_y
, block
->mx
+4*(i
-j
), block
->my
+(4*j
), obmc_edged
, &best_rd
);
1119 dia_change
|= check_block_inter(s
, mb_x
, mb_y
, block
->mx
-4*(i
-j
), block
->my
-(4*j
), obmc_edged
, &best_rd
);
1120 dia_change
|= check_block_inter(s
, mb_x
, mb_y
, block
->mx
+4*(i
-j
), block
->my
-(4*j
), obmc_edged
, &best_rd
);
1121 dia_change
|= check_block_inter(s
, mb_x
, mb_y
, block
->mx
-4*(i
-j
), block
->my
+(4*j
), obmc_edged
, &best_rd
);
1127 static const int square
[8][2]= {{+1, 0},{-1, 0},{ 0,+1},{ 0,-1},{+1,+1},{-1,-1},{+1,-1},{-1,+1},};
1130 dia_change
|= check_block_inter(s
, mb_x
, mb_y
, block
->mx
+square
[i
][0], block
->my
+square
[i
][1], obmc_edged
, &best_rd
);
1132 //FIXME or try the standard 2 pass qpel or similar
1134 mvr
[0][0]= block
->mx
;
1135 mvr
[0][1]= block
->my
;
1136 if(ref_rd
> best_rd
){
1143 check_block(s
, mb_x
, mb_y
, color
, 1, obmc_edged
, &best_rd
);
1144 //FIXME RD style color selection
1145 if(!same_block(block
, &backup
)){
1146 if(tb
) tb
->type
&= ~BLOCK_OPT
;
1147 if(lb
) lb
->type
&= ~BLOCK_OPT
;
1148 if(rb
) rb
->type
&= ~BLOCK_OPT
;
1149 if(bb
) bb
->type
&= ~BLOCK_OPT
;
1150 if(tlb
) tlb
->type
&= ~BLOCK_OPT
;
1151 if(trb
) trb
->type
&= ~BLOCK_OPT
;
1152 if(blb
) blb
->type
&= ~BLOCK_OPT
;
1153 if(brb
) brb
->type
&= ~BLOCK_OPT
;
1158 av_log(s
->avctx
, AV_LOG_ERROR
, "pass:%d changed:%d\n", pass
, change
);
1163 if(s
->block_max_depth
== 1){
1165 for(mb_y
= 0; mb_y
<b_height
; mb_y
+=2){
1166 for(mb_x
= 0; mb_x
<b_width
; mb_x
+=2){
1168 int best_rd
, init_rd
;
1169 const int index
= mb_x
+ mb_y
* b_stride
;
1172 b
[0]= &s
->block
[index
];
1174 b
[2]= b
[0]+b_stride
;
1176 if(same_block(b
[0], b
[1]) &&
1177 same_block(b
[0], b
[2]) &&
1178 same_block(b
[0], b
[3]))
1181 if(!s
->me_cache_generation
)
1182 memset(s
->me_cache
, 0, sizeof(s
->me_cache
));
1183 s
->me_cache_generation
+= 1<<22;
1185 init_rd
= best_rd
= get_4block_rd(s
, mb_x
, mb_y
, 0);
1187 //FIXME more multiref search?
1188 check_4block_inter(s
, mb_x
, mb_y
,
1189 (b
[0]->mx
+ b
[1]->mx
+ b
[2]->mx
+ b
[3]->mx
+ 2) >> 2,
1190 (b
[0]->my
+ b
[1]->my
+ b
[2]->my
+ b
[3]->my
+ 2) >> 2, 0, &best_rd
);
1193 if(!(b
[i
]->type
&BLOCK_INTRA
))
1194 check_4block_inter(s
, mb_x
, mb_y
, b
[i
]->mx
, b
[i
]->my
, b
[i
]->ref
, &best_rd
);
1196 if(init_rd
!= best_rd
)
1200 av_log(s
->avctx
, AV_LOG_ERROR
, "pass:4mv changed:%d\n", change
*4);
1204 static void encode_blocks(SnowContext
*s
, int search
){
1209 if(s
->avctx
->me_method
== ME_ITER
&& !s
->keyframe
&& search
)
1213 if(s
->c
.bytestream_end
- s
->c
.bytestream
< w
*MB_SIZE
*MB_SIZE
*3){ //FIXME nicer limit
1214 av_log(s
->avctx
, AV_LOG_ERROR
, "encoded frame too large\n");
1218 if(s
->avctx
->me_method
== ME_ITER
|| !search
)
1219 encode_q_branch2(s
, 0, x
, y
);
1221 encode_q_branch (s
, 0, x
, y
);
1226 static void quantize(SnowContext
*s
, SubBand
*b
, IDWTELEM
*dst
, DWTELEM
*src
, int stride
, int bias
){
1227 const int w
= b
->width
;
1228 const int h
= b
->height
;
1229 const int qlog
= av_clip(s
->qlog
+ b
->qlog
, 0, QROOT
*16);
1230 const int qmul
= ff_qexp
[qlog
&(QROOT
-1)]<<((qlog
>>QSHIFT
) + ENCODER_EXTRA_BITS
);
1231 int x
,y
, thres1
, thres2
;
1233 if(s
->qlog
== LOSSLESS_QLOG
){
1236 dst
[x
+ y
*stride
]= src
[x
+ y
*stride
];
1240 bias
= bias
? 0 : (3*qmul
)>>3;
1241 thres1
= ((qmul
- bias
)>>QEXPSHIFT
) - 1;
1247 int i
= src
[x
+ y
*stride
];
1249 if((unsigned)(i
+thres1
) > thres2
){
1252 i
/= qmul
; //FIXME optimize
1253 dst
[x
+ y
*stride
]= i
;
1257 i
/= qmul
; //FIXME optimize
1258 dst
[x
+ y
*stride
]= -i
;
1261 dst
[x
+ y
*stride
]= 0;
1267 int i
= src
[x
+ y
*stride
];
1269 if((unsigned)(i
+thres1
) > thres2
){
1272 i
= (i
+ bias
) / qmul
; //FIXME optimize
1273 dst
[x
+ y
*stride
]= i
;
1277 i
= (i
+ bias
) / qmul
; //FIXME optimize
1278 dst
[x
+ y
*stride
]= -i
;
1281 dst
[x
+ y
*stride
]= 0;
1287 static void dequantize(SnowContext
*s
, SubBand
*b
, IDWTELEM
*src
, int stride
){
1288 const int w
= b
->width
;
1289 const int h
= b
->height
;
1290 const int qlog
= av_clip(s
->qlog
+ b
->qlog
, 0, QROOT
*16);
1291 const int qmul
= ff_qexp
[qlog
&(QROOT
-1)]<<(qlog
>>QSHIFT
);
1292 const int qadd
= (s
->qbias
*qmul
)>>QBIAS_SHIFT
;
1295 if(s
->qlog
== LOSSLESS_QLOG
) return;
1299 int i
= src
[x
+ y
*stride
];
1301 src
[x
+ y
*stride
]= -((-i
*qmul
+ qadd
)>>(QEXPSHIFT
)); //FIXME try different bias
1303 src
[x
+ y
*stride
]= (( i
*qmul
+ qadd
)>>(QEXPSHIFT
));
1309 static void decorrelate(SnowContext
*s
, SubBand
*b
, IDWTELEM
*src
, int stride
, int inverse
, int use_median
){
1310 const int w
= b
->width
;
1311 const int h
= b
->height
;
1314 for(y
=h
-1; y
>=0; y
--){
1315 for(x
=w
-1; x
>=0; x
--){
1316 int i
= x
+ y
*stride
;
1320 if(y
&& x
+1<w
) src
[i
] -= mid_pred(src
[i
- 1], src
[i
- stride
], src
[i
- stride
+ 1]);
1321 else src
[i
] -= src
[i
- 1];
1323 if(y
) src
[i
] -= mid_pred(src
[i
- 1], src
[i
- stride
], src
[i
- 1] + src
[i
- stride
] - src
[i
- 1 - stride
]);
1324 else src
[i
] -= src
[i
- 1];
1327 if(y
) src
[i
] -= src
[i
- stride
];
1333 static void correlate(SnowContext
*s
, SubBand
*b
, IDWTELEM
*src
, int stride
, int inverse
, int use_median
){
1334 const int w
= b
->width
;
1335 const int h
= b
->height
;
1340 int i
= x
+ y
*stride
;
1344 if(y
&& x
+1<w
) src
[i
] += mid_pred(src
[i
- 1], src
[i
- stride
], src
[i
- stride
+ 1]);
1345 else src
[i
] += src
[i
- 1];
1347 if(y
) src
[i
] += mid_pred(src
[i
- 1], src
[i
- stride
], src
[i
- 1] + src
[i
- stride
] - src
[i
- 1 - stride
]);
1348 else src
[i
] += src
[i
- 1];
1351 if(y
) src
[i
] += src
[i
- stride
];
1357 static void encode_qlogs(SnowContext
*s
){
1358 int plane_index
, level
, orientation
;
1360 for(plane_index
=0; plane_index
<FFMIN(s
->nb_planes
, 2); plane_index
++){
1361 for(level
=0; level
<s
->spatial_decomposition_count
; level
++){
1362 for(orientation
=level
? 1:0; orientation
<4; orientation
++){
1363 if(orientation
==2) continue;
1364 put_symbol(&s
->c
, s
->header_state
, s
->plane
[plane_index
].band
[level
][orientation
].qlog
, 1);
1370 static void encode_header(SnowContext
*s
){
1374 memset(kstate
, MID_STATE
, sizeof(kstate
));
1376 put_rac(&s
->c
, kstate
, s
->keyframe
);
1377 if(s
->keyframe
|| s
->always_reset
){
1378 ff_snow_reset_contexts(s
);
1379 s
->last_spatial_decomposition_type
=
1383 s
->last_block_max_depth
= 0;
1384 for(plane_index
=0; plane_index
<2; plane_index
++){
1385 Plane
*p
= &s
->plane
[plane_index
];
1388 memset(p
->last_hcoeff
, 0, sizeof(p
->last_hcoeff
));
1392 put_symbol(&s
->c
, s
->header_state
, s
->version
, 0);
1393 put_rac(&s
->c
, s
->header_state
, s
->always_reset
);
1394 put_symbol(&s
->c
, s
->header_state
, s
->temporal_decomposition_type
, 0);
1395 put_symbol(&s
->c
, s
->header_state
, s
->temporal_decomposition_count
, 0);
1396 put_symbol(&s
->c
, s
->header_state
, s
->spatial_decomposition_count
, 0);
1397 put_symbol(&s
->c
, s
->header_state
, s
->colorspace_type
, 0);
1398 if (s
->nb_planes
> 2) {
1399 put_symbol(&s
->c
, s
->header_state
, s
->chroma_h_shift
, 0);
1400 put_symbol(&s
->c
, s
->header_state
, s
->chroma_v_shift
, 0);
1402 put_rac(&s
->c
, s
->header_state
, s
->spatial_scalability
);
1403 // put_rac(&s->c, s->header_state, s->rate_scalability);
1404 put_symbol(&s
->c
, s
->header_state
, s
->max_ref_frames
-1, 0);
1411 for(plane_index
=0; plane_index
<FFMIN(s
->nb_planes
, 2); plane_index
++){
1412 Plane
*p
= &s
->plane
[plane_index
];
1413 update_mc
|= p
->last_htaps
!= p
->htaps
;
1414 update_mc
|= p
->last_diag_mc
!= p
->diag_mc
;
1415 update_mc
|= !!memcmp(p
->last_hcoeff
, p
->hcoeff
, sizeof(p
->hcoeff
));
1417 put_rac(&s
->c
, s
->header_state
, update_mc
);
1419 for(plane_index
=0; plane_index
<FFMIN(s
->nb_planes
, 2); plane_index
++){
1420 Plane
*p
= &s
->plane
[plane_index
];
1421 put_rac(&s
->c
, s
->header_state
, p
->diag_mc
);
1422 put_symbol(&s
->c
, s
->header_state
, p
->htaps
/2-1, 0);
1423 for(i
= p
->htaps
/2; i
; i
--)
1424 put_symbol(&s
->c
, s
->header_state
, FFABS(p
->hcoeff
[i
]), 0);
1427 if(s
->last_spatial_decomposition_count
!= s
->spatial_decomposition_count
){
1428 put_rac(&s
->c
, s
->header_state
, 1);
1429 put_symbol(&s
->c
, s
->header_state
, s
->spatial_decomposition_count
, 0);
1432 put_rac(&s
->c
, s
->header_state
, 0);
1435 put_symbol(&s
->c
, s
->header_state
, s
->spatial_decomposition_type
- s
->last_spatial_decomposition_type
, 1);
1436 put_symbol(&s
->c
, s
->header_state
, s
->qlog
- s
->last_qlog
, 1);
1437 put_symbol(&s
->c
, s
->header_state
, s
->mv_scale
- s
->last_mv_scale
, 1);
1438 put_symbol(&s
->c
, s
->header_state
, s
->qbias
- s
->last_qbias
, 1);
1439 put_symbol(&s
->c
, s
->header_state
, s
->block_max_depth
- s
->last_block_max_depth
, 1);
1443 static void update_last_header_values(SnowContext
*s
){
1447 for(plane_index
=0; plane_index
<2; plane_index
++){
1448 Plane
*p
= &s
->plane
[plane_index
];
1449 p
->last_diag_mc
= p
->diag_mc
;
1450 p
->last_htaps
= p
->htaps
;
1451 memcpy(p
->last_hcoeff
, p
->hcoeff
, sizeof(p
->hcoeff
));
1455 s
->last_spatial_decomposition_type
= s
->spatial_decomposition_type
;
1456 s
->last_qlog
= s
->qlog
;
1457 s
->last_qbias
= s
->qbias
;
1458 s
->last_mv_scale
= s
->mv_scale
;
1459 s
->last_block_max_depth
= s
->block_max_depth
;
1460 s
->last_spatial_decomposition_count
= s
->spatial_decomposition_count
;
1463 static int qscale2qlog(int qscale
){
1464 return rint(QROOT
*log2(qscale
/ (float)FF_QP2LAMBDA
))
1465 + 61*QROOT
/8; ///< 64 > 60
1468 static int ratecontrol_1pass(SnowContext
*s
, AVFrame
*pict
)
1470 /* Estimate the frame's complexity as a sum of weighted dwt coefficients.
1471 * FIXME we know exact mv bits at this point,
1472 * but ratecontrol isn't set up to include them. */
1473 uint32_t coef_sum
= 0;
1474 int level
, orientation
, delta_qlog
;
1476 for(level
=0; level
<s
->spatial_decomposition_count
; level
++){
1477 for(orientation
=level
? 1 : 0; orientation
<4; orientation
++){
1478 SubBand
*b
= &s
->plane
[0].band
[level
][orientation
];
1479 IDWTELEM
*buf
= b
->ibuf
;
1480 const int w
= b
->width
;
1481 const int h
= b
->height
;
1482 const int stride
= b
->stride
;
1483 const int qlog
= av_clip(2*QROOT
+ b
->qlog
, 0, QROOT
*16);
1484 const int qmul
= ff_qexp
[qlog
&(QROOT
-1)]<<(qlog
>>QSHIFT
);
1485 const int qdiv
= (1<<16)/qmul
;
1487 //FIXME this is ugly
1490 buf
[x
+y
*stride
]= b
->buf
[x
+y
*stride
];
1492 decorrelate(s
, b
, buf
, stride
, 1, 0);
1495 coef_sum
+= abs(buf
[x
+y
*stride
]) * qdiv
>> 16;
1499 /* ugly, ratecontrol just takes a sqrt again */
1500 av_assert0(coef_sum
< INT_MAX
);
1501 coef_sum
= (uint64_t)coef_sum
* coef_sum
>> 16;
1503 if(pict
->pict_type
== AV_PICTURE_TYPE_I
){
1504 s
->m
.current_picture
.mb_var_sum
= coef_sum
;
1505 s
->m
.current_picture
.mc_mb_var_sum
= 0;
1507 s
->m
.current_picture
.mc_mb_var_sum
= coef_sum
;
1508 s
->m
.current_picture
.mb_var_sum
= 0;
1511 pict
->quality
= ff_rate_estimate_qscale(&s
->m
, 1);
1512 if (pict
->quality
< 0)
1514 s
->lambda
= pict
->quality
* 3/2;
1515 delta_qlog
= qscale2qlog(pict
->quality
) - s
->qlog
;
1516 s
->qlog
+= delta_qlog
;
1520 static void calculate_visual_weight(SnowContext
*s
, Plane
*p
){
1521 int width
= p
->width
;
1522 int height
= p
->height
;
1523 int level
, orientation
, x
, y
;
1525 for(level
=0; level
<s
->spatial_decomposition_count
; level
++){
1526 for(orientation
=level
? 1 : 0; orientation
<4; orientation
++){
1527 SubBand
*b
= &p
->band
[level
][orientation
];
1528 IDWTELEM
*ibuf
= b
->ibuf
;
1531 memset(s
->spatial_idwt_buffer
, 0, sizeof(*s
->spatial_idwt_buffer
)*width
*height
);
1532 ibuf
[b
->width
/2 + b
->height
/2*b
->stride
]= 256*16;
1533 ff_spatial_idwt(s
->spatial_idwt_buffer
, s
->temp_idwt_buffer
, width
, height
, width
, s
->spatial_decomposition_type
, s
->spatial_decomposition_count
);
1534 for(y
=0; y
<height
; y
++){
1535 for(x
=0; x
<width
; x
++){
1536 int64_t d
= s
->spatial_idwt_buffer
[x
+ y
*width
]*16;
1541 b
->qlog
= (int)(log(352256.0/sqrt(error
)) / log(pow(2.0, 1.0/QROOT
))+0.5);
1546 static int encode_frame(AVCodecContext
*avctx
, AVPacket
*pkt
,
1547 const AVFrame
*pict
, int *got_packet
)
1549 SnowContext
*s
= avctx
->priv_data
;
1550 RangeCoder
* const c
= &s
->c
;
1551 AVFrame
*pic
= pict
;
1552 const int width
= s
->avctx
->width
;
1553 const int height
= s
->avctx
->height
;
1554 int level
, orientation
, plane_index
, i
, y
, ret
;
1555 uint8_t rc_header_bak
[sizeof(s
->header_state
)];
1556 uint8_t rc_block_bak
[sizeof(s
->block_state
)];
1558 if ((ret
= ff_alloc_packet2(avctx
, pkt
, s
->b_width
*s
->b_height
*MB_SIZE
*MB_SIZE
*3 + FF_MIN_BUFFER_SIZE
)) < 0)
1561 ff_init_range_encoder(c
, pkt
->data
, pkt
->size
);
1562 ff_build_rac_states(c
, 0.05*(1LL<<32), 256-8);
1564 for(i
=0; i
< s
->nb_planes
; i
++){
1565 int hshift
= i
? s
->chroma_h_shift
: 0;
1566 int vshift
= i
? s
->chroma_v_shift
: 0;
1567 for(y
=0; y
<(height
>>vshift
); y
++)
1568 memcpy(&s
->input_picture
->data
[i
][y
* s
->input_picture
->linesize
[i
]],
1569 &pict
->data
[i
][y
* pict
->linesize
[i
]],
1571 s
->mpvencdsp
.draw_edges(s
->input_picture
->data
[i
], s
->input_picture
->linesize
[i
],
1572 width
>> hshift
, height
>> vshift
,
1573 EDGE_WIDTH
>> hshift
, EDGE_WIDTH
>> vshift
,
1574 EDGE_TOP
| EDGE_BOTTOM
);
1578 s
->new_picture
= pict
;
1580 s
->m
.picture_number
= avctx
->frame_number
;
1581 if(avctx
->flags
&CODEC_FLAG_PASS2
){
1582 s
->m
.pict_type
= pic
->pict_type
= s
->m
.rc_context
.entry
[avctx
->frame_number
].new_pict_type
;
1583 s
->keyframe
= pic
->pict_type
== AV_PICTURE_TYPE_I
;
1584 if(!(avctx
->flags
&CODEC_FLAG_QSCALE
)) {
1585 pic
->quality
= ff_rate_estimate_qscale(&s
->m
, 0);
1586 if (pic
->quality
< 0)
1590 s
->keyframe
= avctx
->gop_size
==0 || avctx
->frame_number
% avctx
->gop_size
== 0;
1591 s
->m
.pict_type
= pic
->pict_type
= s
->keyframe
? AV_PICTURE_TYPE_I
: AV_PICTURE_TYPE_P
;
1594 if(s
->pass1_rc
&& avctx
->frame_number
== 0)
1595 pic
->quality
= 2*FF_QP2LAMBDA
;
1597 s
->qlog
= qscale2qlog(pic
->quality
);
1598 s
->lambda
= pic
->quality
* 3/2;
1600 if (s
->qlog
< 0 || (!pic
->quality
&& (avctx
->flags
& CODEC_FLAG_QSCALE
))) {
1601 s
->qlog
= LOSSLESS_QLOG
;
1603 }//else keep previous frame's qlog until after motion estimation
1605 if (s
->current_picture
->data
[0] && !(s
->avctx
->flags
&CODEC_FLAG_EMU_EDGE
)) {
1606 int w
= s
->avctx
->width
;
1607 int h
= s
->avctx
->height
;
1609 s
->mpvencdsp
.draw_edges(s
->current_picture
->data
[0],
1610 s
->current_picture
->linesize
[0], w
, h
,
1611 EDGE_WIDTH
, EDGE_WIDTH
, EDGE_TOP
| EDGE_BOTTOM
);
1612 if (s
->current_picture
->data
[2]) {
1613 s
->mpvencdsp
.draw_edges(s
->current_picture
->data
[1],
1614 s
->current_picture
->linesize
[1], w
>>s
->chroma_h_shift
, h
>>s
->chroma_v_shift
,
1615 EDGE_WIDTH
>>s
->chroma_h_shift
, EDGE_WIDTH
>>s
->chroma_v_shift
, EDGE_TOP
| EDGE_BOTTOM
);
1616 s
->mpvencdsp
.draw_edges(s
->current_picture
->data
[2],
1617 s
->current_picture
->linesize
[2], w
>>s
->chroma_h_shift
, h
>>s
->chroma_v_shift
,
1618 EDGE_WIDTH
>>s
->chroma_h_shift
, EDGE_WIDTH
>>s
->chroma_v_shift
, EDGE_TOP
| EDGE_BOTTOM
);
1622 ff_snow_frame_start(s
);
1623 avctx
->coded_frame
= s
->current_picture
;
1625 s
->m
.current_picture_ptr
= &s
->m
.current_picture
;
1626 s
->m
.current_picture
.f
= s
->current_picture
;
1627 s
->m
.current_picture
.f
->pts
= pict
->pts
;
1628 if(pic
->pict_type
== AV_PICTURE_TYPE_P
){
1629 int block_width
= (width
+15)>>4;
1630 int block_height
= (height
+15)>>4;
1631 int stride
= s
->current_picture
->linesize
[0];
1633 av_assert0(s
->current_picture
->data
[0]);
1634 av_assert0(s
->last_picture
[0]->data
[0]);
1636 s
->m
.avctx
= s
->avctx
;
1637 s
->m
. last_picture
.f
= s
->last_picture
[0];
1638 s
->m
. new_picture
.f
= s
->input_picture
;
1639 s
->m
. last_picture_ptr
= &s
->m
. last_picture
;
1640 s
->m
.linesize
= stride
;
1641 s
->m
.uvlinesize
= s
->current_picture
->linesize
[1];
1643 s
->m
.height
= height
;
1644 s
->m
.mb_width
= block_width
;
1645 s
->m
.mb_height
= block_height
;
1646 s
->m
.mb_stride
= s
->m
.mb_width
+1;
1647 s
->m
.b8_stride
= 2*s
->m
.mb_width
+1;
1649 s
->m
.pict_type
= pic
->pict_type
;
1650 s
->m
.me_method
= s
->avctx
->me_method
;
1651 s
->m
.me
.scene_change_score
=0;
1652 s
->m
.flags
= s
->avctx
->flags
;
1653 s
->m
.quarter_sample
= (s
->avctx
->flags
& CODEC_FLAG_QPEL
)!=0;
1654 s
->m
.out_format
= FMT_H263
;
1655 s
->m
.unrestricted_mv
= 1;
1657 s
->m
.lambda
= s
->lambda
;
1658 s
->m
.qscale
= (s
->m
.lambda
*139 + FF_LAMBDA_SCALE
*64) >> (FF_LAMBDA_SHIFT
+ 7);
1659 s
->lambda2
= s
->m
.lambda2
= (s
->m
.lambda
*s
->m
.lambda
+ FF_LAMBDA_SCALE
/2) >> FF_LAMBDA_SHIFT
;
1661 s
->m
.mecc
= s
->mecc
; //move
1662 s
->m
.qdsp
= s
->qdsp
; //move
1663 s
->m
.hdsp
= s
->hdsp
;
1665 s
->hdsp
= s
->m
.hdsp
;
1670 memcpy(rc_header_bak
, s
->header_state
, sizeof(s
->header_state
));
1671 memcpy(rc_block_bak
, s
->block_state
, sizeof(s
->block_state
));
1676 s
->spatial_decomposition_count
= 5;
1678 while( !(width
>>(s
->chroma_h_shift
+ s
->spatial_decomposition_count
))
1679 || !(height
>>(s
->chroma_v_shift
+ s
->spatial_decomposition_count
)))
1680 s
->spatial_decomposition_count
--;
1682 if (s
->spatial_decomposition_count
<= 0) {
1683 av_log(avctx
, AV_LOG_ERROR
, "Resolution too low\n");
1684 return AVERROR(EINVAL
);
1687 s
->m
.pict_type
= pic
->pict_type
;
1688 s
->qbias
= pic
->pict_type
== AV_PICTURE_TYPE_P
? 2 : 0;
1690 ff_snow_common_init_after_header(avctx
);
1692 if(s
->last_spatial_decomposition_count
!= s
->spatial_decomposition_count
){
1693 for(plane_index
=0; plane_index
< s
->nb_planes
; plane_index
++){
1694 calculate_visual_weight(s
, &s
->plane
[plane_index
]);
1699 s
->m
.misc_bits
= 8*(s
->c
.bytestream
- s
->c
.bytestream_start
);
1700 encode_blocks(s
, 1);
1701 s
->m
.mv_bits
= 8*(s
->c
.bytestream
- s
->c
.bytestream_start
) - s
->m
.misc_bits
;
1703 for(plane_index
=0; plane_index
< s
->nb_planes
; plane_index
++){
1704 Plane
*p
= &s
->plane
[plane_index
];
1708 // int bits= put_bits_count(&s->c.pb);
1710 if (!s
->memc_only
) {
1712 if(pict
->data
[plane_index
]) //FIXME gray hack
1715 s
->spatial_idwt_buffer
[y
*w
+ x
]= pict
->data
[plane_index
][y
*pict
->linesize
[plane_index
] + x
]<<FRAC_BITS
;
1718 predict_plane(s
, s
->spatial_idwt_buffer
, plane_index
, 0);
1721 && pic
->pict_type
== AV_PICTURE_TYPE_P
1722 && !(avctx
->flags
&CODEC_FLAG_PASS2
)
1723 && s
->m
.me
.scene_change_score
> s
->avctx
->scenechange_threshold
){
1724 ff_init_range_encoder(c
, pkt
->data
, pkt
->size
);
1725 ff_build_rac_states(c
, 0.05*(1LL<<32), 256-8);
1726 pic
->pict_type
= AV_PICTURE_TYPE_I
;
1728 s
->current_picture
->key_frame
=1;
1732 if(s
->qlog
== LOSSLESS_QLOG
){
1735 s
->spatial_dwt_buffer
[y
*w
+ x
]= (s
->spatial_idwt_buffer
[y
*w
+ x
] + (1<<(FRAC_BITS
-1))-1)>>FRAC_BITS
;
1741 s
->spatial_dwt_buffer
[y
*w
+ x
]=s
->spatial_idwt_buffer
[y
*w
+ x
]<<ENCODER_EXTRA_BITS
;
1746 ff_spatial_dwt(s
->spatial_dwt_buffer
, s
->temp_dwt_buffer
, w
, h
, w
, s
->spatial_decomposition_type
, s
->spatial_decomposition_count
);
1748 if(s
->pass1_rc
&& plane_index
==0){
1749 int delta_qlog
= ratecontrol_1pass(s
, pic
);
1750 if (delta_qlog
<= INT_MIN
)
1753 //reordering qlog in the bitstream would eliminate this reset
1754 ff_init_range_encoder(c
, pkt
->data
, pkt
->size
);
1755 memcpy(s
->header_state
, rc_header_bak
, sizeof(s
->header_state
));
1756 memcpy(s
->block_state
, rc_block_bak
, sizeof(s
->block_state
));
1758 encode_blocks(s
, 0);
1762 for(level
=0; level
<s
->spatial_decomposition_count
; level
++){
1763 for(orientation
=level
? 1 : 0; orientation
<4; orientation
++){
1764 SubBand
*b
= &p
->band
[level
][orientation
];
1766 quantize(s
, b
, b
->ibuf
, b
->buf
, b
->stride
, s
->qbias
);
1768 decorrelate(s
, b
, b
->ibuf
, b
->stride
, pic
->pict_type
== AV_PICTURE_TYPE_P
, 0);
1769 if (!s
->no_bitstream
)
1770 encode_subband(s
, b
, b
->ibuf
, b
->parent
? b
->parent
->ibuf
: NULL
, b
->stride
, orientation
);
1771 av_assert0(b
->parent
==NULL
|| b
->parent
->stride
== b
->stride
*2);
1773 correlate(s
, b
, b
->ibuf
, b
->stride
, 1, 0);
1777 for(level
=0; level
<s
->spatial_decomposition_count
; level
++){
1778 for(orientation
=level
? 1 : 0; orientation
<4; orientation
++){
1779 SubBand
*b
= &p
->band
[level
][orientation
];
1781 dequantize(s
, b
, b
->ibuf
, b
->stride
);
1785 ff_spatial_idwt(s
->spatial_idwt_buffer
, s
->temp_idwt_buffer
, w
, h
, w
, s
->spatial_decomposition_type
, s
->spatial_decomposition_count
);
1786 if(s
->qlog
== LOSSLESS_QLOG
){
1789 s
->spatial_idwt_buffer
[y
*w
+ x
]<<=FRAC_BITS
;
1793 predict_plane(s
, s
->spatial_idwt_buffer
, plane_index
, 1);
1796 if(pic
->pict_type
== AV_PICTURE_TYPE_I
){
1799 s
->current_picture
->data
[plane_index
][y
*s
->current_picture
->linesize
[plane_index
] + x
]=
1800 pict
->data
[plane_index
][y
*pict
->linesize
[plane_index
] + x
];
1804 memset(s
->spatial_idwt_buffer
, 0, sizeof(IDWTELEM
)*w
*h
);
1805 predict_plane(s
, s
->spatial_idwt_buffer
, plane_index
, 1);
1808 if(s
->avctx
->flags
&CODEC_FLAG_PSNR
){
1811 if(pict
->data
[plane_index
]) //FIXME gray hack
1814 int d
= s
->current_picture
->data
[plane_index
][y
*s
->current_picture
->linesize
[plane_index
] + x
] - pict
->data
[plane_index
][y
*pict
->linesize
[plane_index
] + x
];
1818 s
->avctx
->error
[plane_index
] += error
;
1819 s
->current_picture
->error
[plane_index
] = error
;
1824 update_last_header_values(s
);
1826 ff_snow_release_buffer(avctx
);
1828 s
->current_picture
->coded_picture_number
= avctx
->frame_number
;
1829 s
->current_picture
->pict_type
= pict
->pict_type
;
1830 s
->current_picture
->quality
= pict
->quality
;
1831 s
->m
.frame_bits
= 8*(s
->c
.bytestream
- s
->c
.bytestream_start
);
1832 s
->m
.p_tex_bits
= s
->m
.frame_bits
- s
->m
.misc_bits
- s
->m
.mv_bits
;
1833 s
->m
.current_picture
.f
->display_picture_number
=
1834 s
->m
.current_picture
.f
->coded_picture_number
= avctx
->frame_number
;
1835 s
->m
.current_picture
.f
->quality
= pic
->quality
;
1836 s
->m
.total_bits
+= 8*(s
->c
.bytestream
- s
->c
.bytestream_start
);
1838 if (ff_rate_estimate_qscale(&s
->m
, 0) < 0)
1840 if(avctx
->flags
&CODEC_FLAG_PASS1
)
1841 ff_write_pass1_stats(&s
->m
);
1842 s
->m
.last_pict_type
= s
->m
.pict_type
;
1843 avctx
->frame_bits
= s
->m
.frame_bits
;
1844 avctx
->mv_bits
= s
->m
.mv_bits
;
1845 avctx
->misc_bits
= s
->m
.misc_bits
;
1846 avctx
->p_tex_bits
= s
->m
.p_tex_bits
;
1850 pkt
->size
= ff_rac_terminate(c
);
1851 if (avctx
->coded_frame
->key_frame
)
1852 pkt
->flags
|= AV_PKT_FLAG_KEY
;
1858 static av_cold
int encode_end(AVCodecContext
*avctx
)
1860 SnowContext
*s
= avctx
->priv_data
;
1862 ff_snow_common_end(s
);
1863 ff_rate_control_uninit(&s
->m
);
1864 av_frame_free(&s
->input_picture
);
1865 av_free(avctx
->stats_out
);
1870 #define OFFSET(x) offsetof(SnowContext, x)
1871 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
1872 static const AVOption options
[] = {
1873 { "memc_only", "Only do ME/MC (I frames -> ref, P frame -> ME+MC).", OFFSET(memc_only
), AV_OPT_TYPE_INT
, { .i64
= 0 }, 0, 1, VE
},
1874 { "no_bitstream", "Skip final bitstream writeout.", OFFSET(no_bitstream
), AV_OPT_TYPE_INT
, { .i64
= 0 }, 0, 1, VE
},
1878 static const AVClass snowenc_class
= {
1879 .class_name
= "snow encoder",
1880 .item_name
= av_default_item_name
,
1882 .version
= LIBAVUTIL_VERSION_INT
,
1885 AVCodec ff_snow_encoder
= {
1887 .long_name
= NULL_IF_CONFIG_SMALL("Snow"),
1888 .type
= AVMEDIA_TYPE_VIDEO
,
1889 .id
= AV_CODEC_ID_SNOW
,
1890 .priv_data_size
= sizeof(SnowContext
),
1891 .init
= encode_init
,
1892 .encode2
= encode_frame
,
1893 .close
= encode_end
,
1894 .pix_fmts
= (const enum AVPixelFormat
[]){
1895 AV_PIX_FMT_YUV420P
, AV_PIX_FMT_YUV410P
, AV_PIX_FMT_YUV444P
,
1899 .priv_class
= &snowenc_class
,
1908 #include "libavutil/lfg.h"
1909 #include "libavutil/mathematics.h"
1914 int buffer
[2][width
*height
];
1918 s
.spatial_decomposition_count
=6;
1919 s
.spatial_decomposition_type
=1;
1921 s
.temp_dwt_buffer
= av_mallocz(width
* sizeof(DWTELEM
));
1922 s
.temp_idwt_buffer
= av_mallocz(width
* sizeof(IDWTELEM
));
1924 av_lfg_init(&prng
, 1);
1926 printf("testing 5/3 DWT\n");
1927 for(i
=0; i
<width
*height
; i
++)
1928 buffer
[0][i
] = buffer
[1][i
] = av_lfg_get(&prng
) % 54321 - 12345;
1930 ff_spatial_dwt(buffer
[0], s
.temp_dwt_buffer
, width
, height
, width
, s
.spatial_decomposition_type
, s
.spatial_decomposition_count
);
1931 ff_spatial_idwt((IDWTELEM
*)buffer
[0], s
.temp_idwt_buffer
, width
, height
, width
, s
.spatial_decomposition_type
, s
.spatial_decomposition_count
);
1933 for(i
=0; i
<width
*height
; i
++)
1934 if(buffer
[0][i
]!= buffer
[1][i
]) printf("fsck: %6d %12d %7d\n",i
, buffer
[0][i
], buffer
[1][i
]);
1936 printf("testing 9/7 DWT\n");
1937 s
.spatial_decomposition_type
=0;
1938 for(i
=0; i
<width
*height
; i
++)
1939 buffer
[0][i
] = buffer
[1][i
] = av_lfg_get(&prng
) % 54321 - 12345;
1941 ff_spatial_dwt(buffer
[0], s
.temp_dwt_buffer
, width
, height
, width
, s
.spatial_decomposition_type
, s
.spatial_decomposition_count
);
1942 ff_spatial_idwt((IDWTELEM
*)buffer
[0], s
.temp_idwt_buffer
, width
, height
, width
, s
.spatial_decomposition_type
, s
.spatial_decomposition_count
);
1944 for(i
=0; i
<width
*height
; i
++)
1945 if(FFABS(buffer
[0][i
] - buffer
[1][i
])>20) printf("fsck: %6d %12d %7d\n",i
, buffer
[0][i
], buffer
[1][i
]);
1948 int level
, orientation
, x
, y
;
1949 int64_t errors
[8][4];
1952 memset(errors
, 0, sizeof(errors
));
1953 s
.spatial_decomposition_count
=3;
1954 s
.spatial_decomposition_type
=0;
1955 for(level
=0; level
<s
.spatial_decomposition_count
; level
++){
1956 for(orientation
=level
? 1 : 0; orientation
<4; orientation
++){
1957 int w
= width
>> (s
.spatial_decomposition_count
-level
);
1958 int h
= height
>> (s
.spatial_decomposition_count
-level
);
1959 int stride
= width
<< (s
.spatial_decomposition_count
-level
);
1960 DWTELEM
*buf
= buffer
[0];
1963 if(orientation
&1) buf
+=w
;
1964 if(orientation
>1) buf
+=stride
>>1;
1966 memset(buffer
[0], 0, sizeof(int)*width
*height
);
1967 buf
[w
/2 + h
/2*stride
]= 256*256;
1968 ff_spatial_idwt((IDWTELEM
*)buffer
[0], s
.temp_idwt_buffer
, width
, height
, width
, s
.spatial_decomposition_type
, s
.spatial_decomposition_count
);
1969 for(y
=0; y
<height
; y
++){
1970 for(x
=0; x
<width
; x
++){
1971 int64_t d
= buffer
[0][x
+ y
*width
];
1973 if(FFABS(width
/2-x
)<9 && FFABS(height
/2-y
)<9 && level
==2) printf("%8"PRId64
" ", d
);
1975 if(FFABS(height
/2-y
)<9 && level
==2) printf("\n");
1977 error
= (int)(sqrt(error
)+0.5);
1978 errors
[level
][orientation
]= error
;
1979 if(g
) g
=av_gcd(g
, error
);
1983 printf("static int const visual_weight[][4]={\n");
1984 for(level
=0; level
<s
.spatial_decomposition_count
; level
++){
1986 for(orientation
=0; orientation
<4; orientation
++){
1987 printf("%8"PRId64
",", errors
[level
][orientation
]/g
);
1994 int w
= width
>> (s
.spatial_decomposition_count
-level
);
1995 //int h= height >> (s.spatial_decomposition_count-level);
1996 int stride
= width
<< (s
.spatial_decomposition_count
-level
);
1997 DWTELEM
*buf
= buffer
[0];
2003 memset(buffer
[0], 0, sizeof(int)*width
*height
);
2004 for(y
=0; y
<height
; y
++){
2005 for(x
=0; x
<width
; x
++){
2006 int tab
[4]={0,2,3,1};
2007 buffer
[0][x
+width
*y
]= 256*256*tab
[(x
&1) + 2*(y
&1)];
2010 ff_spatial_dwt(buffer
[0], s
.temp_dwt_buffer
, width
, height
, width
, s
.spatial_decomposition_type
, s
.spatial_decomposition_count
);
2011 for(y
=0; y
<height
; y
++){
2012 for(x
=0; x
<width
; x
++){
2013 int64_t d
= buffer
[0][x
+ y
*width
];
2015 if(FFABS(width
/2-x
)<9 && FFABS(height
/2-y
)<9) printf("%8"PRId64
" ", d
);
2017 if(FFABS(height
/2-y
)<9) printf("\n");