Imported Debian version 2.5.0~trusty1.1
[deb_ffmpeg.git] / ffmpeg / libavcodec / cinepakenc.c
1 /*
2 * Cinepak encoder (c) 2011 Tomas Härdin
3 * http://titan.codemill.se/~tomhar/cinepakenc.patch
4 *
5 * Fixes and improvements, vintage decoders compatibility
6 * (c) 2013, 2014 Rl, Aetey Global Technologies AB
7
8 Permission is hereby granted, free of charge, to any person obtaining a
9 copy of this software and associated documentation files (the "Software"),
10 to deal in the Software without restriction, including without limitation
11 the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 and/or sell copies of the Software, and to permit persons to whom the
13 Software is furnished to do so, subject to the following conditions:
14
15 The above copyright notice and this permission notice shall be included
16 in all copies or substantial portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
21 THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
22 OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
23 ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
24 OTHER DEALINGS IN THE SOFTWARE.
25
26 * TODO:
27 * - optimize: color space conversion, ...
28 * - implement options to set the min/max number of strips?
29 * MAYBE:
30 * - "optimally" split the frame into several non-regular areas
31 * using a separate codebook pair for each area and approximating
32 * the area by several rectangular strips (generally not full width ones)
33 * (use quadtree splitting? a simple fixed-granularity grid?)
34 *
35 *
36 * version 2014-01-23 Rl
37 * - added option handling for flexibility
38 *
39 * version 2014-01-21 Rl
40 * - believe it or not, now we get even smaller files, with better quality
41 * (which means I missed an optimization earlier :)
42 *
43 * version 2014-01-20 Rl
44 * - made the encoder compatible with vintage decoders
45 * and added some yet unused code for possible future
46 * incremental codebook updates
47 * - fixed a small memory leak
48 *
49 * version 2013-04-28 Rl
50 * - bugfixed codebook optimization logic
51 *
52 * version 2013-02-14 Rl
53 * "Valentine's Day" version:
54 * - made strip division more robust
55 * - minimized bruteforcing the number of strips,
56 * (costs some R/D but speeds up compession a lot), the heuristic
57 * assumption is that score as a function of the number of strips has
58 * one wide minimum which moves slowly, of course not fully true
59 * - simplified codebook generation,
60 * the old code was meant for other optimizations than we actually do
61 * - optimized the codebook generation / error estimation for MODE_MC
62 *
63 * version 2013-02-12 Rl
64 * - separated codebook training sets, avoided the transfer of wasted bytes,
65 * which yields both better quality and smaller files
66 * - now using the correct colorspace (TODO: move conversion to libswscale)
67 *
68 * version 2013-02-08 Rl
69 * - fixes/optimization in multistrip encoding and codebook size choice,
70 * quality/bitrate is now better than that of the binary proprietary encoder
71 */
72
73 #include "libavutil/intreadwrite.h"
74 #include "avcodec.h"
75 #include "libavutil/lfg.h"
76 #include "elbg.h"
77 #include "internal.h"
78
79 #include "libavutil/avassert.h"
80 #include "libavutil/opt.h"
81
82 #define CVID_HEADER_SIZE 10
83 #define STRIP_HEADER_SIZE 12
84 #define CHUNK_HEADER_SIZE 4
85
86 #define MB_SIZE 4 //4x4 MBs
87 #define MB_AREA (MB_SIZE*MB_SIZE)
88
89 #define VECTOR_MAX 6 //six or four entries per vector depending on format
90 #define CODEBOOK_MAX 256 //size of a codebook
91
92 #define MAX_STRIPS 32 //Note: having fewer choices regarding the number of strips speeds up encoding (obviously)
93 #define MIN_STRIPS 1 //Note: having more strips speeds up encoding the frame (this is less obvious)
94 // MAX_STRIPS limits the maximum quality you can reach
95 // when you want hight quality on high resolutions,
96 // MIN_STRIPS limits the minimum efficiently encodable bit rate
97 // on low resolutions
98 // the numbers are only used for brute force optimization for the first frame,
99 // for the following frames they are adaptively readjusted
100 // NOTE the decoder in ffmpeg has its own arbitrary limitation on the number
101 // of strips, currently 32
102
103 typedef enum {
104 MODE_V1_ONLY = 0,
105 MODE_V1_V4,
106 MODE_MC,
107
108 MODE_COUNT,
109 } CinepakMode;
110
111 typedef enum {
112 ENC_V1,
113 ENC_V4,
114 ENC_SKIP,
115
116 ENC_UNCERTAIN
117 } mb_encoding;
118
119 typedef struct {
120 int v1_vector; //index into v1 codebook
121 int v1_error; //error when using V1 encoding
122 int v4_vector[4]; //indices into v4 codebooks
123 int v4_error; //error when using V4 encoding
124 int skip_error; //error when block is skipped (aka copied from last frame)
125 mb_encoding best_encoding; //last result from calculate_mode_score()
126 } mb_info;
127
128 typedef struct {
129 int v1_codebook[CODEBOOK_MAX*VECTOR_MAX];
130 int v4_codebook[CODEBOOK_MAX*VECTOR_MAX];
131 int v1_size;
132 int v4_size;
133 CinepakMode mode;
134 } strip_info;
135
136 typedef struct {
137 const AVClass *class;
138 AVCodecContext *avctx;
139 unsigned char *pict_bufs[4], *strip_buf, *frame_buf;
140 AVFrame *last_frame;
141 AVFrame *best_frame;
142 AVFrame *scratch_frame;
143 AVFrame *input_frame;
144 enum AVPixelFormat pix_fmt;
145 int w, h;
146 int frame_buf_size;
147 int curframe, keyint;
148 AVLFG randctx;
149 uint64_t lambda;
150 int *codebook_input;
151 int *codebook_closest;
152 mb_info *mb; //MB RD state
153 int min_strips; //the current limit
154 int max_strips; //the current limit
155 #ifdef CINEPAKENC_DEBUG
156 mb_info *best_mb; //TODO: remove. only used for printing stats
157 int num_v1_mode, num_v4_mode, num_mc_mode;
158 int num_v1_encs, num_v4_encs, num_skips;
159 #endif
160 // options
161 int max_extra_cb_iterations;
162 int skip_empty_cb;
163 int min_min_strips;
164 int max_max_strips;
165 int strip_number_delta_range;
166 } CinepakEncContext;
167
168 #define OFFSET(x) offsetof(CinepakEncContext, x)
169 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
170 static const AVOption options[] = {
171 { "max_extra_cb_iterations", "Max extra codebook recalculation passes, more is better and slower", OFFSET(max_extra_cb_iterations), AV_OPT_TYPE_INT, { .i64 = 2 }, 0, INT_MAX, VE },
172 { "skip_empty_cb", "Avoid wasting bytes, ignore vintage MacOS decoder", OFFSET(skip_empty_cb), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
173 { "max_strips", "Limit strips/frame, vintage compatible is 1..3, otherwise the more the better", OFFSET(max_max_strips), AV_OPT_TYPE_INT, { .i64 = 3 }, MIN_STRIPS, MAX_STRIPS, VE },
174 { "min_strips", "Enforce min strips/frame, more is worse and faster, must be <= max_strips", OFFSET(min_min_strips), AV_OPT_TYPE_INT, { .i64 = MIN_STRIPS }, MIN_STRIPS, MAX_STRIPS, VE },
175 { "strip_number_adaptivity", "How fast the strip number adapts, more is slightly better, much slower", OFFSET(strip_number_delta_range), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, MAX_STRIPS-MIN_STRIPS, VE },
176 { NULL },
177 };
178
179 static const AVClass cinepak_class = {
180 .class_name = "cinepak",
181 .item_name = av_default_item_name,
182 .option = options,
183 .version = LIBAVUTIL_VERSION_INT,
184 };
185
186 static av_cold int cinepak_encode_init(AVCodecContext *avctx)
187 {
188 CinepakEncContext *s = avctx->priv_data;
189 int x, mb_count, strip_buf_size, frame_buf_size;
190
191 if (avctx->width & 3 || avctx->height & 3) {
192 av_log(avctx, AV_LOG_ERROR, "width and height must be multiples of four (got %ix%i)\n",
193 avctx->width, avctx->height);
194 return AVERROR(EINVAL);
195 }
196
197 if (s->min_min_strips > s->max_max_strips) {
198 av_log(avctx, AV_LOG_ERROR, "minimal number of strips can not exceed maximal (got %i and %i)\n",
199 s->min_min_strips, s->max_max_strips);
200 return AVERROR(EINVAL);
201 }
202
203 if (!(s->last_frame = av_frame_alloc()))
204 return AVERROR(ENOMEM);
205 if (!(s->best_frame = av_frame_alloc()))
206 goto enomem;
207 if (!(s->scratch_frame = av_frame_alloc()))
208 goto enomem;
209 if (avctx->pix_fmt == AV_PIX_FMT_RGB24)
210 if (!(s->input_frame = av_frame_alloc()))
211 goto enomem;
212
213 if (!(s->codebook_input = av_malloc(sizeof(int) * (avctx->pix_fmt == AV_PIX_FMT_RGB24 ? 6 : 4) * (avctx->width * avctx->height) >> 2)))
214 goto enomem;
215
216 if (!(s->codebook_closest = av_malloc(sizeof(int) * (avctx->width * avctx->height) >> 2)))
217 goto enomem;
218
219 for(x = 0; x < (avctx->pix_fmt == AV_PIX_FMT_RGB24 ? 4 : 3); x++)
220 if(!(s->pict_bufs[x] = av_malloc((avctx->pix_fmt == AV_PIX_FMT_RGB24 ? 6 : 4) * (avctx->width * avctx->height) >> 2)))
221 goto enomem;
222
223 mb_count = avctx->width * avctx->height / MB_AREA;
224
225 //the largest possible chunk is 0x31 with all MBs encoded in V4 mode
226 //and full codebooks being replaced in INTER mode,
227 // which is 34 bits per MB
228 //and 2*256 extra flag bits per strip
229 strip_buf_size = STRIP_HEADER_SIZE + 3 * CHUNK_HEADER_SIZE + 2 * VECTOR_MAX * CODEBOOK_MAX + 4 * (mb_count + (mb_count + 15) / 16) + (2 * CODEBOOK_MAX)/8;
230
231 frame_buf_size = CVID_HEADER_SIZE + s->max_max_strips * strip_buf_size;
232
233 if (!(s->strip_buf = av_malloc(strip_buf_size)))
234 goto enomem;
235
236 if (!(s->frame_buf = av_malloc(frame_buf_size)))
237 goto enomem;
238
239 if (!(s->mb = av_malloc_array(mb_count, sizeof(mb_info))))
240 goto enomem;
241
242 #ifdef CINEPAKENC_DEBUG
243 if (!(s->best_mb = av_malloc_array(mb_count, sizeof(mb_info))))
244 goto enomem;
245 #endif
246
247 av_lfg_init(&s->randctx, 1);
248 s->avctx = avctx;
249 s->w = avctx->width;
250 s->h = avctx->height;
251 s->frame_buf_size = frame_buf_size;
252 s->curframe = 0;
253 s->keyint = avctx->keyint_min;
254 s->pix_fmt = avctx->pix_fmt;
255
256 //set up AVFrames
257 s->last_frame->data[0] = s->pict_bufs[0];
258 s->last_frame->linesize[0] = s->w;
259 s->best_frame->data[0] = s->pict_bufs[1];
260 s->best_frame->linesize[0] = s->w;
261 s->scratch_frame->data[0] = s->pict_bufs[2];
262 s->scratch_frame->linesize[0] = s->w;
263
264 if (s->pix_fmt == AV_PIX_FMT_RGB24) {
265 s->last_frame->data[1] = s->last_frame->data[0] + s->w * s->h;
266 s->last_frame->data[2] = s->last_frame->data[1] + ((s->w * s->h) >> 2);
267 s->last_frame->linesize[1] = s->last_frame->linesize[2] = s->w >> 1;
268
269 s->best_frame->data[1] = s->best_frame->data[0] + s->w * s->h;
270 s->best_frame->data[2] = s->best_frame->data[1] + ((s->w * s->h) >> 2);
271 s->best_frame->linesize[1] = s->best_frame->linesize[2] = s->w >> 1;
272
273 s->scratch_frame->data[1] = s->scratch_frame->data[0] + s->w * s->h;
274 s->scratch_frame->data[2] = s->scratch_frame->data[1] + ((s->w * s->h) >> 2);
275 s->scratch_frame->linesize[1] = s->scratch_frame->linesize[2] = s->w >> 1;
276
277 s->input_frame->data[0] = s->pict_bufs[3];
278 s->input_frame->linesize[0] = s->w;
279 s->input_frame->data[1] = s->input_frame->data[0] + s->w * s->h;
280 s->input_frame->data[2] = s->input_frame->data[1] + ((s->w * s->h) >> 2);
281 s->input_frame->linesize[1] = s->input_frame->linesize[2] = s->w >> 1;
282 }
283
284 s->min_strips = s->min_min_strips;
285 s->max_strips = s->max_max_strips;
286
287 #ifdef CINEPAKENC_DEBUG
288 s->num_v1_mode = s->num_v4_mode = s->num_mc_mode = s->num_v1_encs = s->num_v4_encs = s->num_skips = 0;
289 #endif
290
291 return 0;
292
293 enomem:
294 av_frame_free(&s->last_frame);
295 av_frame_free(&s->best_frame);
296 av_frame_free(&s->scratch_frame);
297 if (avctx->pix_fmt == AV_PIX_FMT_RGB24)
298 av_frame_free(&s->input_frame);
299 av_freep(&s->codebook_input);
300 av_freep(&s->codebook_closest);
301 av_freep(&s->strip_buf);
302 av_freep(&s->frame_buf);
303 av_freep(&s->mb);
304 #ifdef CINEPAKENC_DEBUG
305 av_freep(&s->best_mb);
306 #endif
307
308 for(x = 0; x < (avctx->pix_fmt == AV_PIX_FMT_RGB24 ? 4 : 3); x++)
309 av_freep(&s->pict_bufs[x]);
310
311 return AVERROR(ENOMEM);
312 }
313
314 static int64_t calculate_mode_score(CinepakEncContext *s, int h, strip_info *info, int report, int *training_set_v1_shrunk, int *training_set_v4_shrunk
315 #ifdef CINEPAK_REPORT_SERR
316 , int64_t *serr
317 #endif
318 )
319 {
320 //score = FF_LAMBDA_SCALE * error + lambda * bits
321 int x;
322 int entry_size = s->pix_fmt == AV_PIX_FMT_RGB24 ? 6 : 4;
323 int mb_count = s->w * h / MB_AREA;
324 mb_info *mb;
325 int64_t score1, score2, score3;
326 int64_t ret = s->lambda * ((info->v1_size ? CHUNK_HEADER_SIZE + info->v1_size * entry_size : 0) +
327 (info->v4_size ? CHUNK_HEADER_SIZE + info->v4_size * entry_size : 0) +
328 CHUNK_HEADER_SIZE) << 3;
329
330 //av_log(s->avctx, AV_LOG_INFO, "sizes %3i %3i -> %9lli score mb_count %i", info->v1_size, info->v4_size, (long long int)ret, mb_count);
331
332 #ifdef CINEPAK_REPORT_SERR
333 *serr = 0;
334 #endif
335
336 switch(info->mode) {
337 case MODE_V1_ONLY:
338 //one byte per MB
339 ret += s->lambda * 8 * mb_count;
340
341 // while calculating we assume all blocks are ENC_V1
342 for(x = 0; x < mb_count; x++) {
343 mb = &s->mb[x];
344 ret += FF_LAMBDA_SCALE * mb->v1_error;
345 #ifdef CINEPAK_REPORT_SERR
346 *serr += mb->v1_error;
347 #endif
348 // this function is never called for report in MODE_V1_ONLY
349 // if(!report)
350 mb->best_encoding = ENC_V1;
351 }
352
353 break;
354 case MODE_V1_V4:
355 //9 or 33 bits per MB
356 if(report) {
357 // no moves between the corresponding training sets are allowed
358 *training_set_v1_shrunk = *training_set_v4_shrunk = 0;
359 for(x = 0; x < mb_count; x++) {
360 int mberr;
361 mb = &s->mb[x];
362 if(mb->best_encoding == ENC_V1)
363 score1 = s->lambda * 9 + FF_LAMBDA_SCALE * (mberr=mb->v1_error);
364 else
365 score1 = s->lambda * 33 + FF_LAMBDA_SCALE * (mberr=mb->v4_error);
366 ret += score1;
367 #ifdef CINEPAK_REPORT_SERR
368 *serr += mberr;
369 #endif
370 }
371 } else { // find best mode per block
372 for(x = 0; x < mb_count; x++) {
373 mb = &s->mb[x];
374 score1 = s->lambda * 9 + FF_LAMBDA_SCALE * mb->v1_error;
375 score2 = s->lambda * 33 + FF_LAMBDA_SCALE * mb->v4_error;
376
377 if(score1 <= score2) {
378 ret += score1;
379 #ifdef CINEPAK_REPORT_SERR
380 *serr += mb->v1_error;
381 #endif
382 mb->best_encoding = ENC_V1;
383 } else {
384 ret += score2;
385 #ifdef CINEPAK_REPORT_SERR
386 *serr += mb->v4_error;
387 #endif
388 mb->best_encoding = ENC_V4;
389 }
390 }
391 }
392
393 break;
394 case MODE_MC:
395 //1, 10 or 34 bits per MB
396 if(report) {
397 int v1_shrunk = 0, v4_shrunk = 0;
398 for(x = 0; x < mb_count; x++) {
399 mb = &s->mb[x];
400 // it is OK to move blocks to ENC_SKIP here
401 // but not to any codebook encoding!
402 score1 = s->lambda * 1 + FF_LAMBDA_SCALE * mb->skip_error;
403 if(mb->best_encoding == ENC_SKIP) {
404 ret += score1;
405 #ifdef CINEPAK_REPORT_SERR
406 *serr += mb->skip_error;
407 #endif
408 } else if(mb->best_encoding == ENC_V1) {
409 if((score2=s->lambda * 10 + FF_LAMBDA_SCALE * mb->v1_error) >= score1) {
410 mb->best_encoding = ENC_SKIP;
411 ++v1_shrunk;
412 ret += score1;
413 #ifdef CINEPAK_REPORT_SERR
414 *serr += mb->skip_error;
415 #endif
416 } else {
417 ret += score2;
418 #ifdef CINEPAK_REPORT_SERR
419 *serr += mb->v1_error;
420 #endif
421 }
422 } else {
423 if((score3=s->lambda * 34 + FF_LAMBDA_SCALE * mb->v4_error) >= score1) {
424 mb->best_encoding = ENC_SKIP;
425 ++v4_shrunk;
426 ret += score1;
427 #ifdef CINEPAK_REPORT_SERR
428 *serr += mb->skip_error;
429 #endif
430 } else {
431 ret += score3;
432 #ifdef CINEPAK_REPORT_SERR
433 *serr += mb->v4_error;
434 #endif
435 }
436 }
437 }
438 *training_set_v1_shrunk = v1_shrunk;
439 *training_set_v4_shrunk = v4_shrunk;
440 } else { // find best mode per block
441 for(x = 0; x < mb_count; x++) {
442 mb = &s->mb[x];
443 score1 = s->lambda * 1 + FF_LAMBDA_SCALE * mb->skip_error;
444 score2 = s->lambda * 10 + FF_LAMBDA_SCALE * mb->v1_error;
445 score3 = s->lambda * 34 + FF_LAMBDA_SCALE * mb->v4_error;
446
447 if(score1 <= score2 && score1 <= score3) {
448 ret += score1;
449 #ifdef CINEPAK_REPORT_SERR
450 *serr += mb->skip_error;
451 #endif
452 mb->best_encoding = ENC_SKIP;
453 } else if(score2 <= score3) {
454 ret += score2;
455 #ifdef CINEPAK_REPORT_SERR
456 *serr += mb->v1_error;
457 #endif
458 mb->best_encoding = ENC_V1;
459 } else {
460 ret += score3;
461 #ifdef CINEPAK_REPORT_SERR
462 *serr += mb->v4_error;
463 #endif
464 mb->best_encoding = ENC_V4;
465 }
466 }
467 }
468
469 break;
470 }
471
472 return ret;
473 }
474
475 static int write_chunk_header(unsigned char *buf, int chunk_type, int chunk_size)
476 {
477 buf[0] = chunk_type;
478 AV_WB24(&buf[1], chunk_size + CHUNK_HEADER_SIZE);
479 return CHUNK_HEADER_SIZE;
480 }
481
482 static int encode_codebook(CinepakEncContext *s, int *codebook, int size, int chunk_type_yuv, int chunk_type_gray, unsigned char *buf)
483 {
484 int x, y, ret, entry_size = s->pix_fmt == AV_PIX_FMT_RGB24 ? 6 : 4;
485 int incremental_codebook_replacement_mode = 0; // hardcoded here,
486 // the compiler should notice that this is a constant -- rl
487
488 ret = write_chunk_header(buf,
489 s->pix_fmt == AV_PIX_FMT_RGB24 ?
490 chunk_type_yuv+(incremental_codebook_replacement_mode?1:0) :
491 chunk_type_gray+(incremental_codebook_replacement_mode?1:0),
492 entry_size * size
493 + (incremental_codebook_replacement_mode?(size+31)/32*4:0) );
494
495 // we do codebook encoding according to the "intra" mode
496 // but we keep the "dead" code for reference in case we will want
497 // to use incremental codebook updates (which actually would give us
498 // "kind of" motion compensation, especially in 1 strip/frame case) -- rl
499 // (of course, the code will be not useful as-is)
500 if(incremental_codebook_replacement_mode) {
501 int flags = 0;
502 int flagsind;
503 for(x = 0; x < size; x++) {
504 if(flags == 0) {
505 flagsind = ret;
506 ret += 4;
507 flags = 0x80000000;
508 } else
509 flags = ((flags>>1) | 0x80000000);
510 for(y = 0; y < entry_size; y++)
511 buf[ret++] = codebook[y + x*entry_size] ^ (y >= 4 ? 0x80 : 0);
512 if((flags&0xffffffff) == 0xffffffff) {
513 AV_WB32(&buf[flagsind], flags);
514 flags = 0;
515 }
516 }
517 if(flags)
518 AV_WB32(&buf[flagsind], flags);
519 } else
520 for(x = 0; x < size; x++)
521 for(y = 0; y < entry_size; y++)
522 buf[ret++] = codebook[y + x*entry_size] ^ (y >= 4 ? 0x80 : 0);
523
524 return ret;
525 }
526
527 //sets out to the sub picture starting at (x,y) in in
528 static void get_sub_picture(CinepakEncContext *s, int x, int y, AVPicture *in, AVPicture *out)
529 {
530 out->data[0] = in->data[0] + x + y * in->linesize[0];
531 out->linesize[0] = in->linesize[0];
532
533 if(s->pix_fmt == AV_PIX_FMT_RGB24) {
534 out->data[1] = in->data[1] + (x >> 1) + (y >> 1) * in->linesize[1];
535 out->linesize[1] = in->linesize[1];
536
537 out->data[2] = in->data[2] + (x >> 1) + (y >> 1) * in->linesize[2];
538 out->linesize[2] = in->linesize[2];
539 }
540 }
541
542 //decodes the V1 vector in mb into the 4x4 MB pointed to by sub_pict
543 static void decode_v1_vector(CinepakEncContext *s, AVPicture *sub_pict, int v1_vector, strip_info *info)
544 {
545 int entry_size = s->pix_fmt == AV_PIX_FMT_RGB24 ? 6 : 4;
546
547 sub_pict->data[0][0] =
548 sub_pict->data[0][1] =
549 sub_pict->data[0][ sub_pict->linesize[0]] =
550 sub_pict->data[0][1+ sub_pict->linesize[0]] = info->v1_codebook[v1_vector*entry_size];
551
552 sub_pict->data[0][2] =
553 sub_pict->data[0][3] =
554 sub_pict->data[0][2+ sub_pict->linesize[0]] =
555 sub_pict->data[0][3+ sub_pict->linesize[0]] = info->v1_codebook[v1_vector*entry_size+1];
556
557 sub_pict->data[0][2*sub_pict->linesize[0]] =
558 sub_pict->data[0][1+2*sub_pict->linesize[0]] =
559 sub_pict->data[0][ 3*sub_pict->linesize[0]] =
560 sub_pict->data[0][1+3*sub_pict->linesize[0]] = info->v1_codebook[v1_vector*entry_size+2];
561
562 sub_pict->data[0][2+2*sub_pict->linesize[0]] =
563 sub_pict->data[0][3+2*sub_pict->linesize[0]] =
564 sub_pict->data[0][2+3*sub_pict->linesize[0]] =
565 sub_pict->data[0][3+3*sub_pict->linesize[0]] = info->v1_codebook[v1_vector*entry_size+3];
566
567 if(s->pix_fmt == AV_PIX_FMT_RGB24) {
568 sub_pict->data[1][0] =
569 sub_pict->data[1][1] =
570 sub_pict->data[1][ sub_pict->linesize[1]] =
571 sub_pict->data[1][1+ sub_pict->linesize[1]] = info->v1_codebook[v1_vector*entry_size+4];
572
573 sub_pict->data[2][0] =
574 sub_pict->data[2][1] =
575 sub_pict->data[2][ sub_pict->linesize[2]] =
576 sub_pict->data[2][1+ sub_pict->linesize[2]] = info->v1_codebook[v1_vector*entry_size+5];
577 }
578 }
579
580 //decodes the V4 vectors in mb into the 4x4 MB pointed to by sub_pict
581 static void decode_v4_vector(CinepakEncContext *s, AVPicture *sub_pict, int *v4_vector, strip_info *info)
582 {
583 int i, x, y, entry_size = s->pix_fmt == AV_PIX_FMT_RGB24 ? 6 : 4;
584
585 for(i = y = 0; y < 4; y += 2) {
586 for(x = 0; x < 4; x += 2, i++) {
587 sub_pict->data[0][x + y*sub_pict->linesize[0]] = info->v4_codebook[v4_vector[i]*entry_size];
588 sub_pict->data[0][x+1 + y*sub_pict->linesize[0]] = info->v4_codebook[v4_vector[i]*entry_size+1];
589 sub_pict->data[0][x + (y+1)*sub_pict->linesize[0]] = info->v4_codebook[v4_vector[i]*entry_size+2];
590 sub_pict->data[0][x+1 + (y+1)*sub_pict->linesize[0]] = info->v4_codebook[v4_vector[i]*entry_size+3];
591
592 if(s->pix_fmt == AV_PIX_FMT_RGB24) {
593 sub_pict->data[1][(x>>1) + (y>>1)*sub_pict->linesize[1]] = info->v4_codebook[v4_vector[i]*entry_size+4];
594 sub_pict->data[2][(x>>1) + (y>>1)*sub_pict->linesize[2]] = info->v4_codebook[v4_vector[i]*entry_size+5];
595 }
596 }
597 }
598 }
599
600 static void copy_mb(CinepakEncContext *s, AVPicture *a, AVPicture *b)
601 {
602 int y, p;
603
604 for(y = 0; y < MB_SIZE; y++) {
605 memcpy(a->data[0]+y*a->linesize[0], b->data[0]+y*b->linesize[0],
606 MB_SIZE);
607 }
608
609 if(s->pix_fmt == AV_PIX_FMT_RGB24) {
610 for(p = 1; p <= 2; p++) {
611 for(y = 0; y < MB_SIZE/2; y++) {
612 memcpy(a->data[p] + y*a->linesize[p],
613 b->data[p] + y*b->linesize[p],
614 MB_SIZE/2);
615 }
616 }
617 }
618 }
619
620 static int encode_mode(CinepakEncContext *s, int h, AVPicture *scratch_pict, AVPicture *last_pict, strip_info *info, unsigned char *buf)
621 {
622 int x, y, z, flags, bits, temp_size, header_ofs, ret = 0, mb_count = s->w * h / MB_AREA;
623 int needs_extra_bit, should_write_temp;
624 unsigned char temp[64]; //32/2 = 16 V4 blocks at 4 B each -> 64 B
625 mb_info *mb;
626 AVPicture sub_scratch = {{0}}, sub_last = {{0}};
627
628 //encode codebooks
629 ////// MacOS vintage decoder compatibility dictates the presence of
630 ////// the codebook chunk even when the codebook is empty - pretty dumb...
631 ////// and also the certain order of the codebook chunks -- rl
632 if(info->v4_size || !s->skip_empty_cb)
633 ret += encode_codebook(s, info->v4_codebook, info->v4_size, 0x20, 0x24, buf + ret);
634
635 if(info->v1_size || !s->skip_empty_cb)
636 ret += encode_codebook(s, info->v1_codebook, info->v1_size, 0x22, 0x26, buf + ret);
637
638 //update scratch picture
639 for(z = y = 0; y < h; y += MB_SIZE) {
640 for(x = 0; x < s->w; x += MB_SIZE, z++) {
641 mb = &s->mb[z];
642
643 get_sub_picture(s, x, y, scratch_pict, &sub_scratch);
644
645 if(info->mode == MODE_MC && mb->best_encoding == ENC_SKIP) {
646 get_sub_picture(s, x, y, last_pict, &sub_last);
647 copy_mb(s, &sub_scratch, &sub_last);
648 } else if(info->mode == MODE_V1_ONLY || mb->best_encoding == ENC_V1)
649 decode_v1_vector(s, &sub_scratch, mb->v1_vector, info);
650 else
651 decode_v4_vector(s, &sub_scratch, mb->v4_vector, info);
652 }
653 }
654
655 switch(info->mode) {
656 case MODE_V1_ONLY:
657 //av_log(s->avctx, AV_LOG_INFO, "mb_count = %i\n", mb_count);
658 ret += write_chunk_header(buf + ret, 0x32, mb_count);
659
660 for(x = 0; x < mb_count; x++)
661 buf[ret++] = s->mb[x].v1_vector;
662
663 break;
664 case MODE_V1_V4:
665 //remember header position
666 header_ofs = ret;
667 ret += CHUNK_HEADER_SIZE;
668
669 for(x = 0; x < mb_count; x += 32) {
670 flags = 0;
671 for(y = x; y < FFMIN(x+32, mb_count); y++)
672 if(s->mb[y].best_encoding == ENC_V4)
673 flags |= 1 << (31 - y + x);
674
675 AV_WB32(&buf[ret], flags);
676 ret += 4;
677
678 for(y = x; y < FFMIN(x+32, mb_count); y++) {
679 mb = &s->mb[y];
680
681 if(mb->best_encoding == ENC_V1)
682 buf[ret++] = mb->v1_vector;
683 else
684 for(z = 0; z < 4; z++)
685 buf[ret++] = mb->v4_vector[z];
686 }
687 }
688
689 write_chunk_header(buf + header_ofs, 0x30, ret - header_ofs - CHUNK_HEADER_SIZE);
690
691 break;
692 case MODE_MC:
693 //remember header position
694 header_ofs = ret;
695 ret += CHUNK_HEADER_SIZE;
696 flags = bits = temp_size = 0;
697
698 for(x = 0; x < mb_count; x++) {
699 mb = &s->mb[x];
700 flags |= (mb->best_encoding != ENC_SKIP) << (31 - bits++);
701 needs_extra_bit = 0;
702 should_write_temp = 0;
703
704 if(mb->best_encoding != ENC_SKIP) {
705 if(bits < 32)
706 flags |= (mb->best_encoding == ENC_V4) << (31 - bits++);
707 else
708 needs_extra_bit = 1;
709 }
710
711 if(bits == 32) {
712 AV_WB32(&buf[ret], flags);
713 ret += 4;
714 flags = bits = 0;
715
716 if(mb->best_encoding == ENC_SKIP || needs_extra_bit) {
717 memcpy(&buf[ret], temp, temp_size);
718 ret += temp_size;
719 temp_size = 0;
720 } else
721 should_write_temp = 1;
722 }
723
724 if(needs_extra_bit) {
725 flags = (mb->best_encoding == ENC_V4) << 31;
726 bits = 1;
727 }
728
729 if(mb->best_encoding == ENC_V1)
730 temp[temp_size++] = mb->v1_vector;
731 else if(mb->best_encoding == ENC_V4)
732 for(z = 0; z < 4; z++)
733 temp[temp_size++] = mb->v4_vector[z];
734
735 if(should_write_temp) {
736 memcpy(&buf[ret], temp, temp_size);
737 ret += temp_size;
738 temp_size = 0;
739 }
740 }
741
742 if(bits > 0) {
743 AV_WB32(&buf[ret], flags);
744 ret += 4;
745 memcpy(&buf[ret], temp, temp_size);
746 ret += temp_size;
747 }
748
749 write_chunk_header(buf + header_ofs, 0x31, ret - header_ofs - CHUNK_HEADER_SIZE);
750
751 break;
752 }
753
754 return ret;
755 }
756
757 //computes distortion of 4x4 MB in b compared to a
758 static int compute_mb_distortion(CinepakEncContext *s, AVPicture *a, AVPicture *b)
759 {
760 int x, y, p, d, ret = 0;
761
762 for(y = 0; y < MB_SIZE; y++) {
763 for(x = 0; x < MB_SIZE; x++) {
764 d = a->data[0][x + y*a->linesize[0]] - b->data[0][x + y*b->linesize[0]];
765 ret += d*d;
766 }
767 }
768
769 if(s->pix_fmt == AV_PIX_FMT_RGB24) {
770 for(p = 1; p <= 2; p++) {
771 for(y = 0; y < MB_SIZE/2; y++) {
772 for(x = 0; x < MB_SIZE/2; x++) {
773 d = a->data[p][x + y*a->linesize[p]] - b->data[p][x + y*b->linesize[p]];
774 ret += d*d;
775 }
776 }
777 }
778 }
779
780 return ret;
781 }
782
783 // return the possibly adjusted size of the codebook
784 #define CERTAIN(x) ((x)!=ENC_UNCERTAIN)
785 static int quantize(CinepakEncContext *s, int h, AVPicture *pict,
786 int v1mode, strip_info *info,
787 mb_encoding encoding)
788 {
789 int x, y, i, j, k, x2, y2, x3, y3, plane, shift, mbn;
790 int entry_size = s->pix_fmt == AV_PIX_FMT_RGB24 ? 6 : 4;
791 int *codebook = v1mode ? info->v1_codebook : info->v4_codebook;
792 int size = v1mode ? info->v1_size : info->v4_size;
793 int64_t total_error = 0;
794 uint8_t vq_pict_buf[(MB_AREA*3)/2];
795 AVPicture sub_pict, vq_pict;
796
797 for(mbn = i = y = 0; y < h; y += MB_SIZE) {
798 for(x = 0; x < s->w; x += MB_SIZE, ++mbn) {
799 int *base;
800
801 if(CERTAIN(encoding)) {
802 // use for the training only the blocks known to be to be encoded [sic:-]
803 if(s->mb[mbn].best_encoding != encoding) continue;
804 }
805
806 base = s->codebook_input + i*entry_size;
807 if(v1mode) {
808 //subsample
809 for(j = y2 = 0; y2 < entry_size; y2 += 2) {
810 for(x2 = 0; x2 < 4; x2 += 2, j++) {
811 plane = y2 < 4 ? 0 : 1 + (x2 >> 1);
812 shift = y2 < 4 ? 0 : 1;
813 x3 = shift ? 0 : x2;
814 y3 = shift ? 0 : y2;
815 base[j] = (pict->data[plane][((x+x3) >> shift) + ((y+y3) >> shift) * pict->linesize[plane]] +
816 pict->data[plane][((x+x3) >> shift) + 1 + ((y+y3) >> shift) * pict->linesize[plane]] +
817 pict->data[plane][((x+x3) >> shift) + (((y+y3) >> shift) + 1) * pict->linesize[plane]] +
818 pict->data[plane][((x+x3) >> shift) + 1 + (((y+y3) >> shift) + 1) * pict->linesize[plane]]) >> 2;
819 }
820 }
821 } else {
822 //copy
823 for(j = y2 = 0; y2 < MB_SIZE; y2 += 2) {
824 for(x2 = 0; x2 < MB_SIZE; x2 += 2) {
825 for(k = 0; k < entry_size; k++, j++) {
826 plane = k >= 4 ? k - 3 : 0;
827
828 if(k >= 4) {
829 x3 = (x+x2) >> 1;
830 y3 = (y+y2) >> 1;
831 } else {
832 x3 = x + x2 + (k & 1);
833 y3 = y + y2 + (k >> 1);
834 }
835
836 base[j] = pict->data[plane][x3 + y3*pict->linesize[plane]];
837 }
838 }
839 }
840 }
841 i += v1mode ? 1 : 4;
842 }
843 }
844 // if(i < mbn*(v1mode ? 1 : 4)) {
845 // av_log(s->avctx, AV_LOG_INFO, "reducing training set for %s from %i to %i (encoding %i)\n", v1mode?"v1":"v4", mbn*(v1mode ? 1 : 4), i, encoding);
846 // }
847
848 if(i == 0) // empty training set, nothing to do
849 return 0;
850 if(i < size) {
851 //av_log(s->avctx, (CERTAIN(encoding) ? AV_LOG_ERROR : AV_LOG_INFO), "WOULD WASTE: %s cbsize %i bigger than training set size %i (encoding %i)\n", v1mode?"v1":"v4", size, i, encoding);
852 size = i;
853 }
854
855 avpriv_init_elbg(s->codebook_input, entry_size, i, codebook, size, 1, s->codebook_closest, &s->randctx);
856 avpriv_do_elbg(s->codebook_input, entry_size, i, codebook, size, 1, s->codebook_closest, &s->randctx);
857
858 //setup vq_pict, which contains a single MB
859 vq_pict.data[0] = vq_pict_buf;
860 vq_pict.linesize[0] = MB_SIZE;
861 vq_pict.data[1] = &vq_pict_buf[MB_AREA];
862 vq_pict.data[2] = vq_pict.data[1] + (MB_AREA >> 2);
863 vq_pict.linesize[1] = vq_pict.linesize[2] = MB_SIZE >> 1;
864
865 //copy indices
866 for(i = j = y = 0; y < h; y += MB_SIZE) {
867 for(x = 0; x < s->w; x += MB_SIZE, j++) {
868 mb_info *mb = &s->mb[j];
869 // skip uninteresting blocks if we know their preferred encoding
870 if(CERTAIN(encoding) && mb->best_encoding != encoding)
871 continue;
872
873 //point sub_pict to current MB
874 get_sub_picture(s, x, y, pict, &sub_pict);
875
876 if(v1mode) {
877 mb->v1_vector = s->codebook_closest[i];
878
879 //fill in vq_pict with V1 data
880 decode_v1_vector(s, &vq_pict, mb->v1_vector, info);
881
882 mb->v1_error = compute_mb_distortion(s, &sub_pict, &vq_pict);
883 total_error += mb->v1_error;
884 } else {
885 for(k = 0; k < 4; k++)
886 mb->v4_vector[k] = s->codebook_closest[i+k];
887
888 //fill in vq_pict with V4 data
889 decode_v4_vector(s, &vq_pict, mb->v4_vector, info);
890
891 mb->v4_error = compute_mb_distortion(s, &sub_pict, &vq_pict);
892 total_error += mb->v4_error;
893 }
894 i += v1mode ? 1 : 4;
895 }
896 }
897 // check that we did it right in the beginning of the function
898 av_assert0(i >= size); // training set is no smaller than the codebook
899
900 //av_log(s->avctx, AV_LOG_INFO, "isv1 %i size= %i i= %i error %lli\n", v1mode, size, i, (long long int)total_error);
901
902 return size;
903 }
904
905 static void calculate_skip_errors(CinepakEncContext *s, int h, AVPicture *last_pict, AVPicture *pict, strip_info *info)
906 {
907 int x, y, i;
908 AVPicture sub_last, sub_pict;
909
910 for(i = y = 0; y < h; y += MB_SIZE) {
911 for(x = 0; x < s->w; x += MB_SIZE, i++) {
912 get_sub_picture(s, x, y, last_pict, &sub_last);
913 get_sub_picture(s, x, y, pict, &sub_pict);
914
915 s->mb[i].skip_error = compute_mb_distortion(s, &sub_last, &sub_pict);
916 }
917 }
918 }
919
920 static void write_strip_header(CinepakEncContext *s, int y, int h, int keyframe, unsigned char *buf, int strip_size)
921 {
922 // actually we are exclusively using intra strip coding (how much can we win
923 // otherwise? how to choose which part of a codebook to update?),
924 // keyframes are different only because we disallow ENC_SKIP on them -- rl
925 // (besides, the logic here used to be inverted: )
926 // buf[0] = keyframe ? 0x11: 0x10;
927 buf[0] = keyframe ? 0x10: 0x11;
928 AV_WB24(&buf[1], strip_size + STRIP_HEADER_SIZE);
929 // AV_WB16(&buf[4], y); /* using absolute y values works -- rl */
930 AV_WB16(&buf[4], 0); /* using relative values works as well -- rl */
931 AV_WB16(&buf[6], 0);
932 // AV_WB16(&buf[8], y+h); /* using absolute y values works -- rl */
933 AV_WB16(&buf[8], h); /* using relative values works as well -- rl */
934 AV_WB16(&buf[10], s->w);
935 //av_log(s->avctx, AV_LOG_INFO, "write_strip_header() %x keyframe=%d\n", buf[0], keyframe);
936 }
937
938 static int rd_strip(CinepakEncContext *s, int y, int h, int keyframe, AVPicture *last_pict, AVPicture *pict, AVPicture *scratch_pict, unsigned char *buf, int64_t *best_score
939 #ifdef CINEPAK_REPORT_SERR
940 , int64_t *best_serr
941 #endif
942 )
943 {
944 int64_t score = 0;
945 #ifdef CINEPAK_REPORT_SERR
946 int64_t serr;
947 #endif
948 int best_size = 0;
949 strip_info info;
950 // for codebook optimization:
951 int v1enough, v1_size, v4enough, v4_size;
952 int new_v1_size, new_v4_size;
953 int v1shrunk, v4shrunk;
954
955 if(!keyframe)
956 calculate_skip_errors(s, h, last_pict, pict, &info);
957
958 //try some powers of 4 for the size of the codebooks
959 //constraint the v4 codebook to be no bigger than v1 one,
960 //(and no less than v1_size/4)
961 //thus making v1 preferable and possibly losing small details? should be ok
962 #define SMALLEST_CODEBOOK 1
963 for(v1enough = 0, v1_size = SMALLEST_CODEBOOK; v1_size <= CODEBOOK_MAX && !v1enough; v1_size <<= 2) {
964 for(v4enough = 0, v4_size = 0; v4_size <= v1_size && !v4enough; v4_size = v4_size ? v4_size << 2 : v1_size >= SMALLEST_CODEBOOK << 2 ? v1_size >> 2 : SMALLEST_CODEBOOK) {
965 //try all modes
966 for(CinepakMode mode = 0; mode < MODE_COUNT; mode++) {
967 //don't allow MODE_MC in intra frames
968 if(keyframe && mode == MODE_MC)
969 continue;
970
971 if(mode == MODE_V1_ONLY) {
972 info.v1_size = v1_size;
973 // the size may shrink even before optimizations if the input is short:
974 info.v1_size = quantize(s, h, pict, 1, &info, ENC_UNCERTAIN);
975 if(info.v1_size < v1_size)
976 // too few eligible blocks, no sense in trying bigger sizes
977 v1enough = 1;
978
979 info.v4_size = 0;
980 } else { // mode != MODE_V1_ONLY
981 // if v4 codebook is empty then only allow V1-only mode
982 if(!v4_size)
983 continue;
984
985 if(mode == MODE_V1_V4) {
986 info.v4_size = v4_size;
987 info.v4_size = quantize(s, h, pict, 0, &info, ENC_UNCERTAIN);
988 if(info.v4_size < v4_size)
989 // too few eligible blocks, no sense in trying bigger sizes
990 v4enough = 1;
991 }
992 }
993
994 info.mode = mode;
995 // choose the best encoding per block, based on current experience
996 score = calculate_mode_score(s, h, &info, 0,
997 &v1shrunk, &v4shrunk
998 #ifdef CINEPAK_REPORT_SERR
999 , &serr
1000 #endif
1001 );
1002
1003 if(mode != MODE_V1_ONLY){
1004 int extra_iterations_limit = s->max_extra_cb_iterations;
1005 // recompute the codebooks, omitting the extra blocks
1006 // we assume we _may_ come here with more blocks to encode than before
1007 info.v1_size = v1_size;
1008 new_v1_size = quantize(s, h, pict, 1, &info, ENC_V1);
1009 if(new_v1_size < info.v1_size){
1010 //av_log(s->avctx, AV_LOG_INFO, "mode %i, %3i, %3i: cut v1 codebook to %i entries\n", mode, v1_size, v4_size, new_v1_size);
1011 info.v1_size = new_v1_size;
1012 }
1013 // we assume we _may_ come here with more blocks to encode than before
1014 info.v4_size = v4_size;
1015 new_v4_size = quantize(s, h, pict, 0, &info, ENC_V4);
1016 if(new_v4_size < info.v4_size) {
1017 //av_log(s->avctx, AV_LOG_INFO, "mode %i, %3i, %3i: cut v4 codebook to %i entries at first iteration\n", mode, v1_size, v4_size, new_v4_size);
1018 info.v4_size = new_v4_size;
1019 }
1020 // calculate the resulting score
1021 // (do not move blocks to codebook encodings now, as some blocks may have
1022 // got bigger errors despite a smaller training set - but we do not
1023 // ever grow the training sets back)
1024 for(;;) {
1025 score = calculate_mode_score(s, h, &info, 1,
1026 &v1shrunk, &v4shrunk
1027 #ifdef CINEPAK_REPORT_SERR
1028 , &serr
1029 #endif
1030 );
1031 // do we have a reason to reiterate? if so, have we reached the limit?
1032 if((!v1shrunk && !v4shrunk) || !extra_iterations_limit--) break;
1033 // recompute the codebooks, omitting the extra blocks
1034 if(v1shrunk) {
1035 info.v1_size = v1_size;
1036 new_v1_size = quantize(s, h, pict, 1, &info, ENC_V1);
1037 if(new_v1_size < info.v1_size){
1038 //av_log(s->avctx, AV_LOG_INFO, "mode %i, %3i, %3i: cut v1 codebook to %i entries\n", mode, v1_size, v4_size, new_v1_size);
1039 info.v1_size = new_v1_size;
1040 }
1041 }
1042 if(v4shrunk) {
1043 info.v4_size = v4_size;
1044 new_v4_size = quantize(s, h, pict, 0, &info, ENC_V4);
1045 if(new_v4_size < info.v4_size) {
1046 //av_log(s->avctx, AV_LOG_INFO, "mode %i, %3i, %3i: cut v4 codebook to %i entries\n", mode, v1_size, v4_size, new_v4_size);
1047 info.v4_size = new_v4_size;
1048 }
1049 }
1050 }
1051 }
1052
1053 //av_log(s->avctx, AV_LOG_INFO, "%3i %3i score = %lli\n", v1_size, v4_size, (long long int)score);
1054
1055 if(best_size == 0 || score < *best_score) {
1056
1057 *best_score = score;
1058 #ifdef CINEPAK_REPORT_SERR
1059 *best_serr = serr;
1060 #endif
1061 best_size = encode_mode(s, h, scratch_pict, last_pict, &info, s->strip_buf + STRIP_HEADER_SIZE);
1062
1063 //av_log(s->avctx, AV_LOG_INFO, "mode %i, %3i, %3i: %18lli %i B", mode, info.v1_size, info.v4_size, (long long int)score, best_size);
1064 //av_log(s->avctx, AV_LOG_INFO, "\n");
1065 #ifdef CINEPAK_REPORT_SERR
1066 av_log(s->avctx, AV_LOG_INFO, "mode %i, %3i, %3i: %18lli %i B\n", mode, v1_size, v4_size, (long long int)serr, best_size);
1067 #endif
1068
1069 #ifdef CINEPAKENC_DEBUG
1070 //save MB encoding choices
1071 memcpy(s->best_mb, s->mb, mb_count*sizeof(mb_info));
1072 #endif
1073
1074 //memcpy(strip_temp + STRIP_HEADER_SIZE, strip_temp, best_size);
1075 write_strip_header(s, y, h, keyframe, s->strip_buf, best_size);
1076
1077 }
1078 }
1079 }
1080 }
1081
1082 #ifdef CINEPAKENC_DEBUG
1083 //gather stats. this will only work properly of MAX_STRIPS == 1
1084 if(best_info.mode == MODE_V1_ONLY) {
1085 s->num_v1_mode++;
1086 s->num_v1_encs += s->w*h/MB_AREA;
1087 } else {
1088 if(best_info.mode == MODE_V1_V4)
1089 s->num_v4_mode++;
1090 else
1091 s->num_mc_mode++;
1092
1093 int x;
1094 for(x = 0; x < s->w*h/MB_AREA; x++)
1095 if(s->best_mb[x].best_encoding == ENC_V1)
1096 s->num_v1_encs++;
1097 else if(s->best_mb[x].best_encoding == ENC_V4)
1098 s->num_v4_encs++;
1099 else
1100 s->num_skips++;
1101 }
1102 #endif
1103
1104 best_size += STRIP_HEADER_SIZE;
1105 memcpy(buf, s->strip_buf, best_size);
1106
1107 return best_size;
1108 }
1109
1110 static int write_cvid_header(CinepakEncContext *s, unsigned char *buf, int num_strips, int data_size, int isakeyframe)
1111 {
1112 buf[0] = isakeyframe ? 0 : 1;
1113 AV_WB24(&buf[1], data_size + CVID_HEADER_SIZE);
1114 AV_WB16(&buf[4], s->w);
1115 AV_WB16(&buf[6], s->h);
1116 AV_WB16(&buf[8], num_strips);
1117
1118 return CVID_HEADER_SIZE;
1119 }
1120
1121 static int rd_frame(CinepakEncContext *s, const AVFrame *frame, int isakeyframe, unsigned char *buf, int buf_size)
1122 {
1123 int num_strips, strip, i, y, nexty, size, temp_size;
1124 AVPicture last_pict, pict, scratch_pict;
1125 int64_t best_score = 0, score, score_temp;
1126 #ifdef CINEPAK_REPORT_SERR
1127 int64_t best_serr = 0, serr, serr_temp;
1128 #endif
1129
1130 int best_nstrips = -1, best_size = -1; // mark as uninitialzed
1131
1132 if(s->pix_fmt == AV_PIX_FMT_RGB24) {
1133 int x;
1134 // build a copy of the given frame in the correct colorspace
1135 for(y = 0; y < s->h; y += 2) {
1136 for(x = 0; x < s->w; x += 2) {
1137 uint8_t *ir[2]; int32_t r, g, b, rr, gg, bb;
1138 ir[0] = ((AVPicture*)frame)->data[0] + x*3 + y*((AVPicture*)frame)->linesize[0];
1139 ir[1] = ir[0] + ((AVPicture*)frame)->linesize[0];
1140 get_sub_picture(s, x, y, (AVPicture*)s->input_frame, &scratch_pict);
1141 r = g = b = 0;
1142 for(i=0; i<4; ++i) {
1143 int i1, i2;
1144 i1 = (i&1); i2 = (i>=2);
1145 rr = ir[i2][i1*3+0];
1146 gg = ir[i2][i1*3+1];
1147 bb = ir[i2][i1*3+2];
1148 r += rr; g += gg; b += bb;
1149 // using fixed point arithmetic for portable repeatability, scaling by 2^23
1150 // "Y"
1151 // rr = 0.2857*rr + 0.5714*gg + 0.1429*bb;
1152 rr = (2396625*rr + 4793251*gg + 1198732*bb) >> 23;
1153 if( rr < 0) rr = 0;
1154 else if (rr > 255) rr = 255;
1155 scratch_pict.data[0][i1 + i2*scratch_pict.linesize[0]] = rr;
1156 }
1157 // let us scale down as late as possible
1158 // r /= 4; g /= 4; b /= 4;
1159 // "U"
1160 // rr = -0.1429*r - 0.2857*g + 0.4286*b;
1161 rr = (-299683*r - 599156*g + 898839*b) >> 23;
1162 if( rr < -128) rr = -128;
1163 else if (rr > 127) rr = 127;
1164 scratch_pict.data[1][0] = rr + 128; // quantize needs unsigned
1165 // "V"
1166 // rr = 0.3571*r - 0.2857*g - 0.0714*b;
1167 rr = (748893*r - 599156*g - 149737*b) >> 23;
1168 if( rr < -128) rr = -128;
1169 else if (rr > 127) rr = 127;
1170 scratch_pict.data[2][0] = rr + 128; // quantize needs unsigned
1171 }
1172 }
1173 }
1174
1175 //would be nice but quite certainly incompatible with vintage players:
1176 // support encoding zero strips (meaning skip the whole frame)
1177 for(num_strips = s->min_strips; num_strips <= s->max_strips && num_strips <= s->h / MB_SIZE; num_strips++) {
1178 score = 0;
1179 size = 0;
1180 #ifdef CINEPAK_REPORT_SERR
1181 serr = 0;
1182 #endif
1183
1184 for(y = 0, strip = 1; y < s->h; strip++, y = nexty) {
1185 int strip_height;
1186
1187 nexty = strip * s->h / num_strips; // <= s->h
1188 //make nexty the next multiple of 4 if not already there
1189 if(nexty & 3)
1190 nexty += 4 - (nexty & 3);
1191
1192 strip_height = nexty - y;
1193 if(strip_height <= 0) { // can this ever happen?
1194 av_log(s->avctx, AV_LOG_INFO, "skipping zero height strip %i of %i\n", strip, num_strips);
1195 continue;
1196 }
1197
1198 if(s->pix_fmt == AV_PIX_FMT_RGB24)
1199 get_sub_picture(s, 0, y, (AVPicture*)s->input_frame, &pict);
1200 else
1201 get_sub_picture(s, 0, y, (AVPicture*)frame, &pict);
1202 get_sub_picture(s, 0, y, (AVPicture*)s->last_frame, &last_pict);
1203 get_sub_picture(s, 0, y, (AVPicture*)s->scratch_frame, &scratch_pict);
1204
1205 if((temp_size = rd_strip(s, y, strip_height, isakeyframe, &last_pict, &pict, &scratch_pict, s->frame_buf + size + CVID_HEADER_SIZE, &score_temp
1206 #ifdef CINEPAK_REPORT_SERR
1207 , &serr_temp
1208 #endif
1209 )) < 0)
1210 return temp_size;
1211
1212 score += score_temp;
1213 #ifdef CINEPAK_REPORT_SERR
1214 serr += serr_temp;
1215 #endif
1216 size += temp_size;
1217 //av_log(s->avctx, AV_LOG_INFO, "strip %d, isakeyframe=%d", strip, isakeyframe);
1218 //av_log(s->avctx, AV_LOG_INFO, "\n");
1219 }
1220
1221 if(best_score == 0 || score < best_score) {
1222 best_score = score;
1223 #ifdef CINEPAK_REPORT_SERR
1224 best_serr = serr;
1225 #endif
1226 best_size = size + write_cvid_header(s, s->frame_buf, num_strips, size, isakeyframe);
1227 //av_log(s->avctx, AV_LOG_INFO, "best number of strips so far: %2i, %12lli, %i B\n", num_strips, (long long int)score, best_size);
1228 #ifdef CINEPAK_REPORT_SERR
1229 av_log(s->avctx, AV_LOG_INFO, "best number of strips so far: %2i, %12lli, %i B\n", num_strips, (long long int)serr, best_size);
1230 #endif
1231
1232 FFSWAP(AVFrame *, s->best_frame, s->scratch_frame);
1233 memcpy(buf, s->frame_buf, best_size);
1234 best_nstrips = num_strips;
1235 }
1236 // avoid trying too many strip numbers without a real reason
1237 // (this makes the processing of the very first frame faster)
1238 if(num_strips - best_nstrips > 4)
1239 break;
1240 }
1241
1242 av_assert0(best_nstrips >= 0 && best_size >= 0);
1243
1244 // let the number of strips slowly adapt to the changes in the contents,
1245 // compared to full bruteforcing every time this will occasionally lead
1246 // to some r/d performance loss but makes encoding up to several times faster
1247 if(!s->strip_number_delta_range) {
1248 if(best_nstrips == s->max_strips) { // let us try to step up
1249 s->max_strips = best_nstrips + 1;
1250 if(s->max_strips >= s->max_max_strips)
1251 s->max_strips = s->max_max_strips;
1252 } else { // try to step down
1253 s->max_strips = best_nstrips;
1254 }
1255 s->min_strips = s->max_strips - 1;
1256 if(s->min_strips < s->min_min_strips)
1257 s->min_strips = s->min_min_strips;
1258 } else {
1259 s->max_strips = best_nstrips + s->strip_number_delta_range;
1260 if(s->max_strips >= s->max_max_strips)
1261 s->max_strips = s->max_max_strips;
1262 s->min_strips = best_nstrips - s->strip_number_delta_range;
1263 if(s->min_strips < s->min_min_strips)
1264 s->min_strips = s->min_min_strips;
1265 }
1266
1267 return best_size;
1268 }
1269
1270 static int cinepak_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
1271 const AVFrame *frame, int *got_packet)
1272 {
1273 CinepakEncContext *s = avctx->priv_data;
1274 int ret;
1275
1276 s->lambda = frame->quality ? frame->quality - 1 : 2 * FF_LAMBDA_SCALE;
1277
1278 if ((ret = ff_alloc_packet2(avctx, pkt, s->frame_buf_size)) < 0)
1279 return ret;
1280 ret = rd_frame(s, frame, (s->curframe == 0), pkt->data, s->frame_buf_size);
1281 pkt->size = ret;
1282 if (s->curframe == 0)
1283 pkt->flags |= AV_PKT_FLAG_KEY;
1284 *got_packet = 1;
1285
1286 FFSWAP(AVFrame *, s->last_frame, s->best_frame);
1287
1288 if (++s->curframe >= s->keyint)
1289 s->curframe = 0;
1290
1291 return 0;
1292 }
1293
1294 static av_cold int cinepak_encode_end(AVCodecContext *avctx)
1295 {
1296 CinepakEncContext *s = avctx->priv_data;
1297 int x;
1298
1299 av_frame_free(&s->last_frame);
1300 av_frame_free(&s->best_frame);
1301 av_frame_free(&s->scratch_frame);
1302 if (avctx->pix_fmt == AV_PIX_FMT_RGB24)
1303 av_frame_free(&s->input_frame);
1304 av_freep(&s->codebook_input);
1305 av_freep(&s->codebook_closest);
1306 av_freep(&s->strip_buf);
1307 av_freep(&s->frame_buf);
1308 av_freep(&s->mb);
1309 #ifdef CINEPAKENC_DEBUG
1310 av_freep(&s->best_mb);
1311 #endif
1312
1313 for(x = 0; x < (avctx->pix_fmt == AV_PIX_FMT_RGB24 ? 4 : 3); x++)
1314 av_freep(&s->pict_bufs[x]);
1315
1316 #ifdef CINEPAKENC_DEBUG
1317 av_log(avctx, AV_LOG_INFO, "strip coding stats: %i V1 mode, %i V4 mode, %i MC mode (%i V1 encs, %i V4 encs, %i skips)\n",
1318 s->num_v1_mode, s->num_v4_mode, s->num_mc_mode, s->num_v1_encs, s->num_v4_encs, s->num_skips);
1319 #endif
1320
1321 return 0;
1322 }
1323
1324 AVCodec ff_cinepak_encoder = {
1325 .name = "cinepak",
1326 .type = AVMEDIA_TYPE_VIDEO,
1327 .id = AV_CODEC_ID_CINEPAK,
1328 .priv_data_size = sizeof(CinepakEncContext),
1329 .init = cinepak_encode_init,
1330 .encode2 = cinepak_encode_frame,
1331 .close = cinepak_encode_end,
1332 .pix_fmts = (const enum AVPixelFormat[]){AV_PIX_FMT_RGB24, AV_PIX_FMT_GRAY8, AV_PIX_FMT_NONE},
1333 .long_name = NULL_IF_CONFIG_SMALL("Cinepak / CVID"),
1334 .priv_class = &cinepak_class,
1335 };