ffmpeg/libavcodec/cinepakenc.c

   1 /*
   2  * Cinepak encoder (c) 2011 Tomas Härdin
   3  * http://titan.codemill.se/~tomhar/cinepakenc.patch
   4  *
   5  * Fixes and improvements, vintage decoders compatibility
   6  *  (c) 2013, 2014 Rl, Aetey Global Technologies AB
   7
   8 Permission is hereby granted, free of charge, to any person obtaining a
   9 copy of this software and associated documentation files (the "Software"),
  10 to deal in the Software without restriction, including without limitation
  11 the rights to use, copy, modify, merge, publish, distribute, sublicense,
  12 and/or sell copies of the Software, and to permit persons to whom the
  13 Software is furnished to do so, subject to the following conditions:
  14
  15 The above copyright notice and this permission notice shall be included
  16 in all copies or substantial portions of the Software.
  17
  18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  19 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  20 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  21 THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
  22 OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  23 ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  24 OTHER DEALINGS IN THE SOFTWARE.
  25
  26  * TODO:
  27  * - optimize: color space conversion, ...
  28  * - implement options to set the min/max number of strips?
  29  * MAYBE:
  30  * - "optimally" split the frame into several non-regular areas
  31  *   using a separate codebook pair for each area and approximating
  32  *   the area by several rectangular strips (generally not full width ones)
  33  *   (use quadtree splitting? a simple fixed-granularity grid?)
  34  *
  35  *
  36  * version 2014-01-23 Rl
  37  * - added option handling for flexibility
  38  *
  39  * version 2014-01-21 Rl
  40  * - believe it or not, now we get even smaller files, with better quality
  41  *   (which means I missed an optimization earlier :)
  42  *
  43  * version 2014-01-20 Rl
  44  * - made the encoder compatible with vintage decoders
  45  *   and added some yet unused code for possible future
  46  *   incremental codebook updates
  47  * - fixed a small memory leak
  48  *
  49  * version 2013-04-28 Rl
  50  * - bugfixed codebook optimization logic
  51  *
  52  * version 2013-02-14 Rl
  53  * "Valentine's Day" version:
  54  * - made strip division more robust
  55  * - minimized bruteforcing the number of strips,
  56  *   (costs some R/D but speeds up compession a lot), the heuristic
  57  *   assumption is that score as a function of the number of strips has
  58  *   one wide minimum which moves slowly, of course not fully true
  59  * - simplified codebook generation,
  60  *   the old code was meant for other optimizations than we actually do
  61  * - optimized the codebook generation / error estimation for MODE_MC
  62  *
  63  * version 2013-02-12 Rl
  64  * - separated codebook training sets, avoided the transfer of wasted bytes,
  65  *   which yields both better quality and smaller files
  66  * - now using the correct colorspace (TODO: move conversion to libswscale)
  67  *
  68  * version 2013-02-08 Rl
  69  * - fixes/optimization in multistrip encoding and codebook size choice,
  70  *   quality/bitrate is now better than that of the binary proprietary encoder
  71  */
  72
  73 #include "libavutil/intreadwrite.h"
  74 #include "avcodec.h"
  75 #include "libavutil/lfg.h"
  76 #include "elbg.h"
  77 #include "internal.h"
  78
  79 #include "libavutil/avassert.h"
  80 #include "libavutil/opt.h"
  81
  82 #define CVID_HEADER_SIZE 10
  83 #define STRIP_HEADER_SIZE 12
  84 #define CHUNK_HEADER_SIZE 4
  85
  86 #define MB_SIZE 4           //4x4 MBs
  87 #define MB_AREA (MB_SIZE*MB_SIZE)
  88
  89 #define VECTOR_MAX 6        //six or four entries per vector depending on format
  90 #define CODEBOOK_MAX 256    //size of a codebook
  91
  92 #define MAX_STRIPS  32      //Note: having fewer choices regarding the number of strips speeds up encoding (obviously)
  93 #define MIN_STRIPS  1       //Note: having more strips speeds up encoding the frame (this is less obvious)
  94 // MAX_STRIPS limits the maximum quality you can reach
  95 //            when you want hight quality on high resolutions,
  96 // MIN_STRIPS limits the minimum efficiently encodable bit rate
  97 //            on low resolutions
  98 // the numbers are only used for brute force optimization for the first frame,
  99 // for the following frames they are adaptively readjusted
 100 // NOTE the decoder in ffmpeg has its own arbitrary limitation on the number
 101 // of strips, currently 32
 102
 103 typedef enum {
 104     MODE_V1_ONLY = 0,
 105     MODE_V1_V4,
 106     MODE_MC,
 107
 108     MODE_COUNT,
 109 } CinepakMode;
 110
 111 typedef enum {
 112     ENC_V1,
 113     ENC_V4,
 114     ENC_SKIP,
 115
 116     ENC_UNCERTAIN
 117 } mb_encoding;
 118
 119 typedef struct {
 120     int v1_vector;                  //index into v1 codebook
 121     int v1_error;                   //error when using V1 encoding
 122     int v4_vector[4];               //indices into v4 codebooks
 123     int v4_error;                   //error when using V4 encoding
 124     int skip_error;                 //error when block is skipped (aka copied from last frame)
 125     mb_encoding best_encoding;      //last result from calculate_mode_score()
 126 } mb_info;
 127
 128 typedef struct {
 129     int v1_codebook[CODEBOOK_MAX*VECTOR_MAX];
 130     int v4_codebook[CODEBOOK_MAX*VECTOR_MAX];
 131     int v1_size;
 132     int v4_size;
 133     CinepakMode mode;
 134 } strip_info;
 135
 136 typedef struct {
 137     const AVClass *class;
 138     AVCodecContext *avctx;
 139     unsigned char *pict_bufs[4], *strip_buf, *frame_buf;
 140     AVFrame *last_frame;
 141     AVFrame *best_frame;
 142     AVFrame *scratch_frame;
 143     AVFrame *input_frame;
 144     enum AVPixelFormat pix_fmt;
 145     int w, h;
 146     int frame_buf_size;
 147     int curframe, keyint;
 148     AVLFG randctx;
 149     uint64_t lambda;
 150     int *codebook_input;
 151     int *codebook_closest;
 152     mb_info *mb;                                //MB RD state
 153     int min_strips;          //the current limit
 154     int max_strips;          //the current limit
 155 #ifdef CINEPAKENC_DEBUG
 156     mb_info *best_mb;                           //TODO: remove. only used for printing stats
 157     int num_v1_mode, num_v4_mode, num_mc_mode;
 158     int num_v1_encs, num_v4_encs, num_skips;
 159 #endif
 160 // options
 161     int max_extra_cb_iterations;
 162     int skip_empty_cb;
 163     int min_min_strips;
 164     int max_max_strips;
 165     int strip_number_delta_range;
 166 } CinepakEncContext;
 167
 168 #define OFFSET(x) offsetof(CinepakEncContext, x)
 169 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
 170 static const AVOption options[] = {
 171     { "max_extra_cb_iterations", "Max extra codebook recalculation passes, more is better and slower", OFFSET(max_extra_cb_iterations), AV_OPT_TYPE_INT, { .i64 = 2 }, 0, INT_MAX, VE },
 172     { "skip_empty_cb", "Avoid wasting bytes, ignore vintage MacOS decoder", OFFSET(skip_empty_cb), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
 173     { "max_strips", "Limit strips/frame, vintage compatible is 1..3, otherwise the more the better", OFFSET(max_max_strips), AV_OPT_TYPE_INT, { .i64 = 3 }, MIN_STRIPS, MAX_STRIPS, VE },
 174     { "min_strips", "Enforce min strips/frame, more is worse and faster, must be <= max_strips", OFFSET(min_min_strips), AV_OPT_TYPE_INT, { .i64 = MIN_STRIPS }, MIN_STRIPS, MAX_STRIPS, VE },
 175     { "strip_number_adaptivity", "How fast the strip number adapts, more is slightly better, much slower", OFFSET(strip_number_delta_range), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, MAX_STRIPS-MIN_STRIPS, VE },
 176     { NULL },
 177 };
 178
 179 static const AVClass cinepak_class = {
 180     .class_name = "cinepak",
 181     .item_name  = av_default_item_name,
 182     .option     = options,
 183     .version    = LIBAVUTIL_VERSION_INT,
 184 };
 185
 186 static av_cold int cinepak_encode_init(AVCodecContext *avctx)
 187 {
 188     CinepakEncContext *s = avctx->priv_data;
 189     int x, mb_count, strip_buf_size, frame_buf_size;
 190
 191     if (avctx->width & 3 || avctx->height & 3) {
 192         av_log(avctx, AV_LOG_ERROR, "width and height must be multiples of four (got %ix%i)\n",
 193                 avctx->width, avctx->height);
 194         return AVERROR(EINVAL);
 195     }
 196
 197     if (s->min_min_strips > s->max_max_strips) {
 198         av_log(avctx, AV_LOG_ERROR, "minimal number of strips can not exceed maximal (got %i and %i)\n",
 199                 s->min_min_strips, s->max_max_strips);
 200         return AVERROR(EINVAL);
 201     }
 202
 203     if (!(s->last_frame = av_frame_alloc()))
 204         return AVERROR(ENOMEM);
 205     if (!(s->best_frame = av_frame_alloc()))
 206         goto enomem;
 207     if (!(s->scratch_frame = av_frame_alloc()))
 208         goto enomem;
 209     if (avctx->pix_fmt == AV_PIX_FMT_RGB24)
 210         if (!(s->input_frame = av_frame_alloc()))
 211             goto enomem;
 212
 213     if (!(s->codebook_input = av_malloc(sizeof(int) * (avctx->pix_fmt == AV_PIX_FMT_RGB24 ? 6 : 4) * (avctx->width * avctx->height) >> 2)))
 214         goto enomem;
 215
 216     if (!(s->codebook_closest = av_malloc(sizeof(int) * (avctx->width * avctx->height) >> 2)))
 217         goto enomem;
 218
 219     for(x = 0; x < (avctx->pix_fmt == AV_PIX_FMT_RGB24 ? 4 : 3); x++)
 220         if(!(s->pict_bufs[x] = av_malloc((avctx->pix_fmt == AV_PIX_FMT_RGB24 ? 6 : 4) * (avctx->width * avctx->height) >> 2)))
 221             goto enomem;
 222
 223     mb_count = avctx->width * avctx->height / MB_AREA;
 224
 225     //the largest possible chunk is 0x31 with all MBs encoded in V4 mode
 226     //and full codebooks being replaced in INTER mode,
 227     // which is 34 bits per MB
 228     //and 2*256 extra flag bits per strip
 229     strip_buf_size = STRIP_HEADER_SIZE + 3 * CHUNK_HEADER_SIZE + 2 * VECTOR_MAX * CODEBOOK_MAX + 4 * (mb_count + (mb_count + 15) / 16) + (2 * CODEBOOK_MAX)/8;
 230
 231     frame_buf_size = CVID_HEADER_SIZE + s->max_max_strips * strip_buf_size;
 232
 233     if (!(s->strip_buf = av_malloc(strip_buf_size)))
 234         goto enomem;
 235
 236     if (!(s->frame_buf = av_malloc(frame_buf_size)))
 237         goto enomem;
 238
 239     if (!(s->mb = av_malloc_array(mb_count, sizeof(mb_info))))
 240         goto enomem;
 241
 242 #ifdef CINEPAKENC_DEBUG
 243     if (!(s->best_mb = av_malloc_array(mb_count, sizeof(mb_info))))
 244         goto enomem;
 245 #endif
 246
 247     av_lfg_init(&s->randctx, 1);
 248     s->avctx = avctx;
 249     s->w = avctx->width;
 250     s->h = avctx->height;
 251     s->frame_buf_size = frame_buf_size;
 252     s->curframe = 0;
 253     s->keyint = avctx->keyint_min;
 254     s->pix_fmt = avctx->pix_fmt;
 255
 256     //set up AVFrames
 257     s->last_frame->data[0]        = s->pict_bufs[0];
 258     s->last_frame->linesize[0]    = s->w;
 259     s->best_frame->data[0]        = s->pict_bufs[1];
 260     s->best_frame->linesize[0]    = s->w;
 261     s->scratch_frame->data[0]     = s->pict_bufs[2];
 262     s->scratch_frame->linesize[0] = s->w;
 263
 264     if (s->pix_fmt == AV_PIX_FMT_RGB24) {
 265         s->last_frame->data[1]        = s->last_frame->data[0] + s->w * s->h;
 266         s->last_frame->data[2]        = s->last_frame->data[1] + ((s->w * s->h) >> 2);
 267         s->last_frame->linesize[1]    = s->last_frame->linesize[2] = s->w >> 1;
 268
 269         s->best_frame->data[1]        = s->best_frame->data[0] + s->w * s->h;
 270         s->best_frame->data[2]        = s->best_frame->data[1] + ((s->w * s->h) >> 2);
 271         s->best_frame->linesize[1]    = s->best_frame->linesize[2] = s->w >> 1;
 272
 273         s->scratch_frame->data[1]     = s->scratch_frame->data[0] + s->w * s->h;
 274         s->scratch_frame->data[2]     = s->scratch_frame->data[1] + ((s->w * s->h) >> 2);
 275         s->scratch_frame->linesize[1] = s->scratch_frame->linesize[2] = s->w >> 1;
 276
 277         s->input_frame->data[0]       = s->pict_bufs[3];
 278         s->input_frame->linesize[0]   = s->w;
 279         s->input_frame->data[1]       = s->input_frame->data[0] + s->w * s->h;
 280         s->input_frame->data[2]       = s->input_frame->data[1] + ((s->w * s->h) >> 2);
 281         s->input_frame->linesize[1]   = s->input_frame->linesize[2] = s->w >> 1;
 282     }
 283
 284     s->min_strips = s->min_min_strips;
 285     s->max_strips = s->max_max_strips;
 286
 287 #ifdef CINEPAKENC_DEBUG
 288     s->num_v1_mode = s->num_v4_mode = s->num_mc_mode = s->num_v1_encs = s->num_v4_encs = s->num_skips = 0;
 289 #endif
 290
 291     return 0;
 292
 293 enomem:
 294     av_frame_free(&s->last_frame);
 295     av_frame_free(&s->best_frame);
 296     av_frame_free(&s->scratch_frame);
 297     if (avctx->pix_fmt == AV_PIX_FMT_RGB24)
 298         av_frame_free(&s->input_frame);
 299     av_freep(&s->codebook_input);
 300     av_freep(&s->codebook_closest);
 301     av_freep(&s->strip_buf);
 302     av_freep(&s->frame_buf);
 303     av_freep(&s->mb);
 304 #ifdef CINEPAKENC_DEBUG
 305     av_freep(&s->best_mb);
 306 #endif
 307
 308     for(x = 0; x < (avctx->pix_fmt == AV_PIX_FMT_RGB24 ? 4 : 3); x++)
 309         av_freep(&s->pict_bufs[x]);
 310
 311     return AVERROR(ENOMEM);
 312 }
 313
 314 static int64_t calculate_mode_score(CinepakEncContext *s, int h, strip_info *info, int report, int *training_set_v1_shrunk, int *training_set_v4_shrunk
 315 #ifdef CINEPAK_REPORT_SERR
 316 , int64_t *serr
 317 #endif
 318 )
 319 {
 320     //score = FF_LAMBDA_SCALE * error + lambda * bits
 321     int x;
 322     int entry_size = s->pix_fmt == AV_PIX_FMT_RGB24 ? 6 : 4;
 323     int mb_count = s->w * h / MB_AREA;
 324     mb_info *mb;
 325     int64_t score1, score2, score3;
 326     int64_t ret = s->lambda * ((info->v1_size ? CHUNK_HEADER_SIZE + info->v1_size * entry_size : 0) +
 327                    (info->v4_size ? CHUNK_HEADER_SIZE + info->v4_size * entry_size : 0) +
 328                    CHUNK_HEADER_SIZE) << 3;
 329
 330     //av_log(s->avctx, AV_LOG_INFO, "sizes %3i %3i -> %9lli score mb_count %i", info->v1_size, info->v4_size, (long long int)ret, mb_count);
 331
 332 #ifdef CINEPAK_REPORT_SERR
 333     *serr = 0;
 334 #endif
 335
 336     switch(info->mode) {
 337     case MODE_V1_ONLY:
 338         //one byte per MB
 339         ret += s->lambda * 8 * mb_count;
 340
 341 // while calculating we assume all blocks are ENC_V1
 342         for(x = 0; x < mb_count; x++) {
 343             mb = &s->mb[x];
 344             ret += FF_LAMBDA_SCALE * mb->v1_error;
 345 #ifdef CINEPAK_REPORT_SERR
 346             *serr += mb->v1_error;
 347 #endif
 348 // this function is never called for report in MODE_V1_ONLY
 349 //            if(!report)
 350             mb->best_encoding = ENC_V1;
 351         }
 352
 353         break;
 354     case MODE_V1_V4:
 355         //9 or 33 bits per MB
 356         if(report) {
 357 // no moves between the corresponding training sets are allowed
 358             *training_set_v1_shrunk = *training_set_v4_shrunk = 0;
 359             for(x = 0; x < mb_count; x++) {
 360                 int mberr;
 361                 mb = &s->mb[x];
 362                 if(mb->best_encoding == ENC_V1)
 363                     score1 = s->lambda * 9  + FF_LAMBDA_SCALE * (mberr=mb->v1_error);
 364                 else
 365                     score1 = s->lambda * 33 + FF_LAMBDA_SCALE * (mberr=mb->v4_error);
 366                 ret += score1;
 367 #ifdef CINEPAK_REPORT_SERR
 368                 *serr += mberr;
 369 #endif
 370             }
 371         } else { // find best mode per block
 372             for(x = 0; x < mb_count; x++) {
 373                 mb = &s->mb[x];
 374                 score1 = s->lambda * 9  + FF_LAMBDA_SCALE * mb->v1_error;
 375                 score2 = s->lambda * 33 + FF_LAMBDA_SCALE * mb->v4_error;
 376
 377                 if(score1 <= score2) {
 378                     ret += score1;
 379 #ifdef CINEPAK_REPORT_SERR
 380                     *serr += mb->v1_error;
 381 #endif
 382                     mb->best_encoding = ENC_V1;
 383                 } else {
 384                     ret += score2;
 385 #ifdef CINEPAK_REPORT_SERR
 386                     *serr += mb->v4_error;
 387 #endif
 388                     mb->best_encoding = ENC_V4;
 389                 }
 390             }
 391         }
 392
 393         break;
 394     case MODE_MC:
 395         //1, 10 or 34 bits per MB
 396         if(report) {
 397             int v1_shrunk = 0, v4_shrunk = 0;
 398             for(x = 0; x < mb_count; x++) {
 399                 mb = &s->mb[x];
 400 // it is OK to move blocks to ENC_SKIP here
 401 // but not to any codebook encoding!
 402                 score1 = s->lambda * 1  + FF_LAMBDA_SCALE * mb->skip_error;
 403                 if(mb->best_encoding == ENC_SKIP) {
 404                     ret += score1;
 405 #ifdef CINEPAK_REPORT_SERR
 406                     *serr += mb->skip_error;
 407 #endif
 408                 } else if(mb->best_encoding == ENC_V1) {
 409                     if((score2=s->lambda * 10 + FF_LAMBDA_SCALE * mb->v1_error) >= score1) {
 410                         mb->best_encoding = ENC_SKIP;
 411                         ++v1_shrunk;
 412                         ret += score1;
 413 #ifdef CINEPAK_REPORT_SERR
 414                         *serr += mb->skip_error;
 415 #endif
 416                     } else {
 417                         ret += score2;
 418 #ifdef CINEPAK_REPORT_SERR
 419                         *serr += mb->v1_error;
 420 #endif
 421                     }
 422                 } else {
 423                     if((score3=s->lambda * 34 + FF_LAMBDA_SCALE * mb->v4_error) >= score1) {
 424                         mb->best_encoding = ENC_SKIP;
 425                         ++v4_shrunk;
 426                         ret += score1;
 427 #ifdef CINEPAK_REPORT_SERR
 428                         *serr += mb->skip_error;
 429 #endif
 430                     } else {
 431                         ret += score3;
 432 #ifdef CINEPAK_REPORT_SERR
 433                         *serr += mb->v4_error;
 434 #endif
 435                     }
 436                 }
 437             }
 438             *training_set_v1_shrunk = v1_shrunk;
 439             *training_set_v4_shrunk = v4_shrunk;
 440         } else { // find best mode per block
 441             for(x = 0; x < mb_count; x++) {
 442                 mb = &s->mb[x];
 443                 score1 = s->lambda * 1  + FF_LAMBDA_SCALE * mb->skip_error;
 444                 score2 = s->lambda * 10 + FF_LAMBDA_SCALE * mb->v1_error;
 445                 score3 = s->lambda * 34 + FF_LAMBDA_SCALE * mb->v4_error;
 446
 447                 if(score1 <= score2 && score1 <= score3) {
 448                     ret += score1;
 449 #ifdef CINEPAK_REPORT_SERR
 450                     *serr += mb->skip_error;
 451 #endif
 452                     mb->best_encoding = ENC_SKIP;
 453                 } else if(score2 <= score3) {
 454                     ret += score2;
 455 #ifdef CINEPAK_REPORT_SERR
 456                     *serr += mb->v1_error;
 457 #endif
 458                     mb->best_encoding = ENC_V1;
 459                 } else {
 460                     ret += score3;
 461 #ifdef CINEPAK_REPORT_SERR
 462                     *serr += mb->v4_error;
 463 #endif
 464                     mb->best_encoding = ENC_V4;
 465                 }
 466             }
 467         }
 468
 469         break;
 470     }
 471
 472     return ret;
 473 }
 474
 475 static int write_chunk_header(unsigned char *buf, int chunk_type, int chunk_size)
 476 {
 477     buf[0] = chunk_type;
 478     AV_WB24(&buf[1], chunk_size + CHUNK_HEADER_SIZE);
 479     return CHUNK_HEADER_SIZE;
 480 }
 481
 482 static int encode_codebook(CinepakEncContext *s, int *codebook, int size, int chunk_type_yuv, int chunk_type_gray, unsigned char *buf)
 483 {
 484     int x, y, ret, entry_size = s->pix_fmt == AV_PIX_FMT_RGB24 ? 6 : 4;
 485     int incremental_codebook_replacement_mode = 0; // hardcoded here,
 486                 // the compiler should notice that this is a constant -- rl
 487
 488     ret = write_chunk_header(buf,
 489           s->pix_fmt == AV_PIX_FMT_RGB24 ?
 490            chunk_type_yuv+(incremental_codebook_replacement_mode?1:0) :
 491            chunk_type_gray+(incremental_codebook_replacement_mode?1:0),
 492           entry_size * size
 493            + (incremental_codebook_replacement_mode?(size+31)/32*4:0) );
 494
 495 // we do codebook encoding according to the "intra" mode
 496 // but we keep the "dead" code for reference in case we will want
 497 // to use incremental codebook updates (which actually would give us
 498 // "kind of" motion compensation, especially in 1 strip/frame case) -- rl
 499 // (of course, the code will be not useful as-is)
 500     if(incremental_codebook_replacement_mode) {
 501         int flags = 0;
 502         int flagsind;
 503         for(x = 0; x < size; x++) {
 504             if(flags == 0) {
 505                 flagsind = ret;
 506                 ret += 4;
 507                 flags = 0x80000000;
 508             } else
 509                 flags = ((flags>>1) | 0x80000000);
 510             for(y = 0; y < entry_size; y++)
 511                 buf[ret++] = codebook[y + x*entry_size] ^ (y >= 4 ? 0x80 : 0);
 512             if((flags&0xffffffff) == 0xffffffff) {
 513                 AV_WB32(&buf[flagsind], flags);
 514                 flags = 0;
 515             }
 516         }
 517         if(flags)
 518             AV_WB32(&buf[flagsind], flags);
 519     } else
 520         for(x = 0; x < size; x++)
 521             for(y = 0; y < entry_size; y++)
 522                 buf[ret++] = codebook[y + x*entry_size] ^ (y >= 4 ? 0x80 : 0);
 523
 524     return ret;
 525 }
 526
 527 //sets out to the sub picture starting at (x,y) in in
 528 static void get_sub_picture(CinepakEncContext *s, int x, int y, AVPicture *in, AVPicture *out)
 529 {
 530     out->data[0] = in->data[0] + x + y * in->linesize[0];
 531     out->linesize[0] = in->linesize[0];
 532
 533     if(s->pix_fmt == AV_PIX_FMT_RGB24) {
 534         out->data[1] = in->data[1] + (x >> 1) + (y >> 1) * in->linesize[1];
 535         out->linesize[1] = in->linesize[1];
 536
 537         out->data[2] = in->data[2] + (x >> 1) + (y >> 1) * in->linesize[2];
 538         out->linesize[2] = in->linesize[2];
 539     }
 540 }
 541
 542 //decodes the V1 vector in mb into the 4x4 MB pointed to by sub_pict
 543 static void decode_v1_vector(CinepakEncContext *s, AVPicture *sub_pict, int v1_vector, strip_info *info)
 544 {
 545     int entry_size = s->pix_fmt == AV_PIX_FMT_RGB24 ? 6 : 4;
 546
 547     sub_pict->data[0][0] =
 548             sub_pict->data[0][1] =
 549             sub_pict->data[0][    sub_pict->linesize[0]] =
 550             sub_pict->data[0][1+  sub_pict->linesize[0]] = info->v1_codebook[v1_vector*entry_size];
 551
 552     sub_pict->data[0][2] =
 553             sub_pict->data[0][3] =
 554             sub_pict->data[0][2+  sub_pict->linesize[0]] =
 555             sub_pict->data[0][3+  sub_pict->linesize[0]] = info->v1_codebook[v1_vector*entry_size+1];
 556
 557     sub_pict->data[0][2*sub_pict->linesize[0]] =
 558             sub_pict->data[0][1+2*sub_pict->linesize[0]] =
 559             sub_pict->data[0][  3*sub_pict->linesize[0]] =
 560             sub_pict->data[0][1+3*sub_pict->linesize[0]] = info->v1_codebook[v1_vector*entry_size+2];
 561
 562     sub_pict->data[0][2+2*sub_pict->linesize[0]] =
 563             sub_pict->data[0][3+2*sub_pict->linesize[0]] =
 564             sub_pict->data[0][2+3*sub_pict->linesize[0]] =
 565             sub_pict->data[0][3+3*sub_pict->linesize[0]] = info->v1_codebook[v1_vector*entry_size+3];
 566
 567     if(s->pix_fmt == AV_PIX_FMT_RGB24) {
 568         sub_pict->data[1][0] =
 569             sub_pict->data[1][1] =
 570             sub_pict->data[1][    sub_pict->linesize[1]] =
 571             sub_pict->data[1][1+  sub_pict->linesize[1]] = info->v1_codebook[v1_vector*entry_size+4];
 572
 573         sub_pict->data[2][0] =
 574             sub_pict->data[2][1] =
 575             sub_pict->data[2][    sub_pict->linesize[2]] =
 576             sub_pict->data[2][1+  sub_pict->linesize[2]] = info->v1_codebook[v1_vector*entry_size+5];
 577     }
 578 }
 579
 580 //decodes the V4 vectors in mb into the 4x4 MB pointed to by sub_pict
 581 static void decode_v4_vector(CinepakEncContext *s, AVPicture *sub_pict, int *v4_vector, strip_info *info)
 582 {
 583     int i, x, y, entry_size = s->pix_fmt == AV_PIX_FMT_RGB24 ? 6 : 4;
 584
 585     for(i = y = 0; y < 4; y += 2) {
 586         for(x = 0; x < 4; x += 2, i++) {
 587             sub_pict->data[0][x   +     y*sub_pict->linesize[0]] = info->v4_codebook[v4_vector[i]*entry_size];
 588             sub_pict->data[0][x+1 +     y*sub_pict->linesize[0]] = info->v4_codebook[v4_vector[i]*entry_size+1];
 589             sub_pict->data[0][x   + (y+1)*sub_pict->linesize[0]] = info->v4_codebook[v4_vector[i]*entry_size+2];
 590             sub_pict->data[0][x+1 + (y+1)*sub_pict->linesize[0]] = info->v4_codebook[v4_vector[i]*entry_size+3];
 591
 592             if(s->pix_fmt == AV_PIX_FMT_RGB24) {
 593                 sub_pict->data[1][(x>>1) + (y>>1)*sub_pict->linesize[1]] = info->v4_codebook[v4_vector[i]*entry_size+4];
 594                 sub_pict->data[2][(x>>1) + (y>>1)*sub_pict->linesize[2]] = info->v4_codebook[v4_vector[i]*entry_size+5];
 595             }
 596         }
 597     }
 598 }
 599
 600 static void copy_mb(CinepakEncContext *s, AVPicture *a, AVPicture *b)
 601 {
 602     int y, p;
 603
 604     for(y = 0; y < MB_SIZE; y++) {
 605         memcpy(a->data[0]+y*a->linesize[0], b->data[0]+y*b->linesize[0],
 606                MB_SIZE);
 607     }
 608
 609     if(s->pix_fmt == AV_PIX_FMT_RGB24) {
 610         for(p = 1; p <= 2; p++) {
 611             for(y = 0; y < MB_SIZE/2; y++) {
 612                 memcpy(a->data[p] + y*a->linesize[p],
 613                        b->data[p] + y*b->linesize[p],
 614                        MB_SIZE/2);
 615             }
 616         }
 617     }
 618 }
 619
 620 static int encode_mode(CinepakEncContext *s, int h, AVPicture *scratch_pict, AVPicture *last_pict, strip_info *info, unsigned char *buf)
 621 {
 622     int x, y, z, flags, bits, temp_size, header_ofs, ret = 0, mb_count = s->w * h / MB_AREA;
 623     int needs_extra_bit, should_write_temp;
 624     unsigned char temp[64]; //32/2 = 16 V4 blocks at 4 B each -> 64 B
 625     mb_info *mb;
 626     AVPicture sub_scratch = {{0}}, sub_last = {{0}};
 627
 628     //encode codebooks
 629 ////// MacOS vintage decoder compatibility dictates the presence of
 630 ////// the codebook chunk even when the codebook is empty - pretty dumb...
 631 ////// and also the certain order of the codebook chunks -- rl
 632     if(info->v4_size || !s->skip_empty_cb)
 633         ret += encode_codebook(s, info->v4_codebook, info->v4_size, 0x20, 0x24, buf + ret);
 634
 635     if(info->v1_size || !s->skip_empty_cb)
 636         ret += encode_codebook(s, info->v1_codebook, info->v1_size, 0x22, 0x26, buf + ret);
 637
 638     //update scratch picture
 639     for(z = y = 0; y < h; y += MB_SIZE) {
 640         for(x = 0; x < s->w; x += MB_SIZE, z++) {
 641             mb = &s->mb[z];
 642
 643             get_sub_picture(s, x, y, scratch_pict, &sub_scratch);
 644
 645             if(info->mode == MODE_MC && mb->best_encoding == ENC_SKIP) {
 646                 get_sub_picture(s, x, y, last_pict, &sub_last);
 647                 copy_mb(s, &sub_scratch, &sub_last);
 648             } else if(info->mode == MODE_V1_ONLY || mb->best_encoding == ENC_V1)
 649                 decode_v1_vector(s, &sub_scratch, mb->v1_vector, info);
 650             else
 651                 decode_v4_vector(s, &sub_scratch, mb->v4_vector, info);
 652         }
 653     }
 654
 655     switch(info->mode) {
 656     case MODE_V1_ONLY:
 657         //av_log(s->avctx, AV_LOG_INFO, "mb_count = %i\n", mb_count);
 658         ret += write_chunk_header(buf + ret, 0x32, mb_count);
 659
 660         for(x = 0; x < mb_count; x++)
 661             buf[ret++] = s->mb[x].v1_vector;
 662
 663         break;
 664     case MODE_V1_V4:
 665         //remember header position
 666         header_ofs = ret;
 667         ret += CHUNK_HEADER_SIZE;
 668
 669         for(x = 0; x < mb_count; x += 32) {
 670             flags = 0;
 671             for(y = x; y < FFMIN(x+32, mb_count); y++)
 672                 if(s->mb[y].best_encoding == ENC_V4)
 673                     flags |= 1 << (31 - y + x);
 674
 675             AV_WB32(&buf[ret], flags);
 676             ret += 4;
 677
 678             for(y = x; y < FFMIN(x+32, mb_count); y++) {
 679                 mb = &s->mb[y];
 680
 681                 if(mb->best_encoding == ENC_V1)
 682                     buf[ret++] = mb->v1_vector;
 683                 else
 684                     for(z = 0; z < 4; z++)
 685                         buf[ret++] = mb->v4_vector[z];
 686             }
 687         }
 688
 689         write_chunk_header(buf + header_ofs, 0x30, ret - header_ofs - CHUNK_HEADER_SIZE);
 690
 691         break;
 692     case MODE_MC:
 693         //remember header position
 694         header_ofs = ret;
 695         ret += CHUNK_HEADER_SIZE;
 696         flags = bits = temp_size = 0;
 697
 698         for(x = 0; x < mb_count; x++) {
 699             mb = &s->mb[x];
 700             flags |= (mb->best_encoding != ENC_SKIP) << (31 - bits++);
 701             needs_extra_bit = 0;
 702             should_write_temp = 0;
 703
 704             if(mb->best_encoding != ENC_SKIP) {
 705                 if(bits < 32)
 706                     flags |= (mb->best_encoding == ENC_V4) << (31 - bits++);
 707                 else
 708                     needs_extra_bit = 1;
 709             }
 710
 711             if(bits == 32) {
 712                 AV_WB32(&buf[ret], flags);
 713                 ret += 4;
 714                 flags = bits = 0;
 715
 716                 if(mb->best_encoding == ENC_SKIP || needs_extra_bit) {
 717                     memcpy(&buf[ret], temp, temp_size);
 718                     ret += temp_size;
 719                     temp_size = 0;
 720                 } else
 721                     should_write_temp = 1;
 722             }
 723
 724             if(needs_extra_bit) {
 725                 flags = (mb->best_encoding == ENC_V4) << 31;
 726                 bits = 1;
 727             }
 728
 729             if(mb->best_encoding == ENC_V1)
 730                 temp[temp_size++] = mb->v1_vector;
 731             else if(mb->best_encoding == ENC_V4)
 732                 for(z = 0; z < 4; z++)
 733                     temp[temp_size++] = mb->v4_vector[z];
 734
 735             if(should_write_temp) {
 736                 memcpy(&buf[ret], temp, temp_size);
 737                 ret += temp_size;
 738                 temp_size = 0;
 739             }
 740         }
 741
 742         if(bits > 0) {
 743             AV_WB32(&buf[ret], flags);
 744             ret += 4;
 745             memcpy(&buf[ret], temp, temp_size);
 746             ret += temp_size;
 747         }
 748
 749         write_chunk_header(buf + header_ofs, 0x31, ret - header_ofs - CHUNK_HEADER_SIZE);
 750
 751         break;
 752     }
 753
 754     return ret;
 755 }
 756
 757 //computes distortion of 4x4 MB in b compared to a
 758 static int compute_mb_distortion(CinepakEncContext *s, AVPicture *a, AVPicture *b)
 759 {
 760     int x, y, p, d, ret = 0;
 761
 762     for(y = 0; y < MB_SIZE; y++) {
 763         for(x = 0; x < MB_SIZE; x++) {
 764             d = a->data[0][x + y*a->linesize[0]] - b->data[0][x + y*b->linesize[0]];
 765             ret += d*d;
 766         }
 767     }
 768
 769     if(s->pix_fmt == AV_PIX_FMT_RGB24) {
 770         for(p = 1; p <= 2; p++) {
 771             for(y = 0; y < MB_SIZE/2; y++) {
 772                 for(x = 0; x < MB_SIZE/2; x++) {
 773                     d = a->data[p][x + y*a->linesize[p]] - b->data[p][x + y*b->linesize[p]];
 774                     ret += d*d;
 775                 }
 776             }
 777         }
 778     }
 779
 780     return ret;
 781 }
 782
 783 // return the possibly adjusted size of the codebook
 784 #define CERTAIN(x) ((x)!=ENC_UNCERTAIN)
 785 static int quantize(CinepakEncContext *s, int h, AVPicture *pict,
 786                     int v1mode, strip_info *info,
 787                     mb_encoding encoding)
 788 {
 789     int x, y, i, j, k, x2, y2, x3, y3, plane, shift, mbn;
 790     int entry_size = s->pix_fmt == AV_PIX_FMT_RGB24 ? 6 : 4;
 791     int *codebook = v1mode ? info->v1_codebook : info->v4_codebook;
 792     int size = v1mode ? info->v1_size : info->v4_size;
 793     int64_t total_error = 0;
 794     uint8_t vq_pict_buf[(MB_AREA*3)/2];
 795     AVPicture sub_pict, vq_pict;
 796
 797     for(mbn = i = y = 0; y < h; y += MB_SIZE) {
 798         for(x = 0; x < s->w; x += MB_SIZE, ++mbn) {
 799             int *base;
 800
 801             if(CERTAIN(encoding)) {
 802 // use for the training only the blocks known to be to be encoded [sic:-]
 803                if(s->mb[mbn].best_encoding != encoding) continue;
 804             }
 805
 806             base = s->codebook_input + i*entry_size;
 807             if(v1mode) {
 808                 //subsample
 809                 for(j = y2 = 0; y2 < entry_size; y2 += 2) {
 810                     for(x2 = 0; x2 < 4; x2 += 2, j++) {
 811                         plane = y2 < 4 ? 0 : 1 + (x2 >> 1);
 812                         shift = y2 < 4 ? 0 : 1;
 813                         x3 = shift ? 0 : x2;
 814                         y3 = shift ? 0 : y2;
 815                         base[j] = (pict->data[plane][((x+x3) >> shift) +      ((y+y3) >> shift)      * pict->linesize[plane]] +
 816                                    pict->data[plane][((x+x3) >> shift) + 1 +  ((y+y3) >> shift)      * pict->linesize[plane]] +
 817                                    pict->data[plane][((x+x3) >> shift) +     (((y+y3) >> shift) + 1) * pict->linesize[plane]] +
 818                                    pict->data[plane][((x+x3) >> shift) + 1 + (((y+y3) >> shift) + 1) * pict->linesize[plane]]) >> 2;
 819                     }
 820                 }
 821             } else {
 822                 //copy
 823                 for(j = y2 = 0; y2 < MB_SIZE; y2 += 2) {
 824                     for(x2 = 0; x2 < MB_SIZE; x2 += 2) {
 825                         for(k = 0; k < entry_size; k++, j++) {
 826                             plane = k >= 4 ? k - 3 : 0;
 827
 828                             if(k >= 4) {
 829                                 x3 = (x+x2) >> 1;
 830                                 y3 = (y+y2) >> 1;
 831                             } else {
 832                                 x3 = x + x2 + (k & 1);
 833                                 y3 = y + y2 + (k >> 1);
 834                             }
 835
 836                             base[j] = pict->data[plane][x3 + y3*pict->linesize[plane]];
 837                         }
 838                     }
 839                 }
 840             }
 841             i += v1mode ? 1 : 4;
 842         }
 843     }
 844 //    if(i < mbn*(v1mode ? 1 : 4)) {
 845 //        av_log(s->avctx, AV_LOG_INFO, "reducing training set for %s from %i to %i (encoding %i)\n", v1mode?"v1":"v4", mbn*(v1mode ? 1 : 4), i, encoding);
 846 //    }
 847
 848     if(i == 0) // empty training set, nothing to do
 849         return 0;
 850     if(i < size) {
 851         //av_log(s->avctx, (CERTAIN(encoding) ? AV_LOG_ERROR : AV_LOG_INFO), "WOULD WASTE: %s cbsize %i bigger than training set size %i (encoding %i)\n", v1mode?"v1":"v4", size, i, encoding);
 852         size = i;
 853     }
 854
 855     avpriv_init_elbg(s->codebook_input, entry_size, i, codebook, size, 1, s->codebook_closest, &s->randctx);
 856     avpriv_do_elbg(s->codebook_input, entry_size, i, codebook, size, 1, s->codebook_closest, &s->randctx);
 857
 858     //setup vq_pict, which contains a single MB
 859     vq_pict.data[0] = vq_pict_buf;
 860     vq_pict.linesize[0] = MB_SIZE;
 861     vq_pict.data[1] = &vq_pict_buf[MB_AREA];
 862     vq_pict.data[2] = vq_pict.data[1] + (MB_AREA >> 2);
 863     vq_pict.linesize[1] = vq_pict.linesize[2] = MB_SIZE >> 1;
 864
 865     //copy indices
 866     for(i = j = y = 0; y < h; y += MB_SIZE) {
 867         for(x = 0; x < s->w; x += MB_SIZE, j++) {
 868             mb_info *mb = &s->mb[j];
 869 // skip uninteresting blocks if we know their preferred encoding
 870             if(CERTAIN(encoding) && mb->best_encoding != encoding)
 871                 continue;
 872
 873             //point sub_pict to current MB
 874             get_sub_picture(s, x, y, pict, &sub_pict);
 875
 876             if(v1mode) {
 877                 mb->v1_vector = s->codebook_closest[i];
 878
 879                 //fill in vq_pict with V1 data
 880                 decode_v1_vector(s, &vq_pict, mb->v1_vector, info);
 881
 882                 mb->v1_error = compute_mb_distortion(s, &sub_pict, &vq_pict);
 883                 total_error += mb->v1_error;
 884             } else {
 885                 for(k = 0; k < 4; k++)
 886                     mb->v4_vector[k] = s->codebook_closest[i+k];
 887
 888                 //fill in vq_pict with V4 data
 889                 decode_v4_vector(s, &vq_pict, mb->v4_vector, info);
 890
 891                 mb->v4_error = compute_mb_distortion(s, &sub_pict, &vq_pict);
 892                 total_error += mb->v4_error;
 893             }
 894             i += v1mode ? 1 : 4;
 895         }
 896     }
 897 // check that we did it right in the beginning of the function
 898     av_assert0(i >= size); // training set is no smaller than the codebook
 899
 900     //av_log(s->avctx, AV_LOG_INFO, "isv1 %i size= %i i= %i error %lli\n", v1mode, size, i, (long long int)total_error);
 901
 902     return size;
 903 }
 904
 905 static void calculate_skip_errors(CinepakEncContext *s, int h, AVPicture *last_pict, AVPicture *pict, strip_info *info)
 906 {
 907     int x, y, i;
 908     AVPicture sub_last, sub_pict;
 909
 910     for(i = y = 0; y < h; y += MB_SIZE) {
 911         for(x = 0; x < s->w; x += MB_SIZE, i++) {
 912             get_sub_picture(s, x, y, last_pict, &sub_last);
 913             get_sub_picture(s, x, y, pict,      &sub_pict);
 914
 915             s->mb[i].skip_error = compute_mb_distortion(s, &sub_last, &sub_pict);
 916         }
 917     }
 918 }
 919
 920 static void write_strip_header(CinepakEncContext *s, int y, int h, int keyframe, unsigned char *buf, int strip_size)
 921 {
 922 // actually we are exclusively using intra strip coding (how much can we win
 923 // otherwise? how to choose which part of a codebook to update?),
 924 // keyframes are different only because we disallow ENC_SKIP on them -- rl
 925 // (besides, the logic here used to be inverted: )
 926 //    buf[0] = keyframe ? 0x11: 0x10;
 927     buf[0] = keyframe ? 0x10: 0x11;
 928     AV_WB24(&buf[1], strip_size + STRIP_HEADER_SIZE);
 929 //    AV_WB16(&buf[4], y); /* using absolute y values works -- rl */
 930     AV_WB16(&buf[4], 0); /* using relative values works as well -- rl */
 931     AV_WB16(&buf[6], 0);
 932 //    AV_WB16(&buf[8], y+h); /* using absolute y values works -- rl */
 933     AV_WB16(&buf[8], h); /* using relative values works as well -- rl */
 934     AV_WB16(&buf[10], s->w);
 935     //av_log(s->avctx, AV_LOG_INFO, "write_strip_header() %x keyframe=%d\n", buf[0], keyframe);
 936 }
 937
 938 static int rd_strip(CinepakEncContext *s, int y, int h, int keyframe, AVPicture *last_pict, AVPicture *pict, AVPicture *scratch_pict, unsigned char *buf, int64_t *best_score
 939 #ifdef CINEPAK_REPORT_SERR
 940 , int64_t *best_serr
 941 #endif
 942 )
 943 {
 944     int64_t score = 0;
 945 #ifdef CINEPAK_REPORT_SERR
 946     int64_t serr;
 947 #endif
 948     int best_size = 0;
 949     strip_info info;
 950 // for codebook optimization:
 951     int v1enough, v1_size, v4enough, v4_size;
 952     int new_v1_size, new_v4_size;
 953     int v1shrunk, v4shrunk;
 954
 955     if(!keyframe)
 956         calculate_skip_errors(s, h, last_pict, pict, &info);
 957
 958     //try some powers of 4 for the size of the codebooks
 959     //constraint the v4 codebook to be no bigger than v1 one,
 960     //(and no less than v1_size/4)
 961     //thus making v1 preferable and possibly losing small details? should be ok
 962 #define SMALLEST_CODEBOOK 1
 963     for(v1enough = 0, v1_size = SMALLEST_CODEBOOK; v1_size <= CODEBOOK_MAX && !v1enough; v1_size <<= 2) {
 964         for(v4enough = 0, v4_size = 0; v4_size <= v1_size && !v4enough; v4_size = v4_size ? v4_size << 2 : v1_size >= SMALLEST_CODEBOOK << 2 ? v1_size >> 2 : SMALLEST_CODEBOOK) {
 965             //try all modes
 966             for(CinepakMode mode = 0; mode < MODE_COUNT; mode++) {
 967                 //don't allow MODE_MC in intra frames
 968                 if(keyframe && mode == MODE_MC)
 969                     continue;
 970
 971                 if(mode == MODE_V1_ONLY) {
 972                     info.v1_size = v1_size;
 973 // the size may shrink even before optimizations if the input is short:
 974                     info.v1_size = quantize(s, h, pict, 1, &info, ENC_UNCERTAIN);
 975                     if(info.v1_size < v1_size)
 976 // too few eligible blocks, no sense in trying bigger sizes
 977                         v1enough = 1;
 978
 979                     info.v4_size = 0;
 980                 } else { // mode != MODE_V1_ONLY
 981                     // if v4 codebook is empty then only allow V1-only mode
 982                     if(!v4_size)
 983                         continue;
 984
 985                     if(mode == MODE_V1_V4) {
 986                         info.v4_size = v4_size;
 987                         info.v4_size = quantize(s, h, pict, 0, &info, ENC_UNCERTAIN);
 988                         if(info.v4_size < v4_size)
 989 // too few eligible blocks, no sense in trying bigger sizes
 990                             v4enough = 1;
 991                     }
 992                 }
 993
 994                 info.mode = mode;
 995 // choose the best encoding per block, based on current experience
 996                 score = calculate_mode_score(s, h, &info, 0,
 997                                              &v1shrunk, &v4shrunk
 998 #ifdef CINEPAK_REPORT_SERR
 999 , &serr
1000 #endif
1001 );
1002
1003                 if(mode != MODE_V1_ONLY){
1004                     int extra_iterations_limit = s->max_extra_cb_iterations;
1005 // recompute the codebooks, omitting the extra blocks
1006 // we assume we _may_ come here with more blocks to encode than before
1007                     info.v1_size = v1_size;
1008                     new_v1_size = quantize(s, h, pict, 1, &info, ENC_V1);
1009                     if(new_v1_size < info.v1_size){
1010                         //av_log(s->avctx, AV_LOG_INFO, "mode %i, %3i, %3i: cut v1 codebook to %i entries\n", mode, v1_size, v4_size, new_v1_size);
1011                         info.v1_size = new_v1_size;
1012                     }
1013 // we assume we _may_ come here with more blocks to encode than before
1014                     info.v4_size = v4_size;
1015                     new_v4_size = quantize(s, h, pict, 0, &info, ENC_V4);
1016                     if(new_v4_size < info.v4_size) {
1017                         //av_log(s->avctx, AV_LOG_INFO, "mode %i, %3i, %3i: cut v4 codebook to %i entries at first iteration\n", mode, v1_size, v4_size, new_v4_size);
1018                         info.v4_size = new_v4_size;
1019                     }
1020 // calculate the resulting score
1021 // (do not move blocks to codebook encodings now, as some blocks may have
1022 // got bigger errors despite a smaller training set - but we do not
1023 // ever grow the training sets back)
1024                     for(;;) {
1025                         score = calculate_mode_score(s, h, &info, 1,
1026                                                      &v1shrunk, &v4shrunk
1027 #ifdef CINEPAK_REPORT_SERR
1028 , &serr
1029 #endif
1030 );
1031 // do we have a reason to reiterate? if so, have we reached the limit?
1032                         if((!v1shrunk && !v4shrunk) || !extra_iterations_limit--) break;
1033 // recompute the codebooks, omitting the extra blocks
1034                         if(v1shrunk) {
1035                             info.v1_size = v1_size;
1036                             new_v1_size = quantize(s, h, pict, 1, &info, ENC_V1);
1037                             if(new_v1_size < info.v1_size){
1038                                 //av_log(s->avctx, AV_LOG_INFO, "mode %i, %3i, %3i: cut v1 codebook to %i entries\n", mode, v1_size, v4_size, new_v1_size);
1039                                 info.v1_size = new_v1_size;
1040                             }
1041                         }
1042                         if(v4shrunk) {
1043                             info.v4_size = v4_size;
1044                             new_v4_size = quantize(s, h, pict, 0, &info, ENC_V4);
1045                             if(new_v4_size < info.v4_size) {
1046                                 //av_log(s->avctx, AV_LOG_INFO, "mode %i, %3i, %3i: cut v4 codebook to %i entries\n", mode, v1_size, v4_size, new_v4_size);
1047                                 info.v4_size = new_v4_size;
1048                             }
1049                         }
1050                     }
1051                 }
1052
1053                 //av_log(s->avctx, AV_LOG_INFO, "%3i %3i score = %lli\n", v1_size, v4_size, (long long int)score);
1054
1055                 if(best_size == 0 || score < *best_score) {
1056
1057                     *best_score = score;
1058 #ifdef CINEPAK_REPORT_SERR
1059                     *best_serr = serr;
1060 #endif
1061                     best_size = encode_mode(s, h, scratch_pict, last_pict, &info, s->strip_buf + STRIP_HEADER_SIZE);
1062
1063                     //av_log(s->avctx, AV_LOG_INFO, "mode %i, %3i, %3i: %18lli %i B", mode, info.v1_size, info.v4_size, (long long int)score, best_size);
1064                     //av_log(s->avctx, AV_LOG_INFO, "\n");
1065 #ifdef CINEPAK_REPORT_SERR
1066                     av_log(s->avctx, AV_LOG_INFO, "mode %i, %3i, %3i: %18lli %i B\n", mode, v1_size, v4_size, (long long int)serr, best_size);
1067 #endif
1068
1069 #ifdef CINEPAKENC_DEBUG
1070                     //save MB encoding choices
1071                     memcpy(s->best_mb, s->mb, mb_count*sizeof(mb_info));
1072 #endif
1073
1074                     //memcpy(strip_temp + STRIP_HEADER_SIZE, strip_temp, best_size);
1075                     write_strip_header(s, y, h, keyframe, s->strip_buf, best_size);
1076
1077                 }
1078             }
1079         }
1080     }
1081
1082 #ifdef CINEPAKENC_DEBUG
1083     //gather stats. this will only work properly of MAX_STRIPS == 1
1084     if(best_info.mode == MODE_V1_ONLY) {
1085         s->num_v1_mode++;
1086         s->num_v1_encs += s->w*h/MB_AREA;
1087     } else {
1088         if(best_info.mode == MODE_V1_V4)
1089             s->num_v4_mode++;
1090         else
1091             s->num_mc_mode++;
1092
1093         int x;
1094         for(x = 0; x < s->w*h/MB_AREA; x++)
1095             if(s->best_mb[x].best_encoding == ENC_V1)
1096                 s->num_v1_encs++;
1097             else if(s->best_mb[x].best_encoding == ENC_V4)
1098                 s->num_v4_encs++;
1099             else
1100                 s->num_skips++;
1101     }
1102 #endif
1103
1104     best_size += STRIP_HEADER_SIZE;
1105     memcpy(buf, s->strip_buf, best_size);
1106
1107     return best_size;
1108 }
1109
1110 static int write_cvid_header(CinepakEncContext *s, unsigned char *buf, int num_strips, int data_size, int isakeyframe)
1111 {
1112     buf[0] = isakeyframe ? 0 : 1;
1113     AV_WB24(&buf[1], data_size + CVID_HEADER_SIZE);
1114     AV_WB16(&buf[4], s->w);
1115     AV_WB16(&buf[6], s->h);
1116     AV_WB16(&buf[8], num_strips);
1117
1118     return CVID_HEADER_SIZE;
1119 }
1120
1121 static int rd_frame(CinepakEncContext *s, const AVFrame *frame, int isakeyframe, unsigned char *buf, int buf_size)
1122 {
1123     int num_strips, strip, i, y, nexty, size, temp_size;
1124     AVPicture last_pict, pict, scratch_pict;
1125     int64_t best_score = 0, score, score_temp;
1126 #ifdef CINEPAK_REPORT_SERR
1127     int64_t best_serr = 0, serr, serr_temp;
1128 #endif
1129
1130     int best_nstrips = -1, best_size = -1; // mark as uninitialzed
1131
1132     if(s->pix_fmt == AV_PIX_FMT_RGB24) {
1133         int x;
1134 // build a copy of the given frame in the correct colorspace
1135         for(y = 0; y < s->h; y += 2) {
1136             for(x = 0; x < s->w; x += 2) {
1137                 uint8_t *ir[2]; int32_t r, g, b, rr, gg, bb;
1138                 ir[0] = ((AVPicture*)frame)->data[0] + x*3 + y*((AVPicture*)frame)->linesize[0];
1139                 ir[1] = ir[0] + ((AVPicture*)frame)->linesize[0];
1140                 get_sub_picture(s, x, y, (AVPicture*)s->input_frame, &scratch_pict);
1141                 r = g = b = 0;
1142                 for(i=0; i<4; ++i) {
1143                     int i1, i2;
1144                     i1 = (i&1); i2 = (i>=2);
1145                     rr = ir[i2][i1*3+0];
1146                     gg = ir[i2][i1*3+1];
1147                     bb = ir[i2][i1*3+2];
1148                     r += rr; g += gg; b += bb;
1149 // using fixed point arithmetic for portable repeatability, scaling by 2^23
1150 // "Y"
1151 //                    rr = 0.2857*rr + 0.5714*gg + 0.1429*bb;
1152                     rr = (2396625*rr + 4793251*gg + 1198732*bb) >> 23;
1153                     if(      rr <   0) rr =   0;
1154                     else if (rr > 255) rr = 255;
1155                     scratch_pict.data[0][i1 + i2*scratch_pict.linesize[0]] = rr;
1156                 }
1157 // let us scale down as late as possible
1158 //                r /= 4; g /= 4; b /= 4;
1159 // "U"
1160 //                rr = -0.1429*r - 0.2857*g + 0.4286*b;
1161                 rr = (-299683*r - 599156*g + 898839*b) >> 23;
1162                 if(      rr < -128) rr = -128;
1163                 else if (rr >  127) rr =  127;
1164                 scratch_pict.data[1][0] = rr + 128; // quantize needs unsigned
1165 // "V"
1166 //                rr = 0.3571*r - 0.2857*g - 0.0714*b;
1167                 rr = (748893*r - 599156*g - 149737*b) >> 23;
1168                 if(      rr < -128) rr = -128;
1169                 else if (rr >  127) rr =  127;
1170                 scratch_pict.data[2][0] = rr + 128; // quantize needs unsigned
1171             }
1172         }
1173     }
1174
1175     //would be nice but quite certainly incompatible with vintage players:
1176     // support encoding zero strips (meaning skip the whole frame)
1177     for(num_strips = s->min_strips; num_strips <= s->max_strips && num_strips <= s->h / MB_SIZE; num_strips++) {
1178         score = 0;
1179         size = 0;
1180 #ifdef CINEPAK_REPORT_SERR
1181         serr = 0;
1182 #endif
1183
1184         for(y = 0, strip = 1; y < s->h; strip++, y = nexty) {
1185             int strip_height;
1186
1187             nexty = strip * s->h / num_strips; // <= s->h
1188             //make nexty the next multiple of 4 if not already there
1189             if(nexty & 3)
1190                 nexty += 4 - (nexty & 3);
1191
1192             strip_height = nexty - y;
1193             if(strip_height <= 0) { // can this ever happen?
1194                 av_log(s->avctx, AV_LOG_INFO, "skipping zero height strip %i of %i\n", strip, num_strips);
1195                 continue;
1196             }
1197
1198             if(s->pix_fmt == AV_PIX_FMT_RGB24)
1199                 get_sub_picture(s, 0, y, (AVPicture*)s->input_frame,    &pict);
1200             else
1201                 get_sub_picture(s, 0, y, (AVPicture*)frame,              &pict);
1202             get_sub_picture(s, 0, y, (AVPicture*)s->last_frame,    &last_pict);
1203             get_sub_picture(s, 0, y, (AVPicture*)s->scratch_frame, &scratch_pict);
1204
1205             if((temp_size = rd_strip(s, y, strip_height, isakeyframe, &last_pict, &pict, &scratch_pict, s->frame_buf + size + CVID_HEADER_SIZE, &score_temp
1206 #ifdef CINEPAK_REPORT_SERR
1207 , &serr_temp
1208 #endif
1209 )) < 0)
1210                 return temp_size;
1211
1212             score += score_temp;
1213 #ifdef CINEPAK_REPORT_SERR
1214             serr += serr_temp;
1215 #endif
1216             size += temp_size;
1217             //av_log(s->avctx, AV_LOG_INFO, "strip %d, isakeyframe=%d", strip, isakeyframe);
1218             //av_log(s->avctx, AV_LOG_INFO, "\n");
1219         }
1220
1221         if(best_score == 0 || score < best_score) {
1222             best_score = score;
1223 #ifdef CINEPAK_REPORT_SERR
1224             best_serr = serr;
1225 #endif
1226             best_size = size + write_cvid_header(s, s->frame_buf, num_strips, size, isakeyframe);
1227             //av_log(s->avctx, AV_LOG_INFO, "best number of strips so far: %2i, %12lli, %i B\n", num_strips, (long long int)score, best_size);
1228 #ifdef CINEPAK_REPORT_SERR
1229             av_log(s->avctx, AV_LOG_INFO, "best number of strips so far: %2i, %12lli, %i B\n", num_strips, (long long int)serr, best_size);
1230 #endif
1231
1232             FFSWAP(AVFrame *, s->best_frame, s->scratch_frame);
1233             memcpy(buf, s->frame_buf, best_size);
1234             best_nstrips = num_strips;
1235         }
1236 // avoid trying too many strip numbers without a real reason
1237 // (this makes the processing of the very first frame faster)
1238         if(num_strips - best_nstrips > 4)
1239             break;
1240     }
1241
1242     av_assert0(best_nstrips >= 0 && best_size >= 0);
1243
1244 // let the number of strips slowly adapt to the changes in the contents,
1245 // compared to full bruteforcing every time this will occasionally lead
1246 // to some r/d performance loss but makes encoding up to several times faster
1247     if(!s->strip_number_delta_range) {
1248         if(best_nstrips == s->max_strips) { // let us try to step up
1249             s->max_strips = best_nstrips + 1;
1250             if(s->max_strips >= s->max_max_strips)
1251                 s->max_strips = s->max_max_strips;
1252         } else { // try to step down
1253             s->max_strips = best_nstrips;
1254         }
1255         s->min_strips = s->max_strips - 1;
1256         if(s->min_strips < s->min_min_strips)
1257             s->min_strips = s->min_min_strips;
1258     } else {
1259         s->max_strips = best_nstrips + s->strip_number_delta_range;
1260         if(s->max_strips >= s->max_max_strips)
1261             s->max_strips = s->max_max_strips;
1262         s->min_strips = best_nstrips - s->strip_number_delta_range;
1263         if(s->min_strips < s->min_min_strips)
1264             s->min_strips = s->min_min_strips;
1265     }
1266
1267     return best_size;
1268 }
1269
1270 static int cinepak_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
1271                                 const AVFrame *frame, int *got_packet)
1272 {
1273     CinepakEncContext *s = avctx->priv_data;
1274     int ret;
1275
1276     s->lambda = frame->quality ? frame->quality - 1 : 2 * FF_LAMBDA_SCALE;
1277
1278     if ((ret = ff_alloc_packet2(avctx, pkt, s->frame_buf_size)) < 0)
1279         return ret;
1280     ret = rd_frame(s, frame, (s->curframe == 0), pkt->data, s->frame_buf_size);
1281     pkt->size = ret;
1282     if (s->curframe == 0)
1283         pkt->flags |= AV_PKT_FLAG_KEY;
1284     *got_packet = 1;
1285
1286     FFSWAP(AVFrame *, s->last_frame, s->best_frame);
1287
1288     if (++s->curframe >= s->keyint)
1289         s->curframe = 0;
1290
1291     return 0;
1292 }
1293
1294 static av_cold int cinepak_encode_end(AVCodecContext *avctx)
1295 {
1296     CinepakEncContext *s = avctx->priv_data;
1297     int x;
1298
1299     av_frame_free(&s->last_frame);
1300     av_frame_free(&s->best_frame);
1301     av_frame_free(&s->scratch_frame);
1302     if (avctx->pix_fmt == AV_PIX_FMT_RGB24)
1303         av_frame_free(&s->input_frame);
1304     av_freep(&s->codebook_input);
1305     av_freep(&s->codebook_closest);
1306     av_freep(&s->strip_buf);
1307     av_freep(&s->frame_buf);
1308     av_freep(&s->mb);
1309 #ifdef CINEPAKENC_DEBUG
1310     av_freep(&s->best_mb);
1311 #endif
1312
1313     for(x = 0; x < (avctx->pix_fmt == AV_PIX_FMT_RGB24 ? 4 : 3); x++)
1314         av_freep(&s->pict_bufs[x]);
1315
1316 #ifdef CINEPAKENC_DEBUG
1317     av_log(avctx, AV_LOG_INFO, "strip coding stats: %i V1 mode, %i V4 mode, %i MC mode (%i V1 encs, %i V4 encs, %i skips)\n",
1318         s->num_v1_mode, s->num_v4_mode, s->num_mc_mode, s->num_v1_encs, s->num_v4_encs, s->num_skips);
1319 #endif
1320
1321     return 0;
1322 }
1323
1324 AVCodec ff_cinepak_encoder = {
1325     .name           = "cinepak",
1326     .type           = AVMEDIA_TYPE_VIDEO,
1327     .id             = AV_CODEC_ID_CINEPAK,
1328     .priv_data_size = sizeof(CinepakEncContext),
1329     .init           = cinepak_encode_init,
1330     .encode2        = cinepak_encode_frame,
1331     .close          = cinepak_encode_end,
1332     .pix_fmts       = (const enum AVPixelFormat[]){AV_PIX_FMT_RGB24, AV_PIX_FMT_GRAY8, AV_PIX_FMT_NONE},
1333     .long_name      = NULL_IF_CONFIG_SMALL("Cinepak / CVID"),
1334     .priv_class     = &cinepak_class,
1335 };