4 * Copyright (C) 2007 Vitor Sessak <vitor1001@gmail.com>
5 * Copyright (C) 2004-2007 Eric Lasota
6 * Based on RoQ specs (C) 2001 Tim Ferguson
8 * This file is part of FFmpeg.
10 * FFmpeg is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU Lesser General Public
12 * License as published by the Free Software Foundation; either
13 * version 2.1 of the License, or (at your option) any later version.
15 * FFmpeg is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * Lesser General Public License for more details.
20 * You should have received a copy of the GNU Lesser General Public
21 * License along with FFmpeg; if not, write to the Free Software
22 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
27 * id RoQ encoder by Vitor. Based on the Switchblade3 library and the
28 * Switchblade3 FFmpeg glue by Eric Lasota.
36 * CODEBOOK - 2 + 8 bits
37 * SUBDIVIDE - 2 + combined subcel cost
42 * CODEBOOK - 2 + 8 bits
43 * SUBDIVIDE - 2 + 4*8 bits
45 * Maximum cost: 138 bits per cel
47 * Proper evaluation requires LCD fraction comparison, which requires
48 * Squared Error (SE) loss * savings increase
50 * Maximum savings increase: 136 bits
51 * Maximum SE loss without overflow: 31580641
52 * Components in 8x8 supercel: 192
53 * Maximum SE precision per component: 164482
54 * >65025, so no truncation is needed (phew)
59 #include "libavutil/attributes.h"
60 #include "libavutil/opt.h"
62 #include "bytestream.h"
70 * Maximum number of generated 4x4 codebooks. Can't be 256 to workaround a
73 #define MAX_CBS_4x4 256
75 #define MAX_CBS_2x2 256 ///< Maximum number of 2x2 codebooks.
77 /* The cast is useful when multiplying it by INT_MAX */
78 #define ROQ_LAMBDA_SCALE ((uint64_t) FF_LAMBDA_SCALE)
80 /* Macroblock support functions */
81 static void unpack_roq_cell(roq_cell
*cell
, uint8_t u
[4*3])
83 memcpy(u
, cell
->y
, 4);
84 memset(u
+4, cell
->u
, 4);
85 memset(u
+8, cell
->v
, 4);
88 static void unpack_roq_qcell(uint8_t cb2
[], roq_qcell
*qcell
, uint8_t u
[4*4*3])
91 static const int offsets
[4] = {0, 2, 8, 10};
93 for (cp
=0; cp
<3; cp
++)
95 u
[4*4*cp
+ offsets
[i
] ] = cb2
[qcell
->idx
[i
]*2*2*3 + 4*cp
];
96 u
[4*4*cp
+ offsets
[i
]+1] = cb2
[qcell
->idx
[i
]*2*2*3 + 4*cp
+1];
97 u
[4*4*cp
+ offsets
[i
]+4] = cb2
[qcell
->idx
[i
]*2*2*3 + 4*cp
+2];
98 u
[4*4*cp
+ offsets
[i
]+5] = cb2
[qcell
->idx
[i
]*2*2*3 + 4*cp
+3];
103 static void enlarge_roq_mb4(uint8_t base
[3*16], uint8_t u
[3*64])
107 for(cp
=0; cp
<3; cp
++)
110 *u
++ = base
[(y
/2)*4 + (x
/2) + 16*cp
];
113 static inline int square(int x
)
118 static inline int eval_sse(const uint8_t *a
, const uint8_t *b
, int count
)
123 diff
+= square(*b
++ - *a
++);
128 // FIXME Could use DSPContext.sse, but it is not so speed critical (used
129 // just for motion estimation).
130 static int block_sse(uint8_t * const *buf1
, uint8_t * const *buf2
, int x1
, int y1
,
131 int x2
, int y2
, const int *stride1
, const int *stride2
, int size
)
136 for (k
=0; k
<3; k
++) {
137 int bias
= (k
? CHROMA_BIAS
: 4);
138 for (i
=0; i
<size
; i
++)
139 sse
+= bias
*eval_sse(buf1
[k
] + (y1
+i
)*stride1
[k
] + x1
,
140 buf2
[k
] + (y2
+i
)*stride2
[k
] + x2
, size
);
146 static int eval_motion_dist(RoqContext
*enc
, int x
, int y
, motion_vect vect
,
152 if (mx
< -7 || mx
> 7)
155 if (my
< -7 || my
> 7)
161 if ((unsigned) mx
> enc
->width
-size
|| (unsigned) my
> enc
->height
-size
)
164 return block_sse(enc
->frame_to_enc
->data
, enc
->last_frame
->data
, x
, y
,
166 enc
->frame_to_enc
->linesize
, enc
->last_frame
->linesize
,
171 * @return distortion between two macroblocks
173 static inline int squared_diff_macroblock(uint8_t a
[], uint8_t b
[], int size
)
177 for(cp
=0;cp
<3;cp
++) {
178 int bias
= (cp
? CHROMA_BIAS
: 4);
179 sdiff
+= bias
*eval_sse(a
, b
, size
*size
);
203 SubcelEvaluation subCels
[4];
208 int sourceX
, sourceY
;
215 int usedCB2
[MAX_CBS_2x2
];
216 int usedCB4
[MAX_CBS_4x4
];
217 uint8_t unpacked_cb2
[MAX_CBS_2x2
*2*2*3];
218 uint8_t unpacked_cb4
[MAX_CBS_4x4
*4*4*3];
219 uint8_t unpacked_cb4_enlarged
[MAX_CBS_4x4
*8*8*3];
225 typedef struct RoqTempData
227 CelEvaluation
*cel_evals
;
229 int f2i4
[MAX_CBS_4x4
];
230 int i2f4
[MAX_CBS_4x4
];
231 int f2i2
[MAX_CBS_2x2
];
232 int i2f2
[MAX_CBS_2x2
];
239 RoqCodebooks codebooks
;
246 * Initialize cel evaluators and set their source coordinates
248 static void create_cel_evals(RoqContext
*enc
, RoqTempdata
*tempData
)
252 tempData
->cel_evals
= av_malloc_array(enc
->width
*enc
->height
/64, sizeof(CelEvaluation
));
254 /* Map to the ROQ quadtree order */
255 for (y
=0; y
<enc
->height
; y
+=16)
256 for (x
=0; x
<enc
->width
; x
+=16)
258 tempData
->cel_evals
[n
].sourceX
= x
+ (i
&1)*8;
259 tempData
->cel_evals
[n
++].sourceY
= y
+ (i
&2)*4;
264 * Get macroblocks from parts of the image
266 static void get_frame_mb(const AVFrame
*frame
, int x
, int y
, uint8_t mb
[], int dim
)
270 for (cp
=0; cp
<3; cp
++) {
271 int stride
= frame
->linesize
[cp
];
272 for (i
=0; i
<dim
; i
++)
273 for (j
=0; j
<dim
; j
++)
274 *mb
++ = frame
->data
[cp
][(y
+i
)*stride
+ x
+ j
];
279 * Find the codebook with the lowest distortion from an image
281 static int index_mb(uint8_t cluster
[], uint8_t cb
[], int numCB
,
282 int *outIndex
, int dim
)
284 int i
, lDiff
= INT_MAX
, pick
=0;
286 /* Diff against the others */
287 for (i
=0; i
<numCB
; i
++) {
288 int diff
= squared_diff_macroblock(cluster
, cb
+ i
*dim
*dim
*3, dim
);
299 #define EVAL_MOTION(MOTION) \
301 diff = eval_motion_dist(enc, j, i, MOTION, blocksize); \
303 if (diff < lowestdiff) { \
309 static void motion_search(RoqContext
*enc
, int blocksize
)
311 static const motion_vect offsets
[8] = {
322 int diff
, lowestdiff
, oldbest
;
324 motion_vect bestpick
= {{0,0}};
327 motion_vect
*last_motion
;
328 motion_vect
*this_motion
;
329 motion_vect vect
, vect2
;
331 int max
=(enc
->width
/blocksize
)*enc
->height
/blocksize
;
333 if (blocksize
== 4) {
334 last_motion
= enc
->last_motion4
;
335 this_motion
= enc
->this_motion4
;
337 last_motion
= enc
->last_motion8
;
338 this_motion
= enc
->this_motion8
;
341 for (i
=0; i
<enc
->height
; i
+=blocksize
)
342 for (j
=0; j
<enc
->width
; j
+=blocksize
) {
343 lowestdiff
= eval_motion_dist(enc
, j
, i
, (motion_vect
) {{0,0}},
349 EVAL_MOTION(enc
->this_motion8
[(i
/8)*(enc
->width
/8) + j
/8]);
351 offset
= (i
/blocksize
)*enc
->width
/blocksize
+ j
/blocksize
;
352 if (offset
< max
&& offset
>= 0)
353 EVAL_MOTION(last_motion
[offset
]);
356 if (offset
< max
&& offset
>= 0)
357 EVAL_MOTION(last_motion
[offset
]);
359 offset
= (i
/blocksize
+ 1)*enc
->width
/blocksize
+ j
/blocksize
;
360 if (offset
< max
&& offset
>= 0)
361 EVAL_MOTION(last_motion
[offset
]);
363 off
[0]= (i
/blocksize
)*enc
->width
/blocksize
+ j
/blocksize
- 1;
364 off
[1]= off
[0] - enc
->width
/blocksize
+ 1;
370 vect
.d
[k
]= mid_pred(this_motion
[off
[0]].d
[k
],
371 this_motion
[off
[1]].d
[k
],
372 this_motion
[off
[2]].d
[k
]);
376 EVAL_MOTION(this_motion
[off
[k
]]);
378 EVAL_MOTION(this_motion
[off
[0]]);
383 while (oldbest
!= lowestdiff
) {
384 oldbest
= lowestdiff
;
385 for (k
=0; k
<8; k
++) {
387 vect2
.d
[0] += offsets
[k
].d
[0];
388 vect2
.d
[1] += offsets
[k
].d
[1];
393 offset
= (i
/blocksize
)*enc
->width
/blocksize
+ j
/blocksize
;
394 this_motion
[offset
] = bestpick
;
399 * Get distortion for all options available to a subcel
401 static void gather_data_for_subcel(SubcelEvaluation
*subcel
, int x
,
402 int y
, RoqContext
*enc
, RoqTempdata
*tempData
)
409 static const int bitsUsed
[4] = {2, 10, 10, 34};
411 if (enc
->framesSinceKeyframe
>= 1) {
412 subcel
->motion
= enc
->this_motion4
[y
*enc
->width
/16 + x
/4];
414 subcel
->eval_dist
[RoQ_ID_FCC
] =
415 eval_motion_dist(enc
, x
, y
,
416 enc
->this_motion4
[y
*enc
->width
/16 + x
/4], 4);
418 subcel
->eval_dist
[RoQ_ID_FCC
] = INT_MAX
;
420 if (enc
->framesSinceKeyframe
>= 2)
421 subcel
->eval_dist
[RoQ_ID_MOT
] = block_sse(enc
->frame_to_enc
->data
,
422 enc
->current_frame
->data
, x
,
424 enc
->frame_to_enc
->linesize
,
425 enc
->current_frame
->linesize
,
428 subcel
->eval_dist
[RoQ_ID_MOT
] = INT_MAX
;
430 cluster_index
= y
*enc
->width
/16 + x
/4;
432 get_frame_mb(enc
->frame_to_enc
, x
, y
, mb4
, 4);
434 subcel
->eval_dist
[RoQ_ID_SLD
] = index_mb(mb4
,
435 tempData
->codebooks
.unpacked_cb4
,
436 tempData
->codebooks
.numCB4
,
437 &subcel
->cbEntry
, 4);
439 subcel
->eval_dist
[RoQ_ID_CCC
] = 0;
442 subcel
->subCels
[i
] = tempData
->closest_cb2
[cluster_index
*4+i
];
444 get_frame_mb(enc
->frame_to_enc
, x
+2*(i
&1),
447 subcel
->eval_dist
[RoQ_ID_CCC
] +=
448 squared_diff_macroblock(tempData
->codebooks
.unpacked_cb2
+ subcel
->subCels
[i
]*2*2*3, mb2
, 2);
453 if (ROQ_LAMBDA_SCALE
*subcel
->eval_dist
[i
] + enc
->lambda
*bitsUsed
[i
] <
455 subcel
->best_coding
= i
;
456 subcel
->best_bit_use
= bitsUsed
[i
];
457 best_dist
= ROQ_LAMBDA_SCALE
*subcel
->eval_dist
[i
] +
458 enc
->lambda
*bitsUsed
[i
];
463 * Get distortion for all options available to a cel
465 static void gather_data_for_cel(CelEvaluation
*cel
, RoqContext
*enc
,
466 RoqTempdata
*tempData
)
469 int index
= cel
->sourceY
*enc
->width
/64 + cel
->sourceX
/8;
470 int i
, j
, best_dist
, divide_bit_use
;
472 int bitsUsed
[4] = {2, 10, 10, 0};
474 if (enc
->framesSinceKeyframe
>= 1) {
475 cel
->motion
= enc
->this_motion8
[index
];
477 cel
->eval_dist
[RoQ_ID_FCC
] =
478 eval_motion_dist(enc
, cel
->sourceX
, cel
->sourceY
,
479 enc
->this_motion8
[index
], 8);
481 cel
->eval_dist
[RoQ_ID_FCC
] = INT_MAX
;
483 if (enc
->framesSinceKeyframe
>= 2)
484 cel
->eval_dist
[RoQ_ID_MOT
] = block_sse(enc
->frame_to_enc
->data
,
485 enc
->current_frame
->data
,
486 cel
->sourceX
, cel
->sourceY
,
487 cel
->sourceX
, cel
->sourceY
,
488 enc
->frame_to_enc
->linesize
,
489 enc
->current_frame
->linesize
,8);
491 cel
->eval_dist
[RoQ_ID_MOT
] = INT_MAX
;
493 get_frame_mb(enc
->frame_to_enc
, cel
->sourceX
, cel
->sourceY
, mb8
, 8);
495 cel
->eval_dist
[RoQ_ID_SLD
] =
496 index_mb(mb8
, tempData
->codebooks
.unpacked_cb4_enlarged
,
497 tempData
->codebooks
.numCB4
, &cel
->cbEntry
, 8);
499 gather_data_for_subcel(cel
->subCels
+ 0, cel
->sourceX
+0, cel
->sourceY
+0, enc
, tempData
);
500 gather_data_for_subcel(cel
->subCels
+ 1, cel
->sourceX
+4, cel
->sourceY
+0, enc
, tempData
);
501 gather_data_for_subcel(cel
->subCels
+ 2, cel
->sourceX
+0, cel
->sourceY
+4, enc
, tempData
);
502 gather_data_for_subcel(cel
->subCels
+ 3, cel
->sourceX
+4, cel
->sourceY
+4, enc
, tempData
);
504 cel
->eval_dist
[RoQ_ID_CCC
] = 0;
506 for (i
=0; i
<4; i
++) {
507 cel
->eval_dist
[RoQ_ID_CCC
] +=
508 cel
->subCels
[i
].eval_dist
[cel
->subCels
[i
].best_coding
];
509 divide_bit_use
+= cel
->subCels
[i
].best_bit_use
;
513 bitsUsed
[3] = 2 + divide_bit_use
;
516 if (ROQ_LAMBDA_SCALE
*cel
->eval_dist
[i
] + enc
->lambda
*bitsUsed
[i
] <
518 cel
->best_coding
= i
;
519 best_dist
= ROQ_LAMBDA_SCALE
*cel
->eval_dist
[i
] +
520 enc
->lambda
*bitsUsed
[i
];
523 tempData
->used_option
[cel
->best_coding
]++;
524 tempData
->mainChunkSize
+= bitsUsed
[cel
->best_coding
];
526 if (cel
->best_coding
== RoQ_ID_SLD
)
527 tempData
->codebooks
.usedCB4
[cel
->cbEntry
]++;
529 if (cel
->best_coding
== RoQ_ID_CCC
)
530 for (i
=0; i
<4; i
++) {
531 if (cel
->subCels
[i
].best_coding
== RoQ_ID_SLD
)
532 tempData
->codebooks
.usedCB4
[cel
->subCels
[i
].cbEntry
]++;
533 else if (cel
->subCels
[i
].best_coding
== RoQ_ID_CCC
)
535 tempData
->codebooks
.usedCB2
[cel
->subCels
[i
].subCels
[j
]]++;
539 static void remap_codebooks(RoqContext
*enc
, RoqTempdata
*tempData
)
543 /* Make remaps for the final codebook usage */
544 for (i
=0; i
<(enc
->quake3_compat
? MAX_CBS_4x4
-1 : MAX_CBS_4x4
); i
++) {
545 if (tempData
->codebooks
.usedCB4
[i
]) {
546 tempData
->i2f4
[i
] = idx
;
547 tempData
->f2i4
[idx
] = i
;
549 tempData
->codebooks
.usedCB2
[enc
->cb4x4
[i
].idx
[j
]]++;
554 tempData
->numCB4
= idx
;
557 for (i
=0; i
<MAX_CBS_2x2
; i
++) {
558 if (tempData
->codebooks
.usedCB2
[i
]) {
559 tempData
->i2f2
[i
] = idx
;
560 tempData
->f2i2
[idx
] = i
;
564 tempData
->numCB2
= idx
;
569 * Write codebook chunk
571 static void write_codebooks(RoqContext
*enc
, RoqTempdata
*tempData
)
574 uint8_t **outp
= &enc
->out_buf
;
576 if (tempData
->numCB2
) {
577 bytestream_put_le16(outp
, RoQ_QUAD_CODEBOOK
);
578 bytestream_put_le32(outp
, tempData
->numCB2
*6 + tempData
->numCB4
*4);
579 bytestream_put_byte(outp
, tempData
->numCB4
);
580 bytestream_put_byte(outp
, tempData
->numCB2
);
582 for (i
=0; i
<tempData
->numCB2
; i
++) {
583 bytestream_put_buffer(outp
, enc
->cb2x2
[tempData
->f2i2
[i
]].y
, 4);
584 bytestream_put_byte(outp
, enc
->cb2x2
[tempData
->f2i2
[i
]].u
);
585 bytestream_put_byte(outp
, enc
->cb2x2
[tempData
->f2i2
[i
]].v
);
588 for (i
=0; i
<tempData
->numCB4
; i
++)
590 bytestream_put_byte(outp
, tempData
->i2f2
[enc
->cb4x4
[tempData
->f2i4
[i
]].idx
[j
]]);
595 static inline uint8_t motion_arg(motion_vect mot
)
597 uint8_t ax
= 8 - ((uint8_t) mot
.d
[0]);
598 uint8_t ay
= 8 - ((uint8_t) mot
.d
[1]);
599 return ((ax
&15)<<4) | (ay
&15);
606 uint8_t argumentSpool
[64];
611 /* NOTE: Typecodes must be spooled AFTER arguments!! */
612 static void write_typecode(CodingSpool
*s
, uint8_t type
)
614 s
->typeSpool
|= (type
& 3) << (14 - s
->typeSpoolLength
);
615 s
->typeSpoolLength
+= 2;
616 if (s
->typeSpoolLength
== 16) {
617 bytestream_put_le16(s
->pout
, s
->typeSpool
);
618 bytestream_put_buffer(s
->pout
, s
->argumentSpool
,
619 s
->args
- s
->argumentSpool
);
620 s
->typeSpoolLength
= 0;
622 s
->args
= s
->argumentSpool
;
626 static void reconstruct_and_encode_image(RoqContext
*enc
, RoqTempdata
*tempData
, int w
, int h
, int numBlocks
)
639 spool
.typeSpoolLength
=0;
640 spool
.args
= spool
.argumentSpool
;
641 spool
.pout
= &enc
->out_buf
;
643 if (tempData
->used_option
[RoQ_ID_CCC
]%2)
644 tempData
->mainChunkSize
+=8; //FIXME
646 /* Write the video chunk header */
647 bytestream_put_le16(&enc
->out_buf
, RoQ_QUAD_VQ
);
648 bytestream_put_le32(&enc
->out_buf
, tempData
->mainChunkSize
/8);
649 bytestream_put_byte(&enc
->out_buf
, 0x0);
650 bytestream_put_byte(&enc
->out_buf
, 0x0);
652 for (i
=0; i
<numBlocks
; i
++) {
653 eval
= tempData
->cel_evals
+ i
;
657 dist
+= eval
->eval_dist
[eval
->best_coding
];
659 switch (eval
->best_coding
) {
661 write_typecode(&spool
, RoQ_ID_MOT
);
665 bytestream_put_byte(&spool
.args
, motion_arg(eval
->motion
));
667 write_typecode(&spool
, RoQ_ID_FCC
);
668 ff_apply_motion_8x8(enc
, x
, y
,
669 eval
->motion
.d
[0], eval
->motion
.d
[1]);
673 bytestream_put_byte(&spool
.args
, tempData
->i2f4
[eval
->cbEntry
]);
674 write_typecode(&spool
, RoQ_ID_SLD
);
676 qcell
= enc
->cb4x4
+ eval
->cbEntry
;
677 ff_apply_vector_4x4(enc
, x
, y
, enc
->cb2x2
+ qcell
->idx
[0]);
678 ff_apply_vector_4x4(enc
, x
+4, y
, enc
->cb2x2
+ qcell
->idx
[1]);
679 ff_apply_vector_4x4(enc
, x
, y
+4, enc
->cb2x2
+ qcell
->idx
[2]);
680 ff_apply_vector_4x4(enc
, x
+4, y
+4, enc
->cb2x2
+ qcell
->idx
[3]);
684 write_typecode(&spool
, RoQ_ID_CCC
);
686 for (j
=0; j
<4; j
++) {
690 switch(eval
->subCels
[j
].best_coding
) {
695 bytestream_put_byte(&spool
.args
,
696 motion_arg(eval
->subCels
[j
].motion
));
698 ff_apply_motion_4x4(enc
, subX
, subY
,
699 eval
->subCels
[j
].motion
.d
[0],
700 eval
->subCels
[j
].motion
.d
[1]);
704 bytestream_put_byte(&spool
.args
,
705 tempData
->i2f4
[eval
->subCels
[j
].cbEntry
]);
707 qcell
= enc
->cb4x4
+ eval
->subCels
[j
].cbEntry
;
709 ff_apply_vector_2x2(enc
, subX
, subY
,
710 enc
->cb2x2
+ qcell
->idx
[0]);
711 ff_apply_vector_2x2(enc
, subX
+2, subY
,
712 enc
->cb2x2
+ qcell
->idx
[1]);
713 ff_apply_vector_2x2(enc
, subX
, subY
+2,
714 enc
->cb2x2
+ qcell
->idx
[2]);
715 ff_apply_vector_2x2(enc
, subX
+2, subY
+2,
716 enc
->cb2x2
+ qcell
->idx
[3]);
720 for (k
=0; k
<4; k
++) {
721 int cb_idx
= eval
->subCels
[j
].subCels
[k
];
722 bytestream_put_byte(&spool
.args
,
723 tempData
->i2f2
[cb_idx
]);
725 ff_apply_vector_2x2(enc
, subX
+ 2*(k
&1), subY
+ (k
&2),
726 enc
->cb2x2
+ cb_idx
);
730 write_typecode(&spool
, eval
->subCels
[j
].best_coding
);
736 /* Flush the remainder of the argument/type spool */
737 while (spool
.typeSpoolLength
)
738 write_typecode(&spool
, 0x0);
741 uint8_t *fdata
[3] = {enc
->frame_to_enc
->data
[0],
742 enc
->frame_to_enc
->data
[1],
743 enc
->frame_to_enc
->data
[2]};
744 uint8_t *cdata
[3] = {enc
->current_frame
->data
[0],
745 enc
->current_frame
->data
[1],
746 enc
->current_frame
->data
[2]};
747 av_log(enc
->avctx
, AV_LOG_ERROR
, "Expected distortion: %i Actual: %i\n",
749 block_sse(fdata
, cdata
, 0, 0, 0, 0,
750 enc
->frame_to_enc
->linesize
,
751 enc
->current_frame
->linesize
,
752 enc
->width
)); //WARNING: Square dimensions implied...
758 * Create a single YUV cell from a 2x2 section of the image
760 static inline void frame_block_to_cell(uint8_t *block
, uint8_t * const *data
,
761 int top
, int left
, const int *stride
)
766 for (j
=0; j
<2; j
++) {
767 int x
= (top
+i
)*stride
[0] + left
+ j
;
768 *block
++ = data
[0][x
];
769 x
= (top
+i
)*stride
[1] + left
+ j
;
779 * Create YUV clusters for the entire image
781 static void create_clusters(const AVFrame
*frame
, int w
, int h
, uint8_t *yuvClusters
)
786 for (j
=0; j
<w
; j
+=4) {
787 for (k
=0; k
< 2; k
++)
788 for (l
=0; l
< 2; l
++)
789 frame_block_to_cell(yuvClusters
+ (l
+ 2*k
)*6, frame
->data
,
790 i
+2*k
, j
+2*l
, frame
->linesize
);
795 static void generate_codebook(RoqContext
*enc
, RoqTempdata
*tempdata
,
796 int *points
, int inputCount
, roq_cell
*results
,
797 int size
, int cbsize
)
800 int c_size
= size
*size
/4;
802 int *codebook
= av_malloc_array(6*c_size
, cbsize
*sizeof(int));
806 closest_cb
= av_malloc_array(6*c_size
, inputCount
*sizeof(int));
808 closest_cb
= tempdata
->closest_cb2
;
810 avpriv_init_elbg(points
, 6*c_size
, inputCount
, codebook
, cbsize
, 1, closest_cb
, &enc
->randctx
);
811 avpriv_do_elbg(points
, 6*c_size
, inputCount
, codebook
, cbsize
, 1, closest_cb
, &enc
->randctx
);
817 for (i
=0; i
<cbsize
; i
++)
818 for (k
=0; k
<c_size
; k
++) {
820 results
->y
[j
] = *buf
++;
822 results
->u
= (*buf
++ + CHROMA_BIAS
/2)/CHROMA_BIAS
;
823 results
->v
= (*buf
++ + CHROMA_BIAS
/2)/CHROMA_BIAS
;
830 static void generate_new_codebooks(RoqContext
*enc
, RoqTempdata
*tempData
)
833 RoqCodebooks
*codebooks
= &tempData
->codebooks
;
834 int max
= enc
->width
*enc
->height
/16;
836 roq_cell
*results4
= av_malloc(sizeof(roq_cell
)*MAX_CBS_4x4
*4);
837 uint8_t *yuvClusters
=av_malloc_array(max
, sizeof(int)*6*4);
838 int *points
= av_malloc_array(max
, 6*4*sizeof(int));
841 /* Subsample YUV data */
842 create_clusters(enc
->frame_to_enc
, enc
->width
, enc
->height
, yuvClusters
);
844 /* Cast to integer and apply chroma bias */
845 for (i
=0; i
<max
*24; i
++) {
846 bias
= ((i
%6)<4) ? 1 : CHROMA_BIAS
;
847 points
[i
] = bias
*yuvClusters
[i
];
850 /* Create 4x4 codebooks */
851 generate_codebook(enc
, tempData
, points
, max
, results4
, 4, (enc
->quake3_compat
? MAX_CBS_4x4
-1 : MAX_CBS_4x4
));
853 codebooks
->numCB4
= (enc
->quake3_compat
? MAX_CBS_4x4
-1 : MAX_CBS_4x4
);
855 tempData
->closest_cb2
= av_malloc_array(max
, 4*sizeof(int));
857 /* Create 2x2 codebooks */
858 generate_codebook(enc
, tempData
, points
, max
*4, enc
->cb2x2
, 2, MAX_CBS_2x2
);
860 codebooks
->numCB2
= MAX_CBS_2x2
;
862 /* Unpack 2x2 codebook clusters */
863 for (i
=0; i
<codebooks
->numCB2
; i
++)
864 unpack_roq_cell(enc
->cb2x2
+ i
, codebooks
->unpacked_cb2
+ i
*2*2*3);
866 /* Index all 4x4 entries to the 2x2 entries, unpack, and enlarge */
867 for (i
=0; i
<codebooks
->numCB4
; i
++) {
868 for (j
=0; j
<4; j
++) {
869 unpack_roq_cell(&results4
[4*i
+ j
], mb2
);
870 index_mb(mb2
, codebooks
->unpacked_cb2
, codebooks
->numCB2
,
871 &enc
->cb4x4
[i
].idx
[j
], 2);
873 unpack_roq_qcell(codebooks
->unpacked_cb2
, enc
->cb4x4
+ i
,
874 codebooks
->unpacked_cb4
+ i
*4*4*3);
875 enlarge_roq_mb4(codebooks
->unpacked_cb4
+ i
*4*4*3,
876 codebooks
->unpacked_cb4_enlarged
+ i
*8*8*3);
879 av_free(yuvClusters
);
884 static int roq_encode_video(RoqContext
*enc
)
886 RoqTempdata
*tempData
= enc
->tmpData
;
889 memset(tempData
, 0, sizeof(*tempData
));
891 create_cel_evals(enc
, tempData
);
893 generate_new_codebooks(enc
, tempData
);
895 if (enc
->framesSinceKeyframe
>= 1) {
896 motion_search(enc
, 8);
897 motion_search(enc
, 4);
901 for (i
=0; i
<enc
->width
*enc
->height
/64; i
++)
902 gather_data_for_cel(tempData
->cel_evals
+ i
, enc
, tempData
);
904 /* Quake 3 can't handle chunks bigger than 65535 bytes */
905 if (tempData
->mainChunkSize
/8 > 65535 && enc
->quake3_compat
) {
906 if (enc
->lambda
> 100000) {
907 av_log(enc
->avctx
, AV_LOG_ERROR
, "Cannot encode video in Quake compatible form\n");
908 return AVERROR(EINVAL
);
910 av_log(enc
->avctx
, AV_LOG_ERROR
,
911 "Warning, generated a frame too big for Quake (%d > 65535), "
912 "now switching to a bigger qscale value.\n",
913 tempData
->mainChunkSize
/8);
915 tempData
->mainChunkSize
= 0;
916 memset(tempData
->used_option
, 0, sizeof(tempData
->used_option
));
917 memset(tempData
->codebooks
.usedCB4
, 0,
918 sizeof(tempData
->codebooks
.usedCB4
));
919 memset(tempData
->codebooks
.usedCB2
, 0,
920 sizeof(tempData
->codebooks
.usedCB2
));
925 remap_codebooks(enc
, tempData
);
927 write_codebooks(enc
, tempData
);
929 reconstruct_and_encode_image(enc
, tempData
, enc
->width
, enc
->height
,
930 enc
->width
*enc
->height
/64);
932 enc
->avctx
->coded_frame
= enc
->current_frame
;
934 /* Rotate frame history */
935 FFSWAP(AVFrame
*, enc
->current_frame
, enc
->last_frame
);
936 FFSWAP(motion_vect
*, enc
->last_motion4
, enc
->this_motion4
);
937 FFSWAP(motion_vect
*, enc
->last_motion8
, enc
->this_motion8
);
939 av_freep(&tempData
->cel_evals
);
940 av_freep(&tempData
->closest_cb2
);
942 enc
->framesSinceKeyframe
++;
947 static av_cold
int roq_encode_end(AVCodecContext
*avctx
)
949 RoqContext
*enc
= avctx
->priv_data
;
951 av_frame_free(&enc
->current_frame
);
952 av_frame_free(&enc
->last_frame
);
954 av_freep(&enc
->tmpData
);
955 av_freep(&enc
->this_motion4
);
956 av_freep(&enc
->last_motion4
);
957 av_freep(&enc
->this_motion8
);
958 av_freep(&enc
->last_motion8
);
963 static av_cold
int roq_encode_init(AVCodecContext
*avctx
)
965 RoqContext
*enc
= avctx
->priv_data
;
967 av_lfg_init(&enc
->randctx
, 1);
969 enc
->framesSinceKeyframe
= 0;
970 if ((avctx
->width
& 0xf) || (avctx
->height
& 0xf)) {
971 av_log(avctx
, AV_LOG_ERROR
, "Dimensions must be divisible by 16\n");
972 return AVERROR(EINVAL
);
975 if (avctx
->width
> 65535 || avctx
->height
> 65535) {
976 av_log(avctx
, AV_LOG_ERROR
, "Dimensions are max %d\n", enc
->quake3_compat
? 32768 : 65535);
977 return AVERROR(EINVAL
);
980 if (((avctx
->width
)&(avctx
->width
-1))||((avctx
->height
)&(avctx
->height
-1)))
981 av_log(avctx
, AV_LOG_ERROR
, "Warning: dimensions not power of two, this is not supported by quake\n");
983 enc
->width
= avctx
->width
;
984 enc
->height
= avctx
->height
;
986 enc
->framesSinceKeyframe
= 0;
987 enc
->first_frame
= 1;
989 enc
->last_frame
= av_frame_alloc();
990 enc
->current_frame
= av_frame_alloc();
991 if (!enc
->last_frame
|| !enc
->current_frame
) {
992 roq_encode_end(avctx
);
993 return AVERROR(ENOMEM
);
996 enc
->tmpData
= av_malloc(sizeof(RoqTempdata
));
999 av_mallocz_array((enc
->width
*enc
->height
/16), sizeof(motion_vect
));
1002 av_malloc_array ((enc
->width
*enc
->height
/16), sizeof(motion_vect
));
1005 av_mallocz_array((enc
->width
*enc
->height
/64), sizeof(motion_vect
));
1008 av_malloc_array ((enc
->width
*enc
->height
/64), sizeof(motion_vect
));
1013 static void roq_write_video_info_chunk(RoqContext
*enc
)
1015 /* ROQ info chunk */
1016 bytestream_put_le16(&enc
->out_buf
, RoQ_INFO
);
1019 bytestream_put_le32(&enc
->out_buf
, 8);
1021 /* Unused argument */
1022 bytestream_put_byte(&enc
->out_buf
, 0x00);
1023 bytestream_put_byte(&enc
->out_buf
, 0x00);
1026 bytestream_put_le16(&enc
->out_buf
, enc
->width
);
1029 bytestream_put_le16(&enc
->out_buf
, enc
->height
);
1031 /* Unused in Quake 3, mimics the output of the real encoder */
1032 bytestream_put_byte(&enc
->out_buf
, 0x08);
1033 bytestream_put_byte(&enc
->out_buf
, 0x00);
1034 bytestream_put_byte(&enc
->out_buf
, 0x04);
1035 bytestream_put_byte(&enc
->out_buf
, 0x00);
1038 static int roq_encode_frame(AVCodecContext
*avctx
, AVPacket
*pkt
,
1039 const AVFrame
*frame
, int *got_packet
)
1041 RoqContext
*enc
= avctx
->priv_data
;
1046 enc
->frame_to_enc
= frame
;
1049 enc
->lambda
= frame
->quality
- 1;
1051 enc
->lambda
= 2*ROQ_LAMBDA_SCALE
;
1053 /* 138 bits max per 8x8 block +
1054 * 256 codebooks*(6 bytes 2x2 + 4 bytes 4x4) + 8 bytes frame header */
1055 size
= ((enc
->width
* enc
->height
/ 64) * 138 + 7) / 8 + 256 * (6 + 4) + 8;
1056 if ((ret
= ff_alloc_packet2(avctx
, pkt
, size
)) < 0)
1058 enc
->out_buf
= pkt
->data
;
1060 /* Check for I frame */
1061 if (enc
->framesSinceKeyframe
== avctx
->gop_size
)
1062 enc
->framesSinceKeyframe
= 0;
1064 if (enc
->first_frame
) {
1065 /* Alloc memory for the reconstruction data (we must know the stride
1067 if ((ret
= ff_get_buffer(avctx
, enc
->current_frame
, 0)) < 0 ||
1068 (ret
= ff_get_buffer(avctx
, enc
->last_frame
, 0)) < 0)
1071 /* Before the first video frame, write a "video info" chunk */
1072 roq_write_video_info_chunk(enc
);
1074 enc
->first_frame
= 0;
1077 /* Encode the actual frame */
1078 if ((ret
= roq_encode_video(enc
)) < 0)
1081 pkt
->size
= enc
->out_buf
- pkt
->data
;
1082 if (enc
->framesSinceKeyframe
== 1)
1083 pkt
->flags
|= AV_PKT_FLAG_KEY
;
1089 #define OFFSET(x) offsetof(RoqContext, x)
1090 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
1091 static const AVOption options
[] = {
1092 { "quake3_compat", "Whether to respect known limitations in Quake 3 decoder", OFFSET(quake3_compat
), AV_OPT_TYPE_INT
, { .i64
= 1 }, 0, 1, VE
},
1096 static const AVClass roq_class
= {
1097 .class_name
= "RoQ",
1098 .item_name
= av_default_item_name
,
1100 .version
= LIBAVUTIL_VERSION_INT
,
1103 AVCodec ff_roq_encoder
= {
1105 .long_name
= NULL_IF_CONFIG_SMALL("id RoQ video"),
1106 .type
= AVMEDIA_TYPE_VIDEO
,
1107 .id
= AV_CODEC_ID_ROQ
,
1108 .priv_data_size
= sizeof(RoqContext
),
1109 .init
= roq_encode_init
,
1110 .encode2
= roq_encode_frame
,
1111 .close
= roq_encode_end
,
1112 .pix_fmts
= (const enum AVPixelFormat
[]){ AV_PIX_FMT_YUVJ444P
,
1114 .priv_class
= &roq_class
,