2 * Lagarith lossless decoder
3 * Copyright (c) 2009 Nathan Caldwell <saintdev (at) gmail.com>
5 * This file is part of FFmpeg.
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24 * Lagarith lossless decoder
25 * @author Nathan Caldwell
33 #include "huffyuvdsp.h"
34 #include "lagarithrac.h"
37 enum LagarithFrameType
{
38 FRAME_RAW
= 1, /**< uncompressed */
39 FRAME_U_RGB24
= 2, /**< unaligned RGB24 */
40 FRAME_ARITH_YUY2
= 3, /**< arithmetic coded YUY2 */
41 FRAME_ARITH_RGB24
= 4, /**< arithmetic coded RGB24 */
42 FRAME_SOLID_GRAY
= 5, /**< solid grayscale color frame */
43 FRAME_SOLID_COLOR
= 6, /**< solid non-grayscale color frame */
44 FRAME_OLD_ARITH_RGB
= 7, /**< obsolete arithmetic coded RGB (no longer encoded by upstream since version 1.1.0) */
45 FRAME_ARITH_RGBA
= 8, /**< arithmetic coded RGBA */
46 FRAME_SOLID_RGBA
= 9, /**< solid RGBA color frame */
47 FRAME_ARITH_YV12
= 10, /**< arithmetic coded YV12 */
48 FRAME_REDUCED_RES
= 11, /**< reduced resolution YV12 frame */
51 typedef struct LagarithContext
{
52 AVCodecContext
*avctx
;
53 HuffYUVDSPContext hdsp
;
54 int zeros
; /**< number of consecutive zero bytes encountered */
55 int zeros_rem
; /**< number of zero bytes remaining to output */
57 int rgb_planes_allocated
;
62 * Compute the 52bit mantissa of 1/(double)denom.
63 * This crazy format uses floats in an entropy coder and we have to match x86
64 * rounding exactly, thus ordinary floats aren't portable enough.
65 * @param denom denominator
66 * @return 52bit mantissa
69 static uint64_t softfloat_reciprocal(uint32_t denom
)
71 int shift
= av_log2(denom
- 1) + 1;
72 uint64_t ret
= (1ULL << 52) / denom
;
73 uint64_t err
= (1ULL << 52) - ret
* denom
;
77 return ret
+ err
/ denom
;
81 * (uint32_t)(x*f), where f has the given mantissa, and exponent 0
82 * Used in combination with softfloat_reciprocal computes x/(double)denom.
83 * @param x 32bit integer factor
84 * @param mantissa mantissa of f with exponent 0
85 * @return 32bit integer value (x*f)
86 * @see softfloat_reciprocal
88 static uint32_t softfloat_mul(uint32_t x
, uint64_t mantissa
)
90 uint64_t l
= x
* (mantissa
& 0xffffffff);
91 uint64_t h
= x
* (mantissa
>> 32);
94 l
+= 1 << av_log2(h
>> 21);
99 static uint8_t lag_calc_zero_run(int8_t x
)
101 return (x
<< 1) ^ (x
>> 7);
104 static int lag_decode_prob(GetBitContext
*gb
, uint32_t *value
)
106 static const uint8_t series
[] = { 1, 2, 3, 5, 8, 13, 21 };
113 for (i
= 0; i
< 7; i
++) {
122 if (bits
< 0 || bits
> 31) {
125 } else if (bits
== 0) {
130 val
= get_bits_long(gb
, bits
);
138 static int lag_read_prob_header(lag_rac
*rac
, GetBitContext
*gb
)
140 int i
, j
, scale_factor
;
141 unsigned prob
, cumulative_target
;
142 unsigned cumul_prob
= 0;
143 unsigned scaled_cumul_prob
= 0;
146 rac
->prob
[257] = UINT_MAX
;
147 /* Read probabilities from bitstream */
148 for (i
= 1; i
< 257; i
++) {
149 if (lag_decode_prob(gb
, &rac
->prob
[i
]) < 0) {
150 av_log(rac
->avctx
, AV_LOG_ERROR
, "Invalid probability encountered.\n");
153 if ((uint64_t)cumul_prob
+ rac
->prob
[i
] > UINT_MAX
) {
154 av_log(rac
->avctx
, AV_LOG_ERROR
, "Integer overflow encountered in cumulative probability calculation.\n");
157 cumul_prob
+= rac
->prob
[i
];
159 if (lag_decode_prob(gb
, &prob
)) {
160 av_log(rac
->avctx
, AV_LOG_ERROR
, "Invalid probability run encountered.\n");
165 for (j
= 0; j
< prob
; j
++)
171 av_log(rac
->avctx
, AV_LOG_ERROR
, "All probabilities are 0!\n");
175 /* Scale probabilities so cumulative probability is an even power of 2. */
176 scale_factor
= av_log2(cumul_prob
);
178 if (cumul_prob
& (cumul_prob
- 1)) {
179 uint64_t mul
= softfloat_reciprocal(cumul_prob
);
180 for (i
= 1; i
<= 128; i
++) {
181 rac
->prob
[i
] = softfloat_mul(rac
->prob
[i
], mul
);
182 scaled_cumul_prob
+= rac
->prob
[i
];
184 if (scaled_cumul_prob
<= 0) {
185 av_log(rac
->avctx
, AV_LOG_ERROR
, "Scaled probabilities invalid\n");
186 return AVERROR_INVALIDDATA
;
188 for (; i
< 257; i
++) {
189 rac
->prob
[i
] = softfloat_mul(rac
->prob
[i
], mul
);
190 scaled_cumul_prob
+= rac
->prob
[i
];
194 cumulative_target
= 1 << scale_factor
;
196 if (scaled_cumul_prob
> cumulative_target
) {
197 av_log(rac
->avctx
, AV_LOG_ERROR
,
198 "Scaled probabilities are larger than target!\n");
202 scaled_cumul_prob
= cumulative_target
- scaled_cumul_prob
;
204 for (i
= 1; scaled_cumul_prob
; i
= (i
& 0x7f) + 1) {
209 /* Comment from reference source:
210 * if (b & 0x80 == 0) { // order of operations is 'wrong'; it has been left this way
211 * // since the compression change is negligible and fixing it
212 * // breaks backwards compatibility
213 * b =- (signed int)b;
223 rac
->scale
= scale_factor
;
225 /* Fill probability array with cumulative probability for each symbol. */
226 for (i
= 1; i
< 257; i
++)
227 rac
->prob
[i
] += rac
->prob
[i
- 1];
232 static void add_lag_median_prediction(uint8_t *dst
, uint8_t *src1
,
233 uint8_t *diff
, int w
, int *left
,
236 /* This is almost identical to add_hfyu_median_pred in huffyuvdsp.h.
237 * However the &0xFF on the gradient predictor yealds incorrect output
246 for (i
= 0; i
< w
; i
++) {
247 l
= mid_pred(l
, src1
[i
], l
+ src1
[i
] - lt
) + diff
[i
];
256 static void lag_pred_line(LagarithContext
*l
, uint8_t *buf
,
257 int width
, int stride
, int line
)
262 /* Left prediction only for first line */
263 L
= l
->hdsp
.add_hfyu_left_pred(buf
, buf
, width
, 0);
265 /* Left pixel is actually prev_row[width] */
266 L
= buf
[width
- stride
- 1];
269 /* Second line, left predict first pixel, the rest of the line is median predicted
270 * NOTE: In the case of RGB this pixel is top predicted */
271 TL
= l
->avctx
->pix_fmt
== AV_PIX_FMT_YUV420P
? buf
[-stride
] : L
;
273 /* Top left is 2 rows back, last pixel */
274 TL
= buf
[width
- (2 * stride
) - 1];
277 add_lag_median_prediction(buf
, buf
- stride
, buf
,
282 static void lag_pred_line_yuy2(LagarithContext
*l
, uint8_t *buf
,
283 int width
, int stride
, int line
,
292 l
->hdsp
.add_hfyu_left_pred(buf
, buf
, width
, 0);
298 const int HEAD
= is_luma
? 4 : 2;
301 L
= buf
[width
- stride
- 1];
302 TL
= buf
[HEAD
- stride
- 1];
303 for (i
= 0; i
< HEAD
; i
++) {
307 for (; i
< width
; i
++) {
308 L
= mid_pred(L
& 0xFF, buf
[i
- stride
], (L
+ buf
[i
- stride
] - TL
) & 0xFF) + buf
[i
];
309 TL
= buf
[i
- stride
];
313 TL
= buf
[width
- (2 * stride
) - 1];
314 L
= buf
[width
- stride
- 1];
315 l
->hdsp
.add_hfyu_median_pred(buf
, buf
- stride
, buf
, width
, &L
, &TL
);
319 static int lag_decode_line(LagarithContext
*l
, lag_rac
*rac
,
320 uint8_t *dst
, int width
, int stride
,
329 /* Output any zeros remaining from the previous run */
332 int count
= FFMIN(l
->zeros_rem
, width
- i
);
333 memset(dst
+ i
, 0, count
);
335 l
->zeros_rem
-= count
;
339 dst
[i
] = lag_get_rac(rac
);
348 if (l
->zeros
== esc_count
) {
349 int index
= lag_get_rac(rac
);
354 l
->zeros_rem
= lag_calc_zero_run(index
);
361 static int lag_decode_zero_run_line(LagarithContext
*l
, uint8_t *dst
,
362 const uint8_t *src
, const uint8_t *src_end
,
363 int width
, int esc_count
)
367 uint8_t zero_run
= 0;
368 const uint8_t *src_start
= src
;
369 uint8_t mask1
= -(esc_count
< 2);
370 uint8_t mask2
= -(esc_count
< 3);
371 uint8_t *end
= dst
+ (width
- 2);
373 avpriv_request_sample(l
->avctx
, "zero_run_line");
375 memset(dst
, 0, width
);
379 count
= FFMIN(l
->zeros_rem
, width
- i
);
380 if (end
- dst
< count
) {
381 av_log(l
->avctx
, AV_LOG_ERROR
, "Too many zeros remaining.\n");
382 return AVERROR_INVALIDDATA
;
385 memset(dst
, 0, count
);
386 l
->zeros_rem
-= count
;
392 while (!zero_run
&& dst
+ i
< end
) {
394 if (i
+2 >= src_end
- src
)
395 return AVERROR_INVALIDDATA
;
397 !(src
[i
] | (src
[i
+ 1] & mask1
) | (src
[i
+ 2] & mask2
));
404 l
->zeros_rem
= lag_calc_zero_run(src
[i
]);
414 return src
- src_start
;
419 static int lag_decode_arith_plane(LagarithContext
*l
, uint8_t *dst
,
420 int width
, int height
, int stride
,
421 const uint8_t *src
, int src_size
)
430 const uint8_t *src_end
= src
+ src_size
;
433 rac
.avctx
= l
->avctx
;
437 return AVERROR_INVALIDDATA
;
441 length
= width
* height
;
443 return AVERROR_INVALIDDATA
;
444 if (esc_count
&& AV_RL32(src
+ 1) < length
) {
445 length
= AV_RL32(src
+ 1);
449 if ((ret
= init_get_bits8(&gb
, src
+ offset
, src_size
- offset
)) < 0)
452 if (lag_read_prob_header(&rac
, &gb
) < 0)
455 ff_lag_rac_init(&rac
, &gb
, length
- stride
);
457 for (i
= 0; i
< height
; i
++)
458 read
+= lag_decode_line(l
, &rac
, dst
+ (i
* stride
), width
,
462 av_log(l
->avctx
, AV_LOG_WARNING
,
463 "Output more bytes than length (%d of %"PRIu32
")\n", read
,
465 } else if (esc_count
< 8) {
470 /* Zero run coding only, no range coding. */
471 for (i
= 0; i
< height
; i
++) {
472 int res
= lag_decode_zero_run_line(l
, dst
+ (i
* stride
), src
,
473 src_end
, width
, esc_count
);
479 if (src_size
< width
* height
)
480 return AVERROR_INVALIDDATA
; // buffer not big enough
481 /* Plane is stored uncompressed */
482 for (i
= 0; i
< height
; i
++) {
483 memcpy(dst
+ (i
* stride
), src
, width
);
487 } else if (esc_count
== 0xff) {
488 /* Plane is a solid run of given value */
489 for (i
= 0; i
< height
; i
++)
490 memset(dst
+ i
* stride
, src
[1], width
);
491 /* Do not apply prediction.
492 Note: memset to 0 above, setting first value to src[1]
493 and applying prediction gives the same result. */
496 av_log(l
->avctx
, AV_LOG_ERROR
,
497 "Invalid zero run escape code! (%#x)\n", esc_count
);
501 if (l
->avctx
->pix_fmt
!= AV_PIX_FMT_YUV422P
) {
502 for (i
= 0; i
< height
; i
++) {
503 lag_pred_line(l
, dst
, width
, stride
, i
);
507 for (i
= 0; i
< height
; i
++) {
508 lag_pred_line_yuy2(l
, dst
, width
, stride
, i
,
509 width
== l
->avctx
->width
);
519 * @param avctx codec context
520 * @param data output AVFrame
521 * @param data_size size of output data or 0 if no picture is returned
522 * @param avpkt input packet
523 * @return number of consumed bytes on success or negative if decode fails
525 static int lag_decode_frame(AVCodecContext
*avctx
,
526 void *data
, int *got_frame
, AVPacket
*avpkt
)
528 const uint8_t *buf
= avpkt
->data
;
529 unsigned int buf_size
= avpkt
->size
;
530 LagarithContext
*l
= avctx
->priv_data
;
531 ThreadFrame frame
= { .f
= data
};
532 AVFrame
*const p
= data
;
533 uint8_t frametype
= 0;
534 uint32_t offset_gu
= 0, offset_bv
= 0, offset_ry
= 9;
536 uint8_t *srcs
[4], *dst
;
537 int i
, j
, planes
= 3;
544 offset_gu
= AV_RL32(buf
+ 1);
545 offset_bv
= AV_RL32(buf
+ 5);
548 case FRAME_SOLID_RGBA
:
549 avctx
->pix_fmt
= AV_PIX_FMT_RGB32
;
550 case FRAME_SOLID_GRAY
:
551 if (frametype
== FRAME_SOLID_GRAY
)
552 if (avctx
->bits_per_coded_sample
== 24) {
553 avctx
->pix_fmt
= AV_PIX_FMT_RGB24
;
555 avctx
->pix_fmt
= AV_PIX_FMT_0RGB32
;
559 if ((ret
= ff_thread_get_buffer(avctx
, &frame
, 0)) < 0)
563 if (frametype
== FRAME_SOLID_RGBA
) {
564 for (j
= 0; j
< avctx
->height
; j
++) {
565 for (i
= 0; i
< avctx
->width
; i
++)
566 AV_WN32(dst
+ i
* 4, offset_gu
);
567 dst
+= p
->linesize
[0];
570 for (j
= 0; j
< avctx
->height
; j
++) {
571 memset(dst
, buf
[1], avctx
->width
* planes
);
572 dst
+= p
->linesize
[0];
576 case FRAME_SOLID_COLOR
:
577 if (avctx
->bits_per_coded_sample
== 24) {
578 avctx
->pix_fmt
= AV_PIX_FMT_RGB24
;
580 avctx
->pix_fmt
= AV_PIX_FMT_RGB32
;
581 offset_gu
|= 0xFFU
<< 24;
584 if ((ret
= ff_thread_get_buffer(avctx
, &frame
,0)) < 0)
588 for (j
= 0; j
< avctx
->height
; j
++) {
589 for (i
= 0; i
< avctx
->width
; i
++)
590 if (avctx
->bits_per_coded_sample
== 24) {
591 AV_WB24(dst
+ i
* 3, offset_gu
);
593 AV_WN32(dst
+ i
* 4, offset_gu
);
595 dst
+= p
->linesize
[0];
598 case FRAME_ARITH_RGBA
:
599 avctx
->pix_fmt
= AV_PIX_FMT_RGB32
;
602 offs
[3] = AV_RL32(buf
+ 9);
603 case FRAME_ARITH_RGB24
:
605 if (frametype
== FRAME_ARITH_RGB24
|| frametype
== FRAME_U_RGB24
)
606 avctx
->pix_fmt
= AV_PIX_FMT_RGB24
;
608 if ((ret
= ff_thread_get_buffer(avctx
, &frame
, 0)) < 0)
615 l
->rgb_stride
= FFALIGN(avctx
->width
, 16);
616 av_fast_malloc(&l
->rgb_planes
, &l
->rgb_planes_allocated
,
617 l
->rgb_stride
* avctx
->height
* planes
+ 1);
618 if (!l
->rgb_planes
) {
619 av_log(avctx
, AV_LOG_ERROR
, "cannot allocate temporary buffer\n");
620 return AVERROR(ENOMEM
);
622 for (i
= 0; i
< planes
; i
++)
623 srcs
[i
] = l
->rgb_planes
+ (i
+ 1) * l
->rgb_stride
* avctx
->height
- l
->rgb_stride
;
624 for (i
= 0; i
< planes
; i
++)
625 if (buf_size
<= offs
[i
]) {
626 av_log(avctx
, AV_LOG_ERROR
,
627 "Invalid frame offsets\n");
628 return AVERROR_INVALIDDATA
;
631 for (i
= 0; i
< planes
; i
++)
632 lag_decode_arith_plane(l
, srcs
[i
],
633 avctx
->width
, avctx
->height
,
634 -l
->rgb_stride
, buf
+ offs
[i
],
637 for (i
= 0; i
< planes
; i
++)
638 srcs
[i
] = l
->rgb_planes
+ i
* l
->rgb_stride
* avctx
->height
;
639 for (j
= 0; j
< avctx
->height
; j
++) {
640 for (i
= 0; i
< avctx
->width
; i
++) {
647 if (frametype
== FRAME_ARITH_RGBA
) {
649 AV_WN32(dst
+ i
* 4, MKBETAG(a
, r
, g
, b
));
656 dst
+= p
->linesize
[0];
657 for (i
= 0; i
< planes
; i
++)
658 srcs
[i
] += l
->rgb_stride
;
661 case FRAME_ARITH_YUY2
:
662 avctx
->pix_fmt
= AV_PIX_FMT_YUV422P
;
664 if ((ret
= ff_thread_get_buffer(avctx
, &frame
, 0)) < 0)
667 if (offset_ry
>= buf_size
||
668 offset_gu
>= buf_size
||
669 offset_bv
>= buf_size
) {
670 av_log(avctx
, AV_LOG_ERROR
,
671 "Invalid frame offsets\n");
672 return AVERROR_INVALIDDATA
;
675 lag_decode_arith_plane(l
, p
->data
[0], avctx
->width
, avctx
->height
,
676 p
->linesize
[0], buf
+ offset_ry
,
677 buf_size
- offset_ry
);
678 lag_decode_arith_plane(l
, p
->data
[1], (avctx
->width
+ 1) / 2,
679 avctx
->height
, p
->linesize
[1],
680 buf
+ offset_gu
, buf_size
- offset_gu
);
681 lag_decode_arith_plane(l
, p
->data
[2], (avctx
->width
+ 1) / 2,
682 avctx
->height
, p
->linesize
[2],
683 buf
+ offset_bv
, buf_size
- offset_bv
);
685 case FRAME_ARITH_YV12
:
686 avctx
->pix_fmt
= AV_PIX_FMT_YUV420P
;
688 if ((ret
= ff_thread_get_buffer(avctx
, &frame
, 0)) < 0)
690 if (buf_size
<= offset_ry
|| buf_size
<= offset_gu
|| buf_size
<= offset_bv
) {
691 return AVERROR_INVALIDDATA
;
694 if (offset_ry
>= buf_size
||
695 offset_gu
>= buf_size
||
696 offset_bv
>= buf_size
) {
697 av_log(avctx
, AV_LOG_ERROR
,
698 "Invalid frame offsets\n");
699 return AVERROR_INVALIDDATA
;
702 lag_decode_arith_plane(l
, p
->data
[0], avctx
->width
, avctx
->height
,
703 p
->linesize
[0], buf
+ offset_ry
,
704 buf_size
- offset_ry
);
705 lag_decode_arith_plane(l
, p
->data
[2], (avctx
->width
+ 1) / 2,
706 (avctx
->height
+ 1) / 2, p
->linesize
[2],
707 buf
+ offset_gu
, buf_size
- offset_gu
);
708 lag_decode_arith_plane(l
, p
->data
[1], (avctx
->width
+ 1) / 2,
709 (avctx
->height
+ 1) / 2, p
->linesize
[1],
710 buf
+ offset_bv
, buf_size
- offset_bv
);
713 av_log(avctx
, AV_LOG_ERROR
,
714 "Unsupported Lagarith frame type: %#"PRIx8
"\n", frametype
);
715 return AVERROR_PATCHWELCOME
;
723 static av_cold
int lag_decode_init(AVCodecContext
*avctx
)
725 LagarithContext
*l
= avctx
->priv_data
;
728 ff_huffyuvdsp_init(&l
->hdsp
);
733 static av_cold
int lag_decode_end(AVCodecContext
*avctx
)
735 LagarithContext
*l
= avctx
->priv_data
;
737 av_freep(&l
->rgb_planes
);
742 AVCodec ff_lagarith_decoder
= {
744 .long_name
= NULL_IF_CONFIG_SMALL("Lagarith lossless"),
745 .type
= AVMEDIA_TYPE_VIDEO
,
746 .id
= AV_CODEC_ID_LAGARITH
,
747 .priv_data_size
= sizeof(LagarithContext
),
748 .init
= lag_decode_init
,
749 .close
= lag_decode_end
,
750 .decode
= lag_decode_frame
,
751 .capabilities
= CODEC_CAP_DR1
| CODEC_CAP_FRAME_THREADS
,