3 * Copyright (c) 2012 Andrew D'Addesio
4 * Copyright (c) 2013-2014 Mozilla Corporation
6 * This file is part of FFmpeg.
8 * FFmpeg is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
13 * FFmpeg is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with FFmpeg; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
26 * @author Andrew D'Addesio, Anton Khirnov
28 * Codec homepage: http://opus-codec.org/
29 * Specification: http://tools.ietf.org/html/rfc6716
30 * Ogg Opus specification: https://tools.ietf.org/html/draft-ietf-codec-oggopus-03
32 * Ogg-contained .opus files can be produced with opus-tools:
33 * http://git.xiph.org/?p=opus-tools.git
38 #include "libavutil/attributes.h"
39 #include "libavutil/audio_fifo.h"
40 #include "libavutil/channel_layout.h"
41 #include "libavutil/opt.h"
43 #include "libswresample/swresample.h"
46 #include "celp_filters.h"
53 static const uint16_t silk_frame_duration_ms
[16] = {
61 /* number of samples of silence to feed to the resampler
63 static const int silk_resample_delay
[] = {
67 static const uint8_t celt_band_end
[] = { 13, 17, 17, 19, 21 };
69 static int get_silk_samplerate(int config
)
81 static int opus_rc_init(OpusRangeCoder
*rc
, const uint8_t *data
, int size
)
83 int ret
= init_get_bits8(&rc
->gb
, data
, size
);
88 rc
->value
= 127 - get_bits(&rc
->gb
, 7);
89 rc
->total_read_bits
= 9;
90 opus_rc_normalize(rc
);
95 static void opus_raw_init(OpusRangeCoder
*rc
, const uint8_t *rightend
,
98 rc
->rb
.position
= rightend
;
104 static void opus_fade(float *out
,
105 const float *in1
, const float *in2
,
106 const float *window
, int len
)
109 for (i
= 0; i
< len
; i
++)
110 out
[i
] = in2
[i
] * window
[i
] + in1
[i
] * (1.0 - window
[i
]);
113 static int opus_flush_resample(OpusStreamContext
*s
, int nb_samples
)
115 int celt_size
= av_audio_fifo_size(s
->celt_delay
);
117 ret
= swr_convert(s
->swr
,
118 (uint8_t**)s
->out
, nb_samples
,
122 else if (ret
!= nb_samples
) {
123 av_log(s
->avctx
, AV_LOG_ERROR
, "Wrong number of flushed samples: %d\n",
129 if (celt_size
!= nb_samples
) {
130 av_log(s
->avctx
, AV_LOG_ERROR
, "Wrong number of CELT delay samples.\n");
133 av_audio_fifo_read(s
->celt_delay
, (void**)s
->celt_output
, nb_samples
);
134 for (i
= 0; i
< s
->output_channels
; i
++) {
135 s
->fdsp
->vector_fmac_scalar(s
->out
[i
],
136 s
->celt_output
[i
], 1.0,
141 if (s
->redundancy_idx
) {
142 for (i
= 0; i
< s
->output_channels
; i
++)
143 opus_fade(s
->out
[i
], s
->out
[i
],
144 s
->redundancy_output
[i
] + 120 + s
->redundancy_idx
,
145 ff_celt_window2
+ s
->redundancy_idx
, 120 - s
->redundancy_idx
);
146 s
->redundancy_idx
= 0;
149 s
->out
[0] += nb_samples
;
150 s
->out
[1] += nb_samples
;
151 s
->out_size
-= nb_samples
* sizeof(float);
156 static int opus_init_resample(OpusStreamContext
*s
)
158 static const float delay
[16] = { 0.0 };
159 const uint8_t *delayptr
[2] = { (uint8_t*)delay
, (uint8_t*)delay
};
162 av_opt_set_int(s
->swr
, "in_sample_rate", s
->silk_samplerate
, 0);
163 ret
= swr_init(s
->swr
);
165 av_log(s
->avctx
, AV_LOG_ERROR
, "Error opening the resampler.\n");
169 ret
= swr_convert(s
->swr
,
171 delayptr
, silk_resample_delay
[s
->packet
.bandwidth
]);
173 av_log(s
->avctx
, AV_LOG_ERROR
,
174 "Error feeding initial silence to the resampler.\n");
181 static int opus_decode_redundancy(OpusStreamContext
*s
, const uint8_t *data
, int size
)
184 enum OpusBandwidth bw
= s
->packet
.bandwidth
;
186 if (s
->packet
.mode
== OPUS_MODE_SILK
&&
187 bw
== OPUS_BANDWIDTH_MEDIUMBAND
)
188 bw
= OPUS_BANDWIDTH_WIDEBAND
;
190 ret
= opus_rc_init(&s
->redundancy_rc
, data
, size
);
193 opus_raw_init(&s
->redundancy_rc
, data
+ size
, size
);
195 ret
= ff_celt_decode_frame(s
->celt
, &s
->redundancy_rc
,
196 s
->redundancy_output
,
197 s
->packet
.stereo
+ 1, 240,
198 0, celt_band_end
[s
->packet
.bandwidth
]);
204 av_log(s
->avctx
, AV_LOG_ERROR
, "Error decoding the redundancy frame.\n");
208 static int opus_decode_frame(OpusStreamContext
*s
, const uint8_t *data
, int size
)
210 int samples
= s
->packet
.frame_duration
;
212 int redundancy_size
, redundancy_pos
;
213 int ret
, i
, consumed
;
214 int delayed_samples
= s
->delayed_samples
;
216 ret
= opus_rc_init(&s
->rc
, data
, size
);
220 /* decode the silk frame */
221 if (s
->packet
.mode
== OPUS_MODE_SILK
|| s
->packet
.mode
== OPUS_MODE_HYBRID
) {
222 if (!swr_is_initialized(s
->swr
)) {
223 ret
= opus_init_resample(s
);
228 samples
= ff_silk_decode_superframe(s
->silk
, &s
->rc
, s
->silk_output
,
229 FFMIN(s
->packet
.bandwidth
, OPUS_BANDWIDTH_WIDEBAND
),
230 s
->packet
.stereo
+ 1,
231 silk_frame_duration_ms
[s
->packet
.config
]);
233 av_log(s
->avctx
, AV_LOG_ERROR
, "Error decoding a SILK frame.\n");
236 samples
= swr_convert(s
->swr
,
237 (uint8_t**)s
->out
, s
->packet
.frame_duration
,
238 (const uint8_t**)s
->silk_output
, samples
);
240 av_log(s
->avctx
, AV_LOG_ERROR
, "Error resampling SILK data.\n");
243 av_assert2((samples
& 7) == 0);
244 s
->delayed_samples
+= s
->packet
.frame_duration
- samples
;
246 ff_silk_flush(s
->silk
);
248 // decode redundancy information
249 consumed
= opus_rc_tell(&s
->rc
);
250 if (s
->packet
.mode
== OPUS_MODE_HYBRID
&& consumed
+ 37 <= size
* 8)
251 redundancy
= opus_rc_p2model(&s
->rc
, 12);
252 else if (s
->packet
.mode
== OPUS_MODE_SILK
&& consumed
+ 17 <= size
* 8)
256 redundancy_pos
= opus_rc_p2model(&s
->rc
, 1);
258 if (s
->packet
.mode
== OPUS_MODE_HYBRID
)
259 redundancy_size
= opus_rc_unimodel(&s
->rc
, 256) + 2;
261 redundancy_size
= size
- (consumed
+ 7) / 8;
262 size
-= redundancy_size
;
264 av_log(s
->avctx
, AV_LOG_ERROR
, "Invalid redundancy frame size.\n");
265 return AVERROR_INVALIDDATA
;
268 if (redundancy_pos
) {
269 ret
= opus_decode_redundancy(s
, data
+ size
, redundancy_size
);
272 ff_celt_flush(s
->celt
);
276 /* decode the CELT frame */
277 if (s
->packet
.mode
== OPUS_MODE_CELT
|| s
->packet
.mode
== OPUS_MODE_HYBRID
) {
278 float *out_tmp
[2] = { s
->out
[0], s
->out
[1] };
279 float **dst
= (s
->packet
.mode
== OPUS_MODE_CELT
) ?
280 out_tmp
: s
->celt_output
;
281 int celt_output_samples
= samples
;
282 int delay_samples
= av_audio_fifo_size(s
->celt_delay
);
285 if (s
->packet
.mode
== OPUS_MODE_HYBRID
) {
286 av_audio_fifo_read(s
->celt_delay
, (void**)s
->celt_output
, delay_samples
);
288 for (i
= 0; i
< s
->output_channels
; i
++) {
289 s
->fdsp
->vector_fmac_scalar(out_tmp
[i
], s
->celt_output
[i
], 1.0,
291 out_tmp
[i
] += delay_samples
;
293 celt_output_samples
-= delay_samples
;
295 av_log(s
->avctx
, AV_LOG_WARNING
,
296 "Spurious CELT delay samples present.\n");
297 av_audio_fifo_drain(s
->celt_delay
, delay_samples
);
298 if (s
->avctx
->err_recognition
& AV_EF_EXPLODE
)
303 opus_raw_init(&s
->rc
, data
+ size
, size
);
305 ret
= ff_celt_decode_frame(s
->celt
, &s
->rc
, dst
,
306 s
->packet
.stereo
+ 1,
307 s
->packet
.frame_duration
,
308 (s
->packet
.mode
== OPUS_MODE_HYBRID
) ? 17 : 0,
309 celt_band_end
[s
->packet
.bandwidth
]);
313 if (s
->packet
.mode
== OPUS_MODE_HYBRID
) {
314 int celt_delay
= s
->packet
.frame_duration
- celt_output_samples
;
315 void *delaybuf
[2] = { s
->celt_output
[0] + celt_output_samples
,
316 s
->celt_output
[1] + celt_output_samples
};
318 for (i
= 0; i
< s
->output_channels
; i
++) {
319 s
->fdsp
->vector_fmac_scalar(out_tmp
[i
],
320 s
->celt_output
[i
], 1.0,
321 celt_output_samples
);
324 ret
= av_audio_fifo_write(s
->celt_delay
, delaybuf
, celt_delay
);
329 ff_celt_flush(s
->celt
);
331 if (s
->redundancy_idx
) {
332 for (i
= 0; i
< s
->output_channels
; i
++)
333 opus_fade(s
->out
[i
], s
->out
[i
],
334 s
->redundancy_output
[i
] + 120 + s
->redundancy_idx
,
335 ff_celt_window2
+ s
->redundancy_idx
, 120 - s
->redundancy_idx
);
336 s
->redundancy_idx
= 0;
339 if (!redundancy_pos
) {
340 ff_celt_flush(s
->celt
);
341 ret
= opus_decode_redundancy(s
, data
+ size
, redundancy_size
);
345 for (i
= 0; i
< s
->output_channels
; i
++) {
346 opus_fade(s
->out
[i
] + samples
- 120 + delayed_samples
,
347 s
->out
[i
] + samples
- 120 + delayed_samples
,
348 s
->redundancy_output
[i
] + 120,
349 ff_celt_window2
, 120 - delayed_samples
);
351 s
->redundancy_idx
= 120 - delayed_samples
;
354 for (i
= 0; i
< s
->output_channels
; i
++) {
355 memcpy(s
->out
[i
] + delayed_samples
, s
->redundancy_output
[i
], 120 * sizeof(float));
356 opus_fade(s
->out
[i
] + 120 + delayed_samples
,
357 s
->redundancy_output
[i
] + 120,
358 s
->out
[i
] + 120 + delayed_samples
,
359 ff_celt_window2
, 120);
367 static int opus_decode_subpacket(OpusStreamContext
*s
,
368 const uint8_t *buf
, int buf_size
,
371 int output_samples
= 0;
372 int flush_needed
= 0;
375 /* check if we need to flush the resampler */
376 if (swr_is_initialized(s
->swr
)) {
378 int64_t cur_samplerate
;
379 av_opt_get_int(s
->swr
, "in_sample_rate", 0, &cur_samplerate
);
380 flush_needed
= (s
->packet
.mode
== OPUS_MODE_CELT
) || (cur_samplerate
!= s
->silk_samplerate
);
382 flush_needed
= !!s
->delayed_samples
;
386 if (!buf
&& !flush_needed
)
389 /* use dummy output buffers if the channel is not mapped to anything */
391 (s
->output_channels
== 2 && !s
->out
[1])) {
392 av_fast_malloc(&s
->out_dummy
, &s
->out_dummy_allocated_size
, s
->out_size
);
394 return AVERROR(ENOMEM
);
396 s
->out
[0] = s
->out_dummy
;
398 s
->out
[1] = s
->out_dummy
;
401 /* flush the resampler if necessary */
403 ret
= opus_flush_resample(s
, s
->delayed_samples
);
405 av_log(s
->avctx
, AV_LOG_ERROR
, "Error flushing the resampler.\n");
409 output_samples
+= s
->delayed_samples
;
410 s
->delayed_samples
= 0;
416 /* decode all the frames in the packet */
417 for (i
= 0; i
< s
->packet
.frame_count
; i
++) {
418 int size
= s
->packet
.frame_size
[i
];
419 int samples
= opus_decode_frame(s
, buf
+ s
->packet
.frame_offset
[i
], size
);
422 av_log(s
->avctx
, AV_LOG_ERROR
, "Error decoding an Opus frame.\n");
423 if (s
->avctx
->err_recognition
& AV_EF_EXPLODE
)
426 for (j
= 0; j
< s
->output_channels
; j
++)
427 memset(s
->out
[j
], 0, s
->packet
.frame_duration
* sizeof(float));
428 samples
= s
->packet
.frame_duration
;
430 output_samples
+= samples
;
432 for (j
= 0; j
< s
->output_channels
; j
++)
433 s
->out
[j
] += samples
;
434 s
->out_size
-= samples
* sizeof(float);
438 s
->out
[0] = s
->out
[1] = NULL
;
441 return output_samples
;
444 static int opus_decode_packet(AVCodecContext
*avctx
, void *data
,
445 int *got_frame_ptr
, AVPacket
*avpkt
)
447 OpusContext
*c
= avctx
->priv_data
;
448 AVFrame
*frame
= data
;
449 const uint8_t *buf
= avpkt
->data
;
450 int buf_size
= avpkt
->size
;
451 int coded_samples
= 0;
452 int decoded_samples
= 0;
455 /* decode the header of the first sub-packet to find out the sample count */
457 OpusPacket
*pkt
= &c
->streams
[0].packet
;
458 ret
= ff_opus_parse_packet(pkt
, buf
, buf_size
, c
->nb_streams
> 1);
460 av_log(avctx
, AV_LOG_ERROR
, "Error parsing the packet header.\n");
463 coded_samples
+= pkt
->frame_count
* pkt
->frame_duration
;
464 c
->streams
[0].silk_samplerate
= get_silk_samplerate(pkt
->config
);
467 frame
->nb_samples
= coded_samples
+ c
->streams
[0].delayed_samples
;
469 /* no input or buffered data => nothing to do */
470 if (!frame
->nb_samples
) {
475 /* setup the data buffers */
476 ret
= ff_get_buffer(avctx
, frame
, 0);
478 av_log(avctx
, AV_LOG_ERROR
, "get_buffer() failed\n");
481 frame
->nb_samples
= 0;
483 for (i
= 0; i
< avctx
->channels
; i
++) {
484 ChannelMap
*map
= &c
->channel_maps
[i
];
486 c
->streams
[map
->stream_idx
].out
[map
->channel_idx
] = (float*)frame
->extended_data
[i
];
489 for (i
= 0; i
< c
->nb_streams
; i
++)
490 c
->streams
[i
].out_size
= frame
->linesize
[0];
492 /* decode each sub-packet */
493 for (i
= 0; i
< c
->nb_streams
; i
++) {
494 OpusStreamContext
*s
= &c
->streams
[i
];
497 ret
= ff_opus_parse_packet(&s
->packet
, buf
, buf_size
, i
!= c
->nb_streams
- 1);
499 av_log(avctx
, AV_LOG_ERROR
, "Error parsing the packet header.\n");
502 if (coded_samples
!= s
->packet
.frame_count
* s
->packet
.frame_duration
) {
503 av_log(avctx
, AV_LOG_ERROR
,
504 "Mismatching coded sample count in substream %d.\n", i
);
505 return AVERROR_INVALIDDATA
;
508 s
->silk_samplerate
= get_silk_samplerate(s
->packet
.config
);
511 ret
= opus_decode_subpacket(&c
->streams
[i
], buf
,
512 s
->packet
.data_size
, coded_samples
);
515 if (decoded_samples
&& ret
!= decoded_samples
) {
516 av_log(avctx
, AV_LOG_ERROR
, "Different numbers of decoded samples "
517 "in a multi-channel stream\n");
518 return AVERROR_INVALIDDATA
;
520 decoded_samples
= ret
;
521 buf
+= s
->packet
.packet_size
;
522 buf_size
-= s
->packet
.packet_size
;
525 for (i
= 0; i
< avctx
->channels
; i
++) {
526 ChannelMap
*map
= &c
->channel_maps
[i
];
528 /* handle copied channels */
530 memcpy(frame
->extended_data
[i
],
531 frame
->extended_data
[map
->copy_idx
],
533 } else if (map
->silence
) {
534 memset(frame
->extended_data
[i
], 0, frame
->linesize
[0]);
538 c
->fdsp
->vector_fmul_scalar((float*)frame
->extended_data
[i
],
539 (float*)frame
->extended_data
[i
],
540 c
->gain
, FFALIGN(decoded_samples
, 8));
544 frame
->nb_samples
= decoded_samples
;
545 *got_frame_ptr
= !!decoded_samples
;
550 static av_cold
void opus_decode_flush(AVCodecContext
*ctx
)
552 OpusContext
*c
= ctx
->priv_data
;
555 for (i
= 0; i
< c
->nb_streams
; i
++) {
556 OpusStreamContext
*s
= &c
->streams
[i
];
558 memset(&s
->packet
, 0, sizeof(s
->packet
));
559 s
->delayed_samples
= 0;
562 av_audio_fifo_drain(s
->celt_delay
, av_audio_fifo_size(s
->celt_delay
));
565 ff_silk_flush(s
->silk
);
566 ff_celt_flush(s
->celt
);
570 static av_cold
int opus_decode_close(AVCodecContext
*avctx
)
572 OpusContext
*c
= avctx
->priv_data
;
575 for (i
= 0; i
< c
->nb_streams
; i
++) {
576 OpusStreamContext
*s
= &c
->streams
[i
];
578 ff_silk_free(&s
->silk
);
579 ff_celt_free(&s
->celt
);
581 av_freep(&s
->out_dummy
);
582 s
->out_dummy_allocated_size
= 0;
584 av_audio_fifo_free(s
->celt_delay
);
588 av_freep(&c
->streams
);
591 av_freep(&c
->channel_maps
);
597 static av_cold
int opus_decode_init(AVCodecContext
*avctx
)
599 OpusContext
*c
= avctx
->priv_data
;
602 avctx
->sample_fmt
= AV_SAMPLE_FMT_FLTP
;
603 avctx
->sample_rate
= 48000;
605 c
->fdsp
= avpriv_float_dsp_alloc(0);
607 return AVERROR(ENOMEM
);
609 /* find out the channel configuration */
610 ret
= ff_opus_parse_extradata(avctx
, c
);
614 /* allocate and init each independent decoder */
615 c
->streams
= av_mallocz_array(c
->nb_streams
, sizeof(*c
->streams
));
618 ret
= AVERROR(ENOMEM
);
622 for (i
= 0; i
< c
->nb_streams
; i
++) {
623 OpusStreamContext
*s
= &c
->streams
[i
];
626 s
->output_channels
= (i
< c
->nb_stereo_streams
) ? 2 : 1;
630 for (j
= 0; j
< s
->output_channels
; j
++) {
631 s
->silk_output
[j
] = s
->silk_buf
[j
];
632 s
->celt_output
[j
] = s
->celt_buf
[j
];
633 s
->redundancy_output
[j
] = s
->redundancy_buf
[j
];
642 layout
= (s
->output_channels
== 1) ? AV_CH_LAYOUT_MONO
: AV_CH_LAYOUT_STEREO
;
643 av_opt_set_int(s
->swr
, "in_sample_fmt", avctx
->sample_fmt
, 0);
644 av_opt_set_int(s
->swr
, "out_sample_fmt", avctx
->sample_fmt
, 0);
645 av_opt_set_int(s
->swr
, "in_channel_layout", layout
, 0);
646 av_opt_set_int(s
->swr
, "out_channel_layout", layout
, 0);
647 av_opt_set_int(s
->swr
, "out_sample_rate", avctx
->sample_rate
, 0);
648 av_opt_set_int(s
->swr
, "filter_size", 16, 0);
650 ret
= ff_silk_init(avctx
, &s
->silk
, s
->output_channels
);
654 ret
= ff_celt_init(avctx
, &s
->celt
, s
->output_channels
);
658 s
->celt_delay
= av_audio_fifo_alloc(avctx
->sample_fmt
,
659 s
->output_channels
, 1024);
660 if (!s
->celt_delay
) {
661 ret
= AVERROR(ENOMEM
);
668 opus_decode_close(avctx
);
672 AVCodec ff_opus_decoder
= {
674 .long_name
= NULL_IF_CONFIG_SMALL("Opus"),
675 .type
= AVMEDIA_TYPE_AUDIO
,
676 .id
= AV_CODEC_ID_OPUS
,
677 .priv_data_size
= sizeof(OpusContext
),
678 .init
= opus_decode_init
,
679 .close
= opus_decode_close
,
680 .decode
= opus_decode_packet
,
681 .flush
= opus_decode_flush
,
682 .capabilities
= CODEC_CAP_DR1
| CODEC_CAP_DELAY
,