| 1 | /* |
| 2 | * Opus encoder using libopus |
| 3 | * Copyright (c) 2012 Nathan Caldwell |
| 4 | * |
| 5 | * This file is part of FFmpeg. |
| 6 | * |
| 7 | * FFmpeg is free software; you can redistribute it and/or |
| 8 | * modify it under the terms of the GNU Lesser General Public |
| 9 | * License as published by the Free Software Foundation; either |
| 10 | * version 2.1 of the License, or (at your option) any later version. |
| 11 | * |
| 12 | * FFmpeg is distributed in the hope that it will be useful, |
| 13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| 15 | * Lesser General Public License for more details. |
| 16 | * |
| 17 | * You should have received a copy of the GNU Lesser General Public |
| 18 | * License along with FFmpeg; if not, write to the Free Software |
| 19 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
| 20 | */ |
| 21 | |
| 22 | #include <opus.h> |
| 23 | #include <opus_multistream.h> |
| 24 | |
| 25 | #include "libavutil/opt.h" |
| 26 | #include "avcodec.h" |
| 27 | #include "bytestream.h" |
| 28 | #include "internal.h" |
| 29 | #include "libopus.h" |
| 30 | #include "vorbis.h" |
| 31 | #include "audio_frame_queue.h" |
| 32 | |
| 33 | typedef struct LibopusEncOpts { |
| 34 | int vbr; |
| 35 | int application; |
| 36 | int packet_loss; |
| 37 | int complexity; |
| 38 | float frame_duration; |
| 39 | int packet_size; |
| 40 | int max_bandwidth; |
| 41 | } LibopusEncOpts; |
| 42 | |
| 43 | typedef struct LibopusEncContext { |
| 44 | AVClass *class; |
| 45 | OpusMSEncoder *enc; |
| 46 | int stream_count; |
| 47 | uint8_t *samples; |
| 48 | LibopusEncOpts opts; |
| 49 | AudioFrameQueue afq; |
| 50 | } LibopusEncContext; |
| 51 | |
| 52 | static const uint8_t opus_coupled_streams[8] = { |
| 53 | 0, 1, 1, 2, 2, 2, 2, 3 |
| 54 | }; |
| 55 | |
| 56 | /* Opus internal to Vorbis channel order mapping written in the header */ |
| 57 | static const uint8_t opus_vorbis_channel_map[8][8] = { |
| 58 | { 0 }, |
| 59 | { 0, 1 }, |
| 60 | { 0, 2, 1 }, |
| 61 | { 0, 1, 2, 3 }, |
| 62 | { 0, 4, 1, 2, 3 }, |
| 63 | { 0, 4, 1, 2, 3, 5 }, |
| 64 | { 0, 4, 1, 2, 3, 5, 6 }, |
| 65 | { 0, 6, 1, 2, 3, 4, 5, 7 }, |
| 66 | }; |
| 67 | |
| 68 | /* libavcodec to libopus channel order mapping, passed to libopus */ |
| 69 | static const uint8_t libavcodec_libopus_channel_map[8][8] = { |
| 70 | { 0 }, |
| 71 | { 0, 1 }, |
| 72 | { 0, 1, 2 }, |
| 73 | { 0, 1, 2, 3 }, |
| 74 | { 0, 1, 3, 4, 2 }, |
| 75 | { 0, 1, 4, 5, 2, 3 }, |
| 76 | { 0, 1, 5, 6, 2, 4, 3 }, |
| 77 | { 0, 1, 6, 7, 4, 5, 2, 3 }, |
| 78 | }; |
| 79 | |
| 80 | static void libopus_write_header(AVCodecContext *avctx, int stream_count, |
| 81 | int coupled_stream_count, |
| 82 | const uint8_t *channel_mapping) |
| 83 | { |
| 84 | uint8_t *p = avctx->extradata; |
| 85 | int channels = avctx->channels; |
| 86 | |
| 87 | bytestream_put_buffer(&p, "OpusHead", 8); |
| 88 | bytestream_put_byte(&p, 1); /* Version */ |
| 89 | bytestream_put_byte(&p, channels); |
| 90 | bytestream_put_le16(&p, avctx->delay); /* Lookahead samples at 48kHz */ |
| 91 | bytestream_put_le32(&p, avctx->sample_rate); /* Original sample rate */ |
| 92 | bytestream_put_le16(&p, 0); /* Gain of 0dB is recommended. */ |
| 93 | |
| 94 | /* Channel mapping */ |
| 95 | if (channels > 2) { |
| 96 | bytestream_put_byte(&p, channels <= 8 ? 1 : 255); |
| 97 | bytestream_put_byte(&p, stream_count); |
| 98 | bytestream_put_byte(&p, coupled_stream_count); |
| 99 | bytestream_put_buffer(&p, channel_mapping, channels); |
| 100 | } else { |
| 101 | bytestream_put_byte(&p, 0); |
| 102 | } |
| 103 | } |
| 104 | |
| 105 | static int libopus_configure_encoder(AVCodecContext *avctx, OpusMSEncoder *enc, |
| 106 | LibopusEncOpts *opts) |
| 107 | { |
| 108 | int ret; |
| 109 | |
| 110 | if (avctx->global_quality) { |
| 111 | av_log(avctx, AV_LOG_ERROR, |
| 112 | "Quality-based encoding not supported, " |
| 113 | "please specify a bitrate and VBR setting.\n"); |
| 114 | return AVERROR(EINVAL); |
| 115 | } |
| 116 | |
| 117 | ret = opus_multistream_encoder_ctl(enc, OPUS_SET_BITRATE(avctx->bit_rate)); |
| 118 | if (ret != OPUS_OK) { |
| 119 | av_log(avctx, AV_LOG_ERROR, |
| 120 | "Failed to set bitrate: %s\n", opus_strerror(ret)); |
| 121 | return ret; |
| 122 | } |
| 123 | |
| 124 | ret = opus_multistream_encoder_ctl(enc, |
| 125 | OPUS_SET_COMPLEXITY(opts->complexity)); |
| 126 | if (ret != OPUS_OK) |
| 127 | av_log(avctx, AV_LOG_WARNING, |
| 128 | "Unable to set complexity: %s\n", opus_strerror(ret)); |
| 129 | |
| 130 | ret = opus_multistream_encoder_ctl(enc, OPUS_SET_VBR(!!opts->vbr)); |
| 131 | if (ret != OPUS_OK) |
| 132 | av_log(avctx, AV_LOG_WARNING, |
| 133 | "Unable to set VBR: %s\n", opus_strerror(ret)); |
| 134 | |
| 135 | ret = opus_multistream_encoder_ctl(enc, |
| 136 | OPUS_SET_VBR_CONSTRAINT(opts->vbr == 2)); |
| 137 | if (ret != OPUS_OK) |
| 138 | av_log(avctx, AV_LOG_WARNING, |
| 139 | "Unable to set constrained VBR: %s\n", opus_strerror(ret)); |
| 140 | |
| 141 | ret = opus_multistream_encoder_ctl(enc, |
| 142 | OPUS_SET_PACKET_LOSS_PERC(opts->packet_loss)); |
| 143 | if (ret != OPUS_OK) |
| 144 | av_log(avctx, AV_LOG_WARNING, |
| 145 | "Unable to set expected packet loss percentage: %s\n", |
| 146 | opus_strerror(ret)); |
| 147 | |
| 148 | if (avctx->cutoff) { |
| 149 | ret = opus_multistream_encoder_ctl(enc, |
| 150 | OPUS_SET_MAX_BANDWIDTH(opts->max_bandwidth)); |
| 151 | if (ret != OPUS_OK) |
| 152 | av_log(avctx, AV_LOG_WARNING, |
| 153 | "Unable to set maximum bandwidth: %s\n", opus_strerror(ret)); |
| 154 | } |
| 155 | |
| 156 | return OPUS_OK; |
| 157 | } |
| 158 | |
| 159 | static av_cold int libopus_encode_init(AVCodecContext *avctx) |
| 160 | { |
| 161 | LibopusEncContext *opus = avctx->priv_data; |
| 162 | const uint8_t *channel_mapping; |
| 163 | OpusMSEncoder *enc; |
| 164 | int ret = OPUS_OK; |
| 165 | int coupled_stream_count, header_size, frame_size; |
| 166 | |
| 167 | coupled_stream_count = opus_coupled_streams[avctx->channels - 1]; |
| 168 | opus->stream_count = avctx->channels - coupled_stream_count; |
| 169 | channel_mapping = libavcodec_libopus_channel_map[avctx->channels - 1]; |
| 170 | |
| 171 | /* FIXME: Opus can handle up to 255 channels. However, the mapping for |
| 172 | * anything greater than 8 is undefined. */ |
| 173 | if (avctx->channels > 8) |
| 174 | av_log(avctx, AV_LOG_WARNING, |
| 175 | "Channel layout undefined for %d channels.\n", avctx->channels); |
| 176 | |
| 177 | if (!avctx->bit_rate) { |
| 178 | /* Sane default copied from opusenc */ |
| 179 | avctx->bit_rate = 64000 * opus->stream_count + |
| 180 | 32000 * coupled_stream_count; |
| 181 | av_log(avctx, AV_LOG_WARNING, |
| 182 | "No bit rate set. Defaulting to %d bps.\n", avctx->bit_rate); |
| 183 | } |
| 184 | |
| 185 | if (avctx->bit_rate < 500 || avctx->bit_rate > 256000 * avctx->channels) { |
| 186 | av_log(avctx, AV_LOG_ERROR, "The bit rate %d bps is unsupported. " |
| 187 | "Please choose a value between 500 and %d.\n", avctx->bit_rate, |
| 188 | 256000 * avctx->channels); |
| 189 | return AVERROR(EINVAL); |
| 190 | } |
| 191 | |
| 192 | frame_size = opus->opts.frame_duration * 48000 / 1000; |
| 193 | switch (frame_size) { |
| 194 | case 120: |
| 195 | case 240: |
| 196 | if (opus->opts.application != OPUS_APPLICATION_RESTRICTED_LOWDELAY) |
| 197 | av_log(avctx, AV_LOG_WARNING, |
| 198 | "LPC mode cannot be used with a frame duration of less " |
| 199 | "than 10ms. Enabling restricted low-delay mode.\n" |
| 200 | "Use a longer frame duration if this is not what you want.\n"); |
| 201 | /* Frame sizes less than 10 ms can only use MDCT mode, so switching to |
| 202 | * RESTRICTED_LOWDELAY avoids an unnecessary extra 2.5ms lookahead. */ |
| 203 | opus->opts.application = OPUS_APPLICATION_RESTRICTED_LOWDELAY; |
| 204 | case 480: |
| 205 | case 960: |
| 206 | case 1920: |
| 207 | case 2880: |
| 208 | opus->opts.packet_size = |
| 209 | avctx->frame_size = frame_size * avctx->sample_rate / 48000; |
| 210 | break; |
| 211 | default: |
| 212 | av_log(avctx, AV_LOG_ERROR, "Invalid frame duration: %g.\n" |
| 213 | "Frame duration must be exactly one of: 2.5, 5, 10, 20, 40 or 60.\n", |
| 214 | opus->opts.frame_duration); |
| 215 | return AVERROR(EINVAL); |
| 216 | } |
| 217 | |
| 218 | if (avctx->compression_level < 0 || avctx->compression_level > 10) { |
| 219 | av_log(avctx, AV_LOG_WARNING, |
| 220 | "Compression level must be in the range 0 to 10. " |
| 221 | "Defaulting to 10.\n"); |
| 222 | opus->opts.complexity = 10; |
| 223 | } else { |
| 224 | opus->opts.complexity = avctx->compression_level; |
| 225 | } |
| 226 | |
| 227 | if (avctx->cutoff) { |
| 228 | switch (avctx->cutoff) { |
| 229 | case 4000: |
| 230 | opus->opts.max_bandwidth = OPUS_BANDWIDTH_NARROWBAND; |
| 231 | break; |
| 232 | case 6000: |
| 233 | opus->opts.max_bandwidth = OPUS_BANDWIDTH_MEDIUMBAND; |
| 234 | break; |
| 235 | case 8000: |
| 236 | opus->opts.max_bandwidth = OPUS_BANDWIDTH_WIDEBAND; |
| 237 | break; |
| 238 | case 12000: |
| 239 | opus->opts.max_bandwidth = OPUS_BANDWIDTH_SUPERWIDEBAND; |
| 240 | break; |
| 241 | case 20000: |
| 242 | opus->opts.max_bandwidth = OPUS_BANDWIDTH_FULLBAND; |
| 243 | break; |
| 244 | default: |
| 245 | av_log(avctx, AV_LOG_WARNING, |
| 246 | "Invalid frequency cutoff: %d. Using default maximum bandwidth.\n" |
| 247 | "Cutoff frequency must be exactly one of: 4000, 6000, 8000, 12000 or 20000.\n", |
| 248 | avctx->cutoff); |
| 249 | avctx->cutoff = 0; |
| 250 | } |
| 251 | } |
| 252 | |
| 253 | enc = opus_multistream_encoder_create(avctx->sample_rate, avctx->channels, |
| 254 | opus->stream_count, |
| 255 | coupled_stream_count, |
| 256 | channel_mapping, |
| 257 | opus->opts.application, &ret); |
| 258 | if (ret != OPUS_OK) { |
| 259 | av_log(avctx, AV_LOG_ERROR, |
| 260 | "Failed to create encoder: %s\n", opus_strerror(ret)); |
| 261 | return ff_opus_error_to_averror(ret); |
| 262 | } |
| 263 | |
| 264 | ret = libopus_configure_encoder(avctx, enc, &opus->opts); |
| 265 | if (ret != OPUS_OK) { |
| 266 | ret = ff_opus_error_to_averror(ret); |
| 267 | goto fail; |
| 268 | } |
| 269 | |
| 270 | header_size = 19 + (avctx->channels > 2 ? 2 + avctx->channels : 0); |
| 271 | avctx->extradata = av_malloc(header_size + FF_INPUT_BUFFER_PADDING_SIZE); |
| 272 | if (!avctx->extradata) { |
| 273 | av_log(avctx, AV_LOG_ERROR, "Failed to allocate extradata.\n"); |
| 274 | ret = AVERROR(ENOMEM); |
| 275 | goto fail; |
| 276 | } |
| 277 | avctx->extradata_size = header_size; |
| 278 | |
| 279 | opus->samples = av_mallocz(frame_size * avctx->channels * |
| 280 | av_get_bytes_per_sample(avctx->sample_fmt)); |
| 281 | if (!opus->samples) { |
| 282 | av_log(avctx, AV_LOG_ERROR, "Failed to allocate samples buffer.\n"); |
| 283 | ret = AVERROR(ENOMEM); |
| 284 | goto fail; |
| 285 | } |
| 286 | |
| 287 | ret = opus_multistream_encoder_ctl(enc, OPUS_GET_LOOKAHEAD(&avctx->delay)); |
| 288 | if (ret != OPUS_OK) |
| 289 | av_log(avctx, AV_LOG_WARNING, |
| 290 | "Unable to get number of lookahead samples: %s\n", |
| 291 | opus_strerror(ret)); |
| 292 | |
| 293 | libopus_write_header(avctx, opus->stream_count, coupled_stream_count, |
| 294 | opus_vorbis_channel_map[avctx->channels - 1]); |
| 295 | |
| 296 | ff_af_queue_init(avctx, &opus->afq); |
| 297 | |
| 298 | opus->enc = enc; |
| 299 | |
| 300 | return 0; |
| 301 | |
| 302 | fail: |
| 303 | opus_multistream_encoder_destroy(enc); |
| 304 | av_freep(&avctx->extradata); |
| 305 | return ret; |
| 306 | } |
| 307 | |
| 308 | static int libopus_encode(AVCodecContext *avctx, AVPacket *avpkt, |
| 309 | const AVFrame *frame, int *got_packet_ptr) |
| 310 | { |
| 311 | LibopusEncContext *opus = avctx->priv_data; |
| 312 | const int sample_size = avctx->channels * |
| 313 | av_get_bytes_per_sample(avctx->sample_fmt); |
| 314 | uint8_t *audio; |
| 315 | int ret; |
| 316 | int discard_padding; |
| 317 | |
| 318 | if (frame) { |
| 319 | ff_af_queue_add(&opus->afq, frame); |
| 320 | if (frame->nb_samples < opus->opts.packet_size) { |
| 321 | audio = opus->samples; |
| 322 | memcpy(audio, frame->data[0], frame->nb_samples * sample_size); |
| 323 | } else |
| 324 | audio = frame->data[0]; |
| 325 | } else { |
| 326 | if (!opus->afq.remaining_samples) |
| 327 | return 0; |
| 328 | audio = opus->samples; |
| 329 | memset(audio, 0, opus->opts.packet_size * sample_size); |
| 330 | } |
| 331 | |
| 332 | /* Maximum packet size taken from opusenc in opus-tools. 60ms packets |
| 333 | * consist of 3 frames in one packet. The maximum frame size is 1275 |
| 334 | * bytes along with the largest possible packet header of 7 bytes. */ |
| 335 | if ((ret = ff_alloc_packet2(avctx, avpkt, (1275 * 3 + 7) * opus->stream_count)) < 0) |
| 336 | return ret; |
| 337 | |
| 338 | if (avctx->sample_fmt == AV_SAMPLE_FMT_FLT) |
| 339 | ret = opus_multistream_encode_float(opus->enc, (float *)audio, |
| 340 | opus->opts.packet_size, |
| 341 | avpkt->data, avpkt->size); |
| 342 | else |
| 343 | ret = opus_multistream_encode(opus->enc, (opus_int16 *)audio, |
| 344 | opus->opts.packet_size, |
| 345 | avpkt->data, avpkt->size); |
| 346 | |
| 347 | if (ret < 0) { |
| 348 | av_log(avctx, AV_LOG_ERROR, |
| 349 | "Error encoding frame: %s\n", opus_strerror(ret)); |
| 350 | return ff_opus_error_to_averror(ret); |
| 351 | } |
| 352 | |
| 353 | av_shrink_packet(avpkt, ret); |
| 354 | |
| 355 | ff_af_queue_remove(&opus->afq, opus->opts.packet_size, |
| 356 | &avpkt->pts, &avpkt->duration); |
| 357 | |
| 358 | discard_padding = opus->opts.packet_size - avpkt->duration; |
| 359 | // Check if subtraction resulted in an overflow |
| 360 | if ((discard_padding < opus->opts.packet_size) != (avpkt->duration > 0)) { |
| 361 | av_free_packet(avpkt); |
| 362 | av_free(avpkt); |
| 363 | return AVERROR(EINVAL); |
| 364 | } |
| 365 | if (discard_padding > 0) { |
| 366 | uint8_t* side_data = av_packet_new_side_data(avpkt, |
| 367 | AV_PKT_DATA_SKIP_SAMPLES, |
| 368 | 10); |
| 369 | if(!side_data) { |
| 370 | av_free_packet(avpkt); |
| 371 | av_free(avpkt); |
| 372 | return AVERROR(ENOMEM); |
| 373 | } |
| 374 | AV_WL32(side_data + 4, discard_padding); |
| 375 | } |
| 376 | |
| 377 | *got_packet_ptr = 1; |
| 378 | |
| 379 | return 0; |
| 380 | } |
| 381 | |
| 382 | static av_cold int libopus_encode_close(AVCodecContext *avctx) |
| 383 | { |
| 384 | LibopusEncContext *opus = avctx->priv_data; |
| 385 | |
| 386 | opus_multistream_encoder_destroy(opus->enc); |
| 387 | |
| 388 | ff_af_queue_close(&opus->afq); |
| 389 | |
| 390 | av_freep(&opus->samples); |
| 391 | av_freep(&avctx->extradata); |
| 392 | |
| 393 | return 0; |
| 394 | } |
| 395 | |
| 396 | #define OFFSET(x) offsetof(LibopusEncContext, opts.x) |
| 397 | #define FLAGS AV_OPT_FLAG_AUDIO_PARAM | AV_OPT_FLAG_ENCODING_PARAM |
| 398 | static const AVOption libopus_options[] = { |
| 399 | { "application", "Intended application type", OFFSET(application), AV_OPT_TYPE_INT, { .i64 = OPUS_APPLICATION_AUDIO }, OPUS_APPLICATION_VOIP, OPUS_APPLICATION_RESTRICTED_LOWDELAY, FLAGS, "application" }, |
| 400 | { "voip", "Favor improved speech intelligibility", 0, AV_OPT_TYPE_CONST, { .i64 = OPUS_APPLICATION_VOIP }, 0, 0, FLAGS, "application" }, |
| 401 | { "audio", "Favor faithfulness to the input", 0, AV_OPT_TYPE_CONST, { .i64 = OPUS_APPLICATION_AUDIO }, 0, 0, FLAGS, "application" }, |
| 402 | { "lowdelay", "Restrict to only the lowest delay modes", 0, AV_OPT_TYPE_CONST, { .i64 = OPUS_APPLICATION_RESTRICTED_LOWDELAY }, 0, 0, FLAGS, "application" }, |
| 403 | { "frame_duration", "Duration of a frame in milliseconds", OFFSET(frame_duration), AV_OPT_TYPE_FLOAT, { .dbl = 20.0 }, 2.5, 60.0, FLAGS }, |
| 404 | { "packet_loss", "Expected packet loss percentage", OFFSET(packet_loss), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 100, FLAGS }, |
| 405 | { "vbr", "Variable bit rate mode", OFFSET(vbr), AV_OPT_TYPE_INT, { .i64 = 1 }, 0, 2, FLAGS, "vbr" }, |
| 406 | { "off", "Use constant bit rate", 0, AV_OPT_TYPE_CONST, { .i64 = 0 }, 0, 0, FLAGS, "vbr" }, |
| 407 | { "on", "Use variable bit rate", 0, AV_OPT_TYPE_CONST, { .i64 = 1 }, 0, 0, FLAGS, "vbr" }, |
| 408 | { "constrained", "Use constrained VBR", 0, AV_OPT_TYPE_CONST, { .i64 = 2 }, 0, 0, FLAGS, "vbr" }, |
| 409 | { NULL }, |
| 410 | }; |
| 411 | |
| 412 | static const AVClass libopus_class = { |
| 413 | .class_name = "libopus", |
| 414 | .item_name = av_default_item_name, |
| 415 | .option = libopus_options, |
| 416 | .version = LIBAVUTIL_VERSION_INT, |
| 417 | }; |
| 418 | |
| 419 | static const AVCodecDefault libopus_defaults[] = { |
| 420 | { "b", "0" }, |
| 421 | { "compression_level", "10" }, |
| 422 | { NULL }, |
| 423 | }; |
| 424 | |
| 425 | static const int libopus_sample_rates[] = { |
| 426 | 48000, 24000, 16000, 12000, 8000, 0, |
| 427 | }; |
| 428 | |
| 429 | AVCodec ff_libopus_encoder = { |
| 430 | .name = "libopus", |
| 431 | .long_name = NULL_IF_CONFIG_SMALL("libopus Opus"), |
| 432 | .type = AVMEDIA_TYPE_AUDIO, |
| 433 | .id = AV_CODEC_ID_OPUS, |
| 434 | .priv_data_size = sizeof(LibopusEncContext), |
| 435 | .init = libopus_encode_init, |
| 436 | .encode2 = libopus_encode, |
| 437 | .close = libopus_encode_close, |
| 438 | .capabilities = CODEC_CAP_DELAY | CODEC_CAP_SMALL_LAST_FRAME, |
| 439 | .sample_fmts = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_S16, |
| 440 | AV_SAMPLE_FMT_FLT, |
| 441 | AV_SAMPLE_FMT_NONE }, |
| 442 | .channel_layouts = ff_vorbis_channel_layouts, |
| 443 | .supported_samplerates = libopus_sample_rates, |
| 444 | .priv_class = &libopus_class, |
| 445 | .defaults = libopus_defaults, |
| 446 | }; |