| 1 | /* |
| 2 | * This file is part of FFmpeg. |
| 3 | * |
| 4 | * FFmpeg is free software; you can redistribute it and/or |
| 5 | * modify it under the terms of the GNU Lesser General Public |
| 6 | * License as published by the Free Software Foundation; either |
| 7 | * version 2.1 of the License, or (at your option) any later version. |
| 8 | * |
| 9 | * FFmpeg is distributed in the hope that it will be useful, |
| 10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| 12 | * Lesser General Public License for more details. |
| 13 | * |
| 14 | * You should have received a copy of the GNU Lesser General Public |
| 15 | * License along with FFmpeg; if not, write to the Free Software |
| 16 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
| 17 | */ |
| 18 | |
| 19 | /** |
| 20 | * @file |
| 21 | * simple audio converter |
| 22 | * |
| 23 | * @example transcode_aac.c |
| 24 | * Convert an input audio file to AAC in an MP4 container using FFmpeg. |
| 25 | * @author Andreas Unterweger (dustsigns@gmail.com) |
| 26 | */ |
| 27 | |
| 28 | #include <stdio.h> |
| 29 | |
| 30 | #include "libavformat/avformat.h" |
| 31 | #include "libavformat/avio.h" |
| 32 | |
| 33 | #include "libavcodec/avcodec.h" |
| 34 | |
| 35 | #include "libavutil/audio_fifo.h" |
| 36 | #include "libavutil/avassert.h" |
| 37 | #include "libavutil/avstring.h" |
| 38 | #include "libavutil/frame.h" |
| 39 | #include "libavutil/opt.h" |
| 40 | |
| 41 | #include "libswresample/swresample.h" |
| 42 | |
| 43 | /** The output bit rate in kbit/s */ |
| 44 | #define OUTPUT_BIT_RATE 48000 |
| 45 | /** The number of output channels */ |
| 46 | #define OUTPUT_CHANNELS 2 |
| 47 | /** The audio sample output format */ |
| 48 | #define OUTPUT_SAMPLE_FORMAT AV_SAMPLE_FMT_S16 |
| 49 | |
| 50 | /** |
| 51 | * Convert an error code into a text message. |
| 52 | * @param error Error code to be converted |
| 53 | * @return Corresponding error text (not thread-safe) |
| 54 | */ |
| 55 | static const char *get_error_text(const int error) |
| 56 | { |
| 57 | static char error_buffer[255]; |
| 58 | av_strerror(error, error_buffer, sizeof(error_buffer)); |
| 59 | return error_buffer; |
| 60 | } |
| 61 | |
| 62 | /** Open an input file and the required decoder. */ |
| 63 | static int open_input_file(const char *filename, |
| 64 | AVFormatContext **input_format_context, |
| 65 | AVCodecContext **input_codec_context) |
| 66 | { |
| 67 | AVCodec *input_codec; |
| 68 | int error; |
| 69 | |
| 70 | /** Open the input file to read from it. */ |
| 71 | if ((error = avformat_open_input(input_format_context, filename, NULL, |
| 72 | NULL)) < 0) { |
| 73 | fprintf(stderr, "Could not open input file '%s' (error '%s')\n", |
| 74 | filename, get_error_text(error)); |
| 75 | *input_format_context = NULL; |
| 76 | return error; |
| 77 | } |
| 78 | |
| 79 | /** Get information on the input file (number of streams etc.). */ |
| 80 | if ((error = avformat_find_stream_info(*input_format_context, NULL)) < 0) { |
| 81 | fprintf(stderr, "Could not open find stream info (error '%s')\n", |
| 82 | get_error_text(error)); |
| 83 | avformat_close_input(input_format_context); |
| 84 | return error; |
| 85 | } |
| 86 | |
| 87 | /** Make sure that there is only one stream in the input file. */ |
| 88 | if ((*input_format_context)->nb_streams != 1) { |
| 89 | fprintf(stderr, "Expected one audio input stream, but found %d\n", |
| 90 | (*input_format_context)->nb_streams); |
| 91 | avformat_close_input(input_format_context); |
| 92 | return AVERROR_EXIT; |
| 93 | } |
| 94 | |
| 95 | /** Find a decoder for the audio stream. */ |
| 96 | if (!(input_codec = avcodec_find_decoder((*input_format_context)->streams[0]->codec->codec_id))) { |
| 97 | fprintf(stderr, "Could not find input codec\n"); |
| 98 | avformat_close_input(input_format_context); |
| 99 | return AVERROR_EXIT; |
| 100 | } |
| 101 | |
| 102 | /** Open the decoder for the audio stream to use it later. */ |
| 103 | if ((error = avcodec_open2((*input_format_context)->streams[0]->codec, |
| 104 | input_codec, NULL)) < 0) { |
| 105 | fprintf(stderr, "Could not open input codec (error '%s')\n", |
| 106 | get_error_text(error)); |
| 107 | avformat_close_input(input_format_context); |
| 108 | return error; |
| 109 | } |
| 110 | |
| 111 | /** Save the decoder context for easier access later. */ |
| 112 | *input_codec_context = (*input_format_context)->streams[0]->codec; |
| 113 | |
| 114 | return 0; |
| 115 | } |
| 116 | |
| 117 | /** |
| 118 | * Open an output file and the required encoder. |
| 119 | * Also set some basic encoder parameters. |
| 120 | * Some of these parameters are based on the input file's parameters. |
| 121 | */ |
| 122 | static int open_output_file(const char *filename, |
| 123 | AVCodecContext *input_codec_context, |
| 124 | AVFormatContext **output_format_context, |
| 125 | AVCodecContext **output_codec_context) |
| 126 | { |
| 127 | AVIOContext *output_io_context = NULL; |
| 128 | AVStream *stream = NULL; |
| 129 | AVCodec *output_codec = NULL; |
| 130 | int error; |
| 131 | |
| 132 | /** Open the output file to write to it. */ |
| 133 | if ((error = avio_open(&output_io_context, filename, |
| 134 | AVIO_FLAG_WRITE)) < 0) { |
| 135 | fprintf(stderr, "Could not open output file '%s' (error '%s')\n", |
| 136 | filename, get_error_text(error)); |
| 137 | return error; |
| 138 | } |
| 139 | |
| 140 | /** Create a new format context for the output container format. */ |
| 141 | if (!(*output_format_context = avformat_alloc_context())) { |
| 142 | fprintf(stderr, "Could not allocate output format context\n"); |
| 143 | return AVERROR(ENOMEM); |
| 144 | } |
| 145 | |
| 146 | /** Associate the output file (pointer) with the container format context. */ |
| 147 | (*output_format_context)->pb = output_io_context; |
| 148 | |
| 149 | /** Guess the desired container format based on the file extension. */ |
| 150 | if (!((*output_format_context)->oformat = av_guess_format(NULL, filename, |
| 151 | NULL))) { |
| 152 | fprintf(stderr, "Could not find output file format\n"); |
| 153 | goto cleanup; |
| 154 | } |
| 155 | |
| 156 | av_strlcpy((*output_format_context)->filename, filename, |
| 157 | sizeof((*output_format_context)->filename)); |
| 158 | |
| 159 | /** Find the encoder to be used by its name. */ |
| 160 | if (!(output_codec = avcodec_find_encoder(AV_CODEC_ID_AAC))) { |
| 161 | fprintf(stderr, "Could not find an AAC encoder.\n"); |
| 162 | goto cleanup; |
| 163 | } |
| 164 | |
| 165 | /** Create a new audio stream in the output file container. */ |
| 166 | if (!(stream = avformat_new_stream(*output_format_context, output_codec))) { |
| 167 | fprintf(stderr, "Could not create new stream\n"); |
| 168 | error = AVERROR(ENOMEM); |
| 169 | goto cleanup; |
| 170 | } |
| 171 | |
| 172 | /** Save the encoder context for easiert access later. */ |
| 173 | *output_codec_context = stream->codec; |
| 174 | |
| 175 | /** |
| 176 | * Set the basic encoder parameters. |
| 177 | * The input file's sample rate is used to avoid a sample rate conversion. |
| 178 | */ |
| 179 | (*output_codec_context)->channels = OUTPUT_CHANNELS; |
| 180 | (*output_codec_context)->channel_layout = av_get_default_channel_layout(OUTPUT_CHANNELS); |
| 181 | (*output_codec_context)->sample_rate = input_codec_context->sample_rate; |
| 182 | (*output_codec_context)->sample_fmt = AV_SAMPLE_FMT_S16; |
| 183 | (*output_codec_context)->bit_rate = OUTPUT_BIT_RATE; |
| 184 | |
| 185 | /** |
| 186 | * Some container formats (like MP4) require global headers to be present |
| 187 | * Mark the encoder so that it behaves accordingly. |
| 188 | */ |
| 189 | if ((*output_format_context)->oformat->flags & AVFMT_GLOBALHEADER) |
| 190 | (*output_codec_context)->flags |= CODEC_FLAG_GLOBAL_HEADER; |
| 191 | |
| 192 | /** Open the encoder for the audio stream to use it later. */ |
| 193 | if ((error = avcodec_open2(*output_codec_context, output_codec, NULL)) < 0) { |
| 194 | fprintf(stderr, "Could not open output codec (error '%s')\n", |
| 195 | get_error_text(error)); |
| 196 | goto cleanup; |
| 197 | } |
| 198 | |
| 199 | return 0; |
| 200 | |
| 201 | cleanup: |
| 202 | avio_close((*output_format_context)->pb); |
| 203 | avformat_free_context(*output_format_context); |
| 204 | *output_format_context = NULL; |
| 205 | return error < 0 ? error : AVERROR_EXIT; |
| 206 | } |
| 207 | |
| 208 | /** Initialize one data packet for reading or writing. */ |
| 209 | static void init_packet(AVPacket *packet) |
| 210 | { |
| 211 | av_init_packet(packet); |
| 212 | /** Set the packet data and size so that it is recognized as being empty. */ |
| 213 | packet->data = NULL; |
| 214 | packet->size = 0; |
| 215 | } |
| 216 | |
| 217 | /** Initialize one audio frame for reading from the input file */ |
| 218 | static int init_input_frame(AVFrame **frame) |
| 219 | { |
| 220 | if (!(*frame = av_frame_alloc())) { |
| 221 | fprintf(stderr, "Could not allocate input frame\n"); |
| 222 | return AVERROR(ENOMEM); |
| 223 | } |
| 224 | return 0; |
| 225 | } |
| 226 | |
| 227 | /** |
| 228 | * Initialize the audio resampler based on the input and output codec settings. |
| 229 | * If the input and output sample formats differ, a conversion is required |
| 230 | * libswresample takes care of this, but requires initialization. |
| 231 | */ |
| 232 | static int init_resampler(AVCodecContext *input_codec_context, |
| 233 | AVCodecContext *output_codec_context, |
| 234 | SwrContext **resample_context) |
| 235 | { |
| 236 | int error; |
| 237 | |
| 238 | /** |
| 239 | * Create a resampler context for the conversion. |
| 240 | * Set the conversion parameters. |
| 241 | * Default channel layouts based on the number of channels |
| 242 | * are assumed for simplicity (they are sometimes not detected |
| 243 | * properly by the demuxer and/or decoder). |
| 244 | */ |
| 245 | *resample_context = swr_alloc_set_opts(NULL, |
| 246 | av_get_default_channel_layout(output_codec_context->channels), |
| 247 | output_codec_context->sample_fmt, |
| 248 | output_codec_context->sample_rate, |
| 249 | av_get_default_channel_layout(input_codec_context->channels), |
| 250 | input_codec_context->sample_fmt, |
| 251 | input_codec_context->sample_rate, |
| 252 | 0, NULL); |
| 253 | if (!*resample_context) { |
| 254 | fprintf(stderr, "Could not allocate resample context\n"); |
| 255 | return AVERROR(ENOMEM); |
| 256 | } |
| 257 | /** |
| 258 | * Perform a sanity check so that the number of converted samples is |
| 259 | * not greater than the number of samples to be converted. |
| 260 | * If the sample rates differ, this case has to be handled differently |
| 261 | */ |
| 262 | av_assert0(output_codec_context->sample_rate == input_codec_context->sample_rate); |
| 263 | |
| 264 | /** Open the resampler with the specified parameters. */ |
| 265 | if ((error = swr_init(*resample_context)) < 0) { |
| 266 | fprintf(stderr, "Could not open resample context\n"); |
| 267 | swr_free(resample_context); |
| 268 | return error; |
| 269 | } |
| 270 | return 0; |
| 271 | } |
| 272 | |
| 273 | /** Initialize a FIFO buffer for the audio samples to be encoded. */ |
| 274 | static int init_fifo(AVAudioFifo **fifo) |
| 275 | { |
| 276 | /** Create the FIFO buffer based on the specified output sample format. */ |
| 277 | if (!(*fifo = av_audio_fifo_alloc(OUTPUT_SAMPLE_FORMAT, OUTPUT_CHANNELS, 1))) { |
| 278 | fprintf(stderr, "Could not allocate FIFO\n"); |
| 279 | return AVERROR(ENOMEM); |
| 280 | } |
| 281 | return 0; |
| 282 | } |
| 283 | |
| 284 | /** Write the header of the output file container. */ |
| 285 | static int write_output_file_header(AVFormatContext *output_format_context) |
| 286 | { |
| 287 | int error; |
| 288 | if ((error = avformat_write_header(output_format_context, NULL)) < 0) { |
| 289 | fprintf(stderr, "Could not write output file header (error '%s')\n", |
| 290 | get_error_text(error)); |
| 291 | return error; |
| 292 | } |
| 293 | return 0; |
| 294 | } |
| 295 | |
| 296 | /** Decode one audio frame from the input file. */ |
| 297 | static int decode_audio_frame(AVFrame *frame, |
| 298 | AVFormatContext *input_format_context, |
| 299 | AVCodecContext *input_codec_context, |
| 300 | int *data_present, int *finished) |
| 301 | { |
| 302 | /** Packet used for temporary storage. */ |
| 303 | AVPacket input_packet; |
| 304 | int error; |
| 305 | init_packet(&input_packet); |
| 306 | |
| 307 | /** Read one audio frame from the input file into a temporary packet. */ |
| 308 | if ((error = av_read_frame(input_format_context, &input_packet)) < 0) { |
| 309 | /** If we are the the end of the file, flush the decoder below. */ |
| 310 | if (error == AVERROR_EOF) |
| 311 | *finished = 1; |
| 312 | else { |
| 313 | fprintf(stderr, "Could not read frame (error '%s')\n", |
| 314 | get_error_text(error)); |
| 315 | return error; |
| 316 | } |
| 317 | } |
| 318 | |
| 319 | /** |
| 320 | * Decode the audio frame stored in the temporary packet. |
| 321 | * The input audio stream decoder is used to do this. |
| 322 | * If we are at the end of the file, pass an empty packet to the decoder |
| 323 | * to flush it. |
| 324 | */ |
| 325 | if ((error = avcodec_decode_audio4(input_codec_context, frame, |
| 326 | data_present, &input_packet)) < 0) { |
| 327 | fprintf(stderr, "Could not decode frame (error '%s')\n", |
| 328 | get_error_text(error)); |
| 329 | av_free_packet(&input_packet); |
| 330 | return error; |
| 331 | } |
| 332 | |
| 333 | /** |
| 334 | * If the decoder has not been flushed completely, we are not finished, |
| 335 | * so that this function has to be called again. |
| 336 | */ |
| 337 | if (*finished && *data_present) |
| 338 | *finished = 0; |
| 339 | av_free_packet(&input_packet); |
| 340 | return 0; |
| 341 | } |
| 342 | |
| 343 | /** |
| 344 | * Initialize a temporary storage for the specified number of audio samples. |
| 345 | * The conversion requires temporary storage due to the different format. |
| 346 | * The number of audio samples to be allocated is specified in frame_size. |
| 347 | */ |
| 348 | static int init_converted_samples(uint8_t ***converted_input_samples, |
| 349 | AVCodecContext *output_codec_context, |
| 350 | int frame_size) |
| 351 | { |
| 352 | int error; |
| 353 | |
| 354 | /** |
| 355 | * Allocate as many pointers as there are audio channels. |
| 356 | * Each pointer will later point to the audio samples of the corresponding |
| 357 | * channels (although it may be NULL for interleaved formats). |
| 358 | */ |
| 359 | if (!(*converted_input_samples = calloc(output_codec_context->channels, |
| 360 | sizeof(**converted_input_samples)))) { |
| 361 | fprintf(stderr, "Could not allocate converted input sample pointers\n"); |
| 362 | return AVERROR(ENOMEM); |
| 363 | } |
| 364 | |
| 365 | /** |
| 366 | * Allocate memory for the samples of all channels in one consecutive |
| 367 | * block for convenience. |
| 368 | */ |
| 369 | if ((error = av_samples_alloc(*converted_input_samples, NULL, |
| 370 | output_codec_context->channels, |
| 371 | frame_size, |
| 372 | output_codec_context->sample_fmt, 0)) < 0) { |
| 373 | fprintf(stderr, |
| 374 | "Could not allocate converted input samples (error '%s')\n", |
| 375 | get_error_text(error)); |
| 376 | av_freep(&(*converted_input_samples)[0]); |
| 377 | free(*converted_input_samples); |
| 378 | return error; |
| 379 | } |
| 380 | return 0; |
| 381 | } |
| 382 | |
| 383 | /** |
| 384 | * Convert the input audio samples into the output sample format. |
| 385 | * The conversion happens on a per-frame basis, the size of which is specified |
| 386 | * by frame_size. |
| 387 | */ |
| 388 | static int convert_samples(const uint8_t **input_data, |
| 389 | uint8_t **converted_data, const int frame_size, |
| 390 | SwrContext *resample_context) |
| 391 | { |
| 392 | int error; |
| 393 | |
| 394 | /** Convert the samples using the resampler. */ |
| 395 | if ((error = swr_convert(resample_context, |
| 396 | converted_data, frame_size, |
| 397 | input_data , frame_size)) < 0) { |
| 398 | fprintf(stderr, "Could not convert input samples (error '%s')\n", |
| 399 | get_error_text(error)); |
| 400 | return error; |
| 401 | } |
| 402 | |
| 403 | return 0; |
| 404 | } |
| 405 | |
| 406 | /** Add converted input audio samples to the FIFO buffer for later processing. */ |
| 407 | static int add_samples_to_fifo(AVAudioFifo *fifo, |
| 408 | uint8_t **converted_input_samples, |
| 409 | const int frame_size) |
| 410 | { |
| 411 | int error; |
| 412 | |
| 413 | /** |
| 414 | * Make the FIFO as large as it needs to be to hold both, |
| 415 | * the old and the new samples. |
| 416 | */ |
| 417 | if ((error = av_audio_fifo_realloc(fifo, av_audio_fifo_size(fifo) + frame_size)) < 0) { |
| 418 | fprintf(stderr, "Could not reallocate FIFO\n"); |
| 419 | return error; |
| 420 | } |
| 421 | |
| 422 | /** Store the new samples in the FIFO buffer. */ |
| 423 | if (av_audio_fifo_write(fifo, (void **)converted_input_samples, |
| 424 | frame_size) < frame_size) { |
| 425 | fprintf(stderr, "Could not write data to FIFO\n"); |
| 426 | return AVERROR_EXIT; |
| 427 | } |
| 428 | return 0; |
| 429 | } |
| 430 | |
| 431 | /** |
| 432 | * Read one audio frame from the input file, decodes, converts and stores |
| 433 | * it in the FIFO buffer. |
| 434 | */ |
| 435 | static int read_decode_convert_and_store(AVAudioFifo *fifo, |
| 436 | AVFormatContext *input_format_context, |
| 437 | AVCodecContext *input_codec_context, |
| 438 | AVCodecContext *output_codec_context, |
| 439 | SwrContext *resampler_context, |
| 440 | int *finished) |
| 441 | { |
| 442 | /** Temporary storage of the input samples of the frame read from the file. */ |
| 443 | AVFrame *input_frame = NULL; |
| 444 | /** Temporary storage for the converted input samples. */ |
| 445 | uint8_t **converted_input_samples = NULL; |
| 446 | int data_present; |
| 447 | int ret = AVERROR_EXIT; |
| 448 | |
| 449 | /** Initialize temporary storage for one input frame. */ |
| 450 | if (init_input_frame(&input_frame)) |
| 451 | goto cleanup; |
| 452 | /** Decode one frame worth of audio samples. */ |
| 453 | if (decode_audio_frame(input_frame, input_format_context, |
| 454 | input_codec_context, &data_present, finished)) |
| 455 | goto cleanup; |
| 456 | /** |
| 457 | * If we are at the end of the file and there are no more samples |
| 458 | * in the decoder which are delayed, we are actually finished. |
| 459 | * This must not be treated as an error. |
| 460 | */ |
| 461 | if (*finished && !data_present) { |
| 462 | ret = 0; |
| 463 | goto cleanup; |
| 464 | } |
| 465 | /** If there is decoded data, convert and store it */ |
| 466 | if (data_present) { |
| 467 | /** Initialize the temporary storage for the converted input samples. */ |
| 468 | if (init_converted_samples(&converted_input_samples, output_codec_context, |
| 469 | input_frame->nb_samples)) |
| 470 | goto cleanup; |
| 471 | |
| 472 | /** |
| 473 | * Convert the input samples to the desired output sample format. |
| 474 | * This requires a temporary storage provided by converted_input_samples. |
| 475 | */ |
| 476 | if (convert_samples((const uint8_t**)input_frame->extended_data, converted_input_samples, |
| 477 | input_frame->nb_samples, resampler_context)) |
| 478 | goto cleanup; |
| 479 | |
| 480 | /** Add the converted input samples to the FIFO buffer for later processing. */ |
| 481 | if (add_samples_to_fifo(fifo, converted_input_samples, |
| 482 | input_frame->nb_samples)) |
| 483 | goto cleanup; |
| 484 | ret = 0; |
| 485 | } |
| 486 | ret = 0; |
| 487 | |
| 488 | cleanup: |
| 489 | if (converted_input_samples) { |
| 490 | av_freep(&converted_input_samples[0]); |
| 491 | free(converted_input_samples); |
| 492 | } |
| 493 | av_frame_free(&input_frame); |
| 494 | |
| 495 | return ret; |
| 496 | } |
| 497 | |
| 498 | /** |
| 499 | * Initialize one input frame for writing to the output file. |
| 500 | * The frame will be exactly frame_size samples large. |
| 501 | */ |
| 502 | static int init_output_frame(AVFrame **frame, |
| 503 | AVCodecContext *output_codec_context, |
| 504 | int frame_size) |
| 505 | { |
| 506 | int error; |
| 507 | |
| 508 | /** Create a new frame to store the audio samples. */ |
| 509 | if (!(*frame = av_frame_alloc())) { |
| 510 | fprintf(stderr, "Could not allocate output frame\n"); |
| 511 | return AVERROR_EXIT; |
| 512 | } |
| 513 | |
| 514 | /** |
| 515 | * Set the frame's parameters, especially its size and format. |
| 516 | * av_frame_get_buffer needs this to allocate memory for the |
| 517 | * audio samples of the frame. |
| 518 | * Default channel layouts based on the number of channels |
| 519 | * are assumed for simplicity. |
| 520 | */ |
| 521 | (*frame)->nb_samples = frame_size; |
| 522 | (*frame)->channel_layout = output_codec_context->channel_layout; |
| 523 | (*frame)->format = output_codec_context->sample_fmt; |
| 524 | (*frame)->sample_rate = output_codec_context->sample_rate; |
| 525 | |
| 526 | /** |
| 527 | * Allocate the samples of the created frame. This call will make |
| 528 | * sure that the audio frame can hold as many samples as specified. |
| 529 | */ |
| 530 | if ((error = av_frame_get_buffer(*frame, 0)) < 0) { |
| 531 | fprintf(stderr, "Could allocate output frame samples (error '%s')\n", |
| 532 | get_error_text(error)); |
| 533 | av_frame_free(frame); |
| 534 | return error; |
| 535 | } |
| 536 | |
| 537 | return 0; |
| 538 | } |
| 539 | |
| 540 | /** Encode one frame worth of audio to the output file. */ |
| 541 | static int encode_audio_frame(AVFrame *frame, |
| 542 | AVFormatContext *output_format_context, |
| 543 | AVCodecContext *output_codec_context, |
| 544 | int *data_present) |
| 545 | { |
| 546 | /** Packet used for temporary storage. */ |
| 547 | AVPacket output_packet; |
| 548 | int error; |
| 549 | init_packet(&output_packet); |
| 550 | |
| 551 | /** |
| 552 | * Encode the audio frame and store it in the temporary packet. |
| 553 | * The output audio stream encoder is used to do this. |
| 554 | */ |
| 555 | if ((error = avcodec_encode_audio2(output_codec_context, &output_packet, |
| 556 | frame, data_present)) < 0) { |
| 557 | fprintf(stderr, "Could not encode frame (error '%s')\n", |
| 558 | get_error_text(error)); |
| 559 | av_free_packet(&output_packet); |
| 560 | return error; |
| 561 | } |
| 562 | |
| 563 | /** Write one audio frame from the temporary packet to the output file. */ |
| 564 | if (*data_present) { |
| 565 | if ((error = av_write_frame(output_format_context, &output_packet)) < 0) { |
| 566 | fprintf(stderr, "Could not write frame (error '%s')\n", |
| 567 | get_error_text(error)); |
| 568 | av_free_packet(&output_packet); |
| 569 | return error; |
| 570 | } |
| 571 | |
| 572 | av_free_packet(&output_packet); |
| 573 | } |
| 574 | |
| 575 | return 0; |
| 576 | } |
| 577 | |
| 578 | /** |
| 579 | * Load one audio frame from the FIFO buffer, encode and write it to the |
| 580 | * output file. |
| 581 | */ |
| 582 | static int load_encode_and_write(AVAudioFifo *fifo, |
| 583 | AVFormatContext *output_format_context, |
| 584 | AVCodecContext *output_codec_context) |
| 585 | { |
| 586 | /** Temporary storage of the output samples of the frame written to the file. */ |
| 587 | AVFrame *output_frame; |
| 588 | /** |
| 589 | * Use the maximum number of possible samples per frame. |
| 590 | * If there is less than the maximum possible frame size in the FIFO |
| 591 | * buffer use this number. Otherwise, use the maximum possible frame size |
| 592 | */ |
| 593 | const int frame_size = FFMIN(av_audio_fifo_size(fifo), |
| 594 | output_codec_context->frame_size); |
| 595 | int data_written; |
| 596 | |
| 597 | /** Initialize temporary storage for one output frame. */ |
| 598 | if (init_output_frame(&output_frame, output_codec_context, frame_size)) |
| 599 | return AVERROR_EXIT; |
| 600 | |
| 601 | /** |
| 602 | * Read as many samples from the FIFO buffer as required to fill the frame. |
| 603 | * The samples are stored in the frame temporarily. |
| 604 | */ |
| 605 | if (av_audio_fifo_read(fifo, (void **)output_frame->data, frame_size) < frame_size) { |
| 606 | fprintf(stderr, "Could not read data from FIFO\n"); |
| 607 | av_frame_free(&output_frame); |
| 608 | return AVERROR_EXIT; |
| 609 | } |
| 610 | |
| 611 | /** Encode one frame worth of audio samples. */ |
| 612 | if (encode_audio_frame(output_frame, output_format_context, |
| 613 | output_codec_context, &data_written)) { |
| 614 | av_frame_free(&output_frame); |
| 615 | return AVERROR_EXIT; |
| 616 | } |
| 617 | av_frame_free(&output_frame); |
| 618 | return 0; |
| 619 | } |
| 620 | |
| 621 | /** Write the trailer of the output file container. */ |
| 622 | static int write_output_file_trailer(AVFormatContext *output_format_context) |
| 623 | { |
| 624 | int error; |
| 625 | if ((error = av_write_trailer(output_format_context)) < 0) { |
| 626 | fprintf(stderr, "Could not write output file trailer (error '%s')\n", |
| 627 | get_error_text(error)); |
| 628 | return error; |
| 629 | } |
| 630 | return 0; |
| 631 | } |
| 632 | |
| 633 | /** Convert an audio file to an AAC file in an MP4 container. */ |
| 634 | int main(int argc, char **argv) |
| 635 | { |
| 636 | AVFormatContext *input_format_context = NULL, *output_format_context = NULL; |
| 637 | AVCodecContext *input_codec_context = NULL, *output_codec_context = NULL; |
| 638 | SwrContext *resample_context = NULL; |
| 639 | AVAudioFifo *fifo = NULL; |
| 640 | int ret = AVERROR_EXIT; |
| 641 | |
| 642 | if (argc < 3) { |
| 643 | fprintf(stderr, "Usage: %s <input file> <output file>\n", argv[0]); |
| 644 | exit(1); |
| 645 | } |
| 646 | |
| 647 | /** Register all codecs and formats so that they can be used. */ |
| 648 | av_register_all(); |
| 649 | /** Open the input file for reading. */ |
| 650 | if (open_input_file(argv[1], &input_format_context, |
| 651 | &input_codec_context)) |
| 652 | goto cleanup; |
| 653 | /** Open the output file for writing. */ |
| 654 | if (open_output_file(argv[2], input_codec_context, |
| 655 | &output_format_context, &output_codec_context)) |
| 656 | goto cleanup; |
| 657 | /** Initialize the resampler to be able to convert audio sample formats. */ |
| 658 | if (init_resampler(input_codec_context, output_codec_context, |
| 659 | &resample_context)) |
| 660 | goto cleanup; |
| 661 | /** Initialize the FIFO buffer to store audio samples to be encoded. */ |
| 662 | if (init_fifo(&fifo)) |
| 663 | goto cleanup; |
| 664 | /** Write the header of the output file container. */ |
| 665 | if (write_output_file_header(output_format_context)) |
| 666 | goto cleanup; |
| 667 | |
| 668 | /** |
| 669 | * Loop as long as we have input samples to read or output samples |
| 670 | * to write; abort as soon as we have neither. |
| 671 | */ |
| 672 | while (1) { |
| 673 | /** Use the encoder's desired frame size for processing. */ |
| 674 | const int output_frame_size = output_codec_context->frame_size; |
| 675 | int finished = 0; |
| 676 | |
| 677 | /** |
| 678 | * Make sure that there is one frame worth of samples in the FIFO |
| 679 | * buffer so that the encoder can do its work. |
| 680 | * Since the decoder's and the encoder's frame size may differ, we |
| 681 | * need to FIFO buffer to store as many frames worth of input samples |
| 682 | * that they make up at least one frame worth of output samples. |
| 683 | */ |
| 684 | while (av_audio_fifo_size(fifo) < output_frame_size) { |
| 685 | /** |
| 686 | * Decode one frame worth of audio samples, convert it to the |
| 687 | * output sample format and put it into the FIFO buffer. |
| 688 | */ |
| 689 | if (read_decode_convert_and_store(fifo, input_format_context, |
| 690 | input_codec_context, |
| 691 | output_codec_context, |
| 692 | resample_context, &finished)) |
| 693 | goto cleanup; |
| 694 | |
| 695 | /** |
| 696 | * If we are at the end of the input file, we continue |
| 697 | * encoding the remaining audio samples to the output file. |
| 698 | */ |
| 699 | if (finished) |
| 700 | break; |
| 701 | } |
| 702 | |
| 703 | /** |
| 704 | * If we have enough samples for the encoder, we encode them. |
| 705 | * At the end of the file, we pass the remaining samples to |
| 706 | * the encoder. |
| 707 | */ |
| 708 | while (av_audio_fifo_size(fifo) >= output_frame_size || |
| 709 | (finished && av_audio_fifo_size(fifo) > 0)) |
| 710 | /** |
| 711 | * Take one frame worth of audio samples from the FIFO buffer, |
| 712 | * encode it and write it to the output file. |
| 713 | */ |
| 714 | if (load_encode_and_write(fifo, output_format_context, |
| 715 | output_codec_context)) |
| 716 | goto cleanup; |
| 717 | |
| 718 | /** |
| 719 | * If we are at the end of the input file and have encoded |
| 720 | * all remaining samples, we can exit this loop and finish. |
| 721 | */ |
| 722 | if (finished) { |
| 723 | int data_written; |
| 724 | /** Flush the encoder as it may have delayed frames. */ |
| 725 | do { |
| 726 | if (encode_audio_frame(NULL, output_format_context, |
| 727 | output_codec_context, &data_written)) |
| 728 | goto cleanup; |
| 729 | } while (data_written); |
| 730 | break; |
| 731 | } |
| 732 | } |
| 733 | |
| 734 | /** Write the trailer of the output file container. */ |
| 735 | if (write_output_file_trailer(output_format_context)) |
| 736 | goto cleanup; |
| 737 | ret = 0; |
| 738 | |
| 739 | cleanup: |
| 740 | if (fifo) |
| 741 | av_audio_fifo_free(fifo); |
| 742 | swr_free(&resample_context); |
| 743 | if (output_codec_context) |
| 744 | avcodec_close(output_codec_context); |
| 745 | if (output_format_context) { |
| 746 | avio_close(output_format_context->pb); |
| 747 | avformat_free_context(output_format_context); |
| 748 | } |
| 749 | if (input_codec_context) |
| 750 | avcodec_close(input_codec_context); |
| 751 | if (input_format_context) |
| 752 | avformat_close_input(&input_format_context); |
| 753 | |
| 754 | return ret; |
| 755 | } |