| 1 | /* |
| 2 | * Copyright (C) 2005 Michael Ahlberg, Måns Rullgård |
| 3 | * |
| 4 | * Permission is hereby granted, free of charge, to any person |
| 5 | * obtaining a copy of this software and associated documentation |
| 6 | * files (the "Software"), to deal in the Software without |
| 7 | * restriction, including without limitation the rights to use, copy, |
| 8 | * modify, merge, publish, distribute, sublicense, and/or sell copies |
| 9 | * of the Software, and to permit persons to whom the Software is |
| 10 | * furnished to do so, subject to the following conditions: |
| 11 | * |
| 12 | * The above copyright notice and this permission notice shall be |
| 13 | * included in all copies or substantial portions of the Software. |
| 14 | * |
| 15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
| 16 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
| 17 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND |
| 18 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT |
| 19 | * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, |
| 20 | * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
| 21 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER |
| 22 | * DEALINGS IN THE SOFTWARE. |
| 23 | */ |
| 24 | |
| 25 | #include <stdlib.h> |
| 26 | |
| 27 | #include "libavutil/avstring.h" |
| 28 | #include "libavutil/base64.h" |
| 29 | #include "libavutil/bswap.h" |
| 30 | #include "libavutil/dict.h" |
| 31 | #include "libavcodec/bytestream.h" |
| 32 | #include "libavcodec/get_bits.h" |
| 33 | #include "libavcodec/vorbis_parser.h" |
| 34 | #include "avformat.h" |
| 35 | #include "flac_picture.h" |
| 36 | #include "internal.h" |
| 37 | #include "oggdec.h" |
| 38 | #include "vorbiscomment.h" |
| 39 | #include "replaygain.h" |
| 40 | |
| 41 | static int ogm_chapter(AVFormatContext *as, uint8_t *key, uint8_t *val) |
| 42 | { |
| 43 | int i, cnum, h, m, s, ms, keylen = strlen(key); |
| 44 | AVChapter *chapter = NULL; |
| 45 | |
| 46 | if (keylen < 9 || sscanf(key, "CHAPTER%03d", &cnum) != 1) |
| 47 | return 0; |
| 48 | |
| 49 | if (keylen <= 10) { |
| 50 | if (sscanf(val, "%02d:%02d:%02d.%03d", &h, &m, &s, &ms) < 4) |
| 51 | return 0; |
| 52 | |
| 53 | avpriv_new_chapter(as, cnum, (AVRational) { 1, 1000 }, |
| 54 | ms + 1000 * (s + 60 * (m + 60 * h)), |
| 55 | AV_NOPTS_VALUE, NULL); |
| 56 | av_free(val); |
| 57 | } else if (!strcmp(key + keylen - 4, "NAME")) { |
| 58 | for (i = 0; i < as->nb_chapters; i++) |
| 59 | if (as->chapters[i]->id == cnum) { |
| 60 | chapter = as->chapters[i]; |
| 61 | break; |
| 62 | } |
| 63 | if (!chapter) |
| 64 | return 0; |
| 65 | |
| 66 | av_dict_set(&chapter->metadata, "title", val, AV_DICT_DONT_STRDUP_VAL); |
| 67 | } else |
| 68 | return 0; |
| 69 | |
| 70 | av_free(key); |
| 71 | return 1; |
| 72 | } |
| 73 | |
| 74 | int ff_vorbis_stream_comment(AVFormatContext *as, AVStream *st, |
| 75 | const uint8_t *buf, int size) |
| 76 | { |
| 77 | int updates = ff_vorbis_comment(as, &st->metadata, buf, size, 1); |
| 78 | |
| 79 | if (updates > 0) { |
| 80 | st->event_flags |= AVSTREAM_EVENT_FLAG_METADATA_UPDATED; |
| 81 | } |
| 82 | |
| 83 | return updates; |
| 84 | } |
| 85 | |
| 86 | int ff_vorbis_comment(AVFormatContext *as, AVDictionary **m, |
| 87 | const uint8_t *buf, int size, |
| 88 | int parse_picture) |
| 89 | { |
| 90 | const uint8_t *p = buf; |
| 91 | const uint8_t *end = buf + size; |
| 92 | int updates = 0; |
| 93 | unsigned n, j; |
| 94 | int s; |
| 95 | |
| 96 | /* must have vendor_length and user_comment_list_length */ |
| 97 | if (size < 8) |
| 98 | return AVERROR_INVALIDDATA; |
| 99 | |
| 100 | s = bytestream_get_le32(&p); |
| 101 | |
| 102 | if (end - p - 4 < s || s < 0) |
| 103 | return AVERROR_INVALIDDATA; |
| 104 | |
| 105 | p += s; |
| 106 | |
| 107 | n = bytestream_get_le32(&p); |
| 108 | |
| 109 | while (end - p >= 4 && n > 0) { |
| 110 | const char *t, *v; |
| 111 | int tl, vl; |
| 112 | |
| 113 | s = bytestream_get_le32(&p); |
| 114 | |
| 115 | if (end - p < s || s < 0) |
| 116 | break; |
| 117 | |
| 118 | t = p; |
| 119 | p += s; |
| 120 | n--; |
| 121 | |
| 122 | v = memchr(t, '=', s); |
| 123 | if (!v) |
| 124 | continue; |
| 125 | |
| 126 | tl = v - t; |
| 127 | vl = s - tl - 1; |
| 128 | v++; |
| 129 | |
| 130 | if (tl && vl) { |
| 131 | char *tt, *ct; |
| 132 | |
| 133 | tt = av_malloc(tl + 1); |
| 134 | ct = av_malloc(vl + 1); |
| 135 | if (!tt || !ct) { |
| 136 | av_freep(&tt); |
| 137 | av_freep(&ct); |
| 138 | return AVERROR(ENOMEM); |
| 139 | } |
| 140 | |
| 141 | for (j = 0; j < tl; j++) |
| 142 | tt[j] = av_toupper(t[j]); |
| 143 | tt[tl] = 0; |
| 144 | |
| 145 | memcpy(ct, v, vl); |
| 146 | ct[vl] = 0; |
| 147 | |
| 148 | /* The format in which the pictures are stored is the FLAC format. |
| 149 | * Xiph says: "The binary FLAC picture structure is base64 encoded |
| 150 | * and placed within a VorbisComment with the tag name |
| 151 | * 'METADATA_BLOCK_PICTURE'. This is the preferred and |
| 152 | * recommended way of embedding cover art within VorbisComments." |
| 153 | */ |
| 154 | if (!strcmp(tt, "METADATA_BLOCK_PICTURE") && parse_picture) { |
| 155 | int ret; |
| 156 | char *pict = av_malloc(vl); |
| 157 | |
| 158 | if (!pict) { |
| 159 | av_log(as, AV_LOG_WARNING, "out-of-memory error. Skipping cover art block.\n"); |
| 160 | av_freep(&tt); |
| 161 | av_freep(&ct); |
| 162 | continue; |
| 163 | } |
| 164 | if ((ret = av_base64_decode(pict, ct, vl)) > 0) |
| 165 | ret = ff_flac_parse_picture(as, pict, ret); |
| 166 | av_freep(&tt); |
| 167 | av_freep(&ct); |
| 168 | av_freep(&pict); |
| 169 | if (ret < 0) { |
| 170 | av_log(as, AV_LOG_WARNING, "Failed to parse cover art block.\n"); |
| 171 | continue; |
| 172 | } |
| 173 | } else if (!ogm_chapter(as, tt, ct)) { |
| 174 | updates++; |
| 175 | if (av_dict_get(*m, tt, NULL, 0)) { |
| 176 | av_dict_set(m, tt, ";", AV_DICT_APPEND); |
| 177 | } |
| 178 | av_dict_set(m, tt, ct, |
| 179 | AV_DICT_DONT_STRDUP_KEY | |
| 180 | AV_DICT_APPEND); |
| 181 | av_freep(&ct); |
| 182 | } |
| 183 | } |
| 184 | } |
| 185 | |
| 186 | if (p != end) |
| 187 | av_log(as, AV_LOG_INFO, |
| 188 | "%"PTRDIFF_SPECIFIER" bytes of comment header remain\n", end - p); |
| 189 | if (n > 0) |
| 190 | av_log(as, AV_LOG_INFO, |
| 191 | "truncated comment header, %i comments not found\n", n); |
| 192 | |
| 193 | ff_metadata_conv(m, NULL, ff_vorbiscomment_metadata_conv); |
| 194 | |
| 195 | return updates; |
| 196 | } |
| 197 | |
| 198 | /* |
| 199 | * Parse the vorbis header |
| 200 | * |
| 201 | * Vorbis Identification header from Vorbis_I_spec.html#vorbis-spec-codec |
| 202 | * [vorbis_version] = read 32 bits as unsigned integer | Not used |
| 203 | * [audio_channels] = read 8 bit integer as unsigned | Used |
| 204 | * [audio_sample_rate] = read 32 bits as unsigned integer | Used |
| 205 | * [bitrate_maximum] = read 32 bits as signed integer | Not used yet |
| 206 | * [bitrate_nominal] = read 32 bits as signed integer | Not used yet |
| 207 | * [bitrate_minimum] = read 32 bits as signed integer | Used as bitrate |
| 208 | * [blocksize_0] = read 4 bits as unsigned integer | Not Used |
| 209 | * [blocksize_1] = read 4 bits as unsigned integer | Not Used |
| 210 | * [framing_flag] = read one bit | Not Used |
| 211 | */ |
| 212 | |
| 213 | struct oggvorbis_private { |
| 214 | unsigned int len[3]; |
| 215 | unsigned char *packet[3]; |
| 216 | VorbisParseContext vp; |
| 217 | int64_t final_pts; |
| 218 | int final_duration; |
| 219 | }; |
| 220 | |
| 221 | static int fixup_vorbis_headers(AVFormatContext *as, |
| 222 | struct oggvorbis_private *priv, |
| 223 | uint8_t **buf) |
| 224 | { |
| 225 | int i, offset, len, err; |
| 226 | int buf_len; |
| 227 | unsigned char *ptr; |
| 228 | |
| 229 | len = priv->len[0] + priv->len[1] + priv->len[2]; |
| 230 | buf_len = len + len / 255 + 64; |
| 231 | ptr = *buf = av_realloc(NULL, buf_len); |
| 232 | if (!ptr) |
| 233 | return AVERROR(ENOMEM); |
| 234 | memset(*buf, '\0', buf_len); |
| 235 | |
| 236 | ptr[0] = 2; |
| 237 | offset = 1; |
| 238 | offset += av_xiphlacing(&ptr[offset], priv->len[0]); |
| 239 | offset += av_xiphlacing(&ptr[offset], priv->len[1]); |
| 240 | for (i = 0; i < 3; i++) { |
| 241 | memcpy(&ptr[offset], priv->packet[i], priv->len[i]); |
| 242 | offset += priv->len[i]; |
| 243 | av_freep(&priv->packet[i]); |
| 244 | } |
| 245 | if ((err = av_reallocp(buf, offset + FF_INPUT_BUFFER_PADDING_SIZE)) < 0) |
| 246 | return err; |
| 247 | return offset; |
| 248 | } |
| 249 | |
| 250 | static void vorbis_cleanup(AVFormatContext *s, int idx) |
| 251 | { |
| 252 | struct ogg *ogg = s->priv_data; |
| 253 | struct ogg_stream *os = ogg->streams + idx; |
| 254 | struct oggvorbis_private *priv = os->private; |
| 255 | int i; |
| 256 | if (os->private) |
| 257 | for (i = 0; i < 3; i++) |
| 258 | av_freep(&priv->packet[i]); |
| 259 | } |
| 260 | |
| 261 | static int vorbis_update_metadata(AVFormatContext *s, int idx) |
| 262 | { |
| 263 | struct ogg *ogg = s->priv_data; |
| 264 | struct ogg_stream *os = ogg->streams + idx; |
| 265 | AVStream *st = s->streams[idx]; |
| 266 | int ret; |
| 267 | |
| 268 | if (os->psize <= 8) |
| 269 | return 0; |
| 270 | |
| 271 | /* New metadata packet; release old data. */ |
| 272 | av_dict_free(&st->metadata); |
| 273 | ret = ff_vorbis_stream_comment(s, st, os->buf + os->pstart + 7, |
| 274 | os->psize - 8); |
| 275 | if (ret < 0) |
| 276 | return ret; |
| 277 | |
| 278 | /* Update the metadata if possible. */ |
| 279 | av_freep(&os->new_metadata); |
| 280 | if (st->metadata) { |
| 281 | os->new_metadata = av_packet_pack_dictionary(st->metadata, &os->new_metadata_size); |
| 282 | /* Send an empty dictionary to indicate that metadata has been cleared. */ |
| 283 | } else { |
| 284 | os->new_metadata = av_malloc(1); |
| 285 | os->new_metadata_size = 0; |
| 286 | } |
| 287 | |
| 288 | return ret; |
| 289 | } |
| 290 | |
| 291 | static int vorbis_header(AVFormatContext *s, int idx) |
| 292 | { |
| 293 | struct ogg *ogg = s->priv_data; |
| 294 | AVStream *st = s->streams[idx]; |
| 295 | struct ogg_stream *os = ogg->streams + idx; |
| 296 | struct oggvorbis_private *priv; |
| 297 | int pkt_type = os->buf[os->pstart]; |
| 298 | |
| 299 | if (!os->private) { |
| 300 | os->private = av_mallocz(sizeof(struct oggvorbis_private)); |
| 301 | if (!os->private) |
| 302 | return AVERROR(ENOMEM); |
| 303 | } |
| 304 | |
| 305 | if (!(pkt_type & 1)) |
| 306 | return 0; |
| 307 | |
| 308 | if (os->psize < 1 || pkt_type > 5) |
| 309 | return AVERROR_INVALIDDATA; |
| 310 | |
| 311 | priv = os->private; |
| 312 | |
| 313 | if (priv->packet[pkt_type >> 1]) |
| 314 | return AVERROR_INVALIDDATA; |
| 315 | if (pkt_type > 1 && !priv->packet[0] || pkt_type > 3 && !priv->packet[1]) |
| 316 | return AVERROR_INVALIDDATA; |
| 317 | |
| 318 | priv->len[pkt_type >> 1] = os->psize; |
| 319 | priv->packet[pkt_type >> 1] = av_mallocz(os->psize); |
| 320 | if (!priv->packet[pkt_type >> 1]) |
| 321 | return AVERROR(ENOMEM); |
| 322 | memcpy(priv->packet[pkt_type >> 1], os->buf + os->pstart, os->psize); |
| 323 | if (os->buf[os->pstart] == 1) { |
| 324 | const uint8_t *p = os->buf + os->pstart + 7; /* skip "\001vorbis" tag */ |
| 325 | unsigned blocksize, bs0, bs1; |
| 326 | int srate; |
| 327 | int channels; |
| 328 | |
| 329 | if (os->psize != 30) |
| 330 | return AVERROR_INVALIDDATA; |
| 331 | |
| 332 | if (bytestream_get_le32(&p) != 0) /* vorbis_version */ |
| 333 | return AVERROR_INVALIDDATA; |
| 334 | |
| 335 | channels = bytestream_get_byte(&p); |
| 336 | if (st->codec->channels && channels != st->codec->channels) { |
| 337 | av_log(s, AV_LOG_ERROR, "Channel change is not supported\n"); |
| 338 | return AVERROR_PATCHWELCOME; |
| 339 | } |
| 340 | st->codec->channels = channels; |
| 341 | srate = bytestream_get_le32(&p); |
| 342 | p += 4; // skip maximum bitrate |
| 343 | st->codec->bit_rate = bytestream_get_le32(&p); // nominal bitrate |
| 344 | p += 4; // skip minimum bitrate |
| 345 | |
| 346 | blocksize = bytestream_get_byte(&p); |
| 347 | bs0 = blocksize & 15; |
| 348 | bs1 = blocksize >> 4; |
| 349 | |
| 350 | if (bs0 > bs1) |
| 351 | return AVERROR_INVALIDDATA; |
| 352 | if (bs0 < 6 || bs1 > 13) |
| 353 | return AVERROR_INVALIDDATA; |
| 354 | |
| 355 | if (bytestream_get_byte(&p) != 1) /* framing_flag */ |
| 356 | return AVERROR_INVALIDDATA; |
| 357 | |
| 358 | st->codec->codec_type = AVMEDIA_TYPE_AUDIO; |
| 359 | st->codec->codec_id = AV_CODEC_ID_VORBIS; |
| 360 | |
| 361 | if (srate > 0) { |
| 362 | st->codec->sample_rate = srate; |
| 363 | avpriv_set_pts_info(st, 64, 1, srate); |
| 364 | } |
| 365 | } else if (os->buf[os->pstart] == 3) { |
| 366 | if (vorbis_update_metadata(s, idx) >= 0 && priv->len[1] > 10) { |
| 367 | unsigned new_len; |
| 368 | |
| 369 | int ret = ff_replaygain_export(st, st->metadata); |
| 370 | if (ret < 0) |
| 371 | return ret; |
| 372 | |
| 373 | // drop all metadata we parsed and which is not required by libvorbis |
| 374 | new_len = 7 + 4 + AV_RL32(priv->packet[1] + 7) + 4 + 1; |
| 375 | if (new_len >= 16 && new_len < os->psize) { |
| 376 | AV_WL32(priv->packet[1] + new_len - 5, 0); |
| 377 | priv->packet[1][new_len - 1] = 1; |
| 378 | priv->len[1] = new_len; |
| 379 | } |
| 380 | } |
| 381 | } else { |
| 382 | int ret = fixup_vorbis_headers(s, priv, &st->codec->extradata); |
| 383 | if (ret < 0) { |
| 384 | st->codec->extradata_size = 0; |
| 385 | return ret; |
| 386 | } |
| 387 | st->codec->extradata_size = ret; |
| 388 | if ((ret = avpriv_vorbis_parse_extradata(st->codec, &priv->vp))) { |
| 389 | av_freep(&st->codec->extradata); |
| 390 | st->codec->extradata_size = 0; |
| 391 | return ret; |
| 392 | } |
| 393 | } |
| 394 | |
| 395 | return 1; |
| 396 | } |
| 397 | |
| 398 | static int vorbis_packet(AVFormatContext *s, int idx) |
| 399 | { |
| 400 | struct ogg *ogg = s->priv_data; |
| 401 | struct ogg_stream *os = ogg->streams + idx; |
| 402 | struct oggvorbis_private *priv = os->private; |
| 403 | int duration, flags = 0; |
| 404 | |
| 405 | /* first packet handling |
| 406 | * here we parse the duration of each packet in the first page and compare |
| 407 | * the total duration to the page granule to find the encoder delay and |
| 408 | * set the first timestamp */ |
| 409 | if ((!os->lastpts || os->lastpts == AV_NOPTS_VALUE) && !(os->flags & OGG_FLAG_EOS) && (int64_t)os->granule>=0) { |
| 410 | int seg, d; |
| 411 | uint8_t *last_pkt = os->buf + os->pstart; |
| 412 | uint8_t *next_pkt = last_pkt; |
| 413 | |
| 414 | avpriv_vorbis_parse_reset(&priv->vp); |
| 415 | duration = 0; |
| 416 | seg = os->segp; |
| 417 | d = avpriv_vorbis_parse_frame_flags(&priv->vp, last_pkt, 1, &flags); |
| 418 | if (d < 0) { |
| 419 | os->pflags |= AV_PKT_FLAG_CORRUPT; |
| 420 | return 0; |
| 421 | } else if (flags & VORBIS_FLAG_COMMENT) { |
| 422 | vorbis_update_metadata(s, idx); |
| 423 | flags = 0; |
| 424 | } |
| 425 | duration += d; |
| 426 | last_pkt = next_pkt = next_pkt + os->psize; |
| 427 | for (; seg < os->nsegs; seg++) { |
| 428 | if (os->segments[seg] < 255) { |
| 429 | int d = avpriv_vorbis_parse_frame_flags(&priv->vp, last_pkt, 1, &flags); |
| 430 | if (d < 0) { |
| 431 | duration = os->granule; |
| 432 | break; |
| 433 | } else if (flags & VORBIS_FLAG_COMMENT) { |
| 434 | vorbis_update_metadata(s, idx); |
| 435 | flags = 0; |
| 436 | } |
| 437 | duration += d; |
| 438 | last_pkt = next_pkt + os->segments[seg]; |
| 439 | } |
| 440 | next_pkt += os->segments[seg]; |
| 441 | } |
| 442 | os->lastpts = |
| 443 | os->lastdts = os->granule - duration; |
| 444 | |
| 445 | if (!os->granule && duration) //hack to deal with broken files (Ticket3710) |
| 446 | os->lastpts = os->lastdts = AV_NOPTS_VALUE; |
| 447 | |
| 448 | if (s->streams[idx]->start_time == AV_NOPTS_VALUE) { |
| 449 | s->streams[idx]->start_time = FFMAX(os->lastpts, 0); |
| 450 | if (s->streams[idx]->duration != AV_NOPTS_VALUE) |
| 451 | s->streams[idx]->duration -= s->streams[idx]->start_time; |
| 452 | } |
| 453 | priv->final_pts = AV_NOPTS_VALUE; |
| 454 | avpriv_vorbis_parse_reset(&priv->vp); |
| 455 | } |
| 456 | |
| 457 | /* parse packet duration */ |
| 458 | if (os->psize > 0) { |
| 459 | duration = avpriv_vorbis_parse_frame_flags(&priv->vp, os->buf + os->pstart, 1, &flags); |
| 460 | if (duration < 0) { |
| 461 | os->pflags |= AV_PKT_FLAG_CORRUPT; |
| 462 | return 0; |
| 463 | } else if (flags & VORBIS_FLAG_COMMENT) { |
| 464 | vorbis_update_metadata(s, idx); |
| 465 | flags = 0; |
| 466 | } |
| 467 | os->pduration = duration; |
| 468 | } |
| 469 | |
| 470 | /* final packet handling |
| 471 | * here we save the pts of the first packet in the final page, sum up all |
| 472 | * packet durations in the final page except for the last one, and compare |
| 473 | * to the page granule to find the duration of the final packet */ |
| 474 | if (os->flags & OGG_FLAG_EOS) { |
| 475 | if (os->lastpts != AV_NOPTS_VALUE) { |
| 476 | priv->final_pts = os->lastpts; |
| 477 | priv->final_duration = 0; |
| 478 | } |
| 479 | if (os->segp == os->nsegs) |
| 480 | os->pduration = os->granule - priv->final_pts - priv->final_duration; |
| 481 | priv->final_duration += os->pduration; |
| 482 | } |
| 483 | |
| 484 | return 0; |
| 485 | } |
| 486 | |
| 487 | const struct ogg_codec ff_vorbis_codec = { |
| 488 | .magic = "\001vorbis", |
| 489 | .magicsize = 7, |
| 490 | .header = vorbis_header, |
| 491 | .packet = vorbis_packet, |
| 492 | .cleanup = vorbis_cleanup, |
| 493 | .nb_header = 3, |
| 494 | }; |