| 1 | /* |
| 2 | * WebM DASH Manifest XML muxer |
| 3 | * Copyright (c) 2014 Vignesh Venkatasubramanian |
| 4 | * |
| 5 | * This file is part of FFmpeg. |
| 6 | * |
| 7 | * FFmpeg is free software; you can redistribute it and/or |
| 8 | * modify it under the terms of the GNU Lesser General Public |
| 9 | * License as published by the Free Software Foundation; either |
| 10 | * version 2.1 of the License, or (at your option) any later version. |
| 11 | * |
| 12 | * FFmpeg is distributed in the hope that it will be useful, |
| 13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| 15 | * Lesser General Public License for more details. |
| 16 | * |
| 17 | * You should have received a copy of the GNU Lesser General Public |
| 18 | * License along with FFmpeg; if not, write to the Free Software |
| 19 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
| 20 | */ |
| 21 | |
| 22 | /* |
| 23 | * WebM DASH Specification: |
| 24 | * https://sites.google.com/a/webmproject.org/wiki/adaptive-streaming/webm-dash-specification |
| 25 | */ |
| 26 | |
| 27 | #include <stdint.h> |
| 28 | #include <string.h> |
| 29 | |
| 30 | #include "avformat.h" |
| 31 | #include "avio_internal.h" |
| 32 | #include "matroska.h" |
| 33 | |
| 34 | #include "libavutil/avstring.h" |
| 35 | #include "libavutil/dict.h" |
| 36 | #include "libavutil/opt.h" |
| 37 | |
| 38 | typedef struct AdaptationSet { |
| 39 | char id[10]; |
| 40 | int *streams; |
| 41 | int nb_streams; |
| 42 | } AdaptationSet; |
| 43 | |
| 44 | typedef struct WebMDashMuxContext { |
| 45 | const AVClass *class; |
| 46 | char *adaptation_sets; |
| 47 | AdaptationSet *as; |
| 48 | int nb_as; |
| 49 | int representation_id; |
| 50 | } WebMDashMuxContext; |
| 51 | |
| 52 | static const char *get_codec_name(int codec_id) |
| 53 | { |
| 54 | switch (codec_id) { |
| 55 | case AV_CODEC_ID_VP8: |
| 56 | return "vp8"; |
| 57 | case AV_CODEC_ID_VP9: |
| 58 | return "vp9"; |
| 59 | case AV_CODEC_ID_VORBIS: |
| 60 | return "vorbis"; |
| 61 | case AV_CODEC_ID_OPUS: |
| 62 | return "opus"; |
| 63 | } |
| 64 | return NULL; |
| 65 | } |
| 66 | |
| 67 | static double get_duration(AVFormatContext *s) |
| 68 | { |
| 69 | int i = 0; |
| 70 | double max = 0.0; |
| 71 | for (i = 0; i < s->nb_streams; i++) { |
| 72 | AVDictionaryEntry *duration = av_dict_get(s->streams[i]->metadata, |
| 73 | DURATION, NULL, 0); |
| 74 | if (!duration || atof(duration->value) < 0) continue; |
| 75 | if (atof(duration->value) > max) max = atof(duration->value); |
| 76 | } |
| 77 | return max / 1000; |
| 78 | } |
| 79 | |
| 80 | static void write_header(AVFormatContext *s) |
| 81 | { |
| 82 | double min_buffer_time = 1.0; |
| 83 | avio_printf(s->pb, "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"); |
| 84 | avio_printf(s->pb, "<MPD\n"); |
| 85 | avio_printf(s->pb, " xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\"\n"); |
| 86 | avio_printf(s->pb, " xmlns=\"urn:mpeg:DASH:schema:MPD:2011\"\n"); |
| 87 | avio_printf(s->pb, " xsi:schemaLocation=\"urn:mpeg:DASH:schema:MPD:2011\"\n"); |
| 88 | avio_printf(s->pb, " type=\"static\"\n"); |
| 89 | avio_printf(s->pb, " mediaPresentationDuration=\"PT%gS\"\n", |
| 90 | get_duration(s)); |
| 91 | avio_printf(s->pb, " minBufferTime=\"PT%gS\"\n", |
| 92 | min_buffer_time); |
| 93 | avio_printf(s->pb, " profiles=\"urn:webm:dash:profile:webm-on-demand:2012\""); |
| 94 | avio_printf(s->pb, ">\n"); |
| 95 | } |
| 96 | |
| 97 | static void write_footer(AVFormatContext *s) |
| 98 | { |
| 99 | avio_printf(s->pb, "</MPD>\n"); |
| 100 | } |
| 101 | |
| 102 | static int subsegment_alignment(AVFormatContext *s, AdaptationSet *as) { |
| 103 | int i; |
| 104 | AVDictionaryEntry *gold = av_dict_get(s->streams[as->streams[0]]->metadata, |
| 105 | CUE_TIMESTAMPS, NULL, 0); |
| 106 | if (!gold) return 0; |
| 107 | for (i = 1; i < as->nb_streams; i++) { |
| 108 | AVDictionaryEntry *ts = av_dict_get(s->streams[as->streams[i]]->metadata, |
| 109 | CUE_TIMESTAMPS, NULL, 0); |
| 110 | if (!ts || strncmp(gold->value, ts->value, strlen(gold->value))) return 0; |
| 111 | } |
| 112 | return 1; |
| 113 | } |
| 114 | |
| 115 | static int bitstream_switching(AVFormatContext *s, AdaptationSet *as) { |
| 116 | int i; |
| 117 | AVDictionaryEntry *gold_track_num = av_dict_get(s->streams[as->streams[0]]->metadata, |
| 118 | TRACK_NUMBER, NULL, 0); |
| 119 | AVCodecContext *gold_codec = s->streams[as->streams[0]]->codec; |
| 120 | if (!gold_track_num) return 0; |
| 121 | for (i = 1; i < as->nb_streams; i++) { |
| 122 | AVDictionaryEntry *track_num = av_dict_get(s->streams[as->streams[i]]->metadata, |
| 123 | TRACK_NUMBER, NULL, 0); |
| 124 | AVCodecContext *codec = s->streams[as->streams[i]]->codec; |
| 125 | if (!track_num || |
| 126 | strncmp(gold_track_num->value, track_num->value, strlen(gold_track_num->value)) || |
| 127 | gold_codec->codec_id != codec->codec_id || |
| 128 | gold_codec->extradata_size != codec->extradata_size || |
| 129 | memcmp(gold_codec->extradata, codec->extradata, codec->extradata_size)) { |
| 130 | return 0; |
| 131 | } |
| 132 | } |
| 133 | return 1; |
| 134 | } |
| 135 | |
| 136 | /* |
| 137 | * Writes a Representation within an Adaptation Set. Returns 0 on success and |
| 138 | * < 0 on failure. |
| 139 | */ |
| 140 | static int write_representation(AVFormatContext *s, AVStream *stream, int id, |
| 141 | int output_width, int output_height, |
| 142 | int output_sample_rate) { |
| 143 | AVDictionaryEntry *irange = av_dict_get(stream->metadata, INITIALIZATION_RANGE, NULL, 0); |
| 144 | AVDictionaryEntry *cues_start = av_dict_get(stream->metadata, CUES_START, NULL, 0); |
| 145 | AVDictionaryEntry *cues_end = av_dict_get(stream->metadata, CUES_END, NULL, 0); |
| 146 | AVDictionaryEntry *filename = av_dict_get(stream->metadata, FILENAME, NULL, 0); |
| 147 | AVDictionaryEntry *bandwidth = av_dict_get(stream->metadata, BANDWIDTH, NULL, 0); |
| 148 | if (!irange || cues_start == NULL || cues_end == NULL || filename == NULL || |
| 149 | !bandwidth) { |
| 150 | return -1; |
| 151 | } |
| 152 | avio_printf(s->pb, "<Representation id=\"%d\"", id); |
| 153 | avio_printf(s->pb, " bandwidth=\"%s\"", bandwidth->value); |
| 154 | if (stream->codec->codec_type == AVMEDIA_TYPE_VIDEO && output_width) |
| 155 | avio_printf(s->pb, " width=\"%d\"", stream->codec->width); |
| 156 | if (stream->codec->codec_type == AVMEDIA_TYPE_VIDEO && output_height) |
| 157 | avio_printf(s->pb, " height=\"%d\"", stream->codec->height); |
| 158 | if (stream->codec->codec_type = AVMEDIA_TYPE_AUDIO && output_sample_rate) |
| 159 | avio_printf(s->pb, " audioSamplingRate=\"%d\"", stream->codec->sample_rate); |
| 160 | avio_printf(s->pb, ">\n"); |
| 161 | avio_printf(s->pb, "<BaseURL>%s</BaseURL>\n", filename->value); |
| 162 | avio_printf(s->pb, "<SegmentBase\n"); |
| 163 | avio_printf(s->pb, " indexRange=\"%s-%s\">\n", cues_start->value, cues_end->value); |
| 164 | avio_printf(s->pb, "<Initialization\n"); |
| 165 | avio_printf(s->pb, " range=\"0-%s\" />\n", irange->value); |
| 166 | avio_printf(s->pb, "</SegmentBase>\n"); |
| 167 | avio_printf(s->pb, "</Representation>\n"); |
| 168 | return 0; |
| 169 | } |
| 170 | |
| 171 | /* |
| 172 | * Checks if width of all streams are the same. Returns 1 if true, 0 otherwise. |
| 173 | */ |
| 174 | static int check_matching_width(AVFormatContext *s, AdaptationSet *as) { |
| 175 | int first_width, i; |
| 176 | if (as->nb_streams < 2) return 1; |
| 177 | first_width = s->streams[as->streams[0]]->codec->width; |
| 178 | for (i = 1; i < as->nb_streams; i++) |
| 179 | if (first_width != s->streams[as->streams[i]]->codec->width) |
| 180 | return 0; |
| 181 | return 1; |
| 182 | } |
| 183 | |
| 184 | /* |
| 185 | * Checks if height of all streams are the same. Returns 1 if true, 0 otherwise. |
| 186 | */ |
| 187 | static int check_matching_height(AVFormatContext *s, AdaptationSet *as) { |
| 188 | int first_height, i; |
| 189 | if (as->nb_streams < 2) return 1; |
| 190 | first_height = s->streams[as->streams[0]]->codec->height; |
| 191 | for (i = 1; i < as->nb_streams; i++) |
| 192 | if (first_height != s->streams[as->streams[i]]->codec->height) |
| 193 | return 0; |
| 194 | return 1; |
| 195 | } |
| 196 | |
| 197 | /* |
| 198 | * Checks if sample rate of all streams are the same. Returns 1 if true, 0 otherwise. |
| 199 | */ |
| 200 | static int check_matching_sample_rate(AVFormatContext *s, AdaptationSet *as) { |
| 201 | int first_sample_rate, i; |
| 202 | if (as->nb_streams < 2) return 1; |
| 203 | first_sample_rate = s->streams[as->streams[0]]->codec->sample_rate; |
| 204 | for (i = 1; i < as->nb_streams; i++) |
| 205 | if (first_sample_rate != s->streams[as->streams[i]]->codec->sample_rate) |
| 206 | return 0; |
| 207 | return 1; |
| 208 | } |
| 209 | |
| 210 | /* |
| 211 | * Writes an Adaptation Set. Returns 0 on success and < 0 on failure. |
| 212 | */ |
| 213 | static int write_adaptation_set(AVFormatContext *s, int as_index) |
| 214 | { |
| 215 | WebMDashMuxContext *w = s->priv_data; |
| 216 | AdaptationSet *as = &w->as[as_index]; |
| 217 | AVCodecContext *codec = s->streams[as->streams[0]]->codec; |
| 218 | AVDictionaryEntry *lang; |
| 219 | int i; |
| 220 | static const char boolean[2][6] = { "false", "true" }; |
| 221 | int subsegmentStartsWithSAP = 1; |
| 222 | |
| 223 | // Width, Height and Sample Rate will go in the AdaptationSet tag if they |
| 224 | // are the same for all contained Representations. otherwise, they will go |
| 225 | // on their respective Representation tag. |
| 226 | int width_in_as = 1, height_in_as = 1, sample_rate_in_as = 1; |
| 227 | if (codec->codec_type == AVMEDIA_TYPE_VIDEO) { |
| 228 | width_in_as = check_matching_width(s, as); |
| 229 | height_in_as = check_matching_height(s, as); |
| 230 | } else { |
| 231 | sample_rate_in_as = check_matching_sample_rate(s, as); |
| 232 | } |
| 233 | |
| 234 | avio_printf(s->pb, "<AdaptationSet id=\"%s\"", as->id); |
| 235 | avio_printf(s->pb, " mimeType=\"%s/webm\"", |
| 236 | codec->codec_type == AVMEDIA_TYPE_VIDEO ? "video" : "audio"); |
| 237 | avio_printf(s->pb, " codecs=\"%s\"", get_codec_name(codec->codec_id)); |
| 238 | |
| 239 | lang = av_dict_get(s->streams[as->streams[0]]->metadata, "language", NULL, 0); |
| 240 | if (lang) avio_printf(s->pb, " lang=\"%s\"", lang->value); |
| 241 | |
| 242 | if (codec->codec_type == AVMEDIA_TYPE_VIDEO && width_in_as) |
| 243 | avio_printf(s->pb, " width=\"%d\"", codec->width); |
| 244 | if (codec->codec_type == AVMEDIA_TYPE_VIDEO && height_in_as) |
| 245 | avio_printf(s->pb, " height=\"%d\"", codec->height); |
| 246 | if (codec->codec_type == AVMEDIA_TYPE_AUDIO && sample_rate_in_as) |
| 247 | avio_printf(s->pb, " audioSamplingRate=\"%d\"", codec->sample_rate); |
| 248 | |
| 249 | avio_printf(s->pb, " bitstreamSwitching=\"%s\"", |
| 250 | boolean[bitstream_switching(s, as)]); |
| 251 | avio_printf(s->pb, " subsegmentAlignment=\"%s\"", |
| 252 | boolean[subsegment_alignment(s, as)]); |
| 253 | |
| 254 | for (i = 0; i < as->nb_streams; i++) { |
| 255 | AVDictionaryEntry *kf = av_dict_get(s->streams[as->streams[i]]->metadata, |
| 256 | CLUSTER_KEYFRAME, NULL, 0); |
| 257 | if (!kf || !strncmp(kf->value, "0", 1)) subsegmentStartsWithSAP = 0; |
| 258 | } |
| 259 | avio_printf(s->pb, " subsegmentStartsWithSAP=\"%d\"", subsegmentStartsWithSAP); |
| 260 | avio_printf(s->pb, ">\n"); |
| 261 | |
| 262 | for (i = 0; i < as->nb_streams; i++) { |
| 263 | write_representation(s, s->streams[as->streams[i]], w->representation_id++, |
| 264 | !width_in_as, !height_in_as, !sample_rate_in_as); |
| 265 | } |
| 266 | avio_printf(s->pb, "</AdaptationSet>\n"); |
| 267 | return 0; |
| 268 | } |
| 269 | |
| 270 | static int to_integer(char *p, int len) |
| 271 | { |
| 272 | int ret; |
| 273 | char *q = av_malloc(sizeof(char) * len); |
| 274 | if (!q) return -1; |
| 275 | av_strlcpy(q, p, len); |
| 276 | ret = atoi(q); |
| 277 | av_free(q); |
| 278 | return ret; |
| 279 | } |
| 280 | |
| 281 | static int parse_adaptation_sets(AVFormatContext *s) |
| 282 | { |
| 283 | WebMDashMuxContext *w = s->priv_data; |
| 284 | char *p = w->adaptation_sets; |
| 285 | char *q; |
| 286 | enum { new_set, parsed_id, parsing_streams } state; |
| 287 | // syntax id=0,streams=0,1,2 id=1,streams=3,4 and so on |
| 288 | state = new_set; |
| 289 | while (p < w->adaptation_sets + strlen(w->adaptation_sets)) { |
| 290 | if (*p == ' ') |
| 291 | continue; |
| 292 | else if (state == new_set && !strncmp(p, "id=", 3)) { |
| 293 | w->as = av_realloc(w->as, sizeof(*w->as) * ++w->nb_as); |
| 294 | if (w->as == NULL) return -1; |
| 295 | w->as[w->nb_as - 1].nb_streams = 0; |
| 296 | w->as[w->nb_as - 1].streams = NULL; |
| 297 | p += 3; // consume "id=" |
| 298 | q = w->as[w->nb_as - 1].id; |
| 299 | while (*p != ',') *q++ = *p++; |
| 300 | *q = 0; |
| 301 | p++; |
| 302 | state = parsed_id; |
| 303 | } else if (state == parsed_id && !strncmp(p, "streams=", 8)) { |
| 304 | p += 8; // consume "streams=" |
| 305 | state = parsing_streams; |
| 306 | } else if (state == parsing_streams) { |
| 307 | struct AdaptationSet *as = &w->as[w->nb_as - 1]; |
| 308 | q = p; |
| 309 | while (*q != '\0' && *q != ',' && *q != ' ') q++; |
| 310 | as->streams = av_realloc(as->streams, sizeof(*as->streams) * ++as->nb_streams); |
| 311 | if (as->streams == NULL) return -1; |
| 312 | as->streams[as->nb_streams - 1] = to_integer(p, q - p + 1); |
| 313 | if (as->streams[as->nb_streams - 1] < 0) return -1; |
| 314 | if (*q == '\0') break; |
| 315 | if (*q == ' ') state = new_set; |
| 316 | p = ++q; |
| 317 | } else { |
| 318 | return -1; |
| 319 | } |
| 320 | } |
| 321 | return 0; |
| 322 | } |
| 323 | |
| 324 | static int webm_dash_manifest_write_header(AVFormatContext *s) |
| 325 | { |
| 326 | int i; |
| 327 | double start = 0.0; |
| 328 | WebMDashMuxContext *w = s->priv_data; |
| 329 | parse_adaptation_sets(s); |
| 330 | write_header(s); |
| 331 | avio_printf(s->pb, "<Period id=\"0\""); |
| 332 | avio_printf(s->pb, " start=\"PT%gS\"", start); |
| 333 | avio_printf(s->pb, " duration=\"PT%gS\"", get_duration(s)); |
| 334 | avio_printf(s->pb, " >\n"); |
| 335 | |
| 336 | for (i = 0; i < w->nb_as; i++) { |
| 337 | if (write_adaptation_set(s, i) < 0) return -1; |
| 338 | } |
| 339 | |
| 340 | avio_printf(s->pb, "</Period>\n"); |
| 341 | write_footer(s); |
| 342 | return 0; |
| 343 | } |
| 344 | |
| 345 | static int webm_dash_manifest_write_packet(AVFormatContext *s, AVPacket *pkt) |
| 346 | { |
| 347 | return AVERROR_EOF; |
| 348 | } |
| 349 | |
| 350 | static int webm_dash_manifest_write_trailer(AVFormatContext *s) |
| 351 | { |
| 352 | WebMDashMuxContext *w = s->priv_data; |
| 353 | int i; |
| 354 | for (i = 0; i < w->nb_as; i++) { |
| 355 | av_freep(&w->as[i].streams); |
| 356 | } |
| 357 | av_freep(&w->as); |
| 358 | return 0; |
| 359 | } |
| 360 | |
| 361 | #define OFFSET(x) offsetof(WebMDashMuxContext, x) |
| 362 | static const AVOption options[] = { |
| 363 | { "adaptation_sets", "Adaptation sets. Syntax: id=0,streams=0,1,2 id=1,streams=3,4 and so on", OFFSET(adaptation_sets), AV_OPT_TYPE_STRING, { 0 }, 0, 0, AV_OPT_FLAG_ENCODING_PARAM }, |
| 364 | { NULL }, |
| 365 | }; |
| 366 | |
| 367 | #if CONFIG_WEBM_DASH_MANIFEST_MUXER |
| 368 | static const AVClass webm_dash_class = { |
| 369 | .class_name = "WebM DASH Manifest muxer", |
| 370 | .item_name = av_default_item_name, |
| 371 | .option = options, |
| 372 | .version = LIBAVUTIL_VERSION_INT, |
| 373 | }; |
| 374 | |
| 375 | AVOutputFormat ff_webm_dash_manifest_muxer = { |
| 376 | .name = "webm_dash_manifest", |
| 377 | .long_name = NULL_IF_CONFIG_SMALL("WebM DASH Manifest"), |
| 378 | .mime_type = "application/xml", |
| 379 | .extensions = "xml", |
| 380 | .priv_data_size = sizeof(WebMDashMuxContext), |
| 381 | .write_header = webm_dash_manifest_write_header, |
| 382 | .write_packet = webm_dash_manifest_write_packet, |
| 383 | .write_trailer = webm_dash_manifest_write_trailer, |
| 384 | .priv_class = &webm_dash_class, |
| 385 | }; |
| 386 | #endif |