Commit | Line | Data |
---|---|---|
2ba45a60 DM |
1 | /* |
2 | * WebM DASH Manifest XML muxer | |
3 | * Copyright (c) 2014 Vignesh Venkatasubramanian | |
4 | * | |
5 | * This file is part of FFmpeg. | |
6 | * | |
7 | * FFmpeg is free software; you can redistribute it and/or | |
8 | * modify it under the terms of the GNU Lesser General Public | |
9 | * License as published by the Free Software Foundation; either | |
10 | * version 2.1 of the License, or (at your option) any later version. | |
11 | * | |
12 | * FFmpeg is distributed in the hope that it will be useful, | |
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
15 | * Lesser General Public License for more details. | |
16 | * | |
17 | * You should have received a copy of the GNU Lesser General Public | |
18 | * License along with FFmpeg; if not, write to the Free Software | |
19 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |
20 | */ | |
21 | ||
22 | /* | |
23 | * WebM DASH Specification: | |
24 | * https://sites.google.com/a/webmproject.org/wiki/adaptive-streaming/webm-dash-specification | |
25 | */ | |
26 | ||
27 | #include <stdint.h> | |
28 | #include <string.h> | |
29 | ||
30 | #include "avformat.h" | |
31 | #include "avio_internal.h" | |
32 | #include "matroska.h" | |
33 | ||
34 | #include "libavutil/avstring.h" | |
35 | #include "libavutil/dict.h" | |
36 | #include "libavutil/opt.h" | |
37 | ||
38 | typedef struct AdaptationSet { | |
39 | char id[10]; | |
40 | int *streams; | |
41 | int nb_streams; | |
42 | } AdaptationSet; | |
43 | ||
44 | typedef struct WebMDashMuxContext { | |
45 | const AVClass *class; | |
46 | char *adaptation_sets; | |
47 | AdaptationSet *as; | |
48 | int nb_as; | |
f6fa7814 | 49 | int representation_id; |
2ba45a60 DM |
50 | } WebMDashMuxContext; |
51 | ||
52 | static const char *get_codec_name(int codec_id) | |
53 | { | |
54 | switch (codec_id) { | |
55 | case AV_CODEC_ID_VP8: | |
56 | return "vp8"; | |
57 | case AV_CODEC_ID_VP9: | |
58 | return "vp9"; | |
59 | case AV_CODEC_ID_VORBIS: | |
60 | return "vorbis"; | |
61 | case AV_CODEC_ID_OPUS: | |
62 | return "opus"; | |
63 | } | |
64 | return NULL; | |
65 | } | |
66 | ||
67 | static double get_duration(AVFormatContext *s) | |
68 | { | |
69 | int i = 0; | |
70 | double max = 0.0; | |
71 | for (i = 0; i < s->nb_streams; i++) { | |
72 | AVDictionaryEntry *duration = av_dict_get(s->streams[i]->metadata, | |
73 | DURATION, NULL, 0); | |
74 | if (!duration || atof(duration->value) < 0) continue; | |
75 | if (atof(duration->value) > max) max = atof(duration->value); | |
76 | } | |
77 | return max / 1000; | |
78 | } | |
79 | ||
80 | static void write_header(AVFormatContext *s) | |
81 | { | |
82 | double min_buffer_time = 1.0; | |
83 | avio_printf(s->pb, "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"); | |
84 | avio_printf(s->pb, "<MPD\n"); | |
85 | avio_printf(s->pb, " xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\"\n"); | |
86 | avio_printf(s->pb, " xmlns=\"urn:mpeg:DASH:schema:MPD:2011\"\n"); | |
87 | avio_printf(s->pb, " xsi:schemaLocation=\"urn:mpeg:DASH:schema:MPD:2011\"\n"); | |
88 | avio_printf(s->pb, " type=\"static\"\n"); | |
89 | avio_printf(s->pb, " mediaPresentationDuration=\"PT%gS\"\n", | |
90 | get_duration(s)); | |
91 | avio_printf(s->pb, " minBufferTime=\"PT%gS\"\n", | |
92 | min_buffer_time); | |
93 | avio_printf(s->pb, " profiles=\"urn:webm:dash:profile:webm-on-demand:2012\""); | |
94 | avio_printf(s->pb, ">\n"); | |
95 | } | |
96 | ||
97 | static void write_footer(AVFormatContext *s) | |
98 | { | |
f6fa7814 | 99 | avio_printf(s->pb, "</MPD>\n"); |
2ba45a60 DM |
100 | } |
101 | ||
102 | static int subsegment_alignment(AVFormatContext *s, AdaptationSet *as) { | |
103 | int i; | |
104 | AVDictionaryEntry *gold = av_dict_get(s->streams[as->streams[0]]->metadata, | |
105 | CUE_TIMESTAMPS, NULL, 0); | |
106 | if (!gold) return 0; | |
107 | for (i = 1; i < as->nb_streams; i++) { | |
108 | AVDictionaryEntry *ts = av_dict_get(s->streams[as->streams[i]]->metadata, | |
109 | CUE_TIMESTAMPS, NULL, 0); | |
110 | if (!ts || strncmp(gold->value, ts->value, strlen(gold->value))) return 0; | |
111 | } | |
112 | return 1; | |
113 | } | |
114 | ||
115 | static int bitstream_switching(AVFormatContext *s, AdaptationSet *as) { | |
116 | int i; | |
117 | AVDictionaryEntry *gold_track_num = av_dict_get(s->streams[as->streams[0]]->metadata, | |
118 | TRACK_NUMBER, NULL, 0); | |
119 | AVCodecContext *gold_codec = s->streams[as->streams[0]]->codec; | |
120 | if (!gold_track_num) return 0; | |
121 | for (i = 1; i < as->nb_streams; i++) { | |
122 | AVDictionaryEntry *track_num = av_dict_get(s->streams[as->streams[i]]->metadata, | |
123 | TRACK_NUMBER, NULL, 0); | |
124 | AVCodecContext *codec = s->streams[as->streams[i]]->codec; | |
125 | if (!track_num || | |
126 | strncmp(gold_track_num->value, track_num->value, strlen(gold_track_num->value)) || | |
127 | gold_codec->codec_id != codec->codec_id || | |
128 | gold_codec->extradata_size != codec->extradata_size || | |
129 | memcmp(gold_codec->extradata, codec->extradata, codec->extradata_size)) { | |
130 | return 0; | |
131 | } | |
132 | } | |
133 | return 1; | |
134 | } | |
135 | ||
f6fa7814 DM |
136 | /* |
137 | * Writes a Representation within an Adaptation Set. Returns 0 on success and | |
138 | * < 0 on failure. | |
139 | */ | |
140 | static int write_representation(AVFormatContext *s, AVStream *stream, int id, | |
141 | int output_width, int output_height, | |
142 | int output_sample_rate) { | |
143 | AVDictionaryEntry *irange = av_dict_get(stream->metadata, INITIALIZATION_RANGE, NULL, 0); | |
144 | AVDictionaryEntry *cues_start = av_dict_get(stream->metadata, CUES_START, NULL, 0); | |
145 | AVDictionaryEntry *cues_end = av_dict_get(stream->metadata, CUES_END, NULL, 0); | |
146 | AVDictionaryEntry *filename = av_dict_get(stream->metadata, FILENAME, NULL, 0); | |
147 | AVDictionaryEntry *bandwidth = av_dict_get(stream->metadata, BANDWIDTH, NULL, 0); | |
148 | if (!irange || cues_start == NULL || cues_end == NULL || filename == NULL || | |
149 | !bandwidth) { | |
150 | return -1; | |
151 | } | |
152 | avio_printf(s->pb, "<Representation id=\"%d\"", id); | |
153 | avio_printf(s->pb, " bandwidth=\"%s\"", bandwidth->value); | |
154 | if (stream->codec->codec_type == AVMEDIA_TYPE_VIDEO && output_width) | |
155 | avio_printf(s->pb, " width=\"%d\"", stream->codec->width); | |
156 | if (stream->codec->codec_type == AVMEDIA_TYPE_VIDEO && output_height) | |
157 | avio_printf(s->pb, " height=\"%d\"", stream->codec->height); | |
158 | if (stream->codec->codec_type = AVMEDIA_TYPE_AUDIO && output_sample_rate) | |
159 | avio_printf(s->pb, " audioSamplingRate=\"%d\"", stream->codec->sample_rate); | |
160 | avio_printf(s->pb, ">\n"); | |
161 | avio_printf(s->pb, "<BaseURL>%s</BaseURL>\n", filename->value); | |
162 | avio_printf(s->pb, "<SegmentBase\n"); | |
163 | avio_printf(s->pb, " indexRange=\"%s-%s\">\n", cues_start->value, cues_end->value); | |
164 | avio_printf(s->pb, "<Initialization\n"); | |
165 | avio_printf(s->pb, " range=\"0-%s\" />\n", irange->value); | |
166 | avio_printf(s->pb, "</SegmentBase>\n"); | |
167 | avio_printf(s->pb, "</Representation>\n"); | |
168 | return 0; | |
169 | } | |
170 | ||
171 | /* | |
172 | * Checks if width of all streams are the same. Returns 1 if true, 0 otherwise. | |
173 | */ | |
174 | static int check_matching_width(AVFormatContext *s, AdaptationSet *as) { | |
175 | int first_width, i; | |
176 | if (as->nb_streams < 2) return 1; | |
177 | first_width = s->streams[as->streams[0]]->codec->width; | |
178 | for (i = 1; i < as->nb_streams; i++) | |
179 | if (first_width != s->streams[as->streams[i]]->codec->width) | |
180 | return 0; | |
181 | return 1; | |
182 | } | |
183 | ||
184 | /* | |
185 | * Checks if height of all streams are the same. Returns 1 if true, 0 otherwise. | |
186 | */ | |
187 | static int check_matching_height(AVFormatContext *s, AdaptationSet *as) { | |
188 | int first_height, i; | |
189 | if (as->nb_streams < 2) return 1; | |
190 | first_height = s->streams[as->streams[0]]->codec->height; | |
191 | for (i = 1; i < as->nb_streams; i++) | |
192 | if (first_height != s->streams[as->streams[i]]->codec->height) | |
193 | return 0; | |
194 | return 1; | |
195 | } | |
196 | ||
197 | /* | |
198 | * Checks if sample rate of all streams are the same. Returns 1 if true, 0 otherwise. | |
199 | */ | |
200 | static int check_matching_sample_rate(AVFormatContext *s, AdaptationSet *as) { | |
201 | int first_sample_rate, i; | |
202 | if (as->nb_streams < 2) return 1; | |
203 | first_sample_rate = s->streams[as->streams[0]]->codec->sample_rate; | |
204 | for (i = 1; i < as->nb_streams; i++) | |
205 | if (first_sample_rate != s->streams[as->streams[i]]->codec->sample_rate) | |
206 | return 0; | |
207 | return 1; | |
208 | } | |
209 | ||
2ba45a60 DM |
210 | /* |
211 | * Writes an Adaptation Set. Returns 0 on success and < 0 on failure. | |
212 | */ | |
213 | static int write_adaptation_set(AVFormatContext *s, int as_index) | |
214 | { | |
215 | WebMDashMuxContext *w = s->priv_data; | |
216 | AdaptationSet *as = &w->as[as_index]; | |
217 | AVCodecContext *codec = s->streams[as->streams[0]]->codec; | |
f6fa7814 | 218 | AVDictionaryEntry *lang; |
2ba45a60 DM |
219 | int i; |
220 | static const char boolean[2][6] = { "false", "true" }; | |
221 | int subsegmentStartsWithSAP = 1; | |
f6fa7814 DM |
222 | |
223 | // Width, Height and Sample Rate will go in the AdaptationSet tag if they | |
224 | // are the same for all contained Representations. otherwise, they will go | |
225 | // on their respective Representation tag. | |
226 | int width_in_as = 1, height_in_as = 1, sample_rate_in_as = 1; | |
227 | if (codec->codec_type == AVMEDIA_TYPE_VIDEO) { | |
228 | width_in_as = check_matching_width(s, as); | |
229 | height_in_as = check_matching_height(s, as); | |
230 | } else { | |
231 | sample_rate_in_as = check_matching_sample_rate(s, as); | |
232 | } | |
233 | ||
2ba45a60 DM |
234 | avio_printf(s->pb, "<AdaptationSet id=\"%s\"", as->id); |
235 | avio_printf(s->pb, " mimeType=\"%s/webm\"", | |
236 | codec->codec_type == AVMEDIA_TYPE_VIDEO ? "video" : "audio"); | |
237 | avio_printf(s->pb, " codecs=\"%s\"", get_codec_name(codec->codec_id)); | |
238 | ||
239 | lang = av_dict_get(s->streams[as->streams[0]]->metadata, "language", NULL, 0); | |
240 | if (lang) avio_printf(s->pb, " lang=\"%s\"", lang->value); | |
241 | ||
f6fa7814 | 242 | if (codec->codec_type == AVMEDIA_TYPE_VIDEO && width_in_as) |
2ba45a60 | 243 | avio_printf(s->pb, " width=\"%d\"", codec->width); |
f6fa7814 | 244 | if (codec->codec_type == AVMEDIA_TYPE_VIDEO && height_in_as) |
2ba45a60 | 245 | avio_printf(s->pb, " height=\"%d\"", codec->height); |
f6fa7814 | 246 | if (codec->codec_type == AVMEDIA_TYPE_AUDIO && sample_rate_in_as) |
2ba45a60 | 247 | avio_printf(s->pb, " audioSamplingRate=\"%d\"", codec->sample_rate); |
2ba45a60 DM |
248 | |
249 | avio_printf(s->pb, " bitstreamSwitching=\"%s\"", | |
250 | boolean[bitstream_switching(s, as)]); | |
251 | avio_printf(s->pb, " subsegmentAlignment=\"%s\"", | |
252 | boolean[subsegment_alignment(s, as)]); | |
253 | ||
254 | for (i = 0; i < as->nb_streams; i++) { | |
255 | AVDictionaryEntry *kf = av_dict_get(s->streams[as->streams[i]]->metadata, | |
256 | CLUSTER_KEYFRAME, NULL, 0); | |
257 | if (!kf || !strncmp(kf->value, "0", 1)) subsegmentStartsWithSAP = 0; | |
258 | } | |
259 | avio_printf(s->pb, " subsegmentStartsWithSAP=\"%d\"", subsegmentStartsWithSAP); | |
260 | avio_printf(s->pb, ">\n"); | |
261 | ||
262 | for (i = 0; i < as->nb_streams; i++) { | |
f6fa7814 DM |
263 | write_representation(s, s->streams[as->streams[i]], w->representation_id++, |
264 | !width_in_as, !height_in_as, !sample_rate_in_as); | |
2ba45a60 DM |
265 | } |
266 | avio_printf(s->pb, "</AdaptationSet>\n"); | |
267 | return 0; | |
268 | } | |
269 | ||
270 | static int to_integer(char *p, int len) | |
271 | { | |
272 | int ret; | |
273 | char *q = av_malloc(sizeof(char) * len); | |
274 | if (!q) return -1; | |
275 | av_strlcpy(q, p, len); | |
276 | ret = atoi(q); | |
277 | av_free(q); | |
278 | return ret; | |
279 | } | |
280 | ||
281 | static int parse_adaptation_sets(AVFormatContext *s) | |
282 | { | |
283 | WebMDashMuxContext *w = s->priv_data; | |
284 | char *p = w->adaptation_sets; | |
285 | char *q; | |
286 | enum { new_set, parsed_id, parsing_streams } state; | |
287 | // syntax id=0,streams=0,1,2 id=1,streams=3,4 and so on | |
288 | state = new_set; | |
289 | while (p < w->adaptation_sets + strlen(w->adaptation_sets)) { | |
290 | if (*p == ' ') | |
291 | continue; | |
292 | else if (state == new_set && !strncmp(p, "id=", 3)) { | |
293 | w->as = av_realloc(w->as, sizeof(*w->as) * ++w->nb_as); | |
294 | if (w->as == NULL) return -1; | |
295 | w->as[w->nb_as - 1].nb_streams = 0; | |
296 | w->as[w->nb_as - 1].streams = NULL; | |
297 | p += 3; // consume "id=" | |
298 | q = w->as[w->nb_as - 1].id; | |
299 | while (*p != ',') *q++ = *p++; | |
300 | *q = 0; | |
301 | p++; | |
302 | state = parsed_id; | |
303 | } else if (state == parsed_id && !strncmp(p, "streams=", 8)) { | |
304 | p += 8; // consume "streams=" | |
305 | state = parsing_streams; | |
306 | } else if (state == parsing_streams) { | |
307 | struct AdaptationSet *as = &w->as[w->nb_as - 1]; | |
308 | q = p; | |
309 | while (*q != '\0' && *q != ',' && *q != ' ') q++; | |
310 | as->streams = av_realloc(as->streams, sizeof(*as->streams) * ++as->nb_streams); | |
311 | if (as->streams == NULL) return -1; | |
312 | as->streams[as->nb_streams - 1] = to_integer(p, q - p + 1); | |
313 | if (as->streams[as->nb_streams - 1] < 0) return -1; | |
314 | if (*q == '\0') break; | |
315 | if (*q == ' ') state = new_set; | |
316 | p = ++q; | |
317 | } else { | |
318 | return -1; | |
319 | } | |
320 | } | |
321 | return 0; | |
322 | } | |
323 | ||
324 | static int webm_dash_manifest_write_header(AVFormatContext *s) | |
325 | { | |
326 | int i; | |
327 | double start = 0.0; | |
328 | WebMDashMuxContext *w = s->priv_data; | |
329 | parse_adaptation_sets(s); | |
330 | write_header(s); | |
331 | avio_printf(s->pb, "<Period id=\"0\""); | |
332 | avio_printf(s->pb, " start=\"PT%gS\"", start); | |
333 | avio_printf(s->pb, " duration=\"PT%gS\"", get_duration(s)); | |
334 | avio_printf(s->pb, " >\n"); | |
335 | ||
336 | for (i = 0; i < w->nb_as; i++) { | |
337 | if (write_adaptation_set(s, i) < 0) return -1; | |
338 | } | |
339 | ||
340 | avio_printf(s->pb, "</Period>\n"); | |
341 | write_footer(s); | |
342 | return 0; | |
343 | } | |
344 | ||
345 | static int webm_dash_manifest_write_packet(AVFormatContext *s, AVPacket *pkt) | |
346 | { | |
347 | return AVERROR_EOF; | |
348 | } | |
349 | ||
350 | static int webm_dash_manifest_write_trailer(AVFormatContext *s) | |
351 | { | |
352 | WebMDashMuxContext *w = s->priv_data; | |
353 | int i; | |
354 | for (i = 0; i < w->nb_as; i++) { | |
355 | av_freep(&w->as[i].streams); | |
356 | } | |
357 | av_freep(&w->as); | |
358 | return 0; | |
359 | } | |
360 | ||
361 | #define OFFSET(x) offsetof(WebMDashMuxContext, x) | |
362 | static const AVOption options[] = { | |
363 | { "adaptation_sets", "Adaptation sets. Syntax: id=0,streams=0,1,2 id=1,streams=3,4 and so on", OFFSET(adaptation_sets), AV_OPT_TYPE_STRING, { 0 }, 0, 0, AV_OPT_FLAG_ENCODING_PARAM }, | |
364 | { NULL }, | |
365 | }; | |
366 | ||
367 | #if CONFIG_WEBM_DASH_MANIFEST_MUXER | |
368 | static const AVClass webm_dash_class = { | |
369 | .class_name = "WebM DASH Manifest muxer", | |
370 | .item_name = av_default_item_name, | |
371 | .option = options, | |
372 | .version = LIBAVUTIL_VERSION_INT, | |
373 | }; | |
374 | ||
375 | AVOutputFormat ff_webm_dash_manifest_muxer = { | |
376 | .name = "webm_dash_manifest", | |
377 | .long_name = NULL_IF_CONFIG_SMALL("WebM DASH Manifest"), | |
378 | .mime_type = "application/xml", | |
379 | .extensions = "xml", | |
380 | .priv_data_size = sizeof(WebMDashMuxContext), | |
381 | .write_header = webm_dash_manifest_write_header, | |
382 | .write_packet = webm_dash_manifest_write_packet, | |
383 | .write_trailer = webm_dash_manifest_write_trailer, | |
384 | .priv_class = &webm_dash_class, | |
385 | }; | |
386 | #endif |