2 * TED Talks captions format decoder
3 * Copyright (c) 2012 Nicolas George
5 * This file is part of FFmpeg.
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 #include "libavutil/bprint.h"
23 #include "libavutil/log.h"
24 #include "libavutil/opt.h"
27 #include "subtitles.h"
32 FFDemuxSubtitlesQueue subs
;
35 static const AVOption tedcaptions_options
[] = {
36 { "start_time", "set the start time (offset) of the subtitles, in ms",
37 offsetof(TEDCaptionsDemuxer
, start_time
), FF_OPT_TYPE_INT64
,
38 { .i64
= 15000 }, INT64_MIN
, INT64_MAX
,
39 AV_OPT_FLAG_SUBTITLE_PARAM
| AV_OPT_FLAG_DECODING_PARAM
},
43 static const AVClass tedcaptions_demuxer_class
= {
44 .class_name
= "tedcaptions_demuxer",
45 .item_name
= av_default_item_name
,
46 .option
= tedcaptions_options
,
47 .version
= LIBAVUTIL_VERSION_INT
,
50 #define BETWEEN(a, amin, amax) ((unsigned)((a) - (amin)) <= (amax) - (amin))
52 #define HEX_DIGIT_TEST(c) (BETWEEN(c, '0', '9') || BETWEEN((c) | 32, 'a', 'z'))
53 #define HEX_DIGIT_VAL(c) ((c) <= '9' ? (c) - '0' : ((c) | 32) - 'a' + 10)
54 #define ERR_CODE(c) (c < 0 ? c : AVERROR_INVALIDDATA)
56 static void av_bprint_utf8(AVBPrint
*bp
, unsigned c
)
61 av_bprint_chars(bp
, c
, 1);
64 bytes
= (av_log2(c
) - 2) / 5;
65 av_bprint_chars(bp
, (c
>> (bytes
* 6)) | ((0xFF80 >> bytes
) & 0xFF), 1);
66 for (i
= bytes
- 1; i
>= 0; i
--)
67 av_bprint_chars(bp
, ((c
>> (i
* 6)) & 0x3F) | 0x80, 1);
70 static void next_byte(AVIOContext
*pb
, int *cur_byte
)
73 int ret
= avio_read(pb
, &b
, 1);
74 *cur_byte
= ret
> 0 ? b
: ret
== 0 ? AVERROR_EOF
: ret
;
77 static void skip_spaces(AVIOContext
*pb
, int *cur_byte
)
79 while (*cur_byte
== ' ' || *cur_byte
== '\t' ||
80 *cur_byte
== '\n' || *cur_byte
== '\r')
81 next_byte(pb
, cur_byte
);
84 static int expect_byte(AVIOContext
*pb
, int *cur_byte
, uint8_t c
)
86 skip_spaces(pb
, cur_byte
);
88 return ERR_CODE(*cur_byte
);
89 next_byte(pb
, cur_byte
);
93 static int parse_string(AVIOContext
*pb
, int *cur_byte
, AVBPrint
*bp
, int full
)
97 av_bprint_init(bp
, 0, full
? -1 : 1);
98 ret
= expect_byte(pb
, cur_byte
, '"');
101 while (*cur_byte
> 0 && *cur_byte
!= '"') {
102 if (*cur_byte
== '\\') {
103 next_byte(pb
, cur_byte
);
105 ret
= AVERROR_INVALIDDATA
;
108 if ((*cur_byte
| 32) == 'u') {
110 for (i
= 0; i
< 4; i
++) {
111 next_byte(pb
, cur_byte
);
112 if (!HEX_DIGIT_TEST(*cur_byte
)) {
113 ret
= ERR_CODE(*cur_byte
);
116 chr
= chr
* 16 + HEX_DIGIT_VAL(*cur_byte
);
118 av_bprint_utf8(bp
, chr
);
120 av_bprint_chars(bp
, *cur_byte
, 1);
123 av_bprint_chars(bp
, *cur_byte
, 1);
125 next_byte(pb
, cur_byte
);
127 ret
= expect_byte(pb
, cur_byte
, '"');
130 if (full
&& !av_bprint_is_complete(bp
)) {
131 ret
= AVERROR(ENOMEM
);
137 av_bprint_finalize(bp
, NULL
);
141 static int parse_label(AVIOContext
*pb
, int *cur_byte
, AVBPrint
*bp
)
145 ret
= parse_string(pb
, cur_byte
, bp
, 0);
148 ret
= expect_byte(pb
, cur_byte
, ':');
154 static int parse_boolean(AVIOContext
*pb
, int *cur_byte
, int *result
)
156 static const char * const text
[] = { "false", "true" };
160 skip_spaces(pb
, cur_byte
);
161 for (i
= 0; i
< 2; i
++) {
165 for (; *p
; p
++, next_byte(pb
, cur_byte
))
167 return AVERROR_INVALIDDATA
;
168 if (BETWEEN(*cur_byte
| 32, 'a', 'z'))
169 return AVERROR_INVALIDDATA
;
173 return AVERROR_INVALIDDATA
;
176 static int parse_int(AVIOContext
*pb
, int *cur_byte
, int64_t *result
)
180 skip_spaces(pb
, cur_byte
);
181 if ((unsigned)*cur_byte
- '0' > 9)
182 return AVERROR_INVALIDDATA
;
183 while (BETWEEN(*cur_byte
, '0', '9')) {
184 val
= val
* 10 + (*cur_byte
- '0');
185 next_byte(pb
, cur_byte
);
191 static int parse_file(AVIOContext
*pb
, FFDemuxSubtitlesQueue
*subs
)
193 int ret
, cur_byte
, start_of_par
;
194 AVBPrint label
, content
;
195 int64_t pos
, start
, duration
;
198 next_byte(pb
, &cur_byte
);
199 ret
= expect_byte(pb
, &cur_byte
, '{');
201 return AVERROR_INVALIDDATA
;
202 ret
= parse_label(pb
, &cur_byte
, &label
);
203 if (ret
< 0 || strcmp(label
.str
, "captions"))
204 return AVERROR_INVALIDDATA
;
205 ret
= expect_byte(pb
, &cur_byte
, '[');
207 return AVERROR_INVALIDDATA
;
210 start
= duration
= AV_NOPTS_VALUE
;
211 ret
= expect_byte(pb
, &cur_byte
, '{');
214 pos
= avio_tell(pb
) - 1;
216 ret
= parse_label(pb
, &cur_byte
, &label
);
219 if (!strcmp(label
.str
, "startOfParagraph")) {
220 ret
= parse_boolean(pb
, &cur_byte
, &start_of_par
);
223 } else if (!strcmp(label
.str
, "content")) {
224 ret
= parse_string(pb
, &cur_byte
, &content
, 1);
227 } else if (!strcmp(label
.str
, "startTime")) {
228 ret
= parse_int(pb
, &cur_byte
, &start
);
231 } else if (!strcmp(label
.str
, "duration")) {
232 ret
= parse_int(pb
, &cur_byte
, &duration
);
236 return AVERROR_INVALIDDATA
;
238 skip_spaces(pb
, &cur_byte
);
241 next_byte(pb
, &cur_byte
);
243 ret
= expect_byte(pb
, &cur_byte
, '}');
247 if (!content
.size
|| start
== AV_NOPTS_VALUE
||
248 duration
== AV_NOPTS_VALUE
)
249 return AVERROR_INVALIDDATA
;
250 pkt
= ff_subtitles_queue_insert(subs
, content
.str
, content
.len
, 0);
252 return AVERROR(ENOMEM
);
255 pkt
->duration
= duration
;
256 av_bprint_finalize(&content
, NULL
);
258 skip_spaces(pb
, &cur_byte
);
261 next_byte(pb
, &cur_byte
);
263 ret
= expect_byte(pb
, &cur_byte
, ']');
266 ret
= expect_byte(pb
, &cur_byte
, '}');
269 skip_spaces(pb
, &cur_byte
);
270 if (cur_byte
!= AVERROR_EOF
)
271 return ERR_CODE(cur_byte
);
275 static av_cold
int tedcaptions_read_header(AVFormatContext
*avf
)
277 TEDCaptionsDemuxer
*tc
= avf
->priv_data
;
282 ret
= parse_file(avf
->pb
, &tc
->subs
);
284 if (ret
== AVERROR_INVALIDDATA
)
285 av_log(avf
, AV_LOG_ERROR
, "Syntax error near offset %"PRId64
".\n",
287 ff_subtitles_queue_clean(&tc
->subs
);
290 ff_subtitles_queue_finalize(&tc
->subs
);
291 for (i
= 0; i
< tc
->subs
.nb_subs
; i
++)
292 tc
->subs
.subs
[i
].pts
+= tc
->start_time
;
294 last
= &tc
->subs
.subs
[tc
->subs
.nb_subs
- 1];
295 st
= avformat_new_stream(avf
, NULL
);
297 return AVERROR(ENOMEM
);
298 st
->codec
->codec_type
= AVMEDIA_TYPE_SUBTITLE
;
299 st
->codec
->codec_id
= AV_CODEC_ID_TEXT
;
300 avpriv_set_pts_info(st
, 64, 1, 1000);
301 st
->probe_packets
= 0;
303 st
->duration
= last
->pts
+ last
->duration
;
309 static int tedcaptions_read_packet(AVFormatContext
*avf
, AVPacket
*packet
)
311 TEDCaptionsDemuxer
*tc
= avf
->priv_data
;
313 return ff_subtitles_queue_read_packet(&tc
->subs
, packet
);
316 static int tedcaptions_read_close(AVFormatContext
*avf
)
318 TEDCaptionsDemuxer
*tc
= avf
->priv_data
;
320 ff_subtitles_queue_clean(&tc
->subs
);
324 static av_cold
int tedcaptions_read_probe(AVProbeData
*p
)
326 static const char *const tags
[] = {
327 "\"captions\"", "\"duration\"", "\"content\"",
328 "\"startOfParagraph\"", "\"startTime\"",
330 unsigned i
, count
= 0;
333 if (p
->buf
[strspn(p
->buf
, " \t\r\n")] != '{')
335 for (i
= 0; i
< FF_ARRAY_ELEMS(tags
); i
++) {
336 if (!(t
= strstr(p
->buf
, tags
[i
])))
338 t
+= strlen(tags
[i
]);
339 t
+= strspn(t
, " \t\r\n");
343 return count
== FF_ARRAY_ELEMS(tags
) ? AVPROBE_SCORE_MAX
:
344 count
? AVPROBE_SCORE_EXTENSION
: 0;
347 static int tedcaptions_read_seek(AVFormatContext
*avf
, int stream_index
,
348 int64_t min_ts
, int64_t ts
, int64_t max_ts
,
351 TEDCaptionsDemuxer
*tc
= avf
->priv_data
;
352 return ff_subtitles_queue_seek(&tc
->subs
, avf
, stream_index
,
353 min_ts
, ts
, max_ts
, flags
);
356 AVInputFormat ff_tedcaptions_demuxer
= {
357 .name
= "tedcaptions",
358 .long_name
= NULL_IF_CONFIG_SMALL("TED Talks captions"),
359 .priv_data_size
= sizeof(TEDCaptionsDemuxer
),
360 .priv_class
= &tedcaptions_demuxer_class
,
361 .read_header
= tedcaptions_read_header
,
362 .read_packet
= tedcaptions_read_packet
,
363 .read_close
= tedcaptions_read_close
,
364 .read_probe
= tedcaptions_read_probe
,
365 .read_seek2
= tedcaptions_read_seek
,