Commit | Line | Data |
---|---|---|
2ba45a60 DM |
1 | /* |
2 | * Copyright (c) 2012-2013 Clément Bœsch <u pkh me> | |
3 | * | |
4 | * This file is part of FFmpeg. | |
5 | * | |
6 | * FFmpeg is free software; you can redistribute it and/or | |
7 | * modify it under the terms of the GNU Lesser General Public | |
8 | * License as published by the Free Software Foundation; either | |
9 | * version 2.1 of the License, or (at your option) any later version. | |
10 | * | |
11 | * FFmpeg is distributed in the hope that it will be useful, | |
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
14 | * Lesser General Public License for more details. | |
15 | * | |
16 | * You should have received a copy of the GNU Lesser General Public | |
17 | * License along with FFmpeg; if not, write to the Free Software | |
18 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |
19 | */ | |
20 | ||
21 | #include "avformat.h" | |
22 | #include "subtitles.h" | |
23 | #include "avio_internal.h" | |
24 | #include "libavutil/avassert.h" | |
25 | #include "libavutil/avstring.h" | |
26 | ||
f6fa7814 | 27 | void ff_text_init_avio(void *s, FFTextReader *r, AVIOContext *pb) |
2ba45a60 DM |
28 | { |
29 | int i; | |
30 | r->pb = pb; | |
31 | r->buf_pos = r->buf_len = 0; | |
32 | r->type = FF_UTF_8; | |
33 | for (i = 0; i < 2; i++) | |
34 | r->buf[r->buf_len++] = avio_r8(r->pb); | |
35 | if (strncmp("\xFF\xFE", r->buf, 2) == 0) { | |
36 | r->type = FF_UTF16LE; | |
37 | r->buf_pos += 2; | |
38 | } else if (strncmp("\xFE\xFF", r->buf, 2) == 0) { | |
39 | r->type = FF_UTF16BE; | |
40 | r->buf_pos += 2; | |
41 | } else { | |
42 | r->buf[r->buf_len++] = avio_r8(r->pb); | |
43 | if (strncmp("\xEF\xBB\xBF", r->buf, 3) == 0) { | |
44 | // UTF8 | |
45 | r->buf_pos += 3; | |
46 | } | |
47 | } | |
f6fa7814 DM |
48 | if (s && (r->type == FF_UTF16LE || r->type == FF_UTF16BE)) |
49 | av_log(s, AV_LOG_INFO, | |
50 | "UTF16 is automatically converted to UTF8, do not specify a character encoding\n"); | |
2ba45a60 DM |
51 | } |
52 | ||
53 | void ff_text_init_buf(FFTextReader *r, void *buf, size_t size) | |
54 | { | |
55 | memset(&r->buf_pb, 0, sizeof(r->buf_pb)); | |
56 | ffio_init_context(&r->buf_pb, buf, size, 0, NULL, NULL, NULL, NULL); | |
f6fa7814 | 57 | ff_text_init_avio(NULL, r, &r->buf_pb); |
2ba45a60 DM |
58 | } |
59 | ||
60 | int64_t ff_text_pos(FFTextReader *r) | |
61 | { | |
62 | return avio_tell(r->pb) - r->buf_len + r->buf_pos; | |
63 | } | |
64 | ||
65 | int ff_text_r8(FFTextReader *r) | |
66 | { | |
67 | uint32_t val; | |
68 | uint8_t tmp; | |
69 | if (r->buf_pos < r->buf_len) | |
70 | return r->buf[r->buf_pos++]; | |
71 | if (r->type == FF_UTF16LE) { | |
72 | GET_UTF16(val, avio_rl16(r->pb), return 0;) | |
73 | } else if (r->type == FF_UTF16BE) { | |
74 | GET_UTF16(val, avio_rb16(r->pb), return 0;) | |
75 | } else { | |
76 | return avio_r8(r->pb); | |
77 | } | |
78 | if (!val) | |
79 | return 0; | |
80 | r->buf_pos = 0; | |
81 | r->buf_len = 0; | |
82 | PUT_UTF8(val, tmp, r->buf[r->buf_len++] = tmp;) | |
83 | return r->buf[r->buf_pos++]; // buf_len is at least 1 | |
84 | } | |
85 | ||
86 | void ff_text_read(FFTextReader *r, char *buf, size_t size) | |
87 | { | |
88 | for ( ; size > 0; size--) | |
89 | *buf++ = ff_text_r8(r); | |
90 | } | |
91 | ||
92 | int ff_text_eof(FFTextReader *r) | |
93 | { | |
94 | return r->buf_pos >= r->buf_len && avio_feof(r->pb); | |
95 | } | |
96 | ||
97 | int ff_text_peek_r8(FFTextReader *r) | |
98 | { | |
99 | int c; | |
100 | if (r->buf_pos < r->buf_len) | |
101 | return r->buf[r->buf_pos]; | |
102 | c = ff_text_r8(r); | |
103 | if (!avio_feof(r->pb)) { | |
104 | r->buf_pos = 0; | |
105 | r->buf_len = 1; | |
106 | r->buf[0] = c; | |
107 | } | |
108 | return c; | |
109 | } | |
110 | ||
111 | AVPacket *ff_subtitles_queue_insert(FFDemuxSubtitlesQueue *q, | |
112 | const uint8_t *event, int len, int merge) | |
113 | { | |
114 | AVPacket *subs, *sub; | |
115 | ||
116 | if (merge && q->nb_subs > 0) { | |
117 | /* merge with previous event */ | |
118 | ||
119 | int old_len; | |
120 | sub = &q->subs[q->nb_subs - 1]; | |
121 | old_len = sub->size; | |
122 | if (av_grow_packet(sub, len) < 0) | |
123 | return NULL; | |
124 | memcpy(sub->data + old_len, event, len); | |
125 | } else { | |
126 | /* new event */ | |
127 | ||
128 | if (q->nb_subs >= INT_MAX/sizeof(*q->subs) - 1) | |
129 | return NULL; | |
130 | subs = av_fast_realloc(q->subs, &q->allocated_size, | |
131 | (q->nb_subs + 1) * sizeof(*q->subs)); | |
132 | if (!subs) | |
133 | return NULL; | |
134 | q->subs = subs; | |
135 | sub = &subs[q->nb_subs++]; | |
136 | if (av_new_packet(sub, len) < 0) | |
137 | return NULL; | |
138 | sub->flags |= AV_PKT_FLAG_KEY; | |
139 | sub->pts = sub->dts = 0; | |
140 | memcpy(sub->data, event, len); | |
141 | } | |
142 | return sub; | |
143 | } | |
144 | ||
145 | static int cmp_pkt_sub_ts_pos(const void *a, const void *b) | |
146 | { | |
147 | const AVPacket *s1 = a; | |
148 | const AVPacket *s2 = b; | |
149 | if (s1->pts == s2->pts) { | |
150 | if (s1->pos == s2->pos) | |
151 | return 0; | |
152 | return s1->pos > s2->pos ? 1 : -1; | |
153 | } | |
154 | return s1->pts > s2->pts ? 1 : -1; | |
155 | } | |
156 | ||
157 | static int cmp_pkt_sub_pos_ts(const void *a, const void *b) | |
158 | { | |
159 | const AVPacket *s1 = a; | |
160 | const AVPacket *s2 = b; | |
161 | if (s1->pos == s2->pos) { | |
162 | if (s1->pts == s2->pts) | |
163 | return 0; | |
164 | return s1->pts > s2->pts ? 1 : -1; | |
165 | } | |
166 | return s1->pos > s2->pos ? 1 : -1; | |
167 | } | |
168 | ||
169 | void ff_subtitles_queue_finalize(FFDemuxSubtitlesQueue *q) | |
170 | { | |
171 | int i; | |
172 | ||
173 | qsort(q->subs, q->nb_subs, sizeof(*q->subs), | |
174 | q->sort == SUB_SORT_TS_POS ? cmp_pkt_sub_ts_pos | |
175 | : cmp_pkt_sub_pos_ts); | |
176 | for (i = 0; i < q->nb_subs; i++) | |
177 | if (q->subs[i].duration == -1 && i < q->nb_subs - 1) | |
178 | q->subs[i].duration = q->subs[i + 1].pts - q->subs[i].pts; | |
179 | } | |
180 | ||
181 | int ff_subtitles_queue_read_packet(FFDemuxSubtitlesQueue *q, AVPacket *pkt) | |
182 | { | |
183 | AVPacket *sub = q->subs + q->current_sub_idx; | |
184 | ||
185 | if (q->current_sub_idx == q->nb_subs) | |
186 | return AVERROR_EOF; | |
187 | if (av_copy_packet(pkt, sub) < 0) { | |
188 | return AVERROR(ENOMEM); | |
189 | } | |
190 | ||
191 | pkt->dts = pkt->pts; | |
192 | q->current_sub_idx++; | |
193 | return 0; | |
194 | } | |
195 | ||
196 | static int search_sub_ts(const FFDemuxSubtitlesQueue *q, int64_t ts) | |
197 | { | |
198 | int s1 = 0, s2 = q->nb_subs - 1; | |
199 | ||
200 | if (s2 < s1) | |
201 | return AVERROR(ERANGE); | |
202 | ||
203 | for (;;) { | |
204 | int mid; | |
205 | ||
206 | if (s1 == s2) | |
207 | return s1; | |
208 | if (s1 == s2 - 1) | |
209 | return q->subs[s1].pts <= q->subs[s2].pts ? s1 : s2; | |
210 | mid = (s1 + s2) / 2; | |
211 | if (q->subs[mid].pts <= ts) | |
212 | s1 = mid; | |
213 | else | |
214 | s2 = mid; | |
215 | } | |
216 | } | |
217 | ||
218 | int ff_subtitles_queue_seek(FFDemuxSubtitlesQueue *q, AVFormatContext *s, int stream_index, | |
219 | int64_t min_ts, int64_t ts, int64_t max_ts, int flags) | |
220 | { | |
221 | if (flags & AVSEEK_FLAG_BYTE) { | |
222 | return AVERROR(ENOSYS); | |
223 | } else if (flags & AVSEEK_FLAG_FRAME) { | |
224 | if (ts < 0 || ts >= q->nb_subs) | |
225 | return AVERROR(ERANGE); | |
226 | q->current_sub_idx = ts; | |
227 | } else { | |
228 | int i, idx = search_sub_ts(q, ts); | |
229 | int64_t ts_selected; | |
230 | ||
231 | if (idx < 0) | |
232 | return idx; | |
233 | for (i = idx; i < q->nb_subs && q->subs[i].pts < min_ts; i++) | |
234 | if (stream_index == -1 || q->subs[i].stream_index == stream_index) | |
235 | idx = i; | |
236 | for (i = idx; i > 0 && q->subs[i].pts > max_ts; i--) | |
237 | if (stream_index == -1 || q->subs[i].stream_index == stream_index) | |
238 | idx = i; | |
239 | ||
240 | ts_selected = q->subs[idx].pts; | |
241 | if (ts_selected < min_ts || ts_selected > max_ts) | |
242 | return AVERROR(ERANGE); | |
243 | ||
244 | /* look back in the latest subtitles for overlapping subtitles */ | |
245 | for (i = idx - 1; i >= 0; i--) { | |
246 | int64_t pts = q->subs[i].pts; | |
247 | if (q->subs[i].duration <= 0 || | |
248 | (stream_index != -1 && q->subs[i].stream_index != stream_index)) | |
249 | continue; | |
250 | if (pts >= min_ts && pts > ts_selected - q->subs[i].duration) | |
251 | idx = i; | |
252 | else | |
253 | break; | |
254 | } | |
255 | ||
256 | /* If the queue is used to store multiple subtitles streams (like with | |
257 | * VobSub) and the stream index is not specified, we need to make sure | |
258 | * to focus on the smallest file position offset for a same timestamp; | |
259 | * queue is ordered by pts and then filepos, so we can take the first | |
260 | * entry for a given timestamp. */ | |
261 | if (stream_index == -1) | |
262 | while (idx > 0 && q->subs[idx - 1].pts == q->subs[idx].pts) | |
263 | idx--; | |
264 | ||
265 | q->current_sub_idx = idx; | |
266 | } | |
267 | return 0; | |
268 | } | |
269 | ||
270 | void ff_subtitles_queue_clean(FFDemuxSubtitlesQueue *q) | |
271 | { | |
272 | int i; | |
273 | ||
274 | for (i = 0; i < q->nb_subs; i++) | |
275 | av_free_packet(&q->subs[i]); | |
276 | av_freep(&q->subs); | |
277 | q->nb_subs = q->allocated_size = q->current_sub_idx = 0; | |
278 | } | |
279 | ||
280 | int ff_smil_extract_next_text_chunk(FFTextReader *tr, AVBPrint *buf, char *c) | |
281 | { | |
282 | int i = 0; | |
283 | char end_chr; | |
284 | ||
285 | if (!*c) // cached char? | |
286 | *c = ff_text_r8(tr); | |
287 | if (!*c) | |
288 | return 0; | |
289 | ||
290 | end_chr = *c == '<' ? '>' : '<'; | |
291 | do { | |
292 | av_bprint_chars(buf, *c, 1); | |
293 | *c = ff_text_r8(tr); | |
294 | i++; | |
295 | } while (*c != end_chr && *c); | |
296 | if (end_chr == '>') { | |
297 | av_bprint_chars(buf, '>', 1); | |
298 | *c = 0; | |
299 | } | |
300 | return i; | |
301 | } | |
302 | ||
303 | const char *ff_smil_get_attr_ptr(const char *s, const char *attr) | |
304 | { | |
305 | int in_quotes = 0; | |
306 | const int len = strlen(attr); | |
307 | ||
308 | while (*s) { | |
309 | while (*s) { | |
310 | if (!in_quotes && av_isspace(*s)) | |
311 | break; | |
312 | in_quotes ^= *s == '"'; // XXX: support escaping? | |
313 | s++; | |
314 | } | |
315 | while (av_isspace(*s)) | |
316 | s++; | |
317 | if (!av_strncasecmp(s, attr, len) && s[len] == '=') | |
318 | return s + len + 1 + (s[len + 1] == '"'); | |
319 | } | |
320 | return NULL; | |
321 | } | |
322 | ||
323 | static inline int is_eol(char c) | |
324 | { | |
325 | return c == '\r' || c == '\n'; | |
326 | } | |
327 | ||
328 | void ff_subtitles_read_text_chunk(FFTextReader *tr, AVBPrint *buf) | |
329 | { | |
330 | char eol_buf[5], last_was_cr = 0; | |
331 | int n = 0, i = 0, nb_eol = 0; | |
332 | ||
333 | av_bprint_clear(buf); | |
334 | ||
335 | for (;;) { | |
336 | char c = ff_text_r8(tr); | |
337 | ||
338 | if (!c) | |
339 | break; | |
340 | ||
341 | /* ignore all initial line breaks */ | |
342 | if (n == 0 && is_eol(c)) | |
343 | continue; | |
344 | ||
345 | /* line break buffering: we don't want to add the trailing \r\n */ | |
346 | if (is_eol(c)) { | |
347 | nb_eol += c == '\n' || last_was_cr; | |
348 | if (nb_eol == 2) | |
349 | break; | |
350 | eol_buf[i++] = c; | |
351 | if (i == sizeof(eol_buf) - 1) | |
352 | break; | |
353 | last_was_cr = c == '\r'; | |
354 | continue; | |
355 | } | |
356 | ||
357 | /* only one line break followed by data: we flush the line breaks | |
358 | * buffer */ | |
359 | if (i) { | |
360 | eol_buf[i] = 0; | |
361 | av_bprintf(buf, "%s", eol_buf); | |
362 | i = nb_eol = 0; | |
363 | } | |
364 | ||
365 | av_bprint_chars(buf, c, 1); | |
366 | n++; | |
367 | } | |
368 | } | |
369 | ||
370 | void ff_subtitles_read_chunk(AVIOContext *pb, AVBPrint *buf) | |
371 | { | |
372 | FFTextReader tr; | |
373 | tr.buf_pos = tr.buf_len = 0; | |
374 | tr.type = 0; | |
375 | tr.pb = pb; | |
376 | ff_subtitles_read_text_chunk(&tr, buf); | |
377 | } | |
378 | ||
379 | ptrdiff_t ff_subtitles_read_line(FFTextReader *tr, char *buf, size_t size) | |
380 | { | |
381 | size_t cur = 0; | |
382 | if (!size) | |
383 | return 0; | |
384 | while (cur + 1 < size) { | |
385 | unsigned char c = ff_text_r8(tr); | |
386 | if (!c) | |
387 | return ff_text_eof(tr) ? cur : AVERROR_INVALIDDATA; | |
388 | if (c == '\r' || c == '\n') | |
389 | break; | |
390 | buf[cur++] = c; | |
391 | buf[cur] = '\0'; | |
392 | } | |
393 | if (ff_text_peek_r8(tr) == '\r') | |
394 | ff_text_r8(tr); | |
395 | if (ff_text_peek_r8(tr) == '\n') | |
396 | ff_text_r8(tr); | |
397 | return cur; | |
398 | } |