| 1 | /* |
| 2 | * Copyright (c) 2012-2013 Clément Bœsch <u pkh me> |
| 3 | * |
| 4 | * This file is part of FFmpeg. |
| 5 | * |
| 6 | * FFmpeg is free software; you can redistribute it and/or |
| 7 | * modify it under the terms of the GNU Lesser General Public |
| 8 | * License as published by the Free Software Foundation; either |
| 9 | * version 2.1 of the License, or (at your option) any later version. |
| 10 | * |
| 11 | * FFmpeg is distributed in the hope that it will be useful, |
| 12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| 14 | * Lesser General Public License for more details. |
| 15 | * |
| 16 | * You should have received a copy of the GNU Lesser General Public |
| 17 | * License along with FFmpeg; if not, write to the Free Software |
| 18 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
| 19 | */ |
| 20 | |
| 21 | #include "avformat.h" |
| 22 | #include "subtitles.h" |
| 23 | #include "avio_internal.h" |
| 24 | #include "libavutil/avassert.h" |
| 25 | #include "libavutil/avstring.h" |
| 26 | |
| 27 | void ff_text_init_avio(void *s, FFTextReader *r, AVIOContext *pb) |
| 28 | { |
| 29 | int i; |
| 30 | r->pb = pb; |
| 31 | r->buf_pos = r->buf_len = 0; |
| 32 | r->type = FF_UTF_8; |
| 33 | for (i = 0; i < 2; i++) |
| 34 | r->buf[r->buf_len++] = avio_r8(r->pb); |
| 35 | if (strncmp("\xFF\xFE", r->buf, 2) == 0) { |
| 36 | r->type = FF_UTF16LE; |
| 37 | r->buf_pos += 2; |
| 38 | } else if (strncmp("\xFE\xFF", r->buf, 2) == 0) { |
| 39 | r->type = FF_UTF16BE; |
| 40 | r->buf_pos += 2; |
| 41 | } else { |
| 42 | r->buf[r->buf_len++] = avio_r8(r->pb); |
| 43 | if (strncmp("\xEF\xBB\xBF", r->buf, 3) == 0) { |
| 44 | // UTF8 |
| 45 | r->buf_pos += 3; |
| 46 | } |
| 47 | } |
| 48 | if (s && (r->type == FF_UTF16LE || r->type == FF_UTF16BE)) |
| 49 | av_log(s, AV_LOG_INFO, |
| 50 | "UTF16 is automatically converted to UTF8, do not specify a character encoding\n"); |
| 51 | } |
| 52 | |
| 53 | void ff_text_init_buf(FFTextReader *r, void *buf, size_t size) |
| 54 | { |
| 55 | memset(&r->buf_pb, 0, sizeof(r->buf_pb)); |
| 56 | ffio_init_context(&r->buf_pb, buf, size, 0, NULL, NULL, NULL, NULL); |
| 57 | ff_text_init_avio(NULL, r, &r->buf_pb); |
| 58 | } |
| 59 | |
| 60 | int64_t ff_text_pos(FFTextReader *r) |
| 61 | { |
| 62 | return avio_tell(r->pb) - r->buf_len + r->buf_pos; |
| 63 | } |
| 64 | |
| 65 | int ff_text_r8(FFTextReader *r) |
| 66 | { |
| 67 | uint32_t val; |
| 68 | uint8_t tmp; |
| 69 | if (r->buf_pos < r->buf_len) |
| 70 | return r->buf[r->buf_pos++]; |
| 71 | if (r->type == FF_UTF16LE) { |
| 72 | GET_UTF16(val, avio_rl16(r->pb), return 0;) |
| 73 | } else if (r->type == FF_UTF16BE) { |
| 74 | GET_UTF16(val, avio_rb16(r->pb), return 0;) |
| 75 | } else { |
| 76 | return avio_r8(r->pb); |
| 77 | } |
| 78 | if (!val) |
| 79 | return 0; |
| 80 | r->buf_pos = 0; |
| 81 | r->buf_len = 0; |
| 82 | PUT_UTF8(val, tmp, r->buf[r->buf_len++] = tmp;) |
| 83 | return r->buf[r->buf_pos++]; // buf_len is at least 1 |
| 84 | } |
| 85 | |
| 86 | void ff_text_read(FFTextReader *r, char *buf, size_t size) |
| 87 | { |
| 88 | for ( ; size > 0; size--) |
| 89 | *buf++ = ff_text_r8(r); |
| 90 | } |
| 91 | |
| 92 | int ff_text_eof(FFTextReader *r) |
| 93 | { |
| 94 | return r->buf_pos >= r->buf_len && avio_feof(r->pb); |
| 95 | } |
| 96 | |
| 97 | int ff_text_peek_r8(FFTextReader *r) |
| 98 | { |
| 99 | int c; |
| 100 | if (r->buf_pos < r->buf_len) |
| 101 | return r->buf[r->buf_pos]; |
| 102 | c = ff_text_r8(r); |
| 103 | if (!avio_feof(r->pb)) { |
| 104 | r->buf_pos = 0; |
| 105 | r->buf_len = 1; |
| 106 | r->buf[0] = c; |
| 107 | } |
| 108 | return c; |
| 109 | } |
| 110 | |
| 111 | AVPacket *ff_subtitles_queue_insert(FFDemuxSubtitlesQueue *q, |
| 112 | const uint8_t *event, int len, int merge) |
| 113 | { |
| 114 | AVPacket *subs, *sub; |
| 115 | |
| 116 | if (merge && q->nb_subs > 0) { |
| 117 | /* merge with previous event */ |
| 118 | |
| 119 | int old_len; |
| 120 | sub = &q->subs[q->nb_subs - 1]; |
| 121 | old_len = sub->size; |
| 122 | if (av_grow_packet(sub, len) < 0) |
| 123 | return NULL; |
| 124 | memcpy(sub->data + old_len, event, len); |
| 125 | } else { |
| 126 | /* new event */ |
| 127 | |
| 128 | if (q->nb_subs >= INT_MAX/sizeof(*q->subs) - 1) |
| 129 | return NULL; |
| 130 | subs = av_fast_realloc(q->subs, &q->allocated_size, |
| 131 | (q->nb_subs + 1) * sizeof(*q->subs)); |
| 132 | if (!subs) |
| 133 | return NULL; |
| 134 | q->subs = subs; |
| 135 | sub = &subs[q->nb_subs++]; |
| 136 | if (av_new_packet(sub, len) < 0) |
| 137 | return NULL; |
| 138 | sub->flags |= AV_PKT_FLAG_KEY; |
| 139 | sub->pts = sub->dts = 0; |
| 140 | memcpy(sub->data, event, len); |
| 141 | } |
| 142 | return sub; |
| 143 | } |
| 144 | |
| 145 | static int cmp_pkt_sub_ts_pos(const void *a, const void *b) |
| 146 | { |
| 147 | const AVPacket *s1 = a; |
| 148 | const AVPacket *s2 = b; |
| 149 | if (s1->pts == s2->pts) { |
| 150 | if (s1->pos == s2->pos) |
| 151 | return 0; |
| 152 | return s1->pos > s2->pos ? 1 : -1; |
| 153 | } |
| 154 | return s1->pts > s2->pts ? 1 : -1; |
| 155 | } |
| 156 | |
| 157 | static int cmp_pkt_sub_pos_ts(const void *a, const void *b) |
| 158 | { |
| 159 | const AVPacket *s1 = a; |
| 160 | const AVPacket *s2 = b; |
| 161 | if (s1->pos == s2->pos) { |
| 162 | if (s1->pts == s2->pts) |
| 163 | return 0; |
| 164 | return s1->pts > s2->pts ? 1 : -1; |
| 165 | } |
| 166 | return s1->pos > s2->pos ? 1 : -1; |
| 167 | } |
| 168 | |
| 169 | void ff_subtitles_queue_finalize(FFDemuxSubtitlesQueue *q) |
| 170 | { |
| 171 | int i; |
| 172 | |
| 173 | qsort(q->subs, q->nb_subs, sizeof(*q->subs), |
| 174 | q->sort == SUB_SORT_TS_POS ? cmp_pkt_sub_ts_pos |
| 175 | : cmp_pkt_sub_pos_ts); |
| 176 | for (i = 0; i < q->nb_subs; i++) |
| 177 | if (q->subs[i].duration == -1 && i < q->nb_subs - 1) |
| 178 | q->subs[i].duration = q->subs[i + 1].pts - q->subs[i].pts; |
| 179 | } |
| 180 | |
| 181 | int ff_subtitles_queue_read_packet(FFDemuxSubtitlesQueue *q, AVPacket *pkt) |
| 182 | { |
| 183 | AVPacket *sub = q->subs + q->current_sub_idx; |
| 184 | |
| 185 | if (q->current_sub_idx == q->nb_subs) |
| 186 | return AVERROR_EOF; |
| 187 | if (av_copy_packet(pkt, sub) < 0) { |
| 188 | return AVERROR(ENOMEM); |
| 189 | } |
| 190 | |
| 191 | pkt->dts = pkt->pts; |
| 192 | q->current_sub_idx++; |
| 193 | return 0; |
| 194 | } |
| 195 | |
| 196 | static int search_sub_ts(const FFDemuxSubtitlesQueue *q, int64_t ts) |
| 197 | { |
| 198 | int s1 = 0, s2 = q->nb_subs - 1; |
| 199 | |
| 200 | if (s2 < s1) |
| 201 | return AVERROR(ERANGE); |
| 202 | |
| 203 | for (;;) { |
| 204 | int mid; |
| 205 | |
| 206 | if (s1 == s2) |
| 207 | return s1; |
| 208 | if (s1 == s2 - 1) |
| 209 | return q->subs[s1].pts <= q->subs[s2].pts ? s1 : s2; |
| 210 | mid = (s1 + s2) / 2; |
| 211 | if (q->subs[mid].pts <= ts) |
| 212 | s1 = mid; |
| 213 | else |
| 214 | s2 = mid; |
| 215 | } |
| 216 | } |
| 217 | |
| 218 | int ff_subtitles_queue_seek(FFDemuxSubtitlesQueue *q, AVFormatContext *s, int stream_index, |
| 219 | int64_t min_ts, int64_t ts, int64_t max_ts, int flags) |
| 220 | { |
| 221 | if (flags & AVSEEK_FLAG_BYTE) { |
| 222 | return AVERROR(ENOSYS); |
| 223 | } else if (flags & AVSEEK_FLAG_FRAME) { |
| 224 | if (ts < 0 || ts >= q->nb_subs) |
| 225 | return AVERROR(ERANGE); |
| 226 | q->current_sub_idx = ts; |
| 227 | } else { |
| 228 | int i, idx = search_sub_ts(q, ts); |
| 229 | int64_t ts_selected; |
| 230 | |
| 231 | if (idx < 0) |
| 232 | return idx; |
| 233 | for (i = idx; i < q->nb_subs && q->subs[i].pts < min_ts; i++) |
| 234 | if (stream_index == -1 || q->subs[i].stream_index == stream_index) |
| 235 | idx = i; |
| 236 | for (i = idx; i > 0 && q->subs[i].pts > max_ts; i--) |
| 237 | if (stream_index == -1 || q->subs[i].stream_index == stream_index) |
| 238 | idx = i; |
| 239 | |
| 240 | ts_selected = q->subs[idx].pts; |
| 241 | if (ts_selected < min_ts || ts_selected > max_ts) |
| 242 | return AVERROR(ERANGE); |
| 243 | |
| 244 | /* look back in the latest subtitles for overlapping subtitles */ |
| 245 | for (i = idx - 1; i >= 0; i--) { |
| 246 | int64_t pts = q->subs[i].pts; |
| 247 | if (q->subs[i].duration <= 0 || |
| 248 | (stream_index != -1 && q->subs[i].stream_index != stream_index)) |
| 249 | continue; |
| 250 | if (pts >= min_ts && pts > ts_selected - q->subs[i].duration) |
| 251 | idx = i; |
| 252 | else |
| 253 | break; |
| 254 | } |
| 255 | |
| 256 | /* If the queue is used to store multiple subtitles streams (like with |
| 257 | * VobSub) and the stream index is not specified, we need to make sure |
| 258 | * to focus on the smallest file position offset for a same timestamp; |
| 259 | * queue is ordered by pts and then filepos, so we can take the first |
| 260 | * entry for a given timestamp. */ |
| 261 | if (stream_index == -1) |
| 262 | while (idx > 0 && q->subs[idx - 1].pts == q->subs[idx].pts) |
| 263 | idx--; |
| 264 | |
| 265 | q->current_sub_idx = idx; |
| 266 | } |
| 267 | return 0; |
| 268 | } |
| 269 | |
| 270 | void ff_subtitles_queue_clean(FFDemuxSubtitlesQueue *q) |
| 271 | { |
| 272 | int i; |
| 273 | |
| 274 | for (i = 0; i < q->nb_subs; i++) |
| 275 | av_free_packet(&q->subs[i]); |
| 276 | av_freep(&q->subs); |
| 277 | q->nb_subs = q->allocated_size = q->current_sub_idx = 0; |
| 278 | } |
| 279 | |
| 280 | int ff_smil_extract_next_text_chunk(FFTextReader *tr, AVBPrint *buf, char *c) |
| 281 | { |
| 282 | int i = 0; |
| 283 | char end_chr; |
| 284 | |
| 285 | if (!*c) // cached char? |
| 286 | *c = ff_text_r8(tr); |
| 287 | if (!*c) |
| 288 | return 0; |
| 289 | |
| 290 | end_chr = *c == '<' ? '>' : '<'; |
| 291 | do { |
| 292 | av_bprint_chars(buf, *c, 1); |
| 293 | *c = ff_text_r8(tr); |
| 294 | i++; |
| 295 | } while (*c != end_chr && *c); |
| 296 | if (end_chr == '>') { |
| 297 | av_bprint_chars(buf, '>', 1); |
| 298 | *c = 0; |
| 299 | } |
| 300 | return i; |
| 301 | } |
| 302 | |
| 303 | const char *ff_smil_get_attr_ptr(const char *s, const char *attr) |
| 304 | { |
| 305 | int in_quotes = 0; |
| 306 | const int len = strlen(attr); |
| 307 | |
| 308 | while (*s) { |
| 309 | while (*s) { |
| 310 | if (!in_quotes && av_isspace(*s)) |
| 311 | break; |
| 312 | in_quotes ^= *s == '"'; // XXX: support escaping? |
| 313 | s++; |
| 314 | } |
| 315 | while (av_isspace(*s)) |
| 316 | s++; |
| 317 | if (!av_strncasecmp(s, attr, len) && s[len] == '=') |
| 318 | return s + len + 1 + (s[len + 1] == '"'); |
| 319 | } |
| 320 | return NULL; |
| 321 | } |
| 322 | |
| 323 | static inline int is_eol(char c) |
| 324 | { |
| 325 | return c == '\r' || c == '\n'; |
| 326 | } |
| 327 | |
| 328 | void ff_subtitles_read_text_chunk(FFTextReader *tr, AVBPrint *buf) |
| 329 | { |
| 330 | char eol_buf[5], last_was_cr = 0; |
| 331 | int n = 0, i = 0, nb_eol = 0; |
| 332 | |
| 333 | av_bprint_clear(buf); |
| 334 | |
| 335 | for (;;) { |
| 336 | char c = ff_text_r8(tr); |
| 337 | |
| 338 | if (!c) |
| 339 | break; |
| 340 | |
| 341 | /* ignore all initial line breaks */ |
| 342 | if (n == 0 && is_eol(c)) |
| 343 | continue; |
| 344 | |
| 345 | /* line break buffering: we don't want to add the trailing \r\n */ |
| 346 | if (is_eol(c)) { |
| 347 | nb_eol += c == '\n' || last_was_cr; |
| 348 | if (nb_eol == 2) |
| 349 | break; |
| 350 | eol_buf[i++] = c; |
| 351 | if (i == sizeof(eol_buf) - 1) |
| 352 | break; |
| 353 | last_was_cr = c == '\r'; |
| 354 | continue; |
| 355 | } |
| 356 | |
| 357 | /* only one line break followed by data: we flush the line breaks |
| 358 | * buffer */ |
| 359 | if (i) { |
| 360 | eol_buf[i] = 0; |
| 361 | av_bprintf(buf, "%s", eol_buf); |
| 362 | i = nb_eol = 0; |
| 363 | } |
| 364 | |
| 365 | av_bprint_chars(buf, c, 1); |
| 366 | n++; |
| 367 | } |
| 368 | } |
| 369 | |
| 370 | void ff_subtitles_read_chunk(AVIOContext *pb, AVBPrint *buf) |
| 371 | { |
| 372 | FFTextReader tr; |
| 373 | tr.buf_pos = tr.buf_len = 0; |
| 374 | tr.type = 0; |
| 375 | tr.pb = pb; |
| 376 | ff_subtitles_read_text_chunk(&tr, buf); |
| 377 | } |
| 378 | |
| 379 | ptrdiff_t ff_subtitles_read_line(FFTextReader *tr, char *buf, size_t size) |
| 380 | { |
| 381 | size_t cur = 0; |
| 382 | if (!size) |
| 383 | return 0; |
| 384 | while (cur + 1 < size) { |
| 385 | unsigned char c = ff_text_r8(tr); |
| 386 | if (!c) |
| 387 | return ff_text_eof(tr) ? cur : AVERROR_INVALIDDATA; |
| 388 | if (c == '\r' || c == '\n') |
| 389 | break; |
| 390 | buf[cur++] = c; |
| 391 | buf[cur] = '\0'; |
| 392 | } |
| 393 | if (ff_text_peek_r8(tr) == '\r') |
| 394 | ff_text_r8(tr); |
| 395 | if (ff_text_peek_r8(tr) == '\n') |
| 396 | ff_text_r8(tr); |
| 397 | return cur; |
| 398 | } |