| 1 | /* |
| 2 | * SubRip subtitle decoder |
| 3 | * Copyright (c) 2010 Aurelien Jacobs <aurel@gnuage.org> |
| 4 | * |
| 5 | * This file is part of FFmpeg. |
| 6 | * |
| 7 | * FFmpeg is free software; you can redistribute it and/or |
| 8 | * modify it under the terms of the GNU Lesser General Public |
| 9 | * License as published by the Free Software Foundation; either |
| 10 | * version 2.1 of the License, or (at your option) any later version. |
| 11 | * |
| 12 | * FFmpeg is distributed in the hope that it will be useful, |
| 13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| 15 | * Lesser General Public License for more details. |
| 16 | * |
| 17 | * You should have received a copy of the GNU Lesser General Public |
| 18 | * License along with FFmpeg; if not, write to the Free Software |
| 19 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
| 20 | */ |
| 21 | |
| 22 | #include "libavutil/avstring.h" |
| 23 | #include "libavutil/common.h" |
| 24 | #include "libavutil/intreadwrite.h" |
| 25 | #include "libavutil/parseutils.h" |
| 26 | #include "avcodec.h" |
| 27 | #include "ass.h" |
| 28 | |
| 29 | static int html_color_parse(AVCodecContext *avctx, const char *str) |
| 30 | { |
| 31 | uint8_t rgba[4]; |
| 32 | if (av_parse_color(rgba, str, strcspn(str, "\" >"), avctx) < 0) |
| 33 | return -1; |
| 34 | return rgba[0] | rgba[1] << 8 | rgba[2] << 16; |
| 35 | } |
| 36 | |
| 37 | enum { |
| 38 | PARAM_UNKNOWN = -1, |
| 39 | PARAM_SIZE, |
| 40 | PARAM_COLOR, |
| 41 | PARAM_FACE, |
| 42 | PARAM_NUMBER |
| 43 | }; |
| 44 | |
| 45 | typedef struct { |
| 46 | char tag[128]; |
| 47 | char param[PARAM_NUMBER][128]; |
| 48 | } SrtStack; |
| 49 | |
| 50 | static const char *srt_to_ass(AVCodecContext *avctx, char *out, char *out_end, |
| 51 | const char *in, int x1, int y1, int x2, int y2) |
| 52 | { |
| 53 | char *param, buffer[128], tmp[128]; |
| 54 | int len, tag_close, sptr = 1, line_start = 1, an = 0, end = 0; |
| 55 | SrtStack stack[16]; |
| 56 | |
| 57 | stack[0].tag[0] = 0; |
| 58 | strcpy(stack[0].param[PARAM_SIZE], "{\\fs}"); |
| 59 | strcpy(stack[0].param[PARAM_COLOR], "{\\c}"); |
| 60 | strcpy(stack[0].param[PARAM_FACE], "{\\fn}"); |
| 61 | |
| 62 | if (x1 >= 0 && y1 >= 0) { |
| 63 | if (x2 >= 0 && y2 >= 0 && (x2 != x1 || y2 != y1)) |
| 64 | snprintf(out, out_end-out, |
| 65 | "{\\an1}{\\move(%d,%d,%d,%d)}", x1, y1, x2, y2); |
| 66 | else |
| 67 | snprintf(out, out_end-out, "{\\an1}{\\pos(%d,%d)}", x1, y1); |
| 68 | out += strlen(out); |
| 69 | } |
| 70 | |
| 71 | for (; out < out_end && !end && *in; in++) { |
| 72 | switch (*in) { |
| 73 | case '\r': |
| 74 | break; |
| 75 | case '\n': |
| 76 | if (line_start) { |
| 77 | end = 1; |
| 78 | break; |
| 79 | } |
| 80 | while (out[-1] == ' ') |
| 81 | out--; |
| 82 | snprintf(out, out_end-out, "\\N"); |
| 83 | if(out<out_end) out += strlen(out); |
| 84 | line_start = 1; |
| 85 | break; |
| 86 | case ' ': |
| 87 | if (!line_start) |
| 88 | *out++ = *in; |
| 89 | break; |
| 90 | case '{': /* skip all {\xxx} substrings except for {\an%d} |
| 91 | and all microdvd like styles such as {Y:xxx} */ |
| 92 | len = 0; |
| 93 | an += sscanf(in, "{\\an%*1u}%n", &len) >= 0 && len > 0; |
| 94 | if ((an != 1 && (len = 0, sscanf(in, "{\\%*[^}]}%n", &len) >= 0 && len > 0)) || |
| 95 | (len = 0, sscanf(in, "{%*1[CcFfoPSsYy]:%*[^}]}%n", &len) >= 0 && len > 0)) { |
| 96 | in += len - 1; |
| 97 | } else |
| 98 | *out++ = *in; |
| 99 | break; |
| 100 | case '<': |
| 101 | tag_close = in[1] == '/'; |
| 102 | len = 0; |
| 103 | if (sscanf(in+tag_close+1, "%127[^>]>%n", buffer, &len) >= 1 && len > 0) { |
| 104 | if ((param = strchr(buffer, ' '))) |
| 105 | *param++ = 0; |
| 106 | if ((!tag_close && sptr < FF_ARRAY_ELEMS(stack)) || |
| 107 | ( tag_close && sptr > 0 && !strcmp(stack[sptr-1].tag, buffer))) { |
| 108 | int i, j, unknown = 0; |
| 109 | in += len + tag_close; |
| 110 | if (!tag_close) |
| 111 | memset(stack+sptr, 0, sizeof(*stack)); |
| 112 | if (!strcmp(buffer, "font")) { |
| 113 | if (tag_close) { |
| 114 | for (i=PARAM_NUMBER-1; i>=0; i--) |
| 115 | if (stack[sptr-1].param[i][0]) |
| 116 | for (j=sptr-2; j>=0; j--) |
| 117 | if (stack[j].param[i][0]) { |
| 118 | snprintf(out, out_end-out, |
| 119 | "%s", stack[j].param[i]); |
| 120 | if(out<out_end) out += strlen(out); |
| 121 | break; |
| 122 | } |
| 123 | } else { |
| 124 | while (param) { |
| 125 | if (!strncmp(param, "size=", 5)) { |
| 126 | unsigned font_size; |
| 127 | param += 5 + (param[5] == '"'); |
| 128 | if (sscanf(param, "%u", &font_size) == 1) { |
| 129 | snprintf(stack[sptr].param[PARAM_SIZE], |
| 130 | sizeof(stack[0].param[PARAM_SIZE]), |
| 131 | "{\\fs%u}", font_size); |
| 132 | } |
| 133 | } else if (!strncmp(param, "color=", 6)) { |
| 134 | param += 6 + (param[6] == '"'); |
| 135 | snprintf(stack[sptr].param[PARAM_COLOR], |
| 136 | sizeof(stack[0].param[PARAM_COLOR]), |
| 137 | "{\\c&H%X&}", |
| 138 | html_color_parse(avctx, param)); |
| 139 | } else if (!strncmp(param, "face=", 5)) { |
| 140 | param += 5 + (param[5] == '"'); |
| 141 | len = strcspn(param, |
| 142 | param[-1] == '"' ? "\"" :" "); |
| 143 | av_strlcpy(tmp, param, |
| 144 | FFMIN(sizeof(tmp), len+1)); |
| 145 | param += len; |
| 146 | snprintf(stack[sptr].param[PARAM_FACE], |
| 147 | sizeof(stack[0].param[PARAM_FACE]), |
| 148 | "{\\fn%s}", tmp); |
| 149 | } |
| 150 | if ((param = strchr(param, ' '))) |
| 151 | param++; |
| 152 | } |
| 153 | for (i=0; i<PARAM_NUMBER; i++) |
| 154 | if (stack[sptr].param[i][0]) { |
| 155 | snprintf(out, out_end-out, |
| 156 | "%s", stack[sptr].param[i]); |
| 157 | if(out<out_end) out += strlen(out); |
| 158 | } |
| 159 | } |
| 160 | } else if (!buffer[1] && strspn(buffer, "bisu") == 1) { |
| 161 | snprintf(out, out_end-out, |
| 162 | "{\\%c%d}", buffer[0], !tag_close); |
| 163 | if(out<out_end) out += strlen(out); |
| 164 | } else { |
| 165 | unknown = 1; |
| 166 | snprintf(tmp, sizeof(tmp), "</%s>", buffer); |
| 167 | } |
| 168 | if (tag_close) { |
| 169 | sptr--; |
| 170 | } else if (unknown && !strstr(in, tmp)) { |
| 171 | in -= len + tag_close; |
| 172 | *out++ = *in; |
| 173 | } else |
| 174 | av_strlcpy(stack[sptr++].tag, buffer, |
| 175 | sizeof(stack[0].tag)); |
| 176 | break; |
| 177 | } |
| 178 | } |
| 179 | default: |
| 180 | *out++ = *in; |
| 181 | break; |
| 182 | } |
| 183 | if (*in != ' ' && *in != '\r' && *in != '\n') |
| 184 | line_start = 0; |
| 185 | } |
| 186 | |
| 187 | out = FFMIN(out, out_end-3); |
| 188 | while (!strncmp(out-2, "\\N", 2)) |
| 189 | out -= 2; |
| 190 | while (out[-1] == ' ') |
| 191 | out--; |
| 192 | snprintf(out, out_end-out, "\r\n"); |
| 193 | return in; |
| 194 | } |
| 195 | |
| 196 | static const char *read_ts(const char *buf, int *ts_start, int *ts_end, |
| 197 | int *x1, int *y1, int *x2, int *y2) |
| 198 | { |
| 199 | int i, hs, ms, ss, he, me, se; |
| 200 | |
| 201 | for (i=0; i<2; i++) { |
| 202 | /* try to read timestamps in either the first or second line */ |
| 203 | int c = sscanf(buf, "%d:%2d:%2d%*1[,.]%3d --> %d:%2d:%2d%*1[,.]%3d" |
| 204 | "%*[ ]X1:%u X2:%u Y1:%u Y2:%u", |
| 205 | &hs, &ms, &ss, ts_start, &he, &me, &se, ts_end, |
| 206 | x1, x2, y1, y2); |
| 207 | buf += strcspn(buf, "\n"); |
| 208 | buf += !!*buf; |
| 209 | if (c >= 8) { |
| 210 | *ts_start = 100*(ss + 60*(ms + 60*hs)) + *ts_start/10; |
| 211 | *ts_end = 100*(se + 60*(me + 60*he)) + *ts_end /10; |
| 212 | return buf; |
| 213 | } |
| 214 | } |
| 215 | return NULL; |
| 216 | } |
| 217 | |
| 218 | static int srt_decode_frame(AVCodecContext *avctx, |
| 219 | void *data, int *got_sub_ptr, AVPacket *avpkt) |
| 220 | { |
| 221 | AVSubtitle *sub = data; |
| 222 | int ts_start, ts_end, x1 = -1, y1 = -1, x2 = -1, y2 = -1; |
| 223 | char buffer[2048]; |
| 224 | const char *ptr = avpkt->data; |
| 225 | const char *end = avpkt->data + avpkt->size; |
| 226 | int size; |
| 227 | const uint8_t *p = av_packet_get_side_data(avpkt, AV_PKT_DATA_SUBTITLE_POSITION, &size); |
| 228 | |
| 229 | if (p && size == 16) { |
| 230 | x1 = AV_RL32(p ); |
| 231 | y1 = AV_RL32(p + 4); |
| 232 | x2 = AV_RL32(p + 8); |
| 233 | y2 = AV_RL32(p + 12); |
| 234 | } |
| 235 | |
| 236 | if (avpkt->size <= 0) |
| 237 | return avpkt->size; |
| 238 | |
| 239 | while (ptr < end && *ptr) { |
| 240 | if (avctx->codec->id == AV_CODEC_ID_SRT) { |
| 241 | ptr = read_ts(ptr, &ts_start, &ts_end, &x1, &y1, &x2, &y2); |
| 242 | if (!ptr) |
| 243 | break; |
| 244 | } else { |
| 245 | // Do final divide-by-10 outside rescale to force rounding down. |
| 246 | ts_start = av_rescale_q(avpkt->pts, |
| 247 | avctx->time_base, |
| 248 | (AVRational){1,100}); |
| 249 | ts_end = av_rescale_q(avpkt->pts + avpkt->duration, |
| 250 | avctx->time_base, |
| 251 | (AVRational){1,100}); |
| 252 | } |
| 253 | ptr = srt_to_ass(avctx, buffer, buffer+sizeof(buffer), ptr, |
| 254 | x1, y1, x2, y2); |
| 255 | ff_ass_add_rect(sub, buffer, ts_start, ts_end-ts_start, 0); |
| 256 | } |
| 257 | |
| 258 | *got_sub_ptr = sub->num_rects > 0; |
| 259 | return avpkt->size; |
| 260 | } |
| 261 | |
| 262 | #if CONFIG_SRT_DECODER |
| 263 | /* deprecated decoder */ |
| 264 | AVCodec ff_srt_decoder = { |
| 265 | .name = "srt", |
| 266 | .long_name = NULL_IF_CONFIG_SMALL("SubRip subtitle with embedded timing"), |
| 267 | .type = AVMEDIA_TYPE_SUBTITLE, |
| 268 | .id = AV_CODEC_ID_SRT, |
| 269 | .init = ff_ass_subtitle_header_default, |
| 270 | .decode = srt_decode_frame, |
| 271 | }; |
| 272 | #endif |
| 273 | |
| 274 | #if CONFIG_SUBRIP_DECODER |
| 275 | AVCodec ff_subrip_decoder = { |
| 276 | .name = "subrip", |
| 277 | .long_name = NULL_IF_CONFIG_SMALL("SubRip subtitle"), |
| 278 | .type = AVMEDIA_TYPE_SUBTITLE, |
| 279 | .id = AV_CODEC_ID_SUBRIP, |
| 280 | .init = ff_ass_subtitle_header_default, |
| 281 | .decode = srt_decode_frame, |
| 282 | }; |
| 283 | #endif |