Commit | Line | Data |
---|---|---|
2ba45a60 DM |
1 | /* |
2 | * Copyright (c) 2012 Andrew D'Addesio | |
3 | * Copyright (c) 2013-2014 Mozilla Corporation | |
4 | * | |
5 | * This file is part of FFmpeg. | |
6 | * | |
7 | * FFmpeg is free software; you can redistribute it and/or | |
8 | * modify it under the terms of the GNU Lesser General Public | |
9 | * License as published by the Free Software Foundation; either | |
10 | * version 2.1 of the License, or (at your option) any later version. | |
11 | * | |
12 | * FFmpeg is distributed in the hope that it will be useful, | |
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
15 | * Lesser General Public License for more details. | |
16 | * | |
17 | * You should have received a copy of the GNU Lesser General Public | |
18 | * License along with FFmpeg; if not, write to the Free Software | |
19 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |
20 | */ | |
21 | ||
22 | /** | |
23 | * @file | |
24 | * Opus decoder/parser shared code | |
25 | */ | |
26 | ||
27 | #include <stdint.h> | |
28 | ||
29 | #include "libavutil/error.h" | |
30 | ||
31 | #include "opus.h" | |
32 | #include "vorbis.h" | |
33 | ||
34 | static const uint16_t opus_frame_duration[32] = { | |
35 | 480, 960, 1920, 2880, | |
36 | 480, 960, 1920, 2880, | |
37 | 480, 960, 1920, 2880, | |
38 | 480, 960, | |
39 | 480, 960, | |
40 | 120, 240, 480, 960, | |
41 | 120, 240, 480, 960, | |
42 | 120, 240, 480, 960, | |
43 | 120, 240, 480, 960, | |
44 | }; | |
45 | ||
46 | /** | |
47 | * Read a 1- or 2-byte frame length | |
48 | */ | |
49 | static inline int xiph_lacing_16bit(const uint8_t **ptr, const uint8_t *end) | |
50 | { | |
51 | int val; | |
52 | ||
53 | if (*ptr >= end) | |
54 | return AVERROR_INVALIDDATA; | |
55 | val = *(*ptr)++; | |
56 | if (val >= 252) { | |
57 | if (*ptr >= end) | |
58 | return AVERROR_INVALIDDATA; | |
59 | val += 4 * *(*ptr)++; | |
60 | } | |
61 | return val; | |
62 | } | |
63 | ||
64 | /** | |
65 | * Read a multi-byte length (used for code 3 packet padding size) | |
66 | */ | |
67 | static inline int xiph_lacing_full(const uint8_t **ptr, const uint8_t *end) | |
68 | { | |
69 | int val = 0; | |
70 | int next; | |
71 | ||
72 | while (1) { | |
73 | if (*ptr >= end || val > INT_MAX - 254) | |
74 | return AVERROR_INVALIDDATA; | |
75 | next = *(*ptr)++; | |
76 | val += next; | |
77 | if (next < 255) | |
78 | break; | |
79 | else | |
80 | val--; | |
81 | } | |
82 | return val; | |
83 | } | |
84 | ||
85 | /** | |
86 | * Parse Opus packet info from raw packet data | |
87 | */ | |
88 | int ff_opus_parse_packet(OpusPacket *pkt, const uint8_t *buf, int buf_size, | |
89 | int self_delimiting) | |
90 | { | |
91 | const uint8_t *ptr = buf; | |
92 | const uint8_t *end = buf + buf_size; | |
93 | int padding = 0; | |
94 | int frame_bytes, i; | |
95 | ||
96 | if (buf_size < 1) | |
97 | goto fail; | |
98 | ||
99 | /* TOC byte */ | |
100 | i = *ptr++; | |
101 | pkt->code = (i ) & 0x3; | |
102 | pkt->stereo = (i >> 2) & 0x1; | |
103 | pkt->config = (i >> 3) & 0x1F; | |
104 | ||
105 | /* code 2 and code 3 packets have at least 1 byte after the TOC */ | |
106 | if (pkt->code >= 2 && buf_size < 2) | |
107 | goto fail; | |
108 | ||
109 | switch (pkt->code) { | |
110 | case 0: | |
111 | /* 1 frame */ | |
112 | pkt->frame_count = 1; | |
113 | pkt->vbr = 0; | |
114 | ||
115 | if (self_delimiting) { | |
116 | int len = xiph_lacing_16bit(&ptr, end); | |
117 | if (len < 0 || len > end - ptr) | |
118 | goto fail; | |
119 | end = ptr + len; | |
120 | buf_size = end - buf; | |
121 | } | |
122 | ||
123 | frame_bytes = end - ptr; | |
124 | if (frame_bytes > MAX_FRAME_SIZE) | |
125 | goto fail; | |
126 | pkt->frame_offset[0] = ptr - buf; | |
127 | pkt->frame_size[0] = frame_bytes; | |
128 | break; | |
129 | case 1: | |
130 | /* 2 frames, equal size */ | |
131 | pkt->frame_count = 2; | |
132 | pkt->vbr = 0; | |
133 | ||
134 | if (self_delimiting) { | |
135 | int len = xiph_lacing_16bit(&ptr, end); | |
136 | if (len < 0 || 2 * len > end - ptr) | |
137 | goto fail; | |
138 | end = ptr + 2 * len; | |
139 | buf_size = end - buf; | |
140 | } | |
141 | ||
142 | frame_bytes = end - ptr; | |
143 | if (frame_bytes & 1 || frame_bytes >> 1 > MAX_FRAME_SIZE) | |
144 | goto fail; | |
145 | pkt->frame_offset[0] = ptr - buf; | |
146 | pkt->frame_size[0] = frame_bytes >> 1; | |
147 | pkt->frame_offset[1] = pkt->frame_offset[0] + pkt->frame_size[0]; | |
148 | pkt->frame_size[1] = frame_bytes >> 1; | |
149 | break; | |
150 | case 2: | |
151 | /* 2 frames, different sizes */ | |
152 | pkt->frame_count = 2; | |
153 | pkt->vbr = 1; | |
154 | ||
155 | /* read 1st frame size */ | |
156 | frame_bytes = xiph_lacing_16bit(&ptr, end); | |
157 | if (frame_bytes < 0) | |
158 | goto fail; | |
159 | ||
160 | if (self_delimiting) { | |
161 | int len = xiph_lacing_16bit(&ptr, end); | |
162 | if (len < 0 || len + frame_bytes > end - ptr) | |
163 | goto fail; | |
164 | end = ptr + frame_bytes + len; | |
165 | buf_size = end - buf; | |
166 | } | |
167 | ||
168 | pkt->frame_offset[0] = ptr - buf; | |
169 | pkt->frame_size[0] = frame_bytes; | |
170 | ||
171 | /* calculate 2nd frame size */ | |
172 | frame_bytes = end - ptr - pkt->frame_size[0]; | |
173 | if (frame_bytes < 0 || frame_bytes > MAX_FRAME_SIZE) | |
174 | goto fail; | |
175 | pkt->frame_offset[1] = pkt->frame_offset[0] + pkt->frame_size[0]; | |
176 | pkt->frame_size[1] = frame_bytes; | |
177 | break; | |
178 | case 3: | |
179 | /* 1 to 48 frames, can be different sizes */ | |
180 | i = *ptr++; | |
181 | pkt->frame_count = (i ) & 0x3F; | |
182 | padding = (i >> 6) & 0x01; | |
183 | pkt->vbr = (i >> 7) & 0x01; | |
184 | ||
185 | if (pkt->frame_count == 0 || pkt->frame_count > MAX_FRAMES) | |
186 | goto fail; | |
187 | ||
188 | /* read padding size */ | |
189 | if (padding) { | |
190 | padding = xiph_lacing_full(&ptr, end); | |
191 | if (padding < 0) | |
192 | goto fail; | |
193 | } | |
194 | ||
195 | /* read frame sizes */ | |
196 | if (pkt->vbr) { | |
197 | /* for VBR, all frames except the final one have their size coded | |
198 | in the bitstream. the last frame size is implicit. */ | |
199 | int total_bytes = 0; | |
200 | for (i = 0; i < pkt->frame_count - 1; i++) { | |
201 | frame_bytes = xiph_lacing_16bit(&ptr, end); | |
202 | if (frame_bytes < 0) | |
203 | goto fail; | |
204 | pkt->frame_size[i] = frame_bytes; | |
205 | total_bytes += frame_bytes; | |
206 | } | |
207 | ||
208 | if (self_delimiting) { | |
209 | int len = xiph_lacing_16bit(&ptr, end); | |
210 | if (len < 0 || len + total_bytes + padding > end - ptr) | |
211 | goto fail; | |
212 | end = ptr + total_bytes + len + padding; | |
213 | buf_size = end - buf; | |
214 | } | |
215 | ||
216 | frame_bytes = end - ptr - padding; | |
217 | if (total_bytes > frame_bytes) | |
218 | goto fail; | |
219 | pkt->frame_offset[0] = ptr - buf; | |
220 | for (i = 1; i < pkt->frame_count; i++) | |
221 | pkt->frame_offset[i] = pkt->frame_offset[i-1] + pkt->frame_size[i-1]; | |
222 | pkt->frame_size[pkt->frame_count-1] = frame_bytes - total_bytes; | |
223 | } else { | |
224 | /* for CBR, the remaining packet bytes are divided evenly between | |
225 | the frames */ | |
226 | if (self_delimiting) { | |
227 | frame_bytes = xiph_lacing_16bit(&ptr, end); | |
228 | if (frame_bytes < 0 || pkt->frame_count * frame_bytes + padding > end - ptr) | |
229 | goto fail; | |
230 | end = ptr + pkt->frame_count * frame_bytes + padding; | |
231 | buf_size = end - buf; | |
232 | } else { | |
233 | frame_bytes = end - ptr - padding; | |
234 | if (frame_bytes % pkt->frame_count || | |
235 | frame_bytes / pkt->frame_count > MAX_FRAME_SIZE) | |
236 | goto fail; | |
237 | frame_bytes /= pkt->frame_count; | |
238 | } | |
239 | ||
240 | pkt->frame_offset[0] = ptr - buf; | |
241 | pkt->frame_size[0] = frame_bytes; | |
242 | for (i = 1; i < pkt->frame_count; i++) { | |
243 | pkt->frame_offset[i] = pkt->frame_offset[i-1] + pkt->frame_size[i-1]; | |
244 | pkt->frame_size[i] = frame_bytes; | |
245 | } | |
246 | } | |
247 | } | |
248 | ||
249 | pkt->packet_size = buf_size; | |
250 | pkt->data_size = pkt->packet_size - padding; | |
251 | ||
252 | /* total packet duration cannot be larger than 120ms */ | |
253 | pkt->frame_duration = opus_frame_duration[pkt->config]; | |
254 | if (pkt->frame_duration * pkt->frame_count > MAX_PACKET_DUR) | |
255 | goto fail; | |
256 | ||
257 | /* set mode and bandwidth */ | |
258 | if (pkt->config < 12) { | |
259 | pkt->mode = OPUS_MODE_SILK; | |
260 | pkt->bandwidth = pkt->config >> 2; | |
261 | } else if (pkt->config < 16) { | |
262 | pkt->mode = OPUS_MODE_HYBRID; | |
263 | pkt->bandwidth = OPUS_BANDWIDTH_SUPERWIDEBAND + (pkt->config >= 14); | |
264 | } else { | |
265 | pkt->mode = OPUS_MODE_CELT; | |
266 | pkt->bandwidth = (pkt->config - 16) >> 2; | |
267 | /* skip mediumband */ | |
268 | if (pkt->bandwidth) | |
269 | pkt->bandwidth++; | |
270 | } | |
271 | ||
272 | return 0; | |
273 | ||
274 | fail: | |
275 | memset(pkt, 0, sizeof(*pkt)); | |
276 | return AVERROR_INVALIDDATA; | |
277 | } | |
278 | ||
279 | static int channel_reorder_vorbis(int nb_channels, int channel_idx) | |
280 | { | |
281 | return ff_vorbis_channel_layout_offsets[nb_channels - 1][channel_idx]; | |
282 | } | |
283 | ||
284 | static int channel_reorder_unknown(int nb_channels, int channel_idx) | |
285 | { | |
286 | return channel_idx; | |
287 | } | |
288 | ||
289 | av_cold int ff_opus_parse_extradata(AVCodecContext *avctx, | |
290 | OpusContext *s) | |
291 | { | |
292 | static const uint8_t default_channel_map[2] = { 0, 1 }; | |
2ba45a60 DM |
293 | |
294 | int (*channel_reorder)(int, int) = channel_reorder_unknown; | |
295 | ||
296 | const uint8_t *extradata, *channel_map; | |
297 | int extradata_size; | |
298 | int version, channels, map_type, streams, stereo_streams, i, j; | |
299 | uint64_t layout; | |
300 | ||
301 | if (!avctx->extradata) { | |
302 | if (avctx->channels > 2) { | |
303 | av_log(avctx, AV_LOG_ERROR, | |
304 | "Multichannel configuration without extradata.\n"); | |
305 | return AVERROR(EINVAL); | |
306 | } | |
f6fa7814 DM |
307 | extradata = opus_default_extradata; |
308 | extradata_size = sizeof(opus_default_extradata); | |
2ba45a60 DM |
309 | } else { |
310 | extradata = avctx->extradata; | |
311 | extradata_size = avctx->extradata_size; | |
312 | } | |
313 | ||
314 | if (extradata_size < 19) { | |
315 | av_log(avctx, AV_LOG_ERROR, "Invalid extradata size: %d\n", | |
316 | extradata_size); | |
317 | return AVERROR_INVALIDDATA; | |
318 | } | |
319 | ||
320 | version = extradata[8]; | |
321 | if (version > 15) { | |
322 | avpriv_request_sample(avctx, "Extradata version %d", version); | |
323 | return AVERROR_PATCHWELCOME; | |
324 | } | |
325 | ||
326 | avctx->delay = AV_RL16(extradata + 10); | |
327 | ||
f6fa7814 | 328 | channels = avctx->extradata ? extradata[9] : (avctx->channels == 1) ? 1 : 2; |
2ba45a60 DM |
329 | if (!channels) { |
330 | av_log(avctx, AV_LOG_ERROR, "Zero channel count specified in the extadata\n"); | |
331 | return AVERROR_INVALIDDATA; | |
332 | } | |
333 | ||
334 | s->gain_i = AV_RL16(extradata + 16); | |
335 | if (s->gain_i) | |
336 | s->gain = pow(10, s->gain_i / (20.0 * 256)); | |
337 | ||
338 | map_type = extradata[18]; | |
339 | if (!map_type) { | |
340 | if (channels > 2) { | |
341 | av_log(avctx, AV_LOG_ERROR, | |
342 | "Channel mapping 0 is only specified for up to 2 channels\n"); | |
343 | return AVERROR_INVALIDDATA; | |
344 | } | |
345 | layout = (channels == 1) ? AV_CH_LAYOUT_MONO : AV_CH_LAYOUT_STEREO; | |
346 | streams = 1; | |
347 | stereo_streams = channels - 1; | |
348 | channel_map = default_channel_map; | |
349 | } else if (map_type == 1 || map_type == 255) { | |
350 | if (extradata_size < 21 + channels) { | |
351 | av_log(avctx, AV_LOG_ERROR, "Invalid extradata size: %d\n", | |
352 | extradata_size); | |
353 | return AVERROR_INVALIDDATA; | |
354 | } | |
355 | ||
356 | streams = extradata[19]; | |
357 | stereo_streams = extradata[20]; | |
358 | if (!streams || stereo_streams > streams || | |
359 | streams + stereo_streams > 255) { | |
360 | av_log(avctx, AV_LOG_ERROR, | |
361 | "Invalid stream/stereo stream count: %d/%d\n", streams, stereo_streams); | |
362 | return AVERROR_INVALIDDATA; | |
363 | } | |
364 | ||
365 | if (map_type == 1) { | |
366 | if (channels > 8) { | |
367 | av_log(avctx, AV_LOG_ERROR, | |
368 | "Channel mapping 1 is only specified for up to 8 channels\n"); | |
369 | return AVERROR_INVALIDDATA; | |
370 | } | |
371 | layout = ff_vorbis_channel_layouts[channels - 1]; | |
372 | channel_reorder = channel_reorder_vorbis; | |
373 | } else | |
374 | layout = 0; | |
375 | ||
376 | channel_map = extradata + 21; | |
377 | } else { | |
378 | avpriv_request_sample(avctx, "Mapping type %d", map_type); | |
379 | return AVERROR_PATCHWELCOME; | |
380 | } | |
381 | ||
382 | s->channel_maps = av_mallocz_array(channels, sizeof(*s->channel_maps)); | |
383 | if (!s->channel_maps) | |
384 | return AVERROR(ENOMEM); | |
385 | ||
386 | for (i = 0; i < channels; i++) { | |
387 | ChannelMap *map = &s->channel_maps[i]; | |
388 | uint8_t idx = channel_map[channel_reorder(channels, i)]; | |
389 | ||
390 | if (idx == 255) { | |
391 | map->silence = 1; | |
392 | continue; | |
393 | } else if (idx >= streams + stereo_streams) { | |
394 | av_log(avctx, AV_LOG_ERROR, | |
395 | "Invalid channel map for output channel %d: %d\n", i, idx); | |
396 | return AVERROR_INVALIDDATA; | |
397 | } | |
398 | ||
399 | /* check that we din't see this index yet */ | |
400 | map->copy = 0; | |
401 | for (j = 0; j < i; j++) | |
402 | if (channel_map[channel_reorder(channels, j)] == idx) { | |
403 | map->copy = 1; | |
404 | map->copy_idx = j; | |
405 | break; | |
406 | } | |
407 | ||
408 | if (idx < 2 * stereo_streams) { | |
409 | map->stream_idx = idx / 2; | |
410 | map->channel_idx = idx & 1; | |
411 | } else { | |
412 | map->stream_idx = idx - stereo_streams; | |
413 | map->channel_idx = 0; | |
414 | } | |
415 | } | |
416 | ||
417 | avctx->channels = channels; | |
418 | avctx->channel_layout = layout; | |
419 | s->nb_streams = streams; | |
420 | s->nb_stereo_streams = stereo_streams; | |
421 | ||
422 | return 0; | |
423 | } |