| 1 | /* |
| 2 | * On2 Audio for Video Codec decoder |
| 3 | * |
| 4 | * Copyright (c) 2013 Konstantin Shishkov |
| 5 | * |
| 6 | * This file is part of FFmpeg. |
| 7 | * |
| 8 | * FFmpeg is free software; you can redistribute it and/or |
| 9 | * modify it under the terms of the GNU Lesser General Public |
| 10 | * License as published by the Free Software Foundation; either |
| 11 | * version 2.1 of the License, or (at your option) any later version. |
| 12 | * |
| 13 | * FFmpeg is distributed in the hope that it will be useful, |
| 14 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| 16 | * Lesser General Public License for more details. |
| 17 | * |
| 18 | * You should have received a copy of the GNU Lesser General Public |
| 19 | * License along with FFmpeg; if not, write to the Free Software |
| 20 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
| 21 | */ |
| 22 | |
| 23 | #include "libavutil/channel_layout.h" |
| 24 | #include "libavutil/float_dsp.h" |
| 25 | #include "avcodec.h" |
| 26 | #include "bytestream.h" |
| 27 | #include "fft.h" |
| 28 | #include "get_bits.h" |
| 29 | #include "golomb.h" |
| 30 | #include "internal.h" |
| 31 | #include "unary.h" |
| 32 | |
| 33 | #include "on2avcdata.h" |
| 34 | |
| 35 | #define ON2AVC_SUBFRAME_SIZE 1024 |
| 36 | |
| 37 | enum WindowTypes { |
| 38 | WINDOW_TYPE_LONG = 0, |
| 39 | WINDOW_TYPE_LONG_STOP, |
| 40 | WINDOW_TYPE_LONG_START, |
| 41 | WINDOW_TYPE_8SHORT = 3, |
| 42 | WINDOW_TYPE_EXT4, |
| 43 | WINDOW_TYPE_EXT5, |
| 44 | WINDOW_TYPE_EXT6, |
| 45 | WINDOW_TYPE_EXT7, |
| 46 | }; |
| 47 | |
| 48 | typedef struct On2AVCContext { |
| 49 | AVCodecContext *avctx; |
| 50 | AVFloatDSPContext fdsp; |
| 51 | FFTContext mdct, mdct_half, mdct_small; |
| 52 | FFTContext fft128, fft256, fft512, fft1024; |
| 53 | void (*wtf)(struct On2AVCContext *ctx, float *out, float *in, int size); |
| 54 | |
| 55 | int is_av500; |
| 56 | |
| 57 | const On2AVCMode *modes; |
| 58 | int window_type, prev_window_type; |
| 59 | int num_windows, num_bands; |
| 60 | int bits_per_section; |
| 61 | const int *band_start; |
| 62 | |
| 63 | int grouping[8]; |
| 64 | int ms_present; |
| 65 | int ms_info[ON2AVC_MAX_BANDS]; |
| 66 | |
| 67 | int is_long; |
| 68 | |
| 69 | uint8_t band_type[ON2AVC_MAX_BANDS]; |
| 70 | uint8_t band_run_end[ON2AVC_MAX_BANDS]; |
| 71 | int num_sections; |
| 72 | |
| 73 | float band_scales[ON2AVC_MAX_BANDS]; |
| 74 | |
| 75 | VLC scale_diff; |
| 76 | VLC cb_vlc[16]; |
| 77 | |
| 78 | float scale_tab[128]; |
| 79 | |
| 80 | DECLARE_ALIGNED(32, float, coeffs)[2][ON2AVC_SUBFRAME_SIZE]; |
| 81 | DECLARE_ALIGNED(32, float, delay) [2][ON2AVC_SUBFRAME_SIZE]; |
| 82 | |
| 83 | DECLARE_ALIGNED(32, float, temp) [ON2AVC_SUBFRAME_SIZE * 2]; |
| 84 | DECLARE_ALIGNED(32, float, mdct_buf) [ON2AVC_SUBFRAME_SIZE]; |
| 85 | DECLARE_ALIGNED(32, float, long_win) [ON2AVC_SUBFRAME_SIZE]; |
| 86 | DECLARE_ALIGNED(32, float, short_win)[ON2AVC_SUBFRAME_SIZE / 8]; |
| 87 | } On2AVCContext; |
| 88 | |
| 89 | static void on2avc_read_ms_info(On2AVCContext *c, GetBitContext *gb) |
| 90 | { |
| 91 | int w, b, band_off = 0; |
| 92 | |
| 93 | c->ms_present = get_bits1(gb); |
| 94 | if (!c->ms_present) |
| 95 | return; |
| 96 | for (w = 0; w < c->num_windows; w++) { |
| 97 | if (!c->grouping[w]) { |
| 98 | memcpy(c->ms_info + band_off, |
| 99 | c->ms_info + band_off - c->num_bands, |
| 100 | c->num_bands * sizeof(*c->ms_info)); |
| 101 | band_off += c->num_bands; |
| 102 | continue; |
| 103 | } |
| 104 | for (b = 0; b < c->num_bands; b++) |
| 105 | c->ms_info[band_off++] = get_bits1(gb); |
| 106 | } |
| 107 | } |
| 108 | |
| 109 | // do not see Table 17 in ISO/IEC 13818-7 |
| 110 | static int on2avc_decode_band_types(On2AVCContext *c, GetBitContext *gb) |
| 111 | { |
| 112 | int bits_per_sect = c->is_long ? 5 : 3; |
| 113 | int esc_val = (1 << bits_per_sect) - 1; |
| 114 | int num_bands = c->num_bands * c->num_windows; |
| 115 | int band = 0, i, band_type, run_len, run; |
| 116 | |
| 117 | while (band < num_bands) { |
| 118 | band_type = get_bits(gb, 4); |
| 119 | run_len = 1; |
| 120 | do { |
| 121 | run = get_bits(gb, bits_per_sect); |
| 122 | run_len += run; |
| 123 | } while (run == esc_val); |
| 124 | if (band + run_len > num_bands) { |
| 125 | av_log(c->avctx, AV_LOG_ERROR, "Invalid band type run\n"); |
| 126 | return AVERROR_INVALIDDATA; |
| 127 | } |
| 128 | for (i = band; i < band + run_len; i++) { |
| 129 | c->band_type[i] = band_type; |
| 130 | c->band_run_end[i] = band + run_len; |
| 131 | } |
| 132 | band += run_len; |
| 133 | } |
| 134 | |
| 135 | return 0; |
| 136 | } |
| 137 | |
| 138 | // completely not like Table 18 in ISO/IEC 13818-7 |
| 139 | // (no intensity stereo, different coding for the first coefficient) |
| 140 | static int on2avc_decode_band_scales(On2AVCContext *c, GetBitContext *gb) |
| 141 | { |
| 142 | int w, w2, b, scale, first = 1; |
| 143 | int band_off = 0; |
| 144 | |
| 145 | for (w = 0; w < c->num_windows; w++) { |
| 146 | if (!c->grouping[w]) { |
| 147 | memcpy(c->band_scales + band_off, |
| 148 | c->band_scales + band_off - c->num_bands, |
| 149 | c->num_bands * sizeof(*c->band_scales)); |
| 150 | band_off += c->num_bands; |
| 151 | continue; |
| 152 | } |
| 153 | for (b = 0; b < c->num_bands; b++) { |
| 154 | if (!c->band_type[band_off]) { |
| 155 | int all_zero = 1; |
| 156 | for (w2 = w + 1; w2 < c->num_windows; w2++) { |
| 157 | if (c->grouping[w2]) |
| 158 | break; |
| 159 | if (c->band_type[w2 * c->num_bands + b]) { |
| 160 | all_zero = 0; |
| 161 | break; |
| 162 | } |
| 163 | } |
| 164 | if (all_zero) { |
| 165 | c->band_scales[band_off++] = 0; |
| 166 | continue; |
| 167 | } |
| 168 | } |
| 169 | if (first) { |
| 170 | scale = get_bits(gb, 7); |
| 171 | first = 0; |
| 172 | } else { |
| 173 | scale += get_vlc2(gb, c->scale_diff.table, 9, 3) - 60; |
| 174 | } |
| 175 | if (scale < 0 || scale > 127) { |
| 176 | av_log(c->avctx, AV_LOG_ERROR, "Invalid scale value %d\n", |
| 177 | scale); |
| 178 | return AVERROR_INVALIDDATA; |
| 179 | } |
| 180 | c->band_scales[band_off++] = c->scale_tab[scale]; |
| 181 | } |
| 182 | } |
| 183 | |
| 184 | return 0; |
| 185 | } |
| 186 | |
| 187 | static inline float on2avc_scale(int v, float scale) |
| 188 | { |
| 189 | return v * sqrtf(fabsf(v)) * scale; |
| 190 | } |
| 191 | |
| 192 | // spectral data is coded completely differently - there are no unsigned codebooks |
| 193 | static int on2avc_decode_quads(On2AVCContext *c, GetBitContext *gb, float *dst, |
| 194 | int dst_size, int type, float band_scale) |
| 195 | { |
| 196 | int i, j, val, val1; |
| 197 | |
| 198 | for (i = 0; i < dst_size; i += 4) { |
| 199 | val = get_vlc2(gb, c->cb_vlc[type].table, 9, 3); |
| 200 | |
| 201 | for (j = 0; j < 4; j++) { |
| 202 | val1 = sign_extend((val >> (12 - j * 4)) & 0xF, 4); |
| 203 | *dst++ = on2avc_scale(val1, band_scale); |
| 204 | } |
| 205 | } |
| 206 | |
| 207 | return 0; |
| 208 | } |
| 209 | |
| 210 | static inline int get_egolomb(GetBitContext *gb) |
| 211 | { |
| 212 | int v = 4; |
| 213 | |
| 214 | while (get_bits1(gb)) v++; |
| 215 | |
| 216 | return (1 << v) + get_bits(gb, v); |
| 217 | } |
| 218 | |
| 219 | static int on2avc_decode_pairs(On2AVCContext *c, GetBitContext *gb, float *dst, |
| 220 | int dst_size, int type, float band_scale) |
| 221 | { |
| 222 | int i, val, val1, val2, sign; |
| 223 | |
| 224 | for (i = 0; i < dst_size; i += 2) { |
| 225 | val = get_vlc2(gb, c->cb_vlc[type].table, 9, 3); |
| 226 | |
| 227 | val1 = sign_extend(val >> 8, 8); |
| 228 | val2 = sign_extend(val & 0xFF, 8); |
| 229 | if (type == ON2AVC_ESC_CB) { |
| 230 | if (val1 <= -16 || val1 >= 16) { |
| 231 | sign = 1 - (val1 < 0) * 2; |
| 232 | val1 = sign * get_egolomb(gb); |
| 233 | } |
| 234 | if (val2 <= -16 || val2 >= 16) { |
| 235 | sign = 1 - (val2 < 0) * 2; |
| 236 | val2 = sign * get_egolomb(gb); |
| 237 | } |
| 238 | } |
| 239 | |
| 240 | *dst++ = on2avc_scale(val1, band_scale); |
| 241 | *dst++ = on2avc_scale(val2, band_scale); |
| 242 | } |
| 243 | |
| 244 | return 0; |
| 245 | } |
| 246 | |
| 247 | static int on2avc_read_channel_data(On2AVCContext *c, GetBitContext *gb, int ch) |
| 248 | { |
| 249 | int ret; |
| 250 | int w, b, band_idx; |
| 251 | float *coeff_ptr; |
| 252 | |
| 253 | if ((ret = on2avc_decode_band_types(c, gb)) < 0) |
| 254 | return ret; |
| 255 | if ((ret = on2avc_decode_band_scales(c, gb)) < 0) |
| 256 | return ret; |
| 257 | |
| 258 | coeff_ptr = c->coeffs[ch]; |
| 259 | band_idx = 0; |
| 260 | memset(coeff_ptr, 0, ON2AVC_SUBFRAME_SIZE * sizeof(*coeff_ptr)); |
| 261 | for (w = 0; w < c->num_windows; w++) { |
| 262 | for (b = 0; b < c->num_bands; b++) { |
| 263 | int band_size = c->band_start[b + 1] - c->band_start[b]; |
| 264 | int band_type = c->band_type[band_idx + b]; |
| 265 | |
| 266 | if (!band_type) { |
| 267 | coeff_ptr += band_size; |
| 268 | continue; |
| 269 | } |
| 270 | if (band_type < 9) |
| 271 | on2avc_decode_quads(c, gb, coeff_ptr, band_size, band_type, |
| 272 | c->band_scales[band_idx + b]); |
| 273 | else |
| 274 | on2avc_decode_pairs(c, gb, coeff_ptr, band_size, band_type, |
| 275 | c->band_scales[band_idx + b]); |
| 276 | coeff_ptr += band_size; |
| 277 | } |
| 278 | band_idx += c->num_bands; |
| 279 | } |
| 280 | |
| 281 | return 0; |
| 282 | } |
| 283 | |
| 284 | static int on2avc_apply_ms(On2AVCContext *c) |
| 285 | { |
| 286 | int w, b, i; |
| 287 | int band_off = 0; |
| 288 | float *ch0 = c->coeffs[0]; |
| 289 | float *ch1 = c->coeffs[1]; |
| 290 | |
| 291 | for (w = 0; w < c->num_windows; w++) { |
| 292 | for (b = 0; b < c->num_bands; b++) { |
| 293 | if (c->ms_info[band_off + b]) { |
| 294 | for (i = c->band_start[b]; i < c->band_start[b + 1]; i++) { |
| 295 | float l = *ch0, r = *ch1; |
| 296 | *ch0++ = l + r; |
| 297 | *ch1++ = l - r; |
| 298 | } |
| 299 | } else { |
| 300 | ch0 += c->band_start[b + 1] - c->band_start[b]; |
| 301 | ch1 += c->band_start[b + 1] - c->band_start[b]; |
| 302 | } |
| 303 | } |
| 304 | band_off += c->num_bands; |
| 305 | } |
| 306 | return 0; |
| 307 | } |
| 308 | |
| 309 | static void zero_head_and_tail(float *src, int len, int order0, int order1) |
| 310 | { |
| 311 | memset(src, 0, sizeof(*src) * order0); |
| 312 | memset(src + len - order1, 0, sizeof(*src) * order1); |
| 313 | } |
| 314 | |
| 315 | static void pretwiddle(float *src, float *dst, int dst_len, int tab_step, |
| 316 | int step, int order0, int order1, const double * const *tabs) |
| 317 | { |
| 318 | float *src2, *out; |
| 319 | const double *tab; |
| 320 | int i, j; |
| 321 | |
| 322 | out = dst; |
| 323 | tab = tabs[0]; |
| 324 | for (i = 0; i < tab_step; i++) { |
| 325 | double sum = 0; |
| 326 | for (j = 0; j < order0; j++) |
| 327 | sum += src[j] * tab[j * tab_step + i]; |
| 328 | out[i] += sum; |
| 329 | } |
| 330 | |
| 331 | out = dst + dst_len - tab_step; |
| 332 | tab = tabs[order0]; |
| 333 | src2 = src + (dst_len - tab_step) / step + 1 + order0; |
| 334 | for (i = 0; i < tab_step; i++) { |
| 335 | double sum = 0; |
| 336 | for (j = 0; j < order1; j++) |
| 337 | sum += src2[j] * tab[j * tab_step + i]; |
| 338 | out[i] += sum; |
| 339 | } |
| 340 | } |
| 341 | |
| 342 | static void twiddle(float *src1, float *src2, int src2_len, |
| 343 | const double *tab, int tab_len, int step, |
| 344 | int order0, int order1, const double * const *tabs) |
| 345 | { |
| 346 | int steps; |
| 347 | int mask; |
| 348 | int i, j; |
| 349 | |
| 350 | steps = (src2_len - tab_len) / step + 1; |
| 351 | pretwiddle(src1, src2, src2_len, tab_len, step, order0, order1, tabs); |
| 352 | mask = tab_len - 1; |
| 353 | |
| 354 | for (i = 0; i < steps; i++) { |
| 355 | float in0 = src1[order0 + i]; |
| 356 | int pos = (src2_len - 1) & mask; |
| 357 | |
| 358 | if (pos < tab_len) { |
| 359 | const double *t = tab; |
| 360 | for (j = pos; j >= 0; j--) |
| 361 | src2[j] += in0 * *t++; |
| 362 | for (j = 0; j < tab_len - pos - 1; j++) |
| 363 | src2[src2_len - j - 1] += in0 * tab[pos + 1 + j]; |
| 364 | } else { |
| 365 | for (j = 0; j < tab_len; j++) |
| 366 | src2[pos - j] += in0 * tab[j]; |
| 367 | } |
| 368 | mask = pos + step; |
| 369 | } |
| 370 | } |
| 371 | |
| 372 | #define CMUL1_R(s, t, is, it) \ |
| 373 | s[is + 0] * t[it + 0] - s[is + 1] * t[it + 1] |
| 374 | #define CMUL1_I(s, t, is, it) \ |
| 375 | s[is + 0] * t[it + 1] + s[is + 1] * t[it + 0] |
| 376 | #define CMUL2_R(s, t, is, it) \ |
| 377 | s[is + 0] * t[it + 0] + s[is + 1] * t[it + 1] |
| 378 | #define CMUL2_I(s, t, is, it) \ |
| 379 | s[is + 0] * t[it + 1] - s[is + 1] * t[it + 0] |
| 380 | |
| 381 | #define CMUL0(dst, id, s0, s1, s2, s3, t0, t1, t2, t3, is, it) \ |
| 382 | dst[id] = s0[is] * t0[it] + s1[is] * t1[it] \ |
| 383 | + s2[is] * t2[it] + s3[is] * t3[it]; \ |
| 384 | dst[id + 1] = s0[is] * t0[it + 1] + s1[is] * t1[it + 1] \ |
| 385 | + s2[is] * t2[it + 1] + s3[is] * t3[it + 1]; |
| 386 | |
| 387 | #define CMUL1(dst, s0, s1, s2, s3, t0, t1, t2, t3, is, it) \ |
| 388 | *dst++ = CMUL1_R(s0, t0, is, it) \ |
| 389 | + CMUL1_R(s1, t1, is, it) \ |
| 390 | + CMUL1_R(s2, t2, is, it) \ |
| 391 | + CMUL1_R(s3, t3, is, it); \ |
| 392 | *dst++ = CMUL1_I(s0, t0, is, it) \ |
| 393 | + CMUL1_I(s1, t1, is, it) \ |
| 394 | + CMUL1_I(s2, t2, is, it) \ |
| 395 | + CMUL1_I(s3, t3, is, it); |
| 396 | |
| 397 | #define CMUL2(dst, s0, s1, s2, s3, t0, t1, t2, t3, is, it) \ |
| 398 | *dst++ = CMUL2_R(s0, t0, is, it) \ |
| 399 | + CMUL2_R(s1, t1, is, it) \ |
| 400 | + CMUL2_R(s2, t2, is, it) \ |
| 401 | + CMUL2_R(s3, t3, is, it); \ |
| 402 | *dst++ = CMUL2_I(s0, t0, is, it) \ |
| 403 | + CMUL2_I(s1, t1, is, it) \ |
| 404 | + CMUL2_I(s2, t2, is, it) \ |
| 405 | + CMUL2_I(s3, t3, is, it); |
| 406 | |
| 407 | static void combine_fft(float *s0, float *s1, float *s2, float *s3, float *dst, |
| 408 | const float *t0, const float *t1, |
| 409 | const float *t2, const float *t3, int len, int step) |
| 410 | { |
| 411 | const float *h0, *h1, *h2, *h3; |
| 412 | float *d1, *d2; |
| 413 | int tmp, half; |
| 414 | int len2 = len >> 1, len4 = len >> 2; |
| 415 | int hoff; |
| 416 | int i, j, k; |
| 417 | |
| 418 | tmp = step; |
| 419 | for (half = len2; tmp > 1; half <<= 1, tmp >>= 1); |
| 420 | |
| 421 | h0 = t0 + half; |
| 422 | h1 = t1 + half; |
| 423 | h2 = t2 + half; |
| 424 | h3 = t3 + half; |
| 425 | |
| 426 | CMUL0(dst, 0, s0, s1, s2, s3, t0, t1, t2, t3, 0, 0); |
| 427 | |
| 428 | hoff = 2 * step * (len4 >> 1); |
| 429 | |
| 430 | j = 2; |
| 431 | k = 2 * step; |
| 432 | d1 = dst + 2; |
| 433 | d2 = dst + 2 + (len >> 1); |
| 434 | for (i = 0; i < (len4 - 1) >> 1; i++) { |
| 435 | CMUL1(d1, s0, s1, s2, s3, t0, t1, t2, t3, j, k); |
| 436 | CMUL1(d2, s0, s1, s2, s3, h0, h1, h2, h3, j, k); |
| 437 | j += 2; |
| 438 | k += 2 * step; |
| 439 | } |
| 440 | CMUL0(dst, len4, s0, s1, s2, s3, t0, t1, t2, t3, 1, hoff); |
| 441 | CMUL0(dst, len4 + len2, s0, s1, s2, s3, h0, h1, h2, h3, 1, hoff); |
| 442 | |
| 443 | j = len4; |
| 444 | k = hoff + 2 * step * len4; |
| 445 | d1 = dst + len4 + 2; |
| 446 | d2 = dst + len4 + 2 + len2; |
| 447 | for (i = 0; i < (len4 - 2) >> 1; i++) { |
| 448 | CMUL2(d1, s0, s1, s2, s3, t0, t1, t2, t3, j, k); |
| 449 | CMUL2(d2, s0, s1, s2, s3, h0, h1, h2, h3, j, k); |
| 450 | j -= 2; |
| 451 | k += 2 * step; |
| 452 | } |
| 453 | CMUL0(dst, len2 + 4, s0, s1, s2, s3, t0, t1, t2, t3, 0, k); |
| 454 | } |
| 455 | |
| 456 | static void wtf_end_512(On2AVCContext *c, float *out, float *src, |
| 457 | float *tmp0, float *tmp1) |
| 458 | { |
| 459 | memcpy(src, tmp0, 384 * sizeof(*tmp0)); |
| 460 | memcpy(tmp0 + 384, src + 384, 128 * sizeof(*tmp0)); |
| 461 | |
| 462 | zero_head_and_tail(src, 128, 16, 4); |
| 463 | zero_head_and_tail(src + 128, 128, 16, 4); |
| 464 | zero_head_and_tail(src + 256, 128, 13, 7); |
| 465 | zero_head_and_tail(src + 384, 128, 15, 5); |
| 466 | |
| 467 | c->fft128.fft_permute(&c->fft128, (FFTComplex*)src); |
| 468 | c->fft128.fft_permute(&c->fft128, (FFTComplex*)(src + 128)); |
| 469 | c->fft128.fft_permute(&c->fft128, (FFTComplex*)(src + 256)); |
| 470 | c->fft128.fft_permute(&c->fft128, (FFTComplex*)(src + 384)); |
| 471 | c->fft128.fft_calc(&c->fft128, (FFTComplex*)src); |
| 472 | c->fft128.fft_calc(&c->fft128, (FFTComplex*)(src + 128)); |
| 473 | c->fft128.fft_calc(&c->fft128, (FFTComplex*)(src + 256)); |
| 474 | c->fft128.fft_calc(&c->fft128, (FFTComplex*)(src + 384)); |
| 475 | combine_fft(src, src + 128, src + 256, src + 384, tmp1, |
| 476 | ff_on2avc_ctab_1, ff_on2avc_ctab_2, |
| 477 | ff_on2avc_ctab_3, ff_on2avc_ctab_4, 512, 2); |
| 478 | c->fft512.fft_permute(&c->fft512, (FFTComplex*)tmp1); |
| 479 | c->fft512.fft_calc(&c->fft512, (FFTComplex*)tmp1); |
| 480 | |
| 481 | pretwiddle(&tmp0[ 0], tmp1, 512, 84, 4, 16, 4, ff_on2avc_tabs_20_84_1); |
| 482 | pretwiddle(&tmp0[128], tmp1, 512, 84, 4, 16, 4, ff_on2avc_tabs_20_84_2); |
| 483 | pretwiddle(&tmp0[256], tmp1, 512, 84, 4, 13, 7, ff_on2avc_tabs_20_84_3); |
| 484 | pretwiddle(&tmp0[384], tmp1, 512, 84, 4, 15, 5, ff_on2avc_tabs_20_84_4); |
| 485 | |
| 486 | memcpy(src, tmp1, 512 * sizeof(float)); |
| 487 | } |
| 488 | |
| 489 | static void wtf_end_1024(On2AVCContext *c, float *out, float *src, |
| 490 | float *tmp0, float *tmp1) |
| 491 | { |
| 492 | memcpy(src, tmp0, 768 * sizeof(*tmp0)); |
| 493 | memcpy(tmp0 + 768, src + 768, 256 * sizeof(*tmp0)); |
| 494 | |
| 495 | zero_head_and_tail(src, 256, 16, 4); |
| 496 | zero_head_and_tail(src + 256, 256, 16, 4); |
| 497 | zero_head_and_tail(src + 512, 256, 13, 7); |
| 498 | zero_head_and_tail(src + 768, 256, 15, 5); |
| 499 | |
| 500 | c->fft256.fft_permute(&c->fft256, (FFTComplex*)src); |
| 501 | c->fft256.fft_permute(&c->fft256, (FFTComplex*)(src + 256)); |
| 502 | c->fft256.fft_permute(&c->fft256, (FFTComplex*)(src + 512)); |
| 503 | c->fft256.fft_permute(&c->fft256, (FFTComplex*)(src + 768)); |
| 504 | c->fft256.fft_calc(&c->fft256, (FFTComplex*)src); |
| 505 | c->fft256.fft_calc(&c->fft256, (FFTComplex*)(src + 256)); |
| 506 | c->fft256.fft_calc(&c->fft256, (FFTComplex*)(src + 512)); |
| 507 | c->fft256.fft_calc(&c->fft256, (FFTComplex*)(src + 768)); |
| 508 | combine_fft(src, src + 256, src + 512, src + 768, tmp1, |
| 509 | ff_on2avc_ctab_1, ff_on2avc_ctab_2, |
| 510 | ff_on2avc_ctab_3, ff_on2avc_ctab_4, 1024, 1); |
| 511 | c->fft1024.fft_permute(&c->fft1024, (FFTComplex*)tmp1); |
| 512 | c->fft1024.fft_calc(&c->fft1024, (FFTComplex*)tmp1); |
| 513 | |
| 514 | pretwiddle(&tmp0[ 0], tmp1, 1024, 84, 4, 16, 4, ff_on2avc_tabs_20_84_1); |
| 515 | pretwiddle(&tmp0[256], tmp1, 1024, 84, 4, 16, 4, ff_on2avc_tabs_20_84_2); |
| 516 | pretwiddle(&tmp0[512], tmp1, 1024, 84, 4, 13, 7, ff_on2avc_tabs_20_84_3); |
| 517 | pretwiddle(&tmp0[768], tmp1, 1024, 84, 4, 15, 5, ff_on2avc_tabs_20_84_4); |
| 518 | |
| 519 | memcpy(src, tmp1, 1024 * sizeof(float)); |
| 520 | } |
| 521 | |
| 522 | static void wtf_40(On2AVCContext *c, float *out, float *src, int size) |
| 523 | { |
| 524 | float *tmp0 = c->temp, *tmp1 = c->temp + 1024; |
| 525 | |
| 526 | memset(tmp0, 0, sizeof(*tmp0) * 1024); |
| 527 | memset(tmp1, 0, sizeof(*tmp1) * 1024); |
| 528 | |
| 529 | if (size == 512) { |
| 530 | twiddle(src, &tmp0[ 0], 16, ff_on2avc_tab_10_1, 10, 2, 1, 3, ff_on2avc_tabs_4_10_1); |
| 531 | twiddle(src + 8, &tmp0[ 0], 16, ff_on2avc_tab_10_2, 10, 2, 3, 1, ff_on2avc_tabs_4_10_2); |
| 532 | twiddle(src + 16, &tmp0[ 16], 16, ff_on2avc_tab_10_2, 10, 2, 3, 1, ff_on2avc_tabs_4_10_2); |
| 533 | twiddle(src + 24, &tmp0[ 16], 16, ff_on2avc_tab_10_1, 10, 2, 1, 3, ff_on2avc_tabs_4_10_1); |
| 534 | twiddle(src + 32, &tmp0[ 32], 16, ff_on2avc_tab_10_1, 10, 2, 1, 3, ff_on2avc_tabs_4_10_1); |
| 535 | twiddle(src + 40, &tmp0[ 32], 16, ff_on2avc_tab_10_2, 10, 2, 3, 1, ff_on2avc_tabs_4_10_2); |
| 536 | twiddle(src + 48, &tmp0[ 48], 16, ff_on2avc_tab_10_2, 10, 2, 3, 1, ff_on2avc_tabs_4_10_2); |
| 537 | twiddle(src + 56, &tmp0[ 48], 16, ff_on2avc_tab_10_1, 10, 2, 1, 3, ff_on2avc_tabs_4_10_1); |
| 538 | twiddle(&tmp0[ 0], &tmp1[ 0], 32, ff_on2avc_tab_20_1, 20, 2, 5, 4, ff_on2avc_tabs_9_20_1); |
| 539 | twiddle(&tmp0[16], &tmp1[ 0], 32, ff_on2avc_tab_20_2, 20, 2, 4, 5, ff_on2avc_tabs_9_20_2); |
| 540 | twiddle(&tmp0[32], &tmp1[ 32], 32, ff_on2avc_tab_20_2, 20, 2, 4, 5, ff_on2avc_tabs_9_20_2); |
| 541 | twiddle(&tmp0[48], &tmp1[ 32], 32, ff_on2avc_tab_20_1, 20, 2, 5, 4, ff_on2avc_tabs_9_20_1); |
| 542 | twiddle(src + 64, &tmp1[ 64], 32, ff_on2avc_tab_20_1, 20, 2, 5, 4, ff_on2avc_tabs_9_20_1); |
| 543 | twiddle(src + 80, &tmp1[ 64], 32, ff_on2avc_tab_20_2, 20, 2, 4, 5, ff_on2avc_tabs_9_20_2); |
| 544 | twiddle(src + 96, &tmp1[ 96], 32, ff_on2avc_tab_20_2, 20, 2, 4, 5, ff_on2avc_tabs_9_20_2); |
| 545 | twiddle(src + 112, &tmp1[ 96], 32, ff_on2avc_tab_20_1, 20, 2, 5, 4, ff_on2avc_tabs_9_20_1); |
| 546 | twiddle(src + 128, &tmp1[128], 32, ff_on2avc_tab_20_1, 20, 2, 5, 4, ff_on2avc_tabs_9_20_1); |
| 547 | twiddle(src + 144, &tmp1[128], 32, ff_on2avc_tab_20_2, 20, 2, 4, 5, ff_on2avc_tabs_9_20_2); |
| 548 | twiddle(src + 160, &tmp1[160], 32, ff_on2avc_tab_20_2, 20, 2, 4, 5, ff_on2avc_tabs_9_20_2); |
| 549 | twiddle(src + 176, &tmp1[160], 32, ff_on2avc_tab_20_1, 20, 2, 5, 4, ff_on2avc_tabs_9_20_1); |
| 550 | |
| 551 | memset(tmp0, 0, 64 * sizeof(*tmp0)); |
| 552 | |
| 553 | twiddle(&tmp1[ 0], &tmp0[ 0], 128, ff_on2avc_tab_84_1, 84, 4, 16, 4, ff_on2avc_tabs_20_84_1); |
| 554 | twiddle(&tmp1[ 32], &tmp0[ 0], 128, ff_on2avc_tab_84_2, 84, 4, 16, 4, ff_on2avc_tabs_20_84_2); |
| 555 | twiddle(&tmp1[ 64], &tmp0[ 0], 128, ff_on2avc_tab_84_3, 84, 4, 13, 7, ff_on2avc_tabs_20_84_3); |
| 556 | twiddle(&tmp1[ 96], &tmp0[ 0], 128, ff_on2avc_tab_84_4, 84, 4, 15, 5, ff_on2avc_tabs_20_84_4); |
| 557 | twiddle(&tmp1[128], &tmp0[128], 128, ff_on2avc_tab_84_4, 84, 4, 15, 5, ff_on2avc_tabs_20_84_4); |
| 558 | twiddle(&tmp1[160], &tmp0[128], 128, ff_on2avc_tab_84_3, 84, 4, 13, 7, ff_on2avc_tabs_20_84_3); |
| 559 | twiddle(src + 192, &tmp0[128], 128, ff_on2avc_tab_84_2, 84, 4, 16, 4, ff_on2avc_tabs_20_84_2); |
| 560 | twiddle(src + 224, &tmp0[128], 128, ff_on2avc_tab_84_1, 84, 4, 16, 4, ff_on2avc_tabs_20_84_1); |
| 561 | twiddle(src + 256, &tmp0[256], 128, ff_on2avc_tab_84_1, 84, 4, 16, 4, ff_on2avc_tabs_20_84_1); |
| 562 | twiddle(src + 288, &tmp0[256], 128, ff_on2avc_tab_84_2, 84, 4, 16, 4, ff_on2avc_tabs_20_84_2); |
| 563 | twiddle(src + 320, &tmp0[256], 128, ff_on2avc_tab_84_3, 84, 4, 13, 7, ff_on2avc_tabs_20_84_3); |
| 564 | twiddle(src + 352, &tmp0[256], 128, ff_on2avc_tab_84_4, 84, 4, 15, 5, ff_on2avc_tabs_20_84_4); |
| 565 | |
| 566 | wtf_end_512(c, out, src, tmp0, tmp1); |
| 567 | } else { |
| 568 | twiddle(src, &tmp0[ 0], 32, ff_on2avc_tab_20_1, 20, 2, 5, 4, ff_on2avc_tabs_9_20_1); |
| 569 | twiddle(src + 16, &tmp0[ 0], 32, ff_on2avc_tab_20_2, 20, 2, 4, 5, ff_on2avc_tabs_9_20_2); |
| 570 | twiddle(src + 32, &tmp0[ 32], 32, ff_on2avc_tab_20_2, 20, 2, 4, 5, ff_on2avc_tabs_9_20_2); |
| 571 | twiddle(src + 48, &tmp0[ 32], 32, ff_on2avc_tab_20_1, 20, 2, 5, 4, ff_on2avc_tabs_9_20_1); |
| 572 | twiddle(src + 64, &tmp0[ 64], 32, ff_on2avc_tab_20_1, 20, 2, 5, 4, ff_on2avc_tabs_9_20_1); |
| 573 | twiddle(src + 80, &tmp0[ 64], 32, ff_on2avc_tab_20_2, 20, 2, 4, 5, ff_on2avc_tabs_9_20_2); |
| 574 | twiddle(src + 96, &tmp0[ 96], 32, ff_on2avc_tab_20_2, 20, 2, 4, 5, ff_on2avc_tabs_9_20_2); |
| 575 | twiddle(src + 112, &tmp0[ 96], 32, ff_on2avc_tab_20_1, 20, 2, 5, 4, ff_on2avc_tabs_9_20_1); |
| 576 | twiddle(&tmp0[ 0], &tmp1[ 0], 64, ff_on2avc_tab_40_1, 40, 2, 11, 8, ff_on2avc_tabs_19_40_1); |
| 577 | twiddle(&tmp0[32], &tmp1[ 0], 64, ff_on2avc_tab_40_2, 40, 2, 8, 11, ff_on2avc_tabs_19_40_2); |
| 578 | twiddle(&tmp0[64], &tmp1[ 64], 64, ff_on2avc_tab_40_2, 40, 2, 8, 11, ff_on2avc_tabs_19_40_2); |
| 579 | twiddle(&tmp0[96], &tmp1[ 64], 64, ff_on2avc_tab_40_1, 40, 2, 11, 8, ff_on2avc_tabs_19_40_1); |
| 580 | twiddle(src + 128, &tmp1[128], 64, ff_on2avc_tab_40_1, 40, 2, 11, 8, ff_on2avc_tabs_19_40_1); |
| 581 | twiddle(src + 160, &tmp1[128], 64, ff_on2avc_tab_40_2, 40, 2, 8, 11, ff_on2avc_tabs_19_40_2); |
| 582 | twiddle(src + 192, &tmp1[192], 64, ff_on2avc_tab_40_2, 40, 2, 8, 11, ff_on2avc_tabs_19_40_2); |
| 583 | twiddle(src + 224, &tmp1[192], 64, ff_on2avc_tab_40_1, 40, 2, 11, 8, ff_on2avc_tabs_19_40_1); |
| 584 | twiddle(src + 256, &tmp1[256], 64, ff_on2avc_tab_40_1, 40, 2, 11, 8, ff_on2avc_tabs_19_40_1); |
| 585 | twiddle(src + 288, &tmp1[256], 64, ff_on2avc_tab_40_2, 40, 2, 8, 11, ff_on2avc_tabs_19_40_2); |
| 586 | twiddle(src + 320, &tmp1[320], 64, ff_on2avc_tab_40_2, 40, 2, 8, 11, ff_on2avc_tabs_19_40_2); |
| 587 | twiddle(src + 352, &tmp1[320], 64, ff_on2avc_tab_40_1, 40, 2, 11, 8, ff_on2avc_tabs_19_40_1); |
| 588 | |
| 589 | memset(tmp0, 0, 128 * sizeof(*tmp0)); |
| 590 | |
| 591 | twiddle(&tmp1[ 0], &tmp0[ 0], 256, ff_on2avc_tab_84_1, 84, 4, 16, 4, ff_on2avc_tabs_20_84_1); |
| 592 | twiddle(&tmp1[ 64], &tmp0[ 0], 256, ff_on2avc_tab_84_2, 84, 4, 16, 4, ff_on2avc_tabs_20_84_2); |
| 593 | twiddle(&tmp1[128], &tmp0[ 0], 256, ff_on2avc_tab_84_3, 84, 4, 13, 7, ff_on2avc_tabs_20_84_3); |
| 594 | twiddle(&tmp1[192], &tmp0[ 0], 256, ff_on2avc_tab_84_4, 84, 4, 15, 5, ff_on2avc_tabs_20_84_4); |
| 595 | twiddle(&tmp1[256], &tmp0[256], 256, ff_on2avc_tab_84_4, 84, 4, 15, 5, ff_on2avc_tabs_20_84_4); |
| 596 | twiddle(&tmp1[320], &tmp0[256], 256, ff_on2avc_tab_84_3, 84, 4, 13, 7, ff_on2avc_tabs_20_84_3); |
| 597 | twiddle(src + 384, &tmp0[256], 256, ff_on2avc_tab_84_2, 84, 4, 16, 4, ff_on2avc_tabs_20_84_2); |
| 598 | twiddle(src + 448, &tmp0[256], 256, ff_on2avc_tab_84_1, 84, 4, 16, 4, ff_on2avc_tabs_20_84_1); |
| 599 | twiddle(src + 512, &tmp0[512], 256, ff_on2avc_tab_84_1, 84, 4, 16, 4, ff_on2avc_tabs_20_84_1); |
| 600 | twiddle(src + 576, &tmp0[512], 256, ff_on2avc_tab_84_2, 84, 4, 16, 4, ff_on2avc_tabs_20_84_2); |
| 601 | twiddle(src + 640, &tmp0[512], 256, ff_on2avc_tab_84_3, 84, 4, 13, 7, ff_on2avc_tabs_20_84_3); |
| 602 | twiddle(src + 704, &tmp0[512], 256, ff_on2avc_tab_84_4, 84, 4, 15, 5, ff_on2avc_tabs_20_84_4); |
| 603 | |
| 604 | wtf_end_1024(c, out, src, tmp0, tmp1); |
| 605 | } |
| 606 | } |
| 607 | |
| 608 | static void wtf_44(On2AVCContext *c, float *out, float *src, int size) |
| 609 | { |
| 610 | float *tmp0 = c->temp, *tmp1 = c->temp + 1024; |
| 611 | |
| 612 | memset(tmp0, 0, sizeof(*tmp0) * 1024); |
| 613 | memset(tmp1, 0, sizeof(*tmp1) * 1024); |
| 614 | |
| 615 | if (size == 512) { |
| 616 | twiddle(src, &tmp0[ 0], 16, ff_on2avc_tab_10_1, 10, 2, 1, 3, ff_on2avc_tabs_4_10_1); |
| 617 | twiddle(src + 8, &tmp0[ 0], 16, ff_on2avc_tab_10_2, 10, 2, 3, 1, ff_on2avc_tabs_4_10_2); |
| 618 | twiddle(src + 16, &tmp0[16], 16, ff_on2avc_tab_10_2, 10, 2, 3, 1, ff_on2avc_tabs_4_10_2); |
| 619 | twiddle(src + 24, &tmp0[16], 16, ff_on2avc_tab_10_1, 10, 2, 1, 3, ff_on2avc_tabs_4_10_1); |
| 620 | twiddle(src + 32, &tmp0[32], 16, ff_on2avc_tab_10_1, 10, 2, 1, 3, ff_on2avc_tabs_4_10_1); |
| 621 | twiddle(src + 40, &tmp0[32], 16, ff_on2avc_tab_10_2, 10, 2, 3, 1, ff_on2avc_tabs_4_10_2); |
| 622 | twiddle(src + 48, &tmp0[48], 16, ff_on2avc_tab_10_2, 10, 2, 3, 1, ff_on2avc_tabs_4_10_2); |
| 623 | twiddle(src + 56, &tmp0[48], 16, ff_on2avc_tab_10_1, 10, 2, 1, 3, ff_on2avc_tabs_4_10_1); |
| 624 | twiddle(&tmp0[ 0], &tmp1[ 0], 32, ff_on2avc_tab_20_1, 20, 2, 5, 4, ff_on2avc_tabs_9_20_1); |
| 625 | twiddle(&tmp0[16], &tmp1[ 0], 32, ff_on2avc_tab_20_2, 20, 2, 4, 5, ff_on2avc_tabs_9_20_2); |
| 626 | twiddle(&tmp0[32], &tmp1[32], 32, ff_on2avc_tab_20_2, 20, 2, 4, 5, ff_on2avc_tabs_9_20_2); |
| 627 | twiddle(&tmp0[48], &tmp1[32], 32, ff_on2avc_tab_20_1, 20, 2, 5, 4, ff_on2avc_tabs_9_20_1); |
| 628 | twiddle(src + 64, &tmp1[64], 32, ff_on2avc_tab_20_1, 20, 2, 5, 4, ff_on2avc_tabs_9_20_1); |
| 629 | twiddle(src + 80, &tmp1[64], 32, ff_on2avc_tab_20_2, 20, 2, 4, 5, ff_on2avc_tabs_9_20_2); |
| 630 | twiddle(src + 96, &tmp1[96], 32, ff_on2avc_tab_20_2, 20, 2, 4, 5, ff_on2avc_tabs_9_20_2); |
| 631 | twiddle(src + 112, &tmp1[96], 32, ff_on2avc_tab_20_1, 20, 2, 5, 4, ff_on2avc_tabs_9_20_1); |
| 632 | |
| 633 | memset(tmp0, 0, 64 * sizeof(*tmp0)); |
| 634 | |
| 635 | twiddle(&tmp1[ 0], &tmp0[ 0], 128, ff_on2avc_tab_84_1, 84, 4, 16, 4, ff_on2avc_tabs_20_84_1); |
| 636 | twiddle(&tmp1[32], &tmp0[ 0], 128, ff_on2avc_tab_84_2, 84, 4, 16, 4, ff_on2avc_tabs_20_84_2); |
| 637 | twiddle(&tmp1[64], &tmp0[ 0], 128, ff_on2avc_tab_84_3, 84, 4, 13, 7, ff_on2avc_tabs_20_84_3); |
| 638 | twiddle(&tmp1[96], &tmp0[ 0], 128, ff_on2avc_tab_84_4, 84, 4, 15, 5, ff_on2avc_tabs_20_84_4); |
| 639 | twiddle(src + 128, &tmp0[128], 128, ff_on2avc_tab_84_4, 84, 4, 15, 5, ff_on2avc_tabs_20_84_4); |
| 640 | twiddle(src + 160, &tmp0[128], 128, ff_on2avc_tab_84_3, 84, 4, 13, 7, ff_on2avc_tabs_20_84_3); |
| 641 | twiddle(src + 192, &tmp0[128], 128, ff_on2avc_tab_84_2, 84, 4, 16, 4, ff_on2avc_tabs_20_84_2); |
| 642 | twiddle(src + 224, &tmp0[128], 128, ff_on2avc_tab_84_1, 84, 4, 16, 4, ff_on2avc_tabs_20_84_1); |
| 643 | twiddle(src + 256, &tmp0[256], 128, ff_on2avc_tab_40_1, 40, 2, 11, 8, ff_on2avc_tabs_19_40_1); |
| 644 | twiddle(src + 320, &tmp0[256], 128, ff_on2avc_tab_40_2, 40, 2, 8, 11, ff_on2avc_tabs_19_40_2); |
| 645 | |
| 646 | wtf_end_512(c, out, src, tmp0, tmp1); |
| 647 | } else { |
| 648 | twiddle(src, &tmp0[ 0], 32, ff_on2avc_tab_20_1, 20, 2, 5, 4, ff_on2avc_tabs_9_20_1); |
| 649 | twiddle(src + 16, &tmp0[ 0], 32, ff_on2avc_tab_20_2, 20, 2, 4, 5, ff_on2avc_tabs_9_20_2); |
| 650 | twiddle(src + 32, &tmp0[ 32], 32, ff_on2avc_tab_20_2, 20, 2, 4, 5, ff_on2avc_tabs_9_20_2); |
| 651 | twiddle(src + 48, &tmp0[ 32], 32, ff_on2avc_tab_20_1, 20, 2, 5, 4, ff_on2avc_tabs_9_20_1); |
| 652 | twiddle(src + 64, &tmp0[ 64], 32, ff_on2avc_tab_20_1, 20, 2, 5, 4, ff_on2avc_tabs_9_20_1); |
| 653 | twiddle(src + 80, &tmp0[ 64], 32, ff_on2avc_tab_20_2, 20, 2, 4, 5, ff_on2avc_tabs_9_20_2); |
| 654 | twiddle(src + 96, &tmp0[ 96], 32, ff_on2avc_tab_20_2, 20, 2, 4, 5, ff_on2avc_tabs_9_20_2); |
| 655 | twiddle(src + 112, &tmp0[ 96], 32, ff_on2avc_tab_20_1, 20, 2, 5, 4, ff_on2avc_tabs_9_20_1); |
| 656 | twiddle(&tmp0[ 0], &tmp1[ 0], 64, ff_on2avc_tab_40_1, 40, 2, 11, 8, ff_on2avc_tabs_19_40_1); |
| 657 | twiddle(&tmp0[32], &tmp1[ 0], 64, ff_on2avc_tab_40_2, 40, 2, 8, 11, ff_on2avc_tabs_19_40_2); |
| 658 | twiddle(&tmp0[64], &tmp1[ 64], 64, ff_on2avc_tab_40_2, 40, 2, 8, 11, ff_on2avc_tabs_19_40_2); |
| 659 | twiddle(&tmp0[96], &tmp1[ 64], 64, ff_on2avc_tab_40_1, 40, 2, 11, 8, ff_on2avc_tabs_19_40_1); |
| 660 | twiddle(src + 128, &tmp1[128], 64, ff_on2avc_tab_40_1, 40, 2, 11, 8, ff_on2avc_tabs_19_40_1); |
| 661 | twiddle(src + 160, &tmp1[128], 64, ff_on2avc_tab_40_2, 40, 2, 8, 11, ff_on2avc_tabs_19_40_2); |
| 662 | twiddle(src + 192, &tmp1[192], 64, ff_on2avc_tab_40_2, 40, 2, 8, 11, ff_on2avc_tabs_19_40_2); |
| 663 | twiddle(src + 224, &tmp1[192], 64, ff_on2avc_tab_40_1, 40, 2, 11, 8, ff_on2avc_tabs_19_40_1); |
| 664 | |
| 665 | memset(tmp0, 0, 128 * sizeof(*tmp0)); |
| 666 | |
| 667 | twiddle(&tmp1[ 0], &tmp0[ 0], 256, ff_on2avc_tab_84_1, 84, 4, 16, 4, ff_on2avc_tabs_20_84_1); |
| 668 | twiddle(&tmp1[ 64], &tmp0[ 0], 256, ff_on2avc_tab_84_2, 84, 4, 16, 4, ff_on2avc_tabs_20_84_2); |
| 669 | twiddle(&tmp1[128], &tmp0[ 0], 256, ff_on2avc_tab_84_3, 84, 4, 13, 7, ff_on2avc_tabs_20_84_3); |
| 670 | twiddle(&tmp1[192], &tmp0[ 0], 256, ff_on2avc_tab_84_4, 84, 4, 15, 5, ff_on2avc_tabs_20_84_4); |
| 671 | twiddle(src + 256, &tmp0[256], 256, ff_on2avc_tab_84_4, 84, 4, 15, 5, ff_on2avc_tabs_20_84_4); |
| 672 | twiddle(src + 320, &tmp0[256], 256, ff_on2avc_tab_84_3, 84, 4, 13, 7, ff_on2avc_tabs_20_84_3); |
| 673 | twiddle(src + 384, &tmp0[256], 256, ff_on2avc_tab_84_2, 84, 4, 16, 4, ff_on2avc_tabs_20_84_2); |
| 674 | twiddle(src + 448, &tmp0[256], 256, ff_on2avc_tab_84_1, 84, 4, 16, 4, ff_on2avc_tabs_20_84_1); |
| 675 | twiddle(src + 512, &tmp0[512], 256, ff_on2avc_tab_40_1, 40, 2, 11, 8, ff_on2avc_tabs_19_40_1); |
| 676 | twiddle(src + 640, &tmp0[512], 256, ff_on2avc_tab_40_2, 40, 2, 8, 11, ff_on2avc_tabs_19_40_2); |
| 677 | |
| 678 | wtf_end_1024(c, out, src, tmp0, tmp1); |
| 679 | } |
| 680 | } |
| 681 | |
| 682 | static int on2avc_reconstruct_stereo(On2AVCContext *c, AVFrame *dst, int offset) |
| 683 | { |
| 684 | int ch, i; |
| 685 | |
| 686 | for (ch = 0; ch < 2; ch++) { |
| 687 | float *out = (float*)dst->extended_data[ch] + offset; |
| 688 | float *in = c->coeffs[ch]; |
| 689 | float *saved = c->delay[ch]; |
| 690 | float *buf = c->mdct_buf; |
| 691 | float *wout = out + 448; |
| 692 | |
| 693 | switch (c->window_type) { |
| 694 | case WINDOW_TYPE_EXT7: |
| 695 | c->mdct.imdct_half(&c->mdct, buf, in); |
| 696 | break; |
| 697 | case WINDOW_TYPE_EXT4: |
| 698 | c->wtf(c, buf, in, 1024); |
| 699 | break; |
| 700 | case WINDOW_TYPE_EXT5: |
| 701 | c->wtf(c, buf, in, 512); |
| 702 | c->mdct.imdct_half(&c->mdct_half, buf + 512, in + 512); |
| 703 | for (i = 0; i < 256; i++) { |
| 704 | FFSWAP(float, buf[i + 512], buf[1023 - i]); |
| 705 | } |
| 706 | break; |
| 707 | case WINDOW_TYPE_EXT6: |
| 708 | c->mdct.imdct_half(&c->mdct_half, buf, in); |
| 709 | for (i = 0; i < 256; i++) { |
| 710 | FFSWAP(float, buf[i], buf[511 - i]); |
| 711 | } |
| 712 | c->wtf(c, buf + 512, in + 512, 512); |
| 713 | break; |
| 714 | } |
| 715 | |
| 716 | memcpy(out, saved, 448 * sizeof(float)); |
| 717 | c->fdsp.vector_fmul_window(wout, saved + 448, buf, c->short_win, 64); |
| 718 | memcpy(wout + 128, buf + 64, 448 * sizeof(float)); |
| 719 | memcpy(saved, buf + 512, 448 * sizeof(float)); |
| 720 | memcpy(saved + 448, buf + 7*128 + 64, 64 * sizeof(float)); |
| 721 | } |
| 722 | |
| 723 | return 0; |
| 724 | } |
| 725 | |
| 726 | // not borrowed from aacdec.c - the codec has original design after all |
| 727 | static int on2avc_reconstruct_channel(On2AVCContext *c, int channel, |
| 728 | AVFrame *dst, int offset) |
| 729 | { |
| 730 | int i; |
| 731 | float *out = (float*)dst->extended_data[channel] + offset; |
| 732 | float *in = c->coeffs[channel]; |
| 733 | float *saved = c->delay[channel]; |
| 734 | float *buf = c->mdct_buf; |
| 735 | float *temp = c->temp; |
| 736 | |
| 737 | switch (c->window_type) { |
| 738 | case WINDOW_TYPE_LONG_START: |
| 739 | case WINDOW_TYPE_LONG_STOP: |
| 740 | case WINDOW_TYPE_LONG: |
| 741 | c->mdct.imdct_half(&c->mdct, buf, in); |
| 742 | break; |
| 743 | case WINDOW_TYPE_8SHORT: |
| 744 | for (i = 0; i < ON2AVC_SUBFRAME_SIZE; i += ON2AVC_SUBFRAME_SIZE / 8) |
| 745 | c->mdct_small.imdct_half(&c->mdct_small, buf + i, in + i); |
| 746 | break; |
| 747 | } |
| 748 | |
| 749 | if ((c->prev_window_type == WINDOW_TYPE_LONG || |
| 750 | c->prev_window_type == WINDOW_TYPE_LONG_STOP) && |
| 751 | (c->window_type == WINDOW_TYPE_LONG || |
| 752 | c->window_type == WINDOW_TYPE_LONG_START)) { |
| 753 | c->fdsp.vector_fmul_window(out, saved, buf, c->long_win, 512); |
| 754 | } else { |
| 755 | float *wout = out + 448; |
| 756 | memcpy(out, saved, 448 * sizeof(float)); |
| 757 | |
| 758 | if (c->window_type == WINDOW_TYPE_8SHORT) { |
| 759 | c->fdsp.vector_fmul_window(wout + 0*128, saved + 448, buf + 0*128, c->short_win, 64); |
| 760 | c->fdsp.vector_fmul_window(wout + 1*128, buf + 0*128 + 64, buf + 1*128, c->short_win, 64); |
| 761 | c->fdsp.vector_fmul_window(wout + 2*128, buf + 1*128 + 64, buf + 2*128, c->short_win, 64); |
| 762 | c->fdsp.vector_fmul_window(wout + 3*128, buf + 2*128 + 64, buf + 3*128, c->short_win, 64); |
| 763 | c->fdsp.vector_fmul_window(temp, buf + 3*128 + 64, buf + 4*128, c->short_win, 64); |
| 764 | memcpy(wout + 4*128, temp, 64 * sizeof(float)); |
| 765 | } else { |
| 766 | c->fdsp.vector_fmul_window(wout, saved + 448, buf, c->short_win, 64); |
| 767 | memcpy(wout + 128, buf + 64, 448 * sizeof(float)); |
| 768 | } |
| 769 | } |
| 770 | |
| 771 | // buffer update |
| 772 | switch (c->window_type) { |
| 773 | case WINDOW_TYPE_8SHORT: |
| 774 | memcpy(saved, temp + 64, 64 * sizeof(float)); |
| 775 | c->fdsp.vector_fmul_window(saved + 64, buf + 4*128 + 64, buf + 5*128, c->short_win, 64); |
| 776 | c->fdsp.vector_fmul_window(saved + 192, buf + 5*128 + 64, buf + 6*128, c->short_win, 64); |
| 777 | c->fdsp.vector_fmul_window(saved + 320, buf + 6*128 + 64, buf + 7*128, c->short_win, 64); |
| 778 | memcpy(saved + 448, buf + 7*128 + 64, 64 * sizeof(float)); |
| 779 | break; |
| 780 | case WINDOW_TYPE_LONG_START: |
| 781 | memcpy(saved, buf + 512, 448 * sizeof(float)); |
| 782 | memcpy(saved + 448, buf + 7*128 + 64, 64 * sizeof(float)); |
| 783 | break; |
| 784 | case WINDOW_TYPE_LONG_STOP: |
| 785 | case WINDOW_TYPE_LONG: |
| 786 | memcpy(saved, buf + 512, 512 * sizeof(float)); |
| 787 | break; |
| 788 | } |
| 789 | return 0; |
| 790 | } |
| 791 | |
| 792 | static int on2avc_decode_subframe(On2AVCContext *c, const uint8_t *buf, |
| 793 | int buf_size, AVFrame *dst, int offset) |
| 794 | { |
| 795 | GetBitContext gb; |
| 796 | int i, ret; |
| 797 | |
| 798 | init_get_bits(&gb, buf, buf_size * 8); |
| 799 | if (get_bits1(&gb)) { |
| 800 | av_log(c->avctx, AV_LOG_ERROR, "enh bit set\n"); |
| 801 | return AVERROR_INVALIDDATA; |
| 802 | } |
| 803 | c->prev_window_type = c->window_type; |
| 804 | c->window_type = get_bits(&gb, 3); |
| 805 | if (c->window_type >= WINDOW_TYPE_EXT4 && c->avctx->channels == 1) { |
| 806 | av_log(c->avctx, AV_LOG_ERROR, "stereo mode window for mono audio\n"); |
| 807 | return AVERROR_INVALIDDATA; |
| 808 | } |
| 809 | |
| 810 | c->band_start = c->modes[c->window_type].band_start; |
| 811 | c->num_windows = c->modes[c->window_type].num_windows; |
| 812 | c->num_bands = c->modes[c->window_type].num_bands; |
| 813 | c->is_long = (c->window_type != WINDOW_TYPE_8SHORT); |
| 814 | |
| 815 | c->grouping[0] = 1; |
| 816 | for (i = 1; i < c->num_windows; i++) |
| 817 | c->grouping[i] = !get_bits1(&gb); |
| 818 | |
| 819 | on2avc_read_ms_info(c, &gb); |
| 820 | for (i = 0; i < c->avctx->channels; i++) |
| 821 | if ((ret = on2avc_read_channel_data(c, &gb, i)) < 0) |
| 822 | return AVERROR_INVALIDDATA; |
| 823 | if (c->avctx->channels == 2 && c->ms_present) |
| 824 | on2avc_apply_ms(c); |
| 825 | if (c->window_type < WINDOW_TYPE_EXT4) { |
| 826 | for (i = 0; i < c->avctx->channels; i++) |
| 827 | on2avc_reconstruct_channel(c, i, dst, offset); |
| 828 | } else { |
| 829 | on2avc_reconstruct_stereo(c, dst, offset); |
| 830 | } |
| 831 | |
| 832 | return 0; |
| 833 | } |
| 834 | |
| 835 | static int on2avc_decode_frame(AVCodecContext * avctx, void *data, |
| 836 | int *got_frame_ptr, AVPacket *avpkt) |
| 837 | { |
| 838 | AVFrame *frame = data; |
| 839 | const uint8_t *buf = avpkt->data; |
| 840 | int buf_size = avpkt->size; |
| 841 | On2AVCContext *c = avctx->priv_data; |
| 842 | GetByteContext gb; |
| 843 | int num_frames = 0, frame_size, audio_off; |
| 844 | int ret; |
| 845 | |
| 846 | if (c->is_av500) { |
| 847 | /* get output buffer */ |
| 848 | frame->nb_samples = ON2AVC_SUBFRAME_SIZE; |
| 849 | if ((ret = ff_get_buffer(avctx, frame, 0)) < 0) { |
| 850 | av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n"); |
| 851 | return ret; |
| 852 | } |
| 853 | |
| 854 | if ((ret = on2avc_decode_subframe(c, buf, buf_size, frame, 0)) < 0) |
| 855 | return ret; |
| 856 | } else { |
| 857 | bytestream2_init(&gb, buf, buf_size); |
| 858 | while (bytestream2_get_bytes_left(&gb) > 2) { |
| 859 | frame_size = bytestream2_get_le16(&gb); |
| 860 | if (!frame_size || frame_size > bytestream2_get_bytes_left(&gb)) { |
| 861 | av_log(avctx, AV_LOG_ERROR, "Invalid subframe size %d\n", |
| 862 | frame_size); |
| 863 | return AVERROR_INVALIDDATA; |
| 864 | } |
| 865 | num_frames++; |
| 866 | bytestream2_skip(&gb, frame_size); |
| 867 | } |
| 868 | if (!num_frames) { |
| 869 | av_log(avctx, AV_LOG_ERROR, "No subframes present\n"); |
| 870 | return AVERROR_INVALIDDATA; |
| 871 | } |
| 872 | |
| 873 | /* get output buffer */ |
| 874 | frame->nb_samples = ON2AVC_SUBFRAME_SIZE * num_frames; |
| 875 | if ((ret = ff_get_buffer(avctx, frame, 0)) < 0) { |
| 876 | av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n"); |
| 877 | return ret; |
| 878 | } |
| 879 | |
| 880 | audio_off = 0; |
| 881 | bytestream2_init(&gb, buf, buf_size); |
| 882 | while (bytestream2_get_bytes_left(&gb) > 2) { |
| 883 | frame_size = bytestream2_get_le16(&gb); |
| 884 | if ((ret = on2avc_decode_subframe(c, gb.buffer, frame_size, |
| 885 | frame, audio_off)) < 0) |
| 886 | return ret; |
| 887 | audio_off += ON2AVC_SUBFRAME_SIZE; |
| 888 | bytestream2_skip(&gb, frame_size); |
| 889 | } |
| 890 | } |
| 891 | |
| 892 | *got_frame_ptr = 1; |
| 893 | |
| 894 | return buf_size; |
| 895 | } |
| 896 | |
| 897 | static av_cold void on2avc_free_vlcs(On2AVCContext *c) |
| 898 | { |
| 899 | int i; |
| 900 | |
| 901 | ff_free_vlc(&c->scale_diff); |
| 902 | for (i = 1; i < 16; i++) |
| 903 | ff_free_vlc(&c->cb_vlc[i]); |
| 904 | } |
| 905 | |
| 906 | static av_cold int on2avc_decode_init(AVCodecContext *avctx) |
| 907 | { |
| 908 | On2AVCContext *c = avctx->priv_data; |
| 909 | int i; |
| 910 | |
| 911 | if (avctx->channels > 2U) { |
| 912 | avpriv_request_sample(avctx, "Decoding more than 2 channels"); |
| 913 | return AVERROR_PATCHWELCOME; |
| 914 | } |
| 915 | |
| 916 | c->avctx = avctx; |
| 917 | avctx->sample_fmt = AV_SAMPLE_FMT_FLTP; |
| 918 | avctx->channel_layout = (avctx->channels == 2) ? AV_CH_LAYOUT_STEREO |
| 919 | : AV_CH_LAYOUT_MONO; |
| 920 | |
| 921 | c->is_av500 = (avctx->codec_tag == 0x500); |
| 922 | if (c->is_av500 && avctx->channels == 2) { |
| 923 | av_log(avctx, AV_LOG_ERROR, "0x500 version should be mono\n"); |
| 924 | return AVERROR_INVALIDDATA; |
| 925 | } |
| 926 | if (avctx->channels == 2) |
| 927 | av_log(avctx, AV_LOG_WARNING, |
| 928 | "Stereo mode support is not good, patch is welcome\n"); |
| 929 | |
| 930 | for (i = 0; i < 20; i++) |
| 931 | c->scale_tab[i] = ceil(pow(10.0, i * 0.1) * 16) / 32; |
| 932 | for (; i < 128; i++) |
| 933 | c->scale_tab[i] = ceil(pow(10.0, i * 0.1) * 0.5); |
| 934 | |
| 935 | if (avctx->sample_rate < 32000 || avctx->channels == 1) |
| 936 | memcpy(c->long_win, ff_on2avc_window_long_24000, |
| 937 | 1024 * sizeof(*c->long_win)); |
| 938 | else |
| 939 | memcpy(c->long_win, ff_on2avc_window_long_32000, |
| 940 | 1024 * sizeof(*c->long_win)); |
| 941 | memcpy(c->short_win, ff_on2avc_window_short, 128 * sizeof(*c->short_win)); |
| 942 | |
| 943 | c->modes = (avctx->sample_rate <= 40000) ? ff_on2avc_modes_40 |
| 944 | : ff_on2avc_modes_44; |
| 945 | c->wtf = (avctx->sample_rate <= 40000) ? wtf_40 |
| 946 | : wtf_44; |
| 947 | |
| 948 | ff_mdct_init(&c->mdct, 11, 1, 1.0 / (32768.0 * 1024.0)); |
| 949 | ff_mdct_init(&c->mdct_half, 10, 1, 1.0 / (32768.0 * 512.0)); |
| 950 | ff_mdct_init(&c->mdct_small, 8, 1, 1.0 / (32768.0 * 128.0)); |
| 951 | ff_fft_init(&c->fft128, 6, 0); |
| 952 | ff_fft_init(&c->fft256, 7, 0); |
| 953 | ff_fft_init(&c->fft512, 8, 1); |
| 954 | ff_fft_init(&c->fft1024, 9, 1); |
| 955 | avpriv_float_dsp_init(&c->fdsp, avctx->flags & CODEC_FLAG_BITEXACT); |
| 956 | |
| 957 | if (init_vlc(&c->scale_diff, 9, ON2AVC_SCALE_DIFFS, |
| 958 | ff_on2avc_scale_diff_bits, 1, 1, |
| 959 | ff_on2avc_scale_diff_codes, 4, 4, 0)) { |
| 960 | av_log(avctx, AV_LOG_ERROR, "Cannot init VLC\n"); |
| 961 | return AVERROR(ENOMEM); |
| 962 | } |
| 963 | for (i = 1; i < 9; i++) { |
| 964 | int idx = i - 1; |
| 965 | if (ff_init_vlc_sparse(&c->cb_vlc[i], 9, ff_on2avc_quad_cb_elems[idx], |
| 966 | ff_on2avc_quad_cb_bits[idx], 1, 1, |
| 967 | ff_on2avc_quad_cb_codes[idx], 4, 4, |
| 968 | ff_on2avc_quad_cb_syms[idx], 2, 2, 0)) { |
| 969 | av_log(avctx, AV_LOG_ERROR, "Cannot init VLC\n"); |
| 970 | on2avc_free_vlcs(c); |
| 971 | return AVERROR(ENOMEM); |
| 972 | } |
| 973 | } |
| 974 | for (i = 9; i < 16; i++) { |
| 975 | int idx = i - 9; |
| 976 | if (ff_init_vlc_sparse(&c->cb_vlc[i], 9, ff_on2avc_pair_cb_elems[idx], |
| 977 | ff_on2avc_pair_cb_bits[idx], 1, 1, |
| 978 | ff_on2avc_pair_cb_codes[idx], 2, 2, |
| 979 | ff_on2avc_pair_cb_syms[idx], 2, 2, 0)) { |
| 980 | av_log(avctx, AV_LOG_ERROR, "Cannot init VLC\n"); |
| 981 | on2avc_free_vlcs(c); |
| 982 | return AVERROR(ENOMEM); |
| 983 | } |
| 984 | } |
| 985 | |
| 986 | return 0; |
| 987 | } |
| 988 | |
| 989 | static av_cold int on2avc_decode_close(AVCodecContext *avctx) |
| 990 | { |
| 991 | On2AVCContext *c = avctx->priv_data; |
| 992 | |
| 993 | ff_mdct_end(&c->mdct); |
| 994 | ff_mdct_end(&c->mdct_half); |
| 995 | ff_mdct_end(&c->mdct_small); |
| 996 | ff_fft_end(&c->fft128); |
| 997 | ff_fft_end(&c->fft256); |
| 998 | ff_fft_end(&c->fft512); |
| 999 | ff_fft_end(&c->fft1024); |
| 1000 | |
| 1001 | on2avc_free_vlcs(c); |
| 1002 | |
| 1003 | return 0; |
| 1004 | } |
| 1005 | |
| 1006 | |
| 1007 | AVCodec ff_on2avc_decoder = { |
| 1008 | .name = "on2avc", |
| 1009 | .long_name = NULL_IF_CONFIG_SMALL("On2 Audio for Video Codec"), |
| 1010 | .type = AVMEDIA_TYPE_AUDIO, |
| 1011 | .id = AV_CODEC_ID_ON2AVC, |
| 1012 | .priv_data_size = sizeof(On2AVCContext), |
| 1013 | .init = on2avc_decode_init, |
| 1014 | .decode = on2avc_decode_frame, |
| 1015 | .close = on2avc_decode_close, |
| 1016 | .capabilities = CODEC_CAP_DR1, |
| 1017 | .sample_fmts = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_FLTP, |
| 1018 | AV_SAMPLE_FMT_NONE }, |
| 1019 | }; |