libFDK/src/dct.cpp

   1
   2 /* -----------------------------------------------------------------------------------------------------------
   3 Software License for The Fraunhofer FDK AAC Codec Library for Android
   4
   5 © Copyright  1995 - 2013 Fraunhofer-Gesellschaft zur Förderung der angewandten Forschung e.V.
   6   All rights reserved.
   7
   8  1.    INTRODUCTION
   9 The Fraunhofer FDK AAC Codec Library for Android ("FDK AAC Codec") is software that implements
  10 the MPEG Advanced Audio Coding ("AAC") encoding and decoding scheme for digital audio.
  11 This FDK AAC Codec software is intended to be used on a wide variety of Android devices.
  12
  13 AAC's HE-AAC and HE-AAC v2 versions are regarded as today's most efficient general perceptual
  14 audio codecs. AAC-ELD is considered the best-performing full-bandwidth communications codec by
  15 independent studies and is widely deployed. AAC has been standardized by ISO and IEC as part
  16 of the MPEG specifications.
  17
  18 Patent licenses for necessary patent claims for the FDK AAC Codec (including those of Fraunhofer)
  19 may be obtained through Via Licensing (www.vialicensing.com) or through the respective patent owners
  20 individually for the purpose of encoding or decoding bit streams in products that are compliant with
  21 the ISO/IEC MPEG audio standards. Please note that most manufacturers of Android devices already license
  22 these patent claims through Via Licensing or directly from the patent owners, and therefore FDK AAC Codec
  23 software may already be covered under those patent licenses when it is used for those licensed purposes only.
  24
  25 Commercially-licensed AAC software libraries, including floating-point versions with enhanced sound quality,
  26 are also available from Fraunhofer. Users are encouraged to check the Fraunhofer website for additional
  27 applications information and documentation.
  28
  29 2.    COPYRIGHT LICENSE
  30
  31 Redistribution and use in source and binary forms, with or without modification, are permitted without
  32 payment of copyright license fees provided that you satisfy the following conditions:
  33
  34 You must retain the complete text of this software license in redistributions of the FDK AAC Codec or
  35 your modifications thereto in source code form.
  36
  37 You must retain the complete text of this software license in the documentation and/or other materials
  38 provided with redistributions of the FDK AAC Codec or your modifications thereto in binary form.
  39 You must make available free of charge copies of the complete source code of the FDK AAC Codec and your
  40 modifications thereto to recipients of copies in binary form.
  41
  42 The name of Fraunhofer may not be used to endorse or promote products derived from this library without
  43 prior written permission.
  44
  45 You may not charge copyright license fees for anyone to use, copy or distribute the FDK AAC Codec
  46 software or your modifications thereto.
  47
  48 Your modified versions of the FDK AAC Codec must carry prominent notices stating that you changed the software
  49 and the date of any change. For modified versions of the FDK AAC Codec, the term
  50 "Fraunhofer FDK AAC Codec Library for Android" must be replaced by the term
  51 "Third-Party Modified Version of the Fraunhofer FDK AAC Codec Library for Android."
  52
  53 3.    NO PATENT LICENSE
  54
  55 NO EXPRESS OR IMPLIED LICENSES TO ANY PATENT CLAIMS, including without limitation the patents of Fraunhofer,
  56 ARE GRANTED BY THIS SOFTWARE LICENSE. Fraunhofer provides no warranty of patent non-infringement with
  57 respect to this software.
  58
  59 You may use this FDK AAC Codec software or modifications thereto only for purposes that are authorized
  60 by appropriate patent licenses.
  61
  62 4.    DISCLAIMER
  63
  64 This FDK AAC Codec software is provided by Fraunhofer on behalf of the copyright holders and contributors
  65 "AS IS" and WITHOUT ANY EXPRESS OR IMPLIED WARRANTIES, including but not limited to the implied warranties
  66 of merchantability and fitness for a particular purpose. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
  67 CONTRIBUTORS BE LIABLE for any direct, indirect, incidental, special, exemplary, or consequential damages,
  68 including but not limited to procurement of substitute goods or services; loss of use, data, or profits,
  69 or business interruption, however caused and on any theory of liability, whether in contract, strict
  70 liability, or tort (including negligence), arising in any way out of the use of this software, even if
  71 advised of the possibility of such damage.
  72
  73 5.    CONTACT INFORMATION
  74
  75 Fraunhofer Institute for Integrated Circuits IIS
  76 Attention: Audio and Multimedia Departments - FDK AAC LL
  77 Am Wolfsmantel 33
  78 91058 Erlangen, Germany
  79
  80 www.iis.fraunhofer.de/amm
  81 amm-info@iis.fraunhofer.de
  82 ----------------------------------------------------------------------------------------------------------- */
  83
  84 /*!
  85   \file   dct.cpp
  86   \brief  DCT Implementations
  87   Library functions to calculate standard DCTs. This will most likely be replaced by hand-optimized
  88   functions for the specific target processor.
  89
  90   Three different implementations of the dct type II and the dct type III transforms are provided.
  91
  92   By default implementations which are based on a single, standard complex FFT-kernel are used (dctII_f() and dctIII_f()).
  93   These are specifically helpful in cases where optimized FFT libraries are already available. The FFT used in these
  94   implementation is FFT rad2 from FDK_tools.
  95
  96   Of course, one might also use DCT-libraries should they be available. The DCT and DST
  97   type IV implementations are only available in a version based on a complex FFT kernel.
  98 */
  99
 100 #include "dct.h"
 101
 102
 103 #include "FDK_tools_rom.h"
 104 #include "fft.h"
 105
 106
 107 #if defined(__arm__)
 108 #include "arm/dct_arm.cpp"
 109 #endif
 110
 111
 112 #if !defined(FUNCTION_dct_III)
 113 void dct_III(FIXP_DBL *pDat, /*!< pointer to input/output */
 114              FIXP_DBL *tmp,  /*!< pointer to temporal working buffer */
 115              int L,          /*!< lenght of transform */
 116              int *pDat_e
 117              )
 118 {
 119   FDK_ASSERT(L == 64 || L == 32);
 120   int  i;
 121   FIXP_DBL xr, accu1, accu2;
 122   int inc;
 123   int M = L>>1;
 124   int ld_M;
 125
 126   if (L == 64)  ld_M = 5;
 127   else          ld_M = 4;
 128
 129   /* This loop performs multiplication for index i (i*inc) */
 130   inc = (64/2) >> ld_M; /* 64/L */
 131
 132   FIXP_DBL *pTmp_0 = &tmp[2];
 133   FIXP_DBL *pTmp_1 = &tmp[(M-1)*2];
 134
 135   for(i=1; i<M>>1; i++,pTmp_0+=2,pTmp_1-=2) {
 136
 137     FIXP_DBL accu3,accu4,accu5,accu6;
 138
 139     cplxMultDiv2(&accu2, &accu1, pDat[L - i], pDat[i], sin_twiddle_L64[i*inc]);
 140     cplxMultDiv2(&accu4, &accu3, pDat[M+i], pDat[M-i], sin_twiddle_L64[(M-i)*inc]);
 141     accu3 >>= 1; accu4 >>= 1;
 142
 143     /* This method is better for ARM926, that uses operand2 shifted right by 1 always */
 144     cplxMultDiv2(&accu6, &accu5, (accu3 - (accu1>>1)), ((accu2>>1) + accu4), sin_twiddle_L64[(4*i)*inc]);
 145     xr = (accu1>>1) + accu3;
 146     pTmp_0[0] = (xr>>1) - accu5;
 147     pTmp_1[0] = (xr>>1) + accu5;
 148
 149     xr = (accu2>>1) - accu4;
 150     pTmp_0[1] =  (xr>>1) - accu6;
 151     pTmp_1[1] = -((xr>>1) + accu6);
 152
 153   }
 154
 155   xr     = fMultDiv2(pDat[M], sin_twiddle_L64[64/2].v.re );/* cos((PI/(2*L))*M); */
 156   tmp[0] = ((pDat[0]>>1) + xr)>>1;
 157   tmp[1] = ((pDat[0]>>1) - xr)>>1;
 158
 159   cplxMultDiv2(&accu2, &accu1, pDat[L - (M/2)], pDat[M/2], sin_twiddle_L64[64/4]);
 160   tmp[M]   = accu1>>1;
 161   tmp[M+1] = accu2>>1;
 162
 163   /* dit_fft expects 1 bit scaled input values */
 164   fft(M, tmp, pDat_e);
 165
 166   /* ARM926: 12 cycles per 2-iteration, no overhead code by compiler */
 167   pTmp_1 = &tmp[L];
 168   for (i = M>>1; i--;)
 169   {
 170     FIXP_DBL tmp1, tmp2, tmp3, tmp4;
 171     tmp1 = *tmp++;
 172     tmp2 = *tmp++;
 173     tmp3 = *--pTmp_1;
 174     tmp4 = *--pTmp_1;
 175     *pDat++ = tmp1;
 176     *pDat++ = tmp3;
 177     *pDat++ = tmp2;
 178     *pDat++ = tmp4;
 179   }
 180
 181   *pDat_e += 2;
 182 }
 183 #endif
 184
 185 #if !defined(FUNCTION_dct_II)
 186 void dct_II(FIXP_DBL *pDat, /*!< pointer to input/output */
 187             FIXP_DBL *tmp,  /*!< pointer to temporal working buffer */
 188             int L,          /*!< lenght of transform */
 189             int *pDat_e
 190             )
 191 {
 192     FDK_ASSERT(L == 64 || L == 32);
 193     FIXP_DBL accu1,accu2;
 194     FIXP_DBL *pTmp_0, *pTmp_1;
 195
 196     int i;
 197     int inc;
 198     int M =  L>>1;
 199     int ld_M;
 200
 201     FDK_ASSERT(L == 64 || L == 32);
 202     ld_M = 4 + (L >> 6);  /* L=64: 5,  L=32: 4 */
 203
 204     inc = (64/2) >> ld_M; /* L=64: 1,  L=32: 2 */
 205
 206     FIXP_DBL *pdat  = &pDat[0];
 207     FIXP_DBL accu3, accu4;
 208     pTmp_0 = &tmp[0];
 209     pTmp_1 = &tmp[L-1];
 210     for (i = M>>1; i--; )
 211     {
 212       accu1 = *pdat++;
 213       accu2 = *pdat++;
 214       accu3 = *pdat++;
 215       accu4 = *pdat++;
 216       accu1 >>= 1;
 217       accu2 >>= 1;
 218       accu3 >>= 1;
 219       accu4 >>= 1;
 220       *pTmp_0++ = accu1;
 221       *pTmp_0++ = accu3;
 222       *pTmp_1-- = accu2;
 223       *pTmp_1-- = accu4;
 224     }
 225
 226
 227     fft(M, tmp, pDat_e);
 228
 229     pTmp_0 = &tmp[2];
 230     pTmp_1 = &tmp[(M-1)*2];
 231
 232     for (i=1; i<M>>1; i++,pTmp_0+=2,pTmp_1-=2) {
 233
 234       FIXP_DBL a1,a2;
 235       FIXP_DBL accu3, accu4;
 236
 237       a1 = ((pTmp_0[1]>>1) + (pTmp_1[1]>>1));
 238       a2 = ((pTmp_1[0]>>1) - (pTmp_0[0]>>1));
 239
 240       cplxMultDiv2(&accu1, &accu2, a2, a1, sin_twiddle_L64[(4*i)*inc]);
 241       accu1<<=1; accu2<<=1;
 242
 243       a1 = ((pTmp_0[0]>>1) + (pTmp_1[0]>>1));
 244       a2 = ((pTmp_0[1]>>1) - (pTmp_1[1]>>1));
 245
 246       cplxMultDiv2(&accu3, &accu4, (a1 + accu2), -(accu1 + a2), sin_twiddle_L64[i*inc]);
 247       pDat[L - i] = accu4;
 248       pDat[i]     = accu3;
 249
 250       cplxMultDiv2(&accu3, &accu4, (a1 - accu2), -(accu1 - a2), sin_twiddle_L64[(M-i)*inc]);
 251       pDat[M + i] = accu4;
 252       pDat[M - i] = accu3;
 253
 254     }
 255
 256     cplxMultDiv2(&accu1, &accu2, tmp[M], tmp[M+1], sin_twiddle_L64[(M/2)*inc]);
 257     pDat[L - (M/2)] = accu2;
 258     pDat[M/2]       = accu1;
 259
 260     pDat[0] = (tmp[0]>>1)+(tmp[1]>>1);
 261     pDat[M] =  fMult(((tmp[0]>>1)-(tmp[1]>>1)), sin_twiddle_L64[64/2].v.re);/* cos((PI/(2*L))*M); */
 262
 263     *pDat_e += 2;
 264 }
 265 #endif
 266
 267 static
 268 void getTables(const FIXP_WTP **twiddle, const FIXP_STP **sin_twiddle, int *sin_step, int length)
 269 {
 270   int ld2_length;
 271
 272  /* Get ld2 of length - 2 + 1
 273      -2: because first table entry is window of size 4
 274      +1: because we already include +1 because of ceil(log2(length)) */
 275   ld2_length = DFRACT_BITS-1-fNormz((FIXP_DBL)length) - 1;
 276
 277   /* Extract sort of "eigenvalue" (the 4 left most bits) of length. */
 278   switch ( (length) >> (ld2_length-1) ) {
 279     case 0x4: /* radix 2 */
 280       *sin_twiddle = SineTable512;
 281       *sin_step = 1<<(9 - ld2_length);
 282       *twiddle = windowSlopes[0][0][ld2_length-1];
 283       break;
 284     case 0x7: /* 10 ms */
 285       *sin_twiddle = SineTable480;
 286       *sin_step = 1<<(8 - ld2_length);
 287       *twiddle = windowSlopes[0][1][ld2_length];
 288       break;
 289     default:
 290       *sin_twiddle = NULL;
 291       *sin_step = 0;
 292       *twiddle = NULL;
 293       break;
 294   }
 295
 296   FDK_ASSERT(*twiddle != NULL);
 297
 298   FDK_ASSERT(*sin_step > 0);
 299
 300 }
 301
 302 #if !defined(FUNCTION_dct_IV)
 303
 304 void dct_IV(FIXP_DBL *pDat,
 305             int L,
 306             int *pDat_e)
 307 {
 308   int sin_step = 0;
 309   int M = L >> 1;
 310
 311   const FIXP_WTP *twiddle;
 312   const FIXP_STP *sin_twiddle;
 313
 314   FDK_ASSERT(L >= 4);
 315
 316   getTables(&twiddle, &sin_twiddle, &sin_step, L);
 317
 318 #ifdef FUNCTION_dct_IV_func1
 319   if (M>=4 && (M&3) == 0) {
 320      /* ARM926: 44 cycles for 2 iterations = 22 cycles/iteration */
 321     dct_IV_func1(M>>2, twiddle,  &pDat[0], &pDat[L-1]);
 322   } else
 323 #endif /* FUNCTION_dct_IV_func1 */
 324   {
 325     FIXP_DBL *RESTRICT pDat_0 = &pDat[0];
 326     FIXP_DBL *RESTRICT pDat_1 = &pDat[L - 2];
 327     register int i;
 328
 329     /* 29 cycles on ARM926 */
 330     for (i = 0; i < M-1; i+=2,pDat_0+=2,pDat_1-=2)
 331     {
 332       register FIXP_DBL accu1,accu2,accu3,accu4;
 333
 334       accu1 = pDat_1[1]; accu2 = pDat_0[0];
 335       accu3 = pDat_0[1]; accu4 = pDat_1[0];
 336
 337       cplxMultDiv2(&accu1, &accu2, accu1, accu2, twiddle[i]);
 338       cplxMultDiv2(&accu3, &accu4, accu4, accu3, twiddle[i+1]);
 339
 340       pDat_0[0] = accu2; pDat_0[1] = accu1;
 341       pDat_1[0] = accu4; pDat_1[1] = -accu3;
 342     }
 343     if (M&1)
 344     {
 345       register FIXP_DBL accu1,accu2;
 346
 347       accu1 = pDat_1[1]; accu2 = pDat_0[0];
 348
 349       cplxMultDiv2(&accu1, &accu2, accu1, accu2, twiddle[i]);
 350
 351       pDat_0[0] = accu2; pDat_0[1] = accu1;
 352     }
 353   }
 354
 355   fft(M, pDat, pDat_e);
 356
 357 #ifdef FUNCTION_dct_IV_func2
 358   if (M>=4 && (M&3) == 0) {
 359      /* ARM926: 42 cycles for 2 iterations = 21 cycles/iteration */
 360     dct_IV_func2(M>>2, sin_twiddle, &pDat[0], &pDat[L], sin_step);
 361   } else
 362 #endif /* FUNCTION_dct_IV_func2 */
 363   {
 364     FIXP_DBL *RESTRICT pDat_0 = &pDat[0];
 365     FIXP_DBL *RESTRICT pDat_1 = &pDat[L - 2];
 366     register FIXP_DBL accu1,accu2,accu3,accu4;
 367     int idx, i;
 368
 369     /* Sin and Cos values are 0.0f and 1.0f */
 370     accu1 = pDat_1[0];
 371     accu2 = pDat_1[1];
 372
 373     pDat_1[1] = -(pDat_0[1]>>1);
 374     pDat_0[0] = (pDat_0[0]>>1);
 375
 376
 377     /* 28 cycles for ARM926 */
 378     for (idx = sin_step,i=1; i<(M+1)>>1; i++, idx+=sin_step)
 379     {
 380       FIXP_STP twd = sin_twiddle[idx];
 381       cplxMultDiv2(&accu3, &accu4, accu1, accu2, twd);
 382       pDat_0[1] =  accu3;
 383       pDat_1[0] =  accu4;
 384
 385       pDat_0+=2;
 386       pDat_1-=2;
 387
 388       cplxMultDiv2(&accu3, &accu4, pDat_0[1], pDat_0[0], twd);
 389
 390       accu1 = pDat_1[0];
 391       accu2 = pDat_1[1];
 392
 393       pDat_1[1] = -accu3;
 394       pDat_0[0] =  accu4;
 395     }
 396
 397     if ( (M&1) == 0 )
 398     {
 399       /* Last Sin and Cos value pair are the same */
 400       accu1 = fMultDiv2(accu1, WTC(0x5a82799a));
 401       accu2 = fMultDiv2(accu2, WTC(0x5a82799a));
 402
 403       pDat_1[0] = accu1 + accu2;
 404       pDat_0[1] = accu1 - accu2;
 405     }
 406   }
 407
 408   /* Add twiddeling scale. */
 409   *pDat_e += 2;
 410 }
 411 #endif /* defined (FUNCTION_dct_IV) */
 412
 413 #if !defined(FUNCTION_dst_IV)
 414 void dst_IV(FIXP_DBL *pDat,
 415             int L,
 416             int *pDat_e )
 417 {
 418   int sin_step = 0;
 419   int M = L >> 1;
 420
 421   const FIXP_WTP *twiddle;
 422   const FIXP_STP *sin_twiddle;
 423
 424 #ifdef DSTIV2_ENABLE
 425   if (L == 2) {
 426     const FIXP_STP tab = STCP(0x7641AF3D, 0x30FB9452);
 427     FIXP_DBL tmp1, tmp2;
 428
 429     cplxMultDiv2(&tmp2, &tmp1, pDat[0], pDat[1], tab);
 430
 431     pDat[0] = tmp1;
 432     pDat[1] = tmp2;
 433
 434     *pDat_e += 1;
 435
 436     return;
 437   }
 438 #else
 439   FDK_ASSERT(L >= 4);
 440 #endif
 441
 442   getTables(&twiddle, &sin_twiddle, &sin_step, L);
 443
 444 #ifdef FUNCTION_dst_IV_func1
 445   if ( (M>=4) && ((M&3) == 0) ) {
 446     dst_IV_func1(M, twiddle, &pDat[0], &pDat[L]);
 447   } else
 448 #endif
 449   {
 450     FIXP_DBL *RESTRICT pDat_0 = &pDat[0];
 451     FIXP_DBL *RESTRICT pDat_1 = &pDat[L - 2];
 452
 453     register int i;
 454
 455     /* 34 cycles on ARM926 */
 456     for (i = 0; i < M-1; i+=2,pDat_0+=2,pDat_1-=2)
 457     {
 458       register FIXP_DBL accu1,accu2,accu3,accu4;
 459
 460       accu1 =  pDat_1[1]; accu2 = -pDat_0[0];
 461       accu3 =  pDat_0[1]; accu4 = -pDat_1[0];
 462
 463       cplxMultDiv2(&accu1, &accu2, accu1, accu2, twiddle[i]);
 464       cplxMultDiv2(&accu3, &accu4, accu4, accu3, twiddle[i+1]);
 465
 466       pDat_0[0] = accu2; pDat_0[1] = accu1;
 467       pDat_1[0] = accu4; pDat_1[1] = -accu3;
 468     }
 469     if (M&1)
 470     {
 471       register FIXP_DBL accu1,accu2;
 472
 473       accu1 =  pDat_1[1]; accu2 = -pDat_0[0];
 474
 475       cplxMultDiv2(&accu1, &accu2, accu1, accu2, twiddle[i]);
 476
 477       pDat_0[0] = accu2; pDat_0[1] = accu1;
 478     }
 479   }
 480
 481   fft(M, pDat, pDat_e);
 482
 483 #ifdef FUNCTION_dst_IV_func2
 484   if ( (M>=4) && ((M&3) == 0) ) {
 485     dst_IV_func2(M>>2, sin_twiddle + sin_step, &pDat[0], &pDat[L - 1], sin_step);
 486   } else
 487 #endif /* FUNCTION_dst_IV_func2 */
 488   {
 489     FIXP_DBL *RESTRICT pDat_0;
 490     FIXP_DBL *RESTRICT pDat_1;
 491     register FIXP_DBL accu1,accu2,accu3,accu4;
 492     int idx, i;
 493
 494     pDat_0 = &pDat[0];
 495     pDat_1 = &pDat[L - 2];
 496
 497     /* Sin and Cos values are 0.0f and 1.0f */
 498     accu1 = pDat_1[0];
 499     accu2 = pDat_1[1];
 500
 501     pDat_1[1] = -(pDat_0[0]>>1);
 502     pDat_0[0] = (pDat_0[1]>>1);
 503
 504     for (idx = sin_step,i=1; i<(M+1)>>1; i++, idx+=sin_step)
 505     {
 506       FIXP_STP twd = sin_twiddle[idx];
 507
 508       cplxMultDiv2(&accu3, &accu4, accu1, accu2, twd);
 509       pDat_1[0] =  -accu3;
 510       pDat_0[1] =  -accu4;
 511
 512       pDat_0+=2;
 513       pDat_1-=2;
 514
 515       cplxMultDiv2(&accu3, &accu4, pDat_0[1], pDat_0[0], twd);
 516
 517       accu1 = pDat_1[0];
 518       accu2 = pDat_1[1];
 519
 520       pDat_0[0] =  accu3;
 521       pDat_1[1] = -accu4;
 522     }
 523
 524     if ( (M&1) == 0 )
 525     {
 526       /* Last Sin and Cos value pair are the same */
 527       accu1 = fMultDiv2(accu1, WTC(0x5a82799a));
 528       accu2 = fMultDiv2(accu2, WTC(0x5a82799a));
 529
 530       pDat_0[1] = - accu1 - accu2;
 531       pDat_1[0] =   accu2 - accu1;
 532     }
 533   }
 534
 535   /* Add twiddeling scale. */
 536   *pDat_e += 2;
 537 }
 538 #endif /* !defined(FUNCTION_dst_IV) */
 539
 540