libFDK/src/arm/qmf_arm.cpp

   1
   2 /* -----------------------------------------------------------------------------------------------------------
   3 Software License for The Fraunhofer FDK AAC Codec Library for Android
   4
   5 © Copyright  1995 - 2013 Fraunhofer-Gesellschaft zur Förderung der angewandten Forschung e.V.
   6   All rights reserved.
   7
   8  1.    INTRODUCTION
   9 The Fraunhofer FDK AAC Codec Library for Android ("FDK AAC Codec") is software that implements
  10 the MPEG Advanced Audio Coding ("AAC") encoding and decoding scheme for digital audio.
  11 This FDK AAC Codec software is intended to be used on a wide variety of Android devices.
  12
  13 AAC's HE-AAC and HE-AAC v2 versions are regarded as today's most efficient general perceptual
  14 audio codecs. AAC-ELD is considered the best-performing full-bandwidth communications codec by
  15 independent studies and is widely deployed. AAC has been standardized by ISO and IEC as part
  16 of the MPEG specifications.
  17
  18 Patent licenses for necessary patent claims for the FDK AAC Codec (including those of Fraunhofer)
  19 may be obtained through Via Licensing (www.vialicensing.com) or through the respective patent owners
  20 individually for the purpose of encoding or decoding bit streams in products that are compliant with
  21 the ISO/IEC MPEG audio standards. Please note that most manufacturers of Android devices already license
  22 these patent claims through Via Licensing or directly from the patent owners, and therefore FDK AAC Codec
  23 software may already be covered under those patent licenses when it is used for those licensed purposes only.
  24
  25 Commercially-licensed AAC software libraries, including floating-point versions with enhanced sound quality,
  26 are also available from Fraunhofer. Users are encouraged to check the Fraunhofer website for additional
  27 applications information and documentation.
  28
  29 2.    COPYRIGHT LICENSE
  30
  31 Redistribution and use in source and binary forms, with or without modification, are permitted without
  32 payment of copyright license fees provided that you satisfy the following conditions:
  33
  34 You must retain the complete text of this software license in redistributions of the FDK AAC Codec or
  35 your modifications thereto in source code form.
  36
  37 You must retain the complete text of this software license in the documentation and/or other materials
  38 provided with redistributions of the FDK AAC Codec or your modifications thereto in binary form.
  39 You must make available free of charge copies of the complete source code of the FDK AAC Codec and your
  40 modifications thereto to recipients of copies in binary form.
  41
  42 The name of Fraunhofer may not be used to endorse or promote products derived from this library without
  43 prior written permission.
  44
  45 You may not charge copyright license fees for anyone to use, copy or distribute the FDK AAC Codec
  46 software or your modifications thereto.
  47
  48 Your modified versions of the FDK AAC Codec must carry prominent notices stating that you changed the software
  49 and the date of any change. For modified versions of the FDK AAC Codec, the term
  50 "Fraunhofer FDK AAC Codec Library for Android" must be replaced by the term
  51 "Third-Party Modified Version of the Fraunhofer FDK AAC Codec Library for Android."
  52
  53 3.    NO PATENT LICENSE
  54
  55 NO EXPRESS OR IMPLIED LICENSES TO ANY PATENT CLAIMS, including without limitation the patents of Fraunhofer,
  56 ARE GRANTED BY THIS SOFTWARE LICENSE. Fraunhofer provides no warranty of patent non-infringement with
  57 respect to this software.
  58
  59 You may use this FDK AAC Codec software or modifications thereto only for purposes that are authorized
  60 by appropriate patent licenses.
  61
  62 4.    DISCLAIMER
  63
  64 This FDK AAC Codec software is provided by Fraunhofer on behalf of the copyright holders and contributors
  65 "AS IS" and WITHOUT ANY EXPRESS OR IMPLIED WARRANTIES, including but not limited to the implied warranties
  66 of merchantability and fitness for a particular purpose. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
  67 CONTRIBUTORS BE LIABLE for any direct, indirect, incidental, special, exemplary, or consequential damages,
  68 including but not limited to procurement of substitute goods or services; loss of use, data, or profits,
  69 or business interruption, however caused and on any theory of liability, whether in contract, strict
  70 liability, or tort (including negligence), arising in any way out of the use of this software, even if
  71 advised of the possibility of such damage.
  72
  73 5.    CONTACT INFORMATION
  74
  75 Fraunhofer Institute for Integrated Circuits IIS
  76 Attention: Audio and Multimedia Departments - FDK AAC LL
  77 Am Wolfsmantel 33
  78 91058 Erlangen, Germany
  79
  80 www.iis.fraunhofer.de/amm
  81 amm-info@iis.fraunhofer.de
  82 ----------------------------------------------------------------------------------------------------------- */
  83
  84 #if (QMF_NO_POLY==5)
  85
  86 #define FUNCTION_qmfForwardModulationLP_odd
  87
  88 #ifdef FUNCTION_qmfForwardModulationLP_odd
  89 static void
  90 qmfForwardModulationLP_odd( HANDLE_QMF_FILTER_BANK anaQmf, /*!< Handle of Qmf Analysis Bank  */
  91                             const FIXP_QMF *timeIn,        /*!< Time Signal */
  92                             FIXP_QMF *rSubband )           /*!< Real Output */
  93 {
  94   int i;
  95   int L = anaQmf->no_channels;
  96   int M = L>>1;
  97   int shift = (anaQmf->no_channels>>6) + 1;
  98   int rSubband_e = 0;
  99
 100   FIXP_QMF *rSubbandPtr0 = &rSubband[M+0];                /* runs with increment */
 101   FIXP_QMF *rSubbandPtr1 = &rSubband[M-1];                /* runs with decrement */
 102   FIXP_QMF *timeIn0 = (FIXP_DBL *) &timeIn[0];            /* runs with increment */
 103   FIXP_QMF *timeIn1 = (FIXP_DBL *) &timeIn[L];            /* runs with increment */
 104   FIXP_QMF *timeIn2 = (FIXP_DBL *) &timeIn[L-1];          /* runs with decrement */
 105   FIXP_QMF *timeIn3 = (FIXP_DBL *) &timeIn[2*L-1];        /* runs with decrement */
 106
 107   for (i = 0; i < M; i++)
 108   {
 109     *rSubbandPtr0++ = (*timeIn2-- >> 1) - (*timeIn0++ >> shift);
 110     *rSubbandPtr1-- = (*timeIn1++ >> 1) + (*timeIn3-- >> shift);
 111   }
 112
 113   dct_IV(rSubband,L, &rSubband_e);
 114 }
 115 #endif /* FUNCTION_qmfForwardModulationLP_odd */
 116
 117
 118 /* NEON optimized QMF currently builts only with RVCT toolchain */
 119
 120 #if defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_5TE__)
 121
 122 #if (SAMPLE_BITS == 16)
 123 #define FUNCTION_qmfAnaPrototypeFirSlot
 124 #endif
 125
 126 #ifdef FUNCTION_qmfAnaPrototypeFirSlot
 127
 128 #if defined(__GNUC__)   /* cppp replaced: elif */
 129
 130 inline INT SMULBB (const SHORT a, const LONG b)
 131 {
 132   INT result ;
 133   __asm__ ("smulbb %0, %1, %2"
 134      : "=r" (result)
 135      : "r" (a), "r" (b)) ;
 136   return result ;
 137 }
 138 inline INT SMULBT (const SHORT a, const LONG b)
 139 {
 140   INT result ;
 141   __asm__ ("smulbt %0, %1, %2"
 142      : "=r" (result)
 143      : "r" (a), "r" (b)) ;
 144   return result ;
 145 }
 146
 147 inline INT SMLABB(const LONG accu, const SHORT a, const LONG b)
 148 {
 149   INT result ;
 150   __asm__ ("smlabb %0, %1, %2,%3"
 151      : "=r" (result)
 152      : "r" (a), "r" (b), "r" (accu)) ;
 153   return result;
 154 }
 155 inline INT SMLABT(const LONG accu, const SHORT a, const LONG b)
 156 {
 157   INT result ;
 158   __asm__ ("smlabt %0, %1, %2,%3"
 159      : "=r" (result)
 160      : "r" (a), "r" (b), "r" (accu)) ;
 161   return result;
 162 }
 163 #endif /* compiler selection  */
 164
 165
 166 void qmfAnaPrototypeFirSlot( FIXP_QMF *analysisBuffer,
 167                              int       no_channels,             /*!< Number channels of analysis filter */
 168                              const FIXP_PFT *p_filter,
 169                              int       p_stride,                /*!< Stide of analysis filter    */
 170                              FIXP_QAS *RESTRICT pFilterStates
 171                             )
 172 {
 173   LONG *p_flt = (LONG *) p_filter;
 174   LONG flt;
 175   FIXP_QMF *RESTRICT pData_0 = analysisBuffer + 2*no_channels - 1;
 176   FIXP_QMF *RESTRICT pData_1 = analysisBuffer;
 177
 178   FIXP_QAS *RESTRICT sta_0 = (FIXP_QAS *)pFilterStates;
 179   FIXP_QAS *RESTRICT sta_1 = (FIXP_QAS *)pFilterStates + (2*QMF_NO_POLY*no_channels) - 1;
 180
 181   FIXP_DBL accu0, accu1;
 182   FIXP_QAS sta0, sta1;
 183
 184   int staStep1 =  no_channels<<1;
 185   int staStep2 = (no_channels<<3) - 1; /* Rewind one less */
 186
 187   if (p_stride == 1)
 188   {
 189     /* FIR filter 0 */
 190     flt = *p_flt++;
 191     sta1 = *sta_1;  sta_1 -= staStep1;
 192     accu1 = SMULBB(        sta1, flt);
 193     sta1 = *sta_1;  sta_1 -= staStep1;
 194     accu1 = SMLABT( accu1, sta1, flt);
 195
 196     flt = *p_flt++;
 197     sta1 = *sta_1;  sta_1 -= staStep1;
 198     accu1 = SMLABB( accu1, sta1, flt);
 199     sta1 = *sta_1;  sta_1 -= staStep1;
 200     accu1 = SMLABT( accu1, sta1, flt);
 201
 202     flt = *p_flt++;
 203     sta1 = *sta_1;  sta_1 += staStep2;
 204     accu1 = SMLABB( accu1, sta1, flt);
 205     *pData_1++ = FX_DBL2FX_QMF(accu1<<1);
 206
 207     /* FIR filters 1..63 127..65 or 1..31 63..33 */
 208     no_channels >>= 1;
 209     for (; --no_channels; )
 210     {
 211       sta0 = *sta_0; sta_0 += staStep1;  /* 1,3,5, ... 29/61 */
 212       sta1 = *sta_1; sta_1 -= staStep1;
 213       accu0 = SMULBT(        sta0, flt);
 214       accu1 = SMULBT(        sta1, flt);
 215
 216       flt = *p_flt++;
 217       sta0 = *sta_0; sta_0 += staStep1;
 218       sta1 = *sta_1; sta_1 -= staStep1;
 219       accu0 = SMLABB( accu0, sta0, flt);
 220       accu1 = SMLABB( accu1, sta1, flt);
 221
 222       sta0 = *sta_0; sta_0 += staStep1;
 223       sta1 = *sta_1; sta_1 -= staStep1;
 224       accu0 = SMLABT( accu0, sta0, flt);
 225       accu1 = SMLABT( accu1, sta1, flt);
 226
 227       flt = *p_flt++;
 228       sta0 = *sta_0; sta_0 += staStep1;
 229       sta1 = *sta_1; sta_1 -= staStep1;
 230       accu0 = SMLABB( accu0, sta0, flt);
 231       accu1 = SMLABB( accu1, sta1, flt);
 232
 233       sta0 = *sta_0; sta_0 -= staStep2;
 234       sta1 = *sta_1; sta_1 += staStep2;
 235       accu0 = SMLABT( accu0, sta0, flt);
 236       accu1 = SMLABT( accu1, sta1, flt);
 237
 238       *pData_0-- = FX_DBL2FX_QMF(accu0<<1);
 239       *pData_1++ = FX_DBL2FX_QMF(accu1<<1);
 240
 241       /* Same sequence as above, but mix B=bottom with T=Top */
 242
 243       flt = *p_flt++;
 244       sta0 = *sta_0; sta_0 += staStep1;  /* 2,4,6, ... 30/62 */
 245       sta1 = *sta_1; sta_1 -= staStep1;
 246       accu0 = SMULBB(        sta0, flt);
 247       accu1 = SMULBB(        sta1, flt);
 248
 249       sta0 = *sta_0; sta_0 += staStep1;
 250       sta1 = *sta_1; sta_1 -= staStep1;
 251       accu0 = SMLABT( accu0, sta0, flt);
 252       accu1 = SMLABT( accu1, sta1, flt);
 253
 254       flt = *p_flt++;
 255       sta0 = *sta_0; sta_0 += staStep1;
 256       sta1 = *sta_1; sta_1 -= staStep1;
 257       accu0 = SMLABB( accu0, sta0, flt);
 258       accu1 = SMLABB( accu1, sta1, flt);
 259
 260       sta0 = *sta_0; sta_0 += staStep1;
 261       sta1 = *sta_1; sta_1 -= staStep1;
 262       accu0 = SMLABT( accu0, sta0, flt);
 263       accu1 = SMLABT( accu1, sta1, flt);
 264
 265       flt = *p_flt++;
 266       sta0 = *sta_0; sta_0 -= staStep2;
 267       sta1 = *sta_1; sta_1 += staStep2;
 268       accu0 = SMLABB( accu0, sta0, flt);
 269       accu1 = SMLABB( accu1, sta1, flt);
 270
 271       *pData_0-- = FX_DBL2FX_QMF(accu0<<1);
 272       *pData_1++ = FX_DBL2FX_QMF(accu1<<1);
 273     }
 274
 275     /* FIR filter 31/63 and 33/65 */
 276     sta0 = *sta_0; sta_0 += staStep1;
 277     sta1 = *sta_1; sta_1 -= staStep1;
 278     accu0 = SMULBT(        sta0, flt);
 279     accu1 = SMULBT(        sta1, flt);
 280
 281     flt = *p_flt++;
 282     sta0 = *sta_0; sta_0 += staStep1;
 283     sta1 = *sta_1; sta_1 -= staStep1;
 284     accu0 = SMLABB( accu0, sta0, flt);
 285     accu1 = SMLABB( accu1, sta1, flt);
 286
 287     sta0 = *sta_0; sta_0 += staStep1;
 288     sta1 = *sta_1; sta_1 -= staStep1;
 289     accu0 = SMLABT( accu0, sta0, flt);
 290     accu1 = SMLABT( accu1, sta1, flt);
 291
 292     flt = *p_flt++;
 293     sta0 = *sta_0; sta_0 += staStep1;
 294     sta1 = *sta_1; sta_1 -= staStep1;
 295     accu0 = SMLABB( accu0, sta0, flt);
 296     accu1 = SMLABB( accu1, sta1, flt);
 297
 298     sta0 = *sta_0; sta_0 -= staStep2;
 299     sta1 = *sta_1; sta_1 += staStep2;
 300     accu0 = SMLABT( accu0, sta0, flt);
 301     accu1 = SMLABT( accu1, sta1, flt);
 302
 303     *pData_0-- = FX_DBL2FX_QMF(accu0<<1);
 304     *pData_1++ = FX_DBL2FX_QMF(accu1<<1);
 305
 306     /* FIR filter 32/64 */
 307     flt = *p_flt++;
 308     sta0 = *sta_0; sta_0 += staStep1;
 309     sta1 = *sta_1; sta_1 -= staStep1;
 310     accu0 = SMULBB(        sta0, flt);
 311     accu1 = SMULBB(        sta1, flt);
 312
 313     sta0 = *sta_0; sta_0 += staStep1;
 314     sta1 = *sta_1; sta_1 -= staStep1;
 315     accu0 = SMLABT( accu0, sta0, flt);
 316     accu1 = SMLABT( accu1, sta1, flt);
 317
 318     flt = *p_flt++;
 319     sta0 = *sta_0; sta_0 += staStep1;
 320     sta1 = *sta_1; sta_1 -= staStep1;
 321     accu0 = SMLABB( accu0, sta0, flt);
 322     accu1 = SMLABB( accu1, sta1, flt);
 323
 324     sta0 = *sta_0; sta_0 += staStep1;
 325     sta1 = *sta_1; sta_1 -= staStep1;
 326     accu0 = SMLABT( accu0, sta0, flt);
 327     accu1 = SMLABT( accu1, sta1, flt);
 328
 329     flt = *p_flt;
 330     sta0 = *sta_0;
 331     sta1 = *sta_1;
 332     accu0 = SMLABB( accu0, sta0, flt);
 333     accu1 = SMLABB( accu1, sta1, flt);
 334
 335     *pData_0-- = FX_DBL2FX_QMF(accu0<<1);
 336     *pData_1++ = FX_DBL2FX_QMF(accu1<<1);
 337   }
 338   else
 339   {
 340     int pfltStep = QMF_NO_POLY * (p_stride-1);
 341
 342     flt = p_flt[0];
 343     sta1 = *sta_1;  sta_1 -= staStep1;
 344     accu1 = SMULBB(        sta1, flt);
 345     sta1 = *sta_1;  sta_1 -= staStep1;
 346     accu1 = SMLABT( accu1, sta1, flt);
 347
 348     flt = p_flt[1];
 349     sta1 = *sta_1;  sta_1 -= staStep1;
 350     accu1 = SMLABB( accu1, sta1, flt);
 351     sta1 = *sta_1;  sta_1 -= staStep1;
 352     accu1 = SMLABT( accu1, sta1, flt);
 353
 354     flt = p_flt[2]; p_flt += pfltStep;
 355     sta1 = *sta_1;  sta_1 += staStep2;
 356     accu1 = SMLABB( accu1, sta1, flt);
 357     *pData_1++ = FX_DBL2FX_QMF(accu1<<1);
 358
 359     /* FIR filters 1..63 127..65 or 1..31 63..33 */
 360     for (; --no_channels; )
 361     {
 362       flt = p_flt[0];
 363       sta0 = *sta_0; sta_0 += staStep1;
 364       sta1 = *sta_1; sta_1 -= staStep1;
 365       accu0 = SMULBB(        sta0, flt);
 366       accu1 = SMULBB(        sta1, flt);
 367
 368       sta0 = *sta_0; sta_0 += staStep1;
 369       sta1 = *sta_1; sta_1 -= staStep1;
 370       accu0 = SMLABT( accu0, sta0, flt);
 371       accu1 = SMLABT( accu1, sta1, flt);
 372
 373       flt = p_flt[1];
 374       sta0 = *sta_0; sta_0 += staStep1;
 375       sta1 = *sta_1; sta_1 -= staStep1;
 376       accu0 = SMLABB( accu0, sta0, flt);
 377       accu1 = SMLABB( accu1, sta1, flt);
 378
 379       sta0 = *sta_0; sta_0 += staStep1;
 380       sta1 = *sta_1; sta_1 -= staStep1;
 381       accu0 = SMLABT( accu0, sta0, flt);
 382       accu1 = SMLABT( accu1, sta1, flt);
 383
 384       flt = p_flt[2]; p_flt += pfltStep;
 385       sta0 = *sta_0; sta_0 -= staStep2;
 386       sta1 = *sta_1; sta_1 += staStep2;
 387       accu0 = SMLABB( accu0, sta0, flt);
 388       accu1 = SMLABB( accu1, sta1, flt);
 389
 390       *pData_0-- = FX_DBL2FX_QMF(accu0<<1);
 391       *pData_1++ = FX_DBL2FX_QMF(accu1<<1);
 392     }
 393
 394     /* FIR filter 32/64 */
 395     flt = p_flt[0];
 396     sta0 = *sta_0; sta_0 += staStep1;
 397     accu0 = SMULBB(        sta0, flt);
 398     sta0 = *sta_0; sta_0 += staStep1;
 399     accu0 = SMLABT( accu0, sta0, flt);
 400
 401     flt = p_flt[1];
 402     sta0 = *sta_0; sta_0 += staStep1;
 403     accu0 = SMLABB( accu0, sta0, flt);
 404     sta0 = *sta_0; sta_0 += staStep1;
 405     accu0 = SMLABT( accu0, sta0, flt);
 406
 407     flt = p_flt[2];
 408     sta0 = *sta_0;
 409     accu0 = SMLABB( accu0, sta0, flt);
 410     *pData_0-- = FX_DBL2FX_QMF(accu0<<1);
 411   }
 412 }
 413 #endif /* FUNCTION_qmfAnaPrototypeFirSlot */
 414 #endif /* #if defined(__CC_ARM) && defined(__ARM_ARCH_6__) */
 415
 416 #if ( defined(__ARM_ARCH_5TE__) && (SAMPLE_BITS == 16) ) && !defined(QMF_TABLE_FULL)
 417
 418 #define FUNCTION_qmfSynPrototypeFirSlot
 419
 420 #if defined(FUNCTION_qmfSynPrototypeFirSlot)
 421
 422 #if defined(__GNUC__)   /* cppp replaced: elif */
 423
 424 inline INT SMULWB (const LONG a, const LONG b)
 425 {
 426   INT result ;
 427   __asm__ ("smulwb %0, %1, %2"
 428     : "=r" (result)
 429     : "r" (a), "r" (b)) ;
 430
 431   return result ;
 432 }
 433 inline INT SMULWT (const LONG a, const LONG b)
 434 {
 435   INT result ;
 436   __asm__ ("smulwt %0, %1, %2"
 437     : "=r" (result)
 438     : "r" (a), "r" (b)) ;
 439
 440   return result ;
 441 }
 442
 443 inline INT SMLAWB(const LONG accu, const LONG a, const LONG b)
 444 {
 445   INT result;
 446   asm("smlawb %0, %1, %2, %3 "
 447         : "=r" (result)
 448         : "r" (a), "r" (b), "r" (accu) );
 449   return result ;
 450 }
 451
 452 inline INT SMLAWT(const LONG accu, const LONG a, const LONG b)
 453 {
 454   INT result;
 455   asm("smlawt %0, %1, %2, %3 "
 456         : "=r" (result)
 457         : "r" (a), "r" (b), "r" (accu) );
 458   return result ;
 459 }
 460
 461 #endif /* ARM compiler selector */
 462
 463
 464 static void qmfSynPrototypeFirSlot1_filter(FIXP_QMF *RESTRICT realSlot,
 465                                            FIXP_QMF *RESTRICT imagSlot,
 466                                            const FIXP_DBL *RESTRICT p_flt,
 467                                            FIXP_QSS *RESTRICT sta,
 468                                            FIXP_DBL *pMyTimeOut,
 469                                            int no_channels)
 470 {
 471   /* This code was the base for the above listed assembler sequence */
 472   /* It can be used for debugging purpose or further optimizations  */
 473   const FIXP_DBL *RESTRICT p_fltm = p_flt + 155;
 474
 475   do
 476   {
 477      FIXP_DBL result;
 478      FIXP_DBL A, B, real, imag, sta0;
 479
 480      real = *--realSlot;
 481      imag = *--imagSlot;
 482      B = p_flt[4];                        /* Bottom=[8] Top=[9]     */
 483      A = p_fltm[3];                       /* Bottom=[316] Top=[317] */
 484      sta0 = sta[0];                       /* save state[0]          */
 485      *sta++ = SMLAWT( sta[1], imag, B );  /* index=9...........319  */
 486      *sta++ = SMLAWB( sta[1], real, A );  /* index=316...........6  */
 487      *sta++ = SMLAWB( sta[1], imag, B );  /* index=8,18,    ...318  */
 488      B = p_flt[3];                        /* Bottom=[6] Top=[7]     */
 489      *sta++ = SMLAWT( sta[1], real, A );  /* index=317...........7  */
 490      A = p_fltm[4];                       /* Bottom=[318] Top=[319] */
 491      *sta++ = SMLAWT( sta[1], imag, B );  /* index=7...........317  */
 492      *sta++ = SMLAWB( sta[1], real, A );  /* index=318...........8  */
 493      *sta++ = SMLAWB( sta[1], imag, B );  /* index=6...........316  */
 494      B = p_flt[2];                        /* Bottom=[X] Top=[5]     */
 495      *sta++ = SMLAWT( sta[1], real, A );  /* index=9...........319  */
 496      A = p_fltm[2];                       /* Bottom=[X] Top=[315]   */
 497      *sta++ =         SMULWT( imag, B );  /* index=5,15, ...   315  */
 498      result = SMLAWT( sta0,   real, A );  /* index=315...........5  */
 499
 500      *pMyTimeOut++ = result;
 501
 502      real = *--realSlot;
 503      imag = *--imagSlot;
 504      A = p_fltm[0];                       /* Bottom=[310] Top=[311] */
 505      B = p_flt[7];                        /* Bottom=[14]  Top=[15]  */
 506      result = SMLAWB( sta[0], real, A );  /* index=310...........0  */
 507      *sta++ = SMLAWB( sta[1], imag, B );  /* index=14..........324  */
 508      *pMyTimeOut++ = result;
 509      B = p_flt[6];                        /* Bottom=[12]  Top=[13]  */
 510      *sta++ = SMLAWT( sta[1], real, A );  /* index=311...........1  */
 511      A = p_fltm[1];                       /* Bottom=[312] Top=[313] */
 512      *sta++ = SMLAWT( sta[1], imag, B );  /* index=13..........323  */
 513      *sta++ = SMLAWB( sta[1], real, A );  /* index=312...........2  */
 514      *sta++ = SMLAWB( sta[1], imag, B );  /* index=12..........322  */
 515      *sta++ = SMLAWT( sta[1], real, A );  /* index=313...........3  */
 516      A = p_fltm[2];                       /* Bottom=[314] Top=[315] */
 517      B = p_flt[5];                        /* Bottom=[10]  Top=[11]  */
 518      *sta++ = SMLAWT( sta[1], imag, B );  /* index=11..........321  */
 519      *sta++ = SMLAWB( sta[1], real, A );  /* index=314...........4  */
 520      *sta++ =         SMULWB( imag, B );  /* index=10..........320  */
 521
 522
 523      p_flt    += 5;
 524      p_fltm   -= 5;
 525   }
 526   while ((--no_channels) != 0);
 527
 528 }
 529
 530
 531
 532 INT qmfSynPrototypeFirSlot2(
 533                              HANDLE_QMF_FILTER_BANK qmf,
 534                              FIXP_QMF *RESTRICT realSlot,            /*!< Input: Pointer to real Slot */
 535                              FIXP_QMF *RESTRICT imagSlot,            /*!< Input: Pointer to imag Slot */
 536                              INT_PCM  *RESTRICT timeOut,             /*!< Time domain data */
 537                              INT       stride                        /*!< Time output buffer stride factor*/
 538                             )
 539 {
 540   FIXP_QSS *RESTRICT sta = (FIXP_QSS*)qmf->FilterStates;
 541   int no_channels = qmf->no_channels;
 542   int scale = ((DFRACT_BITS-SAMPLE_BITS)-1-qmf->outScalefactor);
 543
 544   /* We map an arry of 16-bit values upon an array of 2*16-bit values to read 2 values in one shot */
 545   const FIXP_DBL *RESTRICT p_flt  = (FIXP_DBL *) qmf->p_filter;           /* low=[0],   high=[1]   */
 546   const FIXP_DBL *RESTRICT p_fltm = (FIXP_DBL *) qmf->p_filter + 155;     /* low=[310], high=[311] */
 547
 548   FDK_ASSERT(SAMPLE_BITS-1-qmf->outScalefactor >= 0); //   (DFRACT_BITS-SAMPLE_BITS)-1-qmf->outScalefactor >= 0);
 549   FDK_ASSERT(qmf->p_stride==2 && qmf->no_channels == 32);
 550
 551   FDK_ASSERT((no_channels&3) == 0);  /* should be a multiple of 4 */
 552
 553   realSlot += no_channels-1;    // ~~"~~
 554   imagSlot += no_channels-1;    // no_channels-1 .. 0
 555
 556   FIXP_DBL MyTimeOut[32];
 557   FIXP_DBL *pMyTimeOut = &MyTimeOut[0];
 558
 559   for (no_channels = no_channels; no_channels--;)
 560   {
 561      FIXP_DBL result;
 562      FIXP_DBL A, B, real, imag;
 563
 564      real = *realSlot--;
 565      imag = *imagSlot--;
 566      A = p_fltm[0];                       /* Bottom=[310] Top=[311] */
 567      B = p_flt[7];                        /* Bottom=[14]  Top=[15]  */
 568      result = SMLAWB( sta[0], real, A );  /* index=310...........0  */
 569      *sta++ = SMLAWB( sta[1], imag, B );  /* index=14..........324  */
 570      B = p_flt[6];                        /* Bottom=[12]  Top=[13]  */
 571      *sta++ = SMLAWT( sta[1], real, A );  /* index=311...........1  */
 572      A = p_fltm[1];                       /* Bottom=[312] Top=[313] */
 573      *sta++ = SMLAWT( sta[1], imag, B );  /* index=13..........323  */
 574      *sta++ = SMLAWB( sta[1], real, A );  /* index=312...........2  */
 575      *sta++ = SMLAWB( sta[1], imag, B );  /* index=12..........322  */
 576      *sta++ = SMLAWT( sta[1], real, A );  /* index=313...........3  */
 577      A = p_fltm[2];                       /* Bottom=[314] Top=[315] */
 578      B = p_flt[5];                        /* Bottom=[10]  Top=[11]  */
 579      *sta++ = SMLAWT( sta[1], imag, B );  /* index=11..........321  */
 580      *sta++ = SMLAWB( sta[1], real, A );  /* index=314...........4  */
 581      *sta++ =         SMULWB( imag, B );  /* index=10..........320  */
 582
 583      *pMyTimeOut++ = result;
 584
 585      p_fltm   -= 5;
 586      p_flt    += 5;
 587   }
 588
 589   pMyTimeOut = &MyTimeOut[0];
 590 #if (SAMPLE_BITS == 16)
 591   const FIXP_DBL max_pos = (FIXP_DBL) 0x00007FFF << scale;
 592   const FIXP_DBL max_neg = (FIXP_DBL) 0xFFFF8001 << scale;
 593 #else
 594   scale = -scale;
 595   const FIXP_DBL max_pos = (FIXP_DBL) 0x7FFFFFFF >> scale;
 596   const FIXP_DBL max_neg = (FIXP_DBL) 0x80000001 >> scale;
 597 #endif
 598   const FIXP_DBL add_neg = (1 << scale) - 1;
 599
 600   no_channels = qmf->no_channels;
 601
 602   timeOut += no_channels*stride;
 603
 604   FDK_ASSERT(scale >= 0);
 605
 606   if (qmf->outGain != 0x80000000)
 607   {
 608     FIXP_DBL gain = qmf->outGain;
 609     for (no_channels>>=2; no_channels--;)
 610     {
 611       FIXP_DBL result1, result2;
 612
 613       result1 = *pMyTimeOut++;
 614       result2 = *pMyTimeOut++;
 615
 616       result1 = fMult(result1,gain);
 617       timeOut -= stride;
 618       if (result1 < 0)        result1 += add_neg;
 619       if (result1 < max_neg)  result1 = max_neg;
 620       if (result1 > max_pos)  result1 = max_pos;
 621 #if (SAMPLE_BITS == 16)
 622       timeOut[0] = result1 >> scale;
 623 #else
 624       timeOut[0] = result1 << scale;
 625 #endif
 626
 627       result2 = fMult(result2,gain);
 628       timeOut -= stride;
 629       if (result2 < 0)        result2 += add_neg;
 630       if (result2 < max_neg)  result2 = max_neg;
 631       if (result2 > max_pos)  result2 = max_pos;
 632 #if (SAMPLE_BITS == 16)
 633       timeOut[0] = result2 >> scale;
 634 #else
 635       timeOut[0] = result2 << scale;
 636 #endif
 637
 638       result1 = *pMyTimeOut++;
 639       result2 = *pMyTimeOut++;
 640
 641       result1 = fMult(result1,gain);
 642       timeOut -= stride;
 643       if (result1 < 0)        result1 += add_neg;
 644       if (result1 < max_neg)  result1 = max_neg;
 645       if (result1 > max_pos)  result1 = max_pos;
 646 #if (SAMPLE_BITS == 16)
 647       timeOut[0] = result1 >> scale;
 648 #else
 649       timeOut[0] = result1 << scale;
 650 #endif
 651
 652       result2 = fMult(result2,gain);
 653       timeOut -= stride;
 654       if (result2 < 0)        result2 += add_neg;
 655       if (result2 < max_neg)  result2 = max_neg;
 656       if (result2 > max_pos)  result2 = max_pos;
 657 #if (SAMPLE_BITS == 16)
 658       timeOut[0] = result2 >> scale;
 659 #else
 660       timeOut[0] = result2 << scale;
 661 #endif
 662     }
 663   }
 664   else
 665   {
 666     for (no_channels>>=2; no_channels--;)
 667     {
 668       FIXP_DBL result1, result2;
 669       result1 = *pMyTimeOut++;
 670       result2 = *pMyTimeOut++;
 671       timeOut -= stride;
 672       if (result1 < 0)        result1 += add_neg;
 673       if (result1 < max_neg)  result1 = max_neg;
 674       if (result1 > max_pos)  result1 = max_pos;
 675 #if (SAMPLE_BITS == 16)
 676       timeOut[0] = result1 >> scale;
 677 #else
 678       timeOut[0] = result1 << scale;
 679 #endif
 680
 681       timeOut -= stride;
 682       if (result2 < 0)        result2 += add_neg;
 683       if (result2 < max_neg)  result2 = max_neg;
 684       if (result2 > max_pos)  result2 = max_pos;
 685 #if (SAMPLE_BITS == 16)
 686       timeOut[0] = result2 >> scale;
 687 #else
 688       timeOut[0] = result2 << scale;
 689 #endif
 690
 691       result1 = *pMyTimeOut++;
 692       result2 = *pMyTimeOut++;
 693       timeOut -= stride;
 694       if (result1 < 0)        result1 += add_neg;
 695       if (result1 < max_neg)  result1 = max_neg;
 696       if (result1 > max_pos)  result1 = max_pos;
 697 #if (SAMPLE_BITS == 16)
 698       timeOut[0] = result1 >> scale;
 699 #else
 700       timeOut[0] = result1 << scale;
 701 #endif
 702
 703       timeOut -= stride;
 704       if (result2 < 0)        result2 += add_neg;
 705       if (result2 < max_neg)  result2 = max_neg;
 706       if (result2 > max_pos)  result2 = max_pos;
 707 #if (SAMPLE_BITS == 16)
 708       timeOut[0] = result2 >> scale;
 709 #else
 710       timeOut[0] = result2 << scale;
 711 #endif
 712     }
 713   }
 714   return 0;
 715 }
 716
 717 static
 718 void qmfSynPrototypeFirSlot_fallback( HANDLE_QMF_FILTER_BANK qmf,
 719                              FIXP_DBL *realSlot,      /*!< Input: Pointer to real Slot */
 720                              FIXP_DBL *imagSlot,      /*!< Input: Pointer to imag Slot */
 721                              INT_PCM  *timeOut,             /*!< Time domain data */
 722                              const int       stride
 723                             );
 724
 725 /*!
 726   \brief Perform Synthesis Prototype Filtering on a single slot of input data.
 727
 728   The filter takes 2 * #MAX_SYNTHESIS_CHANNELS of input data and
 729   generates #MAX_SYNTHESIS_CHANNELS time domain output samples.
 730 */
 731
 732 static
 733 void qmfSynPrototypeFirSlot( HANDLE_QMF_FILTER_BANK qmf,
 734                              FIXP_DBL *realSlot,      /*!< Input: Pointer to real Slot */
 735                              FIXP_DBL *imagSlot,      /*!< Input: Pointer to imag Slot */
 736                              INT_PCM  *timeOut,             /*!< Time domain data */
 737                              const int       stride
 738                             )
 739 {
 740     INT err = -1;
 741
 742     switch (qmf->p_stride) {
 743     case 2:
 744       err = qmfSynPrototypeFirSlot2(qmf, realSlot, imagSlot, timeOut, stride);
 745       break;
 746     default:
 747       err = -1;
 748     }
 749
 750     /* fallback if configuration not available or failed */
 751     if(err!=0) {
 752         qmfSynPrototypeFirSlot_fallback(qmf, realSlot, imagSlot, timeOut, stride);
 753     }
 754 }
 755 #endif /* FUNCTION_qmfSynPrototypeFirSlot */
 756
 757 #endif  /*  ( defined(__CC_ARM) && defined(__ARM_ARCH_5TE__) && (SAMPLE_BITS == 16) ) && !defined(QMF_TABLE_FULL) */
 758
 759
 760
 761 /* #####################################################################################*/
 762
 763
 764
 765 #endif  /* (QMF_NO_POLY==5) */
 766