rtphone/src/libs/libevs/lib_com/fft_fx.cpp

2833 lines
80 KiB
C++
Executable File

/*====================================================================================
EVS Codec 3GPP TS26.442 Apr 03, 2018. Version 12.11.0 / 13.6.0 / 14.2.0
====================================================================================*/
#include "options.h" /* Compilation switches */
#include "cnst_fx.h" /* Common constants */
#include "prot_fx.h" /* Function prototypes */
#include "rom_com_fx.h" /* Static table prototypes */
#include "stl.h"
#include <assert.h>
/*-----------------------------------------------------------------*
* Local functions
*-----------------------------------------------------------------*/
#define FFT3_ONE_THIRD 21845 /* 1/3 in Q16 */
static void fft5_shift4_16fx( Word16 n1, Word16 *zRe, Word16 *zIm, const Word16 *Idx );
static void fft64_16fx( Word16 *x, Word16 *y, const Word16 *Idx );
static void fft32_5_16fx( Word16 *x, Word16 *y, const Word16 *Idx );
static void cftmdl_16fx(Word16 n, Word16 l, Word16 *a, const Word32 *w);
static void cftfsub_16fx( Word16 n, Word16 *a, const Word32 *w );
static void cft1st_16fx(Word16 n, Word16 *a, const Word32 *w);
static void cftmdl_16fx(Word16 n, Word16 l, Word16 *a, const Word32 *w);
static void fft5_shift4_16fx( Word16 n1, Word16 *zRe, Word16 *zIm, const Word16 *Idx );
static void bitrv2_SR_16fx( Word16 n, const Word16 *ip, Word16 *a );
static void fft64_16fx( Word16 *x, Word16 *y, const Word16 *Idx );
static void fft5_32_16fx( Word16 *zRe, Word16 *zIm, const Word16 *Idx );
static void cdftForw_16fx( Word16 n, Word16 *a, const Word16 *ip, const Word32 *w );
#include "math_32.h"
/*-----------------------------------------------------------------*
* Local functions
*-----------------------------------------------------------------*/
static void cdftForw_fx( Word16 n, Word32 *a, const Word16 *ip, const Word16 *w );
static void bitrv2_SR_fx( Word16 n, const Word16 *ip, Word32 *a );
static void cftfsub_fx( Word16 n, Word32 *a, const Word16 *w );
static void cft1st_fx( Word16 n, Word32 *a, const Word16 *w );
static void cftmdl_fx( Word16 n, Word16 l, Word32 *a, const Word16 *w );
void DoRTFTn_fx(
Word32 *x, /* i/o : real part of input and output data */
Word32 *y, /* i/o : imaginary part of input and output data */
const Word16 n /* i : size of the FFT up to 1024 */
)
{
Word16 i;
Word32 z[2048], *pt;
pt = z;
FOR ( i=0; i<n; i++ )
{
*pt++ = x[i];
move16();
*pt++ = y[i];
move16();
}
IF (sub(n, 16) == 0)
{
cdftForw_fx(2*n,z,Ip_fft16_fx,w_fft16_fx);
}
ELSE IF (sub(n, 32) == 0)
{
cdftForw_fx(2*n,z,Ip_fft32_fx,w_fft32_fx);
}
ELSE IF (sub(n, 64) == 0)
{
cdftForw_fx(2*n,z,Ip_fft64_fx,w_fft64_fx);
}
ELSE IF (sub(n, 128) == 0)
{
cdftForw_fx(2*n,z,Ip_fft128_fx,w_fft128_fx);
}
ELSE IF (sub(n, 256) == 0)
{
cdftForw_fx(2*n,z,Ip_fft256_fx,w_fft256_fx);
}
ELSE IF (sub(n, 512) == 0)
{
cdftForw_fx(2*n,z,Ip_fft512_fx,w_fft512_fx);
}
ELSE
{
assert(0);
}
x[0]=z[0];
move16();
y[0]=z[1];
move16();
pt = &z[2];
FOR( i=n-1; i>=1 ; i--)
{
x[i]=*pt++;
move16();
y[i]=*pt++;
move16();
}
return;
}
/*-----------------------------------------------------------------*
* cdftForw_fx()
* Main fuction of Complex Discrete Fourier Transform
*-----------------------------------------------------------------*/
static void cdftForw_fx(
Word16 n, /* i : data length of real and imag */
Word32 *a, /* i/o : input/output data Q(q)*/
const Word16 *ip, /* i : work area for bit reversal */
const Word16 *w /* i : cos/sin table Q14*/
)
{
/* bit reversal */
bitrv2_SR_fx(n, ip + 2, a);
/* Do FFT */
cftfsub_fx(n, a, w);
}
/*-----------------------------------------------------------------*
* bitrv2_SR_fx()
* Bit reversal
*-----------------------------------------------------------------*/
static void bitrv2_SR_fx(
Word16 n, /* i : data length of real and imag */
const Word16 *ip, /* i/o : work area for bit reversal */
Word32 *a /* i/o : input/output data Q(q)*/
)
{
Word16 j, j1, k, k1, m, m2;
Word16 l;
Word32 xr, xi, yr, yi;
l = n;
move16();
m = 1;
move16();
WHILE (shl(m, 3) < l)
{
l = shr(l, 1);
m = shl(m, 1);
}
m2 = shl(m, 1);
IF (shl(m, 3) == l)
{
FOR (k = 0; k < m; k++)
{
FOR (j = 0; j < k; j++)
{
j1 = add(shl(j, 1), ip[k]);
k1 = add(shl(k, 1), ip[j]);
xr = L_add(0,a[j1]);
xi = L_add(0,a[j1 + 1]);
yr = L_add(0,a[k1]);
yi = L_add(0,a[k1 + 1]);
a[j1] = yr;
move32();
a[j1 + 1] = yi;
move32();
a[k1] = xr;
move32();
a[k1 + 1] = xi;
move32();
j1 = add(j1, m2);
k1 = add(k1, shl(m2, 1));
xr = L_add(0,a[j1]);
xi = L_add(0,a[j1 + 1]);
yr = L_add(0,a[k1]);
yi = L_add(0,a[k1 + 1]);
a[j1] = yr;
move32();
a[j1 + 1] = yi;
move32();
a[k1] = xr;
move32();
a[k1 + 1] = xi;
move32();
j1 = add(j1, m2);
k1 = sub(k1, m2);
xr = L_add(0,a[j1]);
xi = L_add(0,a[j1 + 1]);
xi = L_add(0,a[j1 + 1]);
yr = L_add(0,a[k1]);
yi = L_add(0,a[k1 + 1]);
a[j1] = yr;
move32();
a[j1 + 1] = yi;
move32();
a[k1] = xr;
move32();
a[k1 + 1] = xi;
move32();
j1 = add(j1, m2);
k1 = add(k1, shl(m2, 1));
xr = L_add(0,a[j1]);
xi = L_add(0,a[j1 + 1]);
yr = L_add(0,a[k1]);
yi = L_add(0,a[k1 + 1]);
a[j1] = yr;
move32();
a[j1 + 1] = yi;
move32();
a[k1] = xr;
move32();
a[k1 + 1] = xi;
move32();
}
j1 = add(add(shl(k, 1), m2), ip[k]);
k1 = add(j1, m2);
xr = L_add(0,a[j1]);
xi = L_add(0,a[j1 + 1]);
yr = L_add(0,a[k1]);
yi = L_add(0,a[k1 + 1]);
a[j1] = yr;
move32();
a[j1 + 1] = yi;
move32();
a[k1] = xr;
move32();
a[k1 + 1] = xi;
move32();
}
}
ELSE
{
FOR (k = 1; k < m; k++)
{
FOR (j = 0; j < k; j++)
{
j1 = add(shl(j, 1), ip[k]);
k1 = add(shl(k, 1), ip[j]);
xr = L_add(0,a[j1]);
xi = L_add(0,a[j1 + 1]);
yr = L_add(0,a[k1]);
yi = L_add(0,a[k1 + 1]);
a[j1] = yr;
move32();
a[j1 + 1] = yi;
move32();
a[k1] = xr;
move32();
a[k1 + 1] = xi;
move32();
j1 = add(j1, m2);
k1 = add(k1, m2);
xr = L_add(0,a[j1]);
xi = L_add(0,a[j1 + 1]);
yr = L_add(0,a[k1]);
yi = L_add(0,a[k1 + 1]);
a[j1] = yr;
move32();
a[j1 + 1] = yi;
move32();
a[k1] = xr;
move32();
a[k1 + 1] = xi;
move32();
}
}
}
return;
}
/*-----------------------------------------------------------------*
* cftfsub_fx()
* Complex Discrete Fourier Transform
*-----------------------------------------------------------------*/
static void cftfsub_fx(
Word16 n, /* i : data length of real and imag */
Word32 *a, /* i/o : input/output data Q(q)*/
const Word16 *w /* i : cos/sin table Q14*/
)
{
Word16 j, j1, j2, j3, l;
Word32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
l = 2;
move16();
IF (n > 8)
{
cft1st_fx(n, a, w);
l = 8;
move16();
WHILE ((shl(l, 2) < n))
{
cftmdl_fx(n, l, a, w);
l = shl(l, 2);
}
}
IF (shl(l, 2) == n)
{
FOR (j = 0; j < l; j += 2)
{
j1 = add(j, l);
j2 = add(j1, l);
j3 = add(j2, l);
x0r = L_add(a[j], a[j1]);
x0i = L_add(a[j + 1], a[j1 + 1]);
x1r = L_sub(a[j], a[j1]);
x1i = L_sub(a[j + 1], a[j1 + 1]);
x2r = L_add(a[j2], a[j3]);
x2i = L_add(a[j2 + 1], a[j3 + 1]);
x3r = L_sub(a[j2], a[j3]);
x3i = L_sub(a[j2 + 1], a[j3 + 1]);
a[j] = L_add(x0r, x2r);
move32();
a[j2] = L_sub(x0r, x2r);
move32();
a[j + 1] = L_add(x0i, x2i);
move32();
a[j2 + 1] = L_sub(x0i, x2i);
move32();
a[j1] = L_sub(x1r, x3i);
move32();
a[j1 + 1] = L_add(x1i, x3r);
move32();
a[j3] = L_add(x1r, x3i);
move32();
a[j3 + 1] = L_sub(x1i, x3r);
move32();
}
}
ELSE
{
FOR (j = 0; j < l; j += 2)
{
j1 = add(j, l);
x0r = L_sub(a[j], a[j1]);
x0i = L_sub(a[j + 1], a[j1 + 1]);
a[j] = L_add(a[j], a[j1]);
move32();
a[j + 1] = L_add(a[j + 1], a[j1 + 1]);
move32();
a[j1] = x0r;
move32();
move32();
a[j1 + 1] = x0i;
move32();
move32();
}
}
return;
}
/*-----------------------------------------------------------------*
* cft1st_fx()
* Subfunction of Complex Discrete Fourier Transform
*-----------------------------------------------------------------*/
static void cft1st_fx(
Word16 n, /* i : data length of real and imag */
Word32 *a, /* i/o : input/output data Q(q)*/
const Word16 *w /* i : cos/sin table Q14*/
)
{
Word16 j, k1, k2;
Word16 wk1r, wk1i, wk2r, wk2i, wk3r, wk3i;
Word32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
x0r = L_add(a[0], a[2]);
x0i = L_add(a[1], a[3]);
x1r = L_sub(a[0], a[2]);
x1i = L_sub(a[1], a[3]);
x2r = L_add(a[4], a[6]);
x2i = L_add(a[5], a[7]);
x3r = L_sub(a[4], a[6]);
x3i = L_sub(a[5], a[7]);
a[0] = L_add(x0r, x2r);
move32();
a[1] = L_add(x0i, x2i);
move32();
a[4] = L_sub(x0r, x2r);
move32();
a[5] = L_sub(x0i, x2i);
move32();
a[2] = L_sub(x1r, x3i);
move32();
a[3] = L_add(x1i, x3r);
move32();
a[6] = L_add(x1r, x3i);
move32();
a[7] = L_sub(x1i, x3r);
move32();
wk1r = w[2];
move16();
x0r = L_add(a[8], a[10]);
x0i = L_add(a[9], a[11]);
x1r = L_sub(a[8], a[10]);
x1i = L_sub(a[9], a[11]);
x2r = L_add(a[12], a[14]);
x2i = L_add(a[13], a[15]);
x3r = L_sub(a[12], a[14]);
x3i = L_sub(a[13], a[15]);
a[8] = L_add(x0r, x2r);
move32();
a[9] = L_add(x0i, x2i);
move32();
a[12] = L_sub(x2i, x0i);
move32();
a[13] = L_sub(x0r, x2r);
move32();
x0r = L_sub(x1r, x3i);
x0i = L_add(x1i ,x3r);
a[10] = Mult_32_16(L_shl(L_sub(x0r, x0i), 1), wk1r);
move32();
a[11] = Mult_32_16(L_shl(L_add(x0r, x0i), 1), wk1r);
move32();
x0r = L_add(x3i, x1r);
x0i = L_sub(x3r,x1i);
a[14] = Mult_32_16(L_shl(L_sub(x0i, x0r), 1), wk1r);
move32();
a[15] = Mult_32_16(L_shl(L_add(x0i, x0r), 1), wk1r);
move32();
k1 = 0;
move16();
FOR (j = 16; j < n; j += 16)
{
k1 = add(k1, 2);
k2 = shl(k1, 1);
wk2r = w[k1];
move16();
wk2i = w[k1 + 1];
move16();
wk1r = w[k2];
move16();
wk1i = w[k2 + 1];
move16();
wk3r = extract_l(L_sub(L_deposit_l(wk1r), L_shr(L_mult(wk2i, wk1i), 14)));
wk3i = extract_l(L_msu0(L_shr(L_mult(wk2i, wk1r), 14), wk1i, 1));
x0r = L_add(a[j], a[j + 2]);
x0i = L_add(a[j + 1], a[j + 3]);
x1r = L_sub(a[j], a[j + 2]);
x1i = L_sub(a[j + 1], a[j + 3]);
x2r = L_add(a[j + 4], a[j + 6]);
x2i = L_add(a[j + 5], a[j + 7]);
x3r = L_sub(a[j + 4], a[j + 6]);
x3i = L_sub(a[j + 5], a[j + 7]);
a[j] = L_add(x0r, x2r);
move32();
a[j + 1] = L_add(x0i, x2i);
move32();
x0r = L_sub(x0r, x2r);
x0i = L_sub(x0i, x2i);
a[j + 4] = L_sub(Mult_32_16(L_shl(x0r, 1), wk2r), Mult_32_16(L_shl(x0i, 1), wk2i));
move32();
a[j + 5] = L_add(Mult_32_16(L_shl(x0i, 1), wk2r), Mult_32_16(L_shl(x0r, 1), wk2i));
move32();
x0r = L_sub(x1r, x3i);
x0i = L_add(x1i, x3r);
a[j + 2] = L_sub(Mult_32_16(L_shl(x0r, 1), wk1r), Mult_32_16(L_shl(x0i, 1), wk1i));
move32();
a[j + 3] = L_add(Mult_32_16(L_shl(x0i, 1), wk1r), Mult_32_16(L_shl(x0r, 1), wk1i));
move32();
x0r = L_add(x1r, x3i);
x0i = L_sub(x1i, x3r);
a[j + 6] = L_sub(Mult_32_16(L_shl(x0r, 1), wk3r), Mult_32_16(L_shl(x0i, 1), wk3i));
move32();
a[j + 7] = L_add(Mult_32_16(L_shl(x0i, 1), wk3r), Mult_32_16(L_shl(x0r, 1), wk3i));
move32();
wk1r = w[k2 + 2];
move16();
wk1i = w[k2 + 3];
move16();
wk3r = extract_l(L_sub(L_deposit_l(wk1r), L_shr(L_mult(wk2r, wk1i), 14)));
wk3i = extract_l(L_msu0(L_shr(L_mult(wk2r, wk1r), 14), wk1i, 1));
x0r = L_add(a[j + 8], a[j + 10]);
x0i = L_add(a[j + 9], a[j + 11]);
x1r = L_sub(a[j + 8], a[j + 10]);
x1i = L_sub(a[j + 9], a[j + 11]);
x2r = L_add(a[j + 12], a[j + 14]);
x2i = L_add(a[j + 13], a[j + 15]);
x3r = L_sub(a[j + 12], a[j + 14]);
x3i = L_sub(a[j + 13], a[j + 15]);
a[j + 8] = L_add(x0r, x2r);
move32();
a[j + 9] = L_add(x0i, x2i);
move32();
x0r = L_sub(x0r, x2r);
x0i = L_sub(x0i, x2i);
a[j + 12] = L_negate(L_add(Mult_32_16(L_shl(x0r, 1), wk2i), Mult_32_16(L_shl(x0i, 1), wk2r)));
move32();
a[j + 13] = L_sub(Mult_32_16(L_shl(x0r, 1), wk2r), Mult_32_16(L_shl(x0i, 1), wk2i));
move32();
x0r = L_sub(x1r, x3i);
x0i = L_add(x1i, x3r);
a[j + 10] = L_sub(Mult_32_16(L_shl(x0r, 1), wk1r), Mult_32_16(L_shl(x0i, 1), wk1i));
move32();
a[j + 11] = L_add(Mult_32_16(L_shl(x0i, 1), wk1r), Mult_32_16(L_shl(x0r, 1), wk1i));
move32();
x0r =L_add(x1r, x3i);
x0i =L_sub(x1i, x3r);
a[j + 14] = L_sub(Mult_32_16(L_shl(x0r, 1), wk3r), Mult_32_16(L_shl(x0i, 1), wk3i));
move32();
a[j + 15] = L_add(Mult_32_16(L_shl(x0i, 1), wk3r), Mult_32_16(L_shl(x0r, 1), wk3i));
move32();
}
return;
}
/*-----------------------------------------------------------------*
* cftmdl_fx()
* Subfunction of Complex Discrete Fourier Transform
*-----------------------------------------------------------------*/
static void cftmdl_fx(
Word16 n, /* i : data length of real and imag */
Word16 l, /* i : initial shift for processing */
Word32 *a, /* i/o : input/output data Q(Qx+Q_edct)*/
const Word16 *w /* i : cos/sin table Q30*/
)
{
Word16 j, j1, j2, j3, k, k1, k2, m, m2;
Word16 wk1r, wk1i, wk2r, wk2i, wk3r, wk3i;
Word32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
Word16 tmp;
m = shl(l, 2);
FOR (j = 0; j < l; j += 2)
{
j1 = add(j, l);
j2 = add(j1, l);
j3 = add(j2, l);
x0r = L_add(a[j], a[j1]);
x0i = L_add(a[j + 1], a[j1 + 1]);
x1r = L_sub(a[j], a[j1]);
x1i = L_sub(a[j + 1], a[j1 + 1]);
x2r = L_add(a[j2], a[j3]);
x2i = L_add(a[j2 + 1], a[j3 + 1]);
x3r = L_sub(a[j2], a[j3]);
x3i = L_sub(a[j2 + 1], a[j3 + 1]);
a[j] = L_add(x0r, x2r);
move32();
a[j + 1] = L_add(x0i, x2i);
move32();
a[j2] = L_sub(x0r, x2r);
move32();
a[j2 + 1] =L_sub(x0i, x2i);
move32();
a[j1] = L_sub(x1r, x3i);
move32();
a[j1 + 1] = L_add(x1i, x3r);
move32();
a[j3] = L_add(x1r, x3i);
move32();
a[j3 + 1] = L_sub(x1i, x3r);
move32();
}
wk1r = w[2];
move16();
tmp = add(l, m);
FOR (j = m; j < tmp; j += 2)
{
j1 = add(j, l);
j2 = add(j1, l);
j3 = add(j2, l);
x0r = L_add(a[j], a[j1]);
x0i = L_add(a[j + 1], a[j1 + 1]);
x1r = L_sub(a[j], a[j1]);
x1i = L_sub(a[j + 1], a[j1 + 1]);
x2r = L_add(a[j2], a[j3]);
x2i = L_add(a[j2 + 1], a[j3 + 1]);
x3r = L_sub(a[j2], a[j3]);
x3i = L_sub(a[j2 + 1], a[j3 + 1]);
a[j] = L_add(x0r, x2r);
move32();
a[j + 1] = L_add(x0i, x2i);
move32();
a[j2] = L_sub(x2i, x0i);
move32();
a[j2 + 1] = L_sub(x0r, x2r);
move32();
x0r = L_sub(x1r, x3i);
x0i = L_add(x1i, x3r);
a[j1] = Mult_32_16(L_shl(L_sub(x0r, x0i), 1), wk1r);
move32();
a[j1 + 1] = Mult_32_16(L_shl(L_add(x0r, x0i), 1), wk1r);
move32();
x0r = L_add(x3i, x1r);
x0i = L_sub(x3r, x1i);
a[j3] = Mult_32_16(L_shl(L_sub(x0i, x0r), 1), wk1r);
move32();
a[j3 + 1] = Mult_32_16(L_shl(L_add(x0r, x0i), 1), wk1r);
move32();
}
k1 = 0;
move16();
m2 = shl(m, 1);
FOR (k = m2; k < n; k += m2)
{
k1 = add(k1, 2);
k2 = shl(k1, 1);
wk2r = w[k1];
move16();
wk2i = w[k1 + 1];
move16();
wk1r = w[k2];
move16();
wk1i = w[k2 + 1];
move16();
wk3r = extract_l(L_sub(L_deposit_l(wk1r), L_shr(L_mult(wk2i, wk1i), 14)));
wk3i = extract_l(L_msu0(L_shr(L_mult(wk2i, wk1r), 14), wk1i, 1));
tmp = add(l, k) ;
FOR (j = k; j < tmp; j += 2)
{
j1 = add(j, l);
j2 = add(j1, l);
j3 = add(j2, l);
x0r = L_add(a[j], a[j1]);
x0i = L_add(a[j + 1], a[j1 + 1]);
x1r = L_sub(a[j], a[j1]);
x1i = L_sub(a[j + 1], a[j1 + 1]);
x2r = L_add(a[j2], a[j3]);
x2i = L_add(a[j2 + 1], a[j3 + 1]);
x3r = L_sub(a[j2], a[j3]);
x3i = L_sub(a[j2 + 1], a[j3 + 1]);
a[j] = L_add(x0r, x2r);
move32();
a[j + 1] = L_add(x0i, x2i);
move32();
x0r = L_sub(x0r, x2r);
x0i = L_sub(x0i, x2i);
a[j2] = L_sub(Mult_32_16(L_shl(x0r, 1), wk2r), Mult_32_16(L_shl(x0i, 1), wk2i));
move32();
a[j2 + 1] = L_add(Mult_32_16(L_shl(x0i, 1), wk2r), Mult_32_16(L_shl(x0r, 1), wk2i));
move32();
x0r = L_sub(x1r, x3i);
x0i = L_add(x1i, x3r);
a[j1] = L_sub(Mult_32_16(L_shl(x0r, 1), wk1r), Mult_32_16(L_shl(x0i, 1), wk1i));
move32();
a[j1 + 1] = L_add(Mult_32_16(L_shl(x0i, 1), wk1r), Mult_32_16(L_shl(x0r, 1), wk1i));
move32();
x0r = L_add(x1r, x3i);
x0i = L_sub(x1i, x3r);
a[j3] = L_sub(Mult_32_16(L_shl(x0r, 1), wk3r), Mult_32_16(L_shl(x0i, 1), wk3i));
move32();
a[j3 + 1] = L_add(Mult_32_16(L_shl(x0i, 1), wk3r), Mult_32_16(L_shl(x0r, 1), wk3i));
move32();
}
wk1r = w[k2 + 2];
move16();
wk1i = w[k2 + 3];
move16();
wk3r = extract_l(L_sub(L_deposit_l(wk1r), L_shr(L_mult(wk2r, wk1i), 14)));
wk3i = extract_l(L_msu0(L_shr(L_mult(wk2r, wk1r), 14), wk1i, 1));
tmp = add(l, add(k, m));
FOR (j = add(k, m); j < tmp; j += 2)
{
j1 = add(j, l);
j2 = add(j1, l);
j3 = add(j2, l);
x0r = L_add(a[j], a[j1]);
x0i = L_add(a[j + 1], a[j1 + 1]);
x1r = L_sub(a[j], a[j1]);
x1i = L_sub(a[j + 1], a[j1 + 1]);
x2r = L_add(a[j2], a[j3]);
x2i = L_add(a[j2 + 1], a[j3 + 1]);
x3r = L_sub(a[j2], a[j3]);
x3i = L_sub(a[j2 + 1], a[j3 + 1]);
a[j] = L_add(x0r, x2r);
move32();
a[j + 1] = L_add(x0i, x2i);
move32();
x0r= L_sub(x0r, x2r);
x0i=L_sub(x0i, x2i);
a[j2] = L_negate(L_add(Mult_32_16(L_shl(x0r, 1), wk2i), Mult_32_16(L_shl(x0i, 1), wk2r)));
move32();
a[j2 + 1] = L_sub(Mult_32_16(L_shl(x0r, 1), wk2r), Mult_32_16(L_shl(x0i, 1), wk2i));
move32();
x0r = L_sub(x1r, x3i);
x0i = L_add(x1i, x3r);
a[j1] = L_sub(Mult_32_16(L_shl(x0r, 1), wk1r), Mult_32_16(L_shl(x0i, 1), wk1i));
move32();
a[j1 + 1] = L_add(Mult_32_16(L_shl(x0i, 1), wk1r), Mult_32_16(L_shl(x0r, 1), wk1i));
move32();
x0r = L_add(x1r, x3i);
x0i = L_sub(x1i, x3r);
a[j3] = L_sub(Mult_32_16(L_shl(x0r, 1), wk3r), Mult_32_16(L_shl(x0i, 1), wk3i));
move32();
a[j3 + 1] = L_add(Mult_32_16(L_shl(x0i, 1), wk3r), Mult_32_16(L_shl(x0r, 1), wk3i));
move32();
}
}
return;
}
static void cftbsub_fx(
Word16 n,
Word32 *a,
const Word16 *w /* i : cos/sin table */
)
{
Word16 j, j1, j2, j3, l;
Word32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
l = 2;
move16();
IF (n > 8)
{
cft1st_fx(n, a, w);
l = 8;
move16();
WHILE (sub(shl(l, 2), n) < 0)
{
cftmdl_fx(n, l, a, w);
l = shl(l, 2);
}
}
IF (sub(shl(l, 2), n) == 0)
{
FOR (j = 0; j < l; j += 2)
{
j1 = add(j, l);
j2 = add(j1, l);
j3 = add(j2, l);
x0r = L_add(a[j], a[j1]);
x0i = L_negate(L_add(a[j + 1], a[j1 + 1]));
x1r = L_sub(a[j], a[j1]);
x1i = L_sub(a[j1 + 1], a[j + 1]);
x2r = L_add(a[j2], a[j3]);
x2i = L_add(a[j2 + 1], a[j3 + 1]);
x3r = L_sub(a[j2], a[j3]);
x3i = L_sub(a[j2 + 1], a[j3 + 1]);
a[j] = L_add(x0r, x2r);
move32();
a[j + 1] = L_sub(x0i, x2i);
move32();
a[j2] = L_sub(x0r, x2r);
move32();
a[j2 + 1] = L_add(x0i, x2i);
move32();
a[j1] = L_sub(x1r, x3i);
move32();
a[j1 + 1] = L_sub(x1i, x3r);
move32();
a[j3] = L_add(x1r, x3i);
move32();
a[j3 + 1] = L_add(x1i, x3r);
move32();
}
}
ELSE
{
FOR (j = 0; j < l; j += 2)
{
j1 = add(j, l);
x0r = L_sub(a[j], a[j1]);
x0i = L_sub(a[j1 + 1], a[j + 1]);
a[j] = L_add(a[j], a[j1]);
move32();
a[j + 1] = L_negate(L_add(a[j + 1], a[j1 + 1]));
move32();
a[j1] = x0r;
move32();
move32();
a[j1 + 1] = x0i;
move32();
move32();
}
}
}
static void rftfsub_fx(
Word16 n,
Word32 *a,
Word16 nc,
const Word16 *c
)
{
Word16 j, k, kk, ks, m, tmp;
Word32 xr, xi, yr, yi;
Word16 wkr, wki;
m = shr(n, 1);
/*ks = 2 * nc / m; */
tmp = shl(nc, 1);
ks = 0;
move16();
WHILE (sub(tmp, m) >= 0)
{
ks = add(ks, 1);
tmp = sub(tmp, m);
}
kk = 0;
move16();
FOR (j = 2; j < m; j += 2)
{
k = sub(n, j);
kk = add(kk, ks);
wkr = sub(8192, c[nc - kk]);
wki = c[kk];
move16();
xr = L_sub(a[j], a[k]);
xi = L_add(a[j + 1], a[k + 1]);
yr = L_sub(Mult_32_16(L_shl(xr, 1), wkr), Mult_32_16(L_shl(xi, 1), wki));
yi = L_add(Mult_32_16(L_shl(xi, 1), wkr), Mult_32_16(L_shl(xr, 1), wki));
a[j] = L_sub(a[j], yr);
move32();
a[j + 1] = L_sub(a[j + 1], yi);
move32();
a[k] = L_add(a[k], yr);
move32();
a[k + 1] = L_sub(a[k + 1], yi);
move32();
}
}
static void rftbsub_fx(
Word16 n,
Word32 *a,
Word16 nc,
const Word16 *c
)
{
Word16 j, k, kk, ks, m, tmp;
Word32 xr, xi, yr, yi;
Word16 wkr, wki;
a[1] = L_negate(a[1]);
m = shr(n, 1);
/*ks = 2 * nc / m; */
tmp = shl(nc, 1);
ks = 0;
move16();
WHILE (sub(tmp, m) >= 0)
{
ks = add(ks, 1);
tmp = sub(tmp, m);
}
kk = 0;
move16();
FOR (j = 2; j < m; j += 2)
{
k = sub(n, j);
kk = add(kk, ks);
wkr = sub(8192, c[nc - kk]);
wki = c[kk];
move16();
xr = L_sub(a[j], a[k]);
xi = L_add(a[j + 1], a[k + 1]);
yr = L_add(Mult_32_16(L_shl(xr, 1), wkr), Mult_32_16(L_shl(xi, 1), wki));
yi = L_sub(Mult_32_16(L_shl(xi, 1), wkr), Mult_32_16(L_shl(xr, 1), wki));
a[j] = L_sub(a[j], yr);
move32();
a[j + 1] = L_sub(yi, a[j + 1]);
move32();
a[k] = L_add(a[k], yr);
move32();
a[k + 1] = L_sub(yi, a[k + 1]);
move32();
}
a[m + 1] = L_negate(a[m + 1]);
move32();
}
static void dctsub_fx(
Word16 n,
Word32 *a,
Word16 nc,
const Word16 *c
)
{
Word16 j, k, kk, ks, m, tmp;
Word16 wkr, wki;
Word32 xr;
m = shr(n, 1);
/*ks = nc / n; */
tmp = nc;
move16();
ks = 0;
move16();
WHILE (sub(tmp, n) >= 0)
{
ks = add(ks, 1);
tmp = sub(tmp, n);
}
kk = 0;
move16();
FOR (j = 1; j < m; j++)
{
k = sub(n, j);
kk = add(kk, ks);
wkr = sub(c[kk], c[nc - kk]);
wki = add(c[kk], c[nc - kk]);
xr = L_sub(Mult_32_16(L_shl(a[j], 1), wki), Mult_32_16(L_shl(a[k], 1), wkr));
a[j] = L_add(Mult_32_16(L_shl(a[j], 1), wkr), Mult_32_16(L_shl(a[k], 1), wki));
move32();
a[k] = xr;
move32();
}
a[m] = Mult_32_16(L_shl(a[m], 1), c[0]);
}
/*-----------------------------------------------------------------*
* edct2_fx()
*
* Transformation of the signal to DCT domain
* OR Inverse EDCT-II for short frames
*-----------------------------------------------------------------*/
void edct2_fx(
Word16 n,
Word16 isgn,
Word16 *in,
Word32 *a,
Word16 *q,
const Word16 *ip,
const Word16 *w
)
{
Word16 j, nw, nc;
Word32 xr;
*q = Exp16Array(n, in);
*q = add(*q, 6);
FOR (j = 0; j < n; j++)
{
a[j] = L_shl((Word32) in[j], *q);
move32();
}
nw = ip[0];
move16();
if (sub(n, shl(nw, 2)) > 0)
{
nw = shr(n, 2);
}
nc = ip[1];
move16();
if (n > nc)
{
nc = n;
move16();
}
IF (isgn < 0)
{
xr = a[n - 1];
move32();
FOR (j = n - 2; j >= 2; j -= 2)
{
a[j + 1] = L_sub(a[j], a[j - 1]);
move32();
a[j] = L_add(a[j], a[j - 1]);
move32();
}
a[1] = L_sub(a[0], xr);
move32();
a[0] = L_add(a[0], xr);
move32();
IF (n > 4)
{
rftbsub_fx(n, a, nc, w + nw);
bitrv2_SR_fx(n, ip + 2, a);
cftbsub_fx(n, a, w);
}
ELSE IF (n == 4)
{
cftfsub_fx(n, a, w);
}
}
IF (isgn >= 0)
{
a[0] = L_shr(a[0], 1);
move32();
}
dctsub_fx(n, a, nc, w + nw);
IF (isgn >= 0)
{
IF (n > 4)
{
bitrv2_SR_fx(n, ip + 2, a);
cftfsub_fx(n, a, w);
rftfsub_fx(n, a, nc, w + nw);
}
ELSE IF (n == 4)
{
cftfsub_fx(n, a, w);
}
xr = L_sub(a[0], a[1]);
a[0] = L_add(a[0], a[1]);
move32();
FOR (j = 2; j < n; j += 2)
{
a[j - 1] = L_sub(a[j], a[j + 1]);
move32();
a[j] = L_add(a[j], a[j + 1]);
move32();
}
a[n - 1] = xr;
move32();
FOR (j = 0; j < n; j ++)
{
a[j] = L_shr(a[j], 5);
move32();
}
}
}
/*-----------------------------------------------------------------*
* fft5_shift4()
* 5-point FFT with 4-point circular shift
*-----------------------------------------------------------------*/
static void fft5_shift4_16fx(
Word16 n1, /* i : length of data */
Word16 *zRe, /* i/o : real part of input and output data */
Word16 *zIm, /* i/o : imaginary part of input and output data */
const Word16 *Idx /* i : pointer of the address table */
)
{
Word16 T1, To, T8, Tt, T9, Ts, Te, Tp, Th, Tn,T2, T3, T4, T5, T6, T7;
Word16 i0,i1,i2,i3,i4;
Word32 L_tmp;
i0 = Idx[0];
move16();
i1 = Idx[n1];
move16();
i2 = Idx[n1*2];
move16();
i3 = Idx[n1*3];
move16();
i4 = Idx[n1*4];
move16();
T1 = zRe[i0];
move16();
To = zIm[i0];
move16();
T2 = zRe[i1];
move16();
T3 = zRe[i4];
move16();
T4 = add(T2,T3);
T5 = zRe[i2];
move16();
T6 = zRe[i3];
move16();
T7 = add(T5,T6);
T8 = add(T4,T7);
Tt = sub(T5,T6);
/* T9 = KP559016994 * (T4 - T7); */
L_tmp = Mult_32_16(KP559016994_16FX,sub(T4,T7));
T9 = round_fx(L_tmp);
Ts = sub(T2,T3);
T2 = zIm[i1];
move16();
T3 = zIm[i4];
move16();
T4 = add(T2,T3);
T5 = zIm[i2];
move16();
T6 = zIm[i3];
move16();
T7 = add(T5,T6);
Te = sub(T2,T3);
Tp = add(T4,T7);
Th = sub(T5,T6);
/* Tn = KP559016994 * (T4 - T7); */
L_tmp = Mult_32_16(KP559016994_16FX,sub(T4,T7));
Tn = round_fx(L_tmp);
zRe[i0] = add(T1,T8);
move16();
zIm[i0] = add(To,Tp);
move16();
/* T2 = KP951056516*Te + KP587785252*Th; */
L_tmp = Mult_32_16(KP951056516_16FX,Te);
L_tmp = Madd_32_16(L_tmp,KP587785252_16FX,Th);
T2 = round_fx(L_tmp);
/*T3 = KP951056516*Th - KP587785252*Te; */
L_tmp = Mult_32_16(KP951056516_16FX,Th);
L_tmp = Msub_32_16(L_tmp,KP587785252_16FX,Te);
T3 = round_fx(L_tmp);
T6 = sub(T1,shr(T8,2));
T4 = add(T9,T6);
T5 = sub(T6,T9);
zRe[i1] = sub(T4,T2);
move16();
zRe[i2] = add(T5,T3);
move16();
zRe[i4] = add(T4,T2);
move16();
zRe[i3] = sub(T5,T3);
move16();
/* T2 = KP951056516 * Ts + KP587785252 * Tt; */
L_tmp = Mult_32_16(KP951056516_16FX,Ts);
L_tmp = Madd_32_16(L_tmp,KP587785252_16FX,Tt);
T2 = round_fx(L_tmp);
/* T3 = KP951056516 * Tt - KP587785252 * Ts; */
L_tmp = Mult_32_16(KP951056516_16FX,Tt);
L_tmp = Msub_32_16(L_tmp,KP587785252_16FX,Ts);
T3 = round_fx(L_tmp);
T6 = sub(To,shr(Tp,2));
T4 = add(Tn,T6);
T5 = sub(T6,Tn);
zIm[i4] = sub(T4,T2);
move16();
zIm[i2] = sub(T5,T3);
move16();
zIm[i1] = add(T2,T4);
move16();
zIm[i3] = add(T3,T5);
move16();
return;
}
/*-----------------------------------------------------------------*
* fft5_32()
* 5-point FFT called for 32 times
*-----------------------------------------------------------------*/
static void fft5_32_16fx(
Word16 *zRe, /* i/o : real part of input and output data */
Word16 *zIm, /* i/o : imaginary part of input and output data */
const Word16 *Idx /* i : pointer of the address table */
)
{
Word16 T1, To, T8, Tt, T9, Ts, Te, Tp, Th, Tn,T2, T3, T4, T5, T6, T7;
Word16 i0,i1,i2,i3,i4;
Word32 L_tmp;
i0 = Idx[0];
move16();
i1 = Idx[32];
move16();
i2 = Idx[64];
move16();
i3 = Idx[96];
move16();
i4 = Idx[128];
move16();
T1 = zRe[i0];
move16();
To = zIm[i0];
move16();
T2 = zRe[i1];
move16();
T3 = zRe[i4];
move16();
T4 = add(T2, T3);
T5 = zRe[i2];
move16();
T6 = zRe[i3];
move16();
T7 = add(T5,T6);
T8 = add(T4,T7);
Tt = sub(T5,T6);
/* T9 = KP559016994 * (T4 - T7); */
L_tmp = Mult_32_16(KP559016994_16FX,sub(T4,T7));
T9 = round_fx(L_tmp);
Ts = sub(T2,T3);
T2 = zIm[i1];
move16();
T3 = zIm[i4];
move16();
T4 = add(T2,T3);
T5 = zIm[i2];
move16();
T6 = zIm[i3];
move16();
T7 = add(T5,T6);
Te = sub(T2,T3);
Tp = add(T4,T7);
Th = sub(T5,T6);
L_tmp = Mult_32_16(KP559016994_16FX,sub(T4,T7));
Tn = round_fx(L_tmp);
zRe[i0] = add(T1,T8);
move16();
zIm[i0] = add(To,Tp);
move32();
/*T2 = KP951056516*Te + KP587785252*Th; */
L_tmp = Mult_32_16(KP951056516_16FX,Te);
L_tmp = Madd_32_16(L_tmp,KP587785252_16FX,Th);
T2 = round_fx(L_tmp);
/*T3 = KP951056516*Th - KP587785252*Te; */
L_tmp = Mult_32_16(KP951056516_16FX,Th);
L_tmp = Msub_32_16(L_tmp,KP587785252_16FX,Te);
T3 = round_fx(L_tmp);
T6 = sub(T1,shr(T8,2));
T4 = add(T9,T6);
T5 = sub(T6,T9);
zRe[i3] = sub(T4,T2);
move32();
zRe[i1] = add(T5,T3);
move32();
zRe[i2] = add(T4,T2);
move32();
zRe[i4] = sub(T5,T3);
move32();
/* T2 = KP951056516 * Ts + KP587785252 * Tt; */
L_tmp = Mult_32_16(KP951056516_16FX,Ts);
L_tmp = Madd_32_16(L_tmp,KP587785252_16FX,Tt);
T2 = round_fx(L_tmp);
/* T3 = KP951056516 * Tt - KP587785252 * Ts; */
L_tmp = Mult_32_16(KP951056516_16FX,Tt);
L_tmp = Msub_32_16(L_tmp,KP587785252_16FX,Ts);
T3 = round_fx(L_tmp);
T6 = sub(To,shr(Tp,2));
T4 = add(Tn,T6);
T5 = sub(T6,Tn);
zIm[i2] = sub(T4,T2);
move16();
zIm[i1] = sub(T5,T3);
move16();
zIm[i3] = add(T2,T4);
move16();
zIm[i4] = add(T3,T5);
move16();
return;
}
/*-----------------------------------------------------------------*
* fft64()
* 64-point FFT
*-----------------------------------------------------------------*/
static void fft64_16fx(
Word16 *x, /* i/o : real part of input and output data */
Word16 *y, /* i/o : imaginary part of input and output data */
const Word16 *Idx /* i : pointer of the address table */
)
{
Word16 i,id,jd;
Word16 z[128];
move16();/*penalty for 1 ptr init */
FOR ( i=0; i<64; i++ )
{
id = Idx[i];
move16();
z[2*i] = x[id];
move16();
z[2*i+1] = y[id];
move16();
}
cdftForw_16fx(128,z,Ip_fft64_16fx,w_fft64_16fx);
move16();/*penalty for 1 ptr init */
FOR( i=0; i<64 ; i++)
{
jd = Odx_fft64_16fx[i];
move16();
id = Idx[jd];
move16();
x[id]=z[2*i];
move16();
y[id]=z[2*i+1];
move16();
}
return;
}
/*-----------------------------------------------------------------*
* fft32_5()
* 32-point FFT called for 5 times
*-----------------------------------------------------------------*/
static void fft32_5_16fx(
Word16 *x, /* i/o : real part of input and output data */
Word16 *y, /* i/o : imaginary part of input and output data */
const Word16 *Idx /* i : pointer of the address table */
)
{
Word16 i,id,jd;
Word16 z[64];
move16();/*penalty for 1 ptr init */
FOR( i=0; i<32; i++ )
{
id = Idx[i];
move16();
z[2*i] = x[id];
move16();
z[2*i+1] = y[id];
move16();
}
cdftForw_16fx(64,z,Ip_fft32_16fx,w_fft32_16fx);
move16();/*penalty for 1 ptr init */
FOR( i=0; i<32; i++ )
{
jd = Odx_fft32_5[i];
move16();
id = Idx[jd];
move16();
x[id]=z[2*i];
move16();
y[id]=z[2*i+1];
move16();
}
return;
}
/*-----------------------------------------------------------------*
* DoRTFT160()
* a low complexity 2-dimensional DFT of 160 points
*-----------------------------------------------------------------*/
void DoRTFT160_16fx(
Word16 x[], /* i/o : real part of input and output data */
Word16 y[] /* i/o : imaginary part of input and output data */
)
{
Word16 j;
/* Applying 32-point FFT for 5 times based on the address table Idx_dortft160 */
FOR(j=0; j<5; j++)
{
fft32_5_16fx(x,y,Idx_dortft160+shl(j,5)/*32*j*/);
}
/* Applying 5-point FFT for 32 times based on the address table Idx_dortft160 */
FOR(j=0; j<32; j++)
{
fft5_32_16fx(x,y,Idx_dortft160+j);
}
return;
}
/*-----------------------------------------------------------------*
* DoRTFT320()
* a low complexity 2-dimensional DFT of 320 points
*-----------------------------------------------------------------*/
void DoRTFT320_16fx(
Word16 *x, /* i/o : real part of input and output data */
Word16 *y /* i/o : imaginary part of input and output data */
)
{
Word16 j;
/* Applying 64-point FFT for 5 times based on the address table Idx_dortft160 */
FOR(j=0; j<5; j++)
{
fft64_16fx(x,y,Idx_dortft320_16fx+shl(j,6)/*64*j*/);
}
/* Applying 5-point FFT for 64 times based on the address table Idx_dortft160 */
FOR(j=0; j<64; j++)
{
fft5_shift4_16fx(64,x,y,Idx_dortft320_16fx+j);
}
return;
}
/*-----------------------------------------------------------------*
* DoRTFT128()
* FFT with 128 points
*-----------------------------------------------------------------*/
void DoRTFT128_16fx(
Word16 *x, /* i/o : real part of input and output data Q(Qx+Q_edct)*/
Word16 *y /* i/o : imaginary part of input and output data Q(Qx+Q_edct)*/
)
{
Word16 i;
Word16 z[256];
move16();/*penalty for 1 ptr init */
FOR ( i=0; i<128; i++ )
{
z[2*i] = x[i];
move16();
z[2*i+1] = y[i];
move16();
}
cdftForw_16fx(256,z,Ip_fft128_16fx,w_fft128_16fx);
x[0]=z[0];
move16();
y[0]=z[1];
move16();
move16();/*penalty for 1 ptr init */
move16();/*penalty for 1 ptr init */
FOR( i=1; i<128 ; i++)
{
x[128-i]=z[2*i];
move16();
y[128-i]=z[2*i+1];
move16();
}
return;
}
/*-----------------------------------------------------------------*
* cdftForw()
* Main fuction of Complex Discrete Fourier Transform
*-----------------------------------------------------------------*/
static void cdftForw_16fx(
Word16 n, /* i : data length of real and imag */
Word16 *a, /* i/o : input/output data Q(Qx+Q_edct)*/
const Word16 *ip, /* i : work area for bit reversal */
const Word32 *w /* i : cos/sin table Q30*/
)
{
/* bit reversal */
bitrv2_SR_16fx(n, ip + 2, a);
/* Do FFT */
cftfsub_16fx(n, a, w);
}
/*-----------------------------------------------------------------*
* bitrv2_SR()
* Bit reversal
*-----------------------------------------------------------------*/
static void bitrv2_SR_16fx(
Word16 n, /* i : data length of real and imag */
const Word16 *ip, /* i/o : work area for bit reversal */
Word16 *a /* i/o : input/output data Q(Qx+Q_edct)*/
)
{
Word16 j, j1, k, k1, m, m2;
Word16 l;
Word16 xr, xi, yr, yi;
l = n;
move16();
m = 1;
move16();
WHILE (sub(shl(m,3),l) < 0)
{
l = shr(l,1);
m = shl(m,1);
}
m2 = shl(m,1);
IF (sub(shl(m, 3),l) == 0)
{
FOR (k = 0; k < m; k++)
{
FOR (j = 0; j < k; j++)
{
j1 = add(shl(j,1),ip[k]);
k1 = add(shl(k,1),ip[j]);
xr = a[j1];
move16();
xi = a[j1 + 1];
move16();
yr = a[k1];
move16();
yi = a[k1 + 1];
move16();
a[j1] = yr;
move16();
a[j1 + 1] = yi;
move16();
a[k1] = xr;
move16();
a[k1 + 1] = xi;
move16();
j1 = add(j1,m2);
k1 = add(k1,shl(m2,1));
xr = a[j1];
move16();
xi = a[j1 + 1];
move16();
yr = a[k1];
move16();
yi = a[k1 + 1];
move16();
a[j1] = yr;
move16();
a[j1 + 1] = yi;
move16();
a[k1] = xr;
move16();
a[k1 + 1] = xi;
move16();
j1 = add(j1,m2);
k1 = sub(k1,m2);
xr = a[j1];
move16();
xi = a[j1 + 1];
move16();
xi = a[j1 + 1];
move16();
yr = a[k1];
move16();
yi = a[k1 + 1];
move16();
a[j1] = yr;
move16();
a[j1 + 1] = yi;
move16();
a[k1] = xr;
move16();
a[k1 + 1] = xi;
move16();
j1 = add(j1,m2);
k1 = add(k1,shl(m2,1));
xr = a[j1];
move16();
xi = a[j1 + 1];
move16();
yr = a[k1];
move16();
yi = a[k1 + 1];
move16();
a[j1] = yr;
move16();
a[j1 + 1] = yi;
move16();
a[k1] = xr;
move16();
a[k1 + 1] = xi;
move16();
}
j1 = add(add(shl(k,1),m2),ip[k]);
k1 = add(j1,m2);
xr = a[j1];
move16();
xi = a[j1 + 1];
move16();
yr = a[k1];
move16();
yi = a[k1 + 1];
move16();
a[j1] = yr;
move16();
a[j1 + 1] = yi;
move16();
a[k1] = xr;
move16();
a[k1 + 1] = xi;
move16();
}
}
ELSE
{
FOR (k = 1; k < m; k++)
{
FOR (j = 0; j < k; j++)
{
j1 = add(shl(j,1),ip[k]);
k1 = add(shl(k,1),ip[j]);
xr = a[j1];
move16();
xi = a[j1 + 1];
move16();
yr = a[k1];
move16();
yi = a[k1 + 1];
move16();
a[j1] = yr;
move16();
a[j1 + 1] = yi;
move16();
a[k1] = xr;
move16();
a[k1 + 1] = xi;
move16();
j1 = add(j1,m2);
k1 = add(k1,m2);
xr = a[j1];
move16();
xi = a[j1 + 1];
move16();
yr = a[k1];
move16();
yi = a[k1 + 1];
move16();
a[j1] = yr;
move16();
a[j1 + 1] = yi;
move16();
a[k1] = xr;
move16();
a[k1 + 1] = xi;
move16();
}
}
}
return;
}
/*-----------------------------------------------------------------*
* cftfsub()
* Complex Discrete Fourier Transform
*-----------------------------------------------------------------*/
static void cftfsub_16fx(
Word16 n, /* i : data length of real and imag */
Word16 *a, /* i/o : input/output data Q(Qx+Q_edct)*/
const Word32 *w /* i : cos/sin table Q30*/
)
{
Word16 j, j1, j2, j3, l;
Word16 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
l = 2;
move16();
IF (sub(n,8) > 0)
{
cft1st_16fx(n, a, w);
l = 8;
move16();
WHILE (sub(shl(l, 2),n) < 0)
{
cftmdl_16fx(n, l, a, w);
l = shl(l,2);
}
}
IF (sub(shl(l,2),n) == 0)
{
FOR (j = 0; j < l; j += 2)
{
j1 = add(j,l);
j2 = add(j1,l);
j3 = add(j2,l);
x0r = add(a[j],a[j1]);
x0i = add(a[j + 1],a[j1 + 1]);
x1r = sub(a[j],a[j1]);
x1i = sub(a[j + 1],a[j1 + 1]);
x2r = add(a[j2],a[j3]);
x2i = add(a[j2 + 1],a[j3 + 1]);
x3r = sub(a[j2],a[j3]);
x3i = sub(a[j2 + 1],a[j3 + 1]);
a[j] = add(x0r,x2r);
move16();
a[j + 1] = add(x0i,x2i);
move16();
a[j2] = sub(x0r,x2r);
move16();
a[j2 + 1] = sub(x0i,x2i);
move16();
a[j1] = sub(x1r,x3i);
move16();
a[j1 + 1] = add(x1i,x3r);
move16();
a[j3] = add(x1r,x3i);
move16();
a[j3 + 1] = sub(x1i,x3r);
move16();
}
}
ELSE
{
FOR (j = 0; j < l; j += 2)
{
j1 = add(j,l);
x0r = sub(a[j],a[j1]);
x0i = sub(a[j + 1],a[j1 + 1]);
a[j] = add(a[j],a[j1]);
move16();
a[j + 1] = add(a[j + 1],a[j1 + 1]);
move16();
a[j1] = x0r;
move16();
a[j1 + 1] = x0i;
move16();
}
}
return;
}
/*-----------------------------------------------------------------*
* cft1st()
* Subfunction of Complex Discrete Fourier Transform
*-----------------------------------------------------------------*/
static void cft1st_16fx(
Word16 n, /* i : data length of real and imag */
Word16 *a, /* i/o : input/output data Q(Qx+Q_edct)*/
const Word32 *w /* i : cos/sin table Q30*/
)
{
Word16 j, k1, k2;
Word32 wk1r, wk1i, wk2r, wk2i, wk3r, wk3i;
Word16 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
Word16 tmp;
Word32 L_tmp;
x0r = add(a[0],a[2]);
x0i = add(a[1],a[3]);
x1r = sub(a[0],a[2]);
x1i = sub(a[1],a[3]);
x2r = add(a[4],a[6]);
x2i = add(a[5],a[7]);
x3r = sub(a[4],a[6]);
x3i = sub(a[5],a[7]);
a[0] = add(x0r,x2r);
move16();
a[1] = add(x0i,x2i);
move16();
a[4] = sub(x0r,x2r);
move16();
a[5] = sub(x0i,x2i);
move16();
a[2] = sub(x1r,x3i);
move16();
a[3] = add(x1i,x3r);
move16();
a[6] = add(x1r,x3i);
move16();
a[7] = sub(x1i,x3r);
move16();
wk1r = w[2];
move32();
x0r = add(a[8],a[10]);
x0i = add(a[9],a[11]);
x1r = sub(a[8],a[10]);
x1i = sub(a[9],a[11]);
x2r = add(a[12],a[14]);
x2i = add(a[13],a[15]);
x3r = sub(a[12],a[14]);
x3i = sub(a[13],a[15]);
a[8] = add(x0r,x2r);
move16();
a[9] = add(x0i,x2i);
move16();
a[12] = sub(x2i,x0i);
move16();
a[13] = sub(x0r,x2r);
move16();
x0r = sub(x1r,x3i);
x0i = add(x1i,x3r);
tmp = sub(x0r,x0i);
L_tmp = Mult_32_16(wk1r,tmp); /*Q(15+Qx+Q_edct) */
a[10] = round_fx(L_shl(L_tmp,1)); /*Q(Qx+Q_edct) */
tmp = add(x0r,x0i);
L_tmp = Mult_32_16(wk1r,tmp); /*Q(15+Qx+Q_edct) */
a[11] = round_fx(L_shl(L_tmp,1)); /* Q(Qx+Q_edct) */
x0r = add(x3i,x1r);
x0i = sub(x3r,x1i);
tmp = sub(x0i,x0r);
L_tmp = Mult_32_16(wk1r,tmp); /*Q(15+Qx+Q_edct) */
a[14] = round_fx(L_shl(L_tmp,1)); /*Q(Qx+Q_edct) */
tmp = add(x0i,x0r);
L_tmp = Mult_32_16(wk1r,tmp); /*Q(15+Qx+Q_edct) */
a[15] = round_fx(L_shl(L_tmp,1)); /*Q(Qx+Q_edct) */
k1 = 0;
move16();
FOR (j = 16; j < n; j += 16)
{
k1 = add(k1,2);
k2 = shl(k1,1);
wk2r = L_add(0,w[k1]);
wk2i = L_add(0,w[k1 + 1]);
wk1r = L_add(0,w[k2]);
wk1i = L_add(0,w[k2 + 1]);
L_tmp = L_shl(Mult_32_32(wk2i,wk1i),1);/*Q29 */
wk3r = L_sub(wk1r,L_shl(L_tmp,1));/*Q30 */
L_tmp = L_shl(Mult_32_32(wk2i,wk1r),1);/*Q29 */
wk3i = L_sub(L_shl(L_tmp,1),wk1i);/*Q30 */
x0r = add(a[j],a[j + 2]);
x0i = add(a[j + 1],a[j + 3]);
x1r = sub(a[j],a[j + 2]);
x1i = sub(a[j + 1],a[j + 3]);
x2r = add(a[j + 4],a[j + 6]);
x2i = add(a[j + 5],a[j + 7]);
x3r = sub(a[j + 4],a[j + 6]);
x3i = sub(a[j + 5],a[j + 7]);
a[j] = add(x0r,x2r);
move16();
a[j + 1] = add(x0i,x2i);
move16();
x0r = sub(x0r,x2r);
x0i = sub(x0i,x2i);
L_tmp = Mult_32_16(wk2r,x0r);/*Q(15+Qx+Q_edct) */
L_tmp = Msub_32_16(L_tmp,wk2i,x0i); /*Q(15+Qx+Q_edct) */
a[j + 4] = round_fx(L_shl(L_tmp,1)); /*Q(Qx+Q_edct) */
L_tmp = Mult_32_16(wk2r,x0i);/*Q(15+Qx+Q_edct) */
L_tmp = Madd_32_16(L_tmp,wk2i,x0r); /*Q(15+Qx+Q_edct) */
a[j + 5] = round_fx(L_shl(L_tmp,1)); /*Q(Qx+Q_edct) */
x0r = sub(x1r,x3i);
x0i = add(x1i,x3r);
L_tmp = Mult_32_16(wk1r,x0r);/*Q(15+Qx+Q_edct) */
L_tmp = Msub_32_16(L_tmp,wk1i,x0i); /*Q(15+Qx+Q_edct) */
a[j + 2] = round_fx(L_shl(L_tmp,1)); /*Q(Qx+Q_edct) */
L_tmp = Mult_32_16(wk1r,x0i);/*Q(15+Qx+Q_edct) */
L_tmp = Madd_32_16(L_tmp,wk1i,x0r); /*Q(15+Qx+Q_edct) */
a[j + 3] = round_fx(L_shl(L_tmp,1)); /*Q(Qx+Q_edct) */
x0r = add(x1r,x3i);
x0i = sub(x1i,x3r);
L_tmp = Mult_32_16(wk3r,x0r); /*Q(15+Qx+Q_edct) */
L_tmp = Msub_32_16(L_tmp,wk3i,x0i); /*Q(15+Qx+Q_edct) */
a[j + 6] = round_fx(L_shl(L_tmp,1)); /*Q(Qx+Q_edct) */
L_tmp = Mult_32_16(wk3r,x0i); /*Q(15+Qx+Q_edct) */
L_tmp = Madd_32_16(L_tmp,wk3i,x0r); /*Q(15+Qx+Q_edct) */
a[j + 7] = round_fx(L_shl(L_tmp,1)); /*Q(Qx+Q_edct) */
wk1r = L_add(0,w[k2 + 2]);
wk1i = L_add(0,w[k2 + 3]);
L_tmp = L_shl(Mult_32_32(wk2r,wk1i),1);/*Q29 */
wk3r = L_sub(wk1r,L_shl(L_tmp,1)); /*Q30 */
L_tmp = L_shl(Mult_32_32(wk2r,wk1r),1);/*Q29 */
wk3i = L_sub(L_shl(L_tmp,1),wk1i); /*Q30 */
x0r = add(a[j + 8],a[j + 10]);
x0i = add(a[j + 9],a[j + 11]);
x1r = sub(a[j + 8],a[j + 10]);
x1i = sub(a[j + 9],a[j + 11]);
x2r = add(a[j + 12],a[j + 14]);
x2i = add(a[j + 13],a[j + 15]);
x3r = sub(a[j + 12],a[j + 14]);
x3i = sub(a[j + 13],a[j + 15]);
a[j + 8] = add(x0r,x2r);
move16();
a[j + 9] = add(x0i,x2i);
move16();
x0r = sub(x0r,x2r);
x0i = sub(x0i,x2i);
tmp = negate(x0r);
L_tmp = Mult_32_16(wk2i,tmp);/*Q(15+Qx+Q_edct) */
L_tmp = Msub_32_16(L_tmp,wk2r,x0i); /*Q(15+Qx+Q_edct) */
a[j + 12] = round_fx(L_shl(L_tmp,1)); /*Q(Qx+Q_edct) */
tmp = negate(x0i);
L_tmp = Mult_32_16(wk2i,tmp);/*Q(15+Qx+Q_edct) */
L_tmp = Madd_32_16(L_tmp,wk2r,x0r); /*Q(15+Qx+Q_edct) */
a[j + 13] = round_fx(L_shl(L_tmp,1)); /*Q(Qx+Q_edct) */
x0r = sub(x1r,x3i);
x0i = add(x1i,x3r);
L_tmp = Mult_32_16(wk1r,x0r);/*Q(15+Qx+Q_edct) */
L_tmp = Msub_32_16(L_tmp,wk1i,x0i); /*Q(15+Qx+Q_edct) */
a[j + 10] = round_fx(L_shl(L_tmp,1)); /*Q(Qx+Q_edct) */
L_tmp = Mult_32_16(wk1r,x0i);/*Q(15+Qx+Q_edct) */
L_tmp = Madd_32_16(L_tmp,wk1i,x0r); /*Q(15+Qx+Q_edct) */
a[j + 11] = round_fx(L_shl(L_tmp,1)); /*Q(Qx+Q_edct) */
x0r = add(x1r,x3i);
x0i = sub(x1i,x3r);
L_tmp = Mult_32_16(wk3r,x0r); /*Q(15+Qx+Q_edct) */
L_tmp = Msub_32_16(L_tmp,wk3i,x0i); /*Q(15+Qx+Q_edct) */
a[j + 14] = round_fx(L_shl(L_tmp,1)); /*Q(Qx+Q_edct) */
L_tmp = Mult_32_16(wk3r,x0i); /*Q(15+Qx+Q_edct) */
L_tmp = Madd_32_16(L_tmp,wk3i,x0r); /*Q(15+Qx+Q_edct) */
a[j + 15] = round_fx(L_shl(L_tmp,1)); /*Q(Qx+Q_edct) */
}
return;
}
/*-----------------------------------------------------------------*
* cftmdl()
* Subfunction of Complex Discrete Fourier Transform
*-----------------------------------------------------------------*/
static void cftmdl_16fx(
Word16 n, /* i : data length of real and imag */
Word16 l, /* i : initial shift for processing */
Word16 *a, /* i/o : input/output data Q(Qx+Q_edct)*/
const Word32 *w /* i : cos/sin table Q30*/
)
{
Word16 j, j1, j2, j3, k, k1, k2, m, m2;
Word32 wk1r, wk1i, wk2r, wk2i, wk3r, wk3i;
Word16 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
Word16 tmp, tmp2;
Word32 L_tmp;
Word32 L_x0r, L_x0i;
m = shl(l, 2);
move16();
FOR (j = 0; j < l; j += 2)
{
j1 = add(j,l);
j2 = add(j1,l);
j3 = add(j2,l);
x0r = add(a[j],a[j1]);
x0i = add(a[j + 1],a[j1 + 1]);
x1r = sub(a[j],a[j1]);
x1i = sub(a[j + 1],a[j1 + 1]);
x2r = add(a[j2],a[j3]);
x2i = add(a[j2 + 1],a[j3 + 1]);
x3r = sub(a[j2],a[j3]);
x3i = sub(a[j2 + 1],a[j3 + 1]);
a[j] = add(x0r,x2r);
move16();
a[j + 1] = add(x0i,x2i);
move16();
a[j2] = sub(x0r,x2r);
move16();
a[j2 + 1] = sub(x0i,x2i);
move16();
a[j1] = sub(x1r,x3i);
move16();
a[j1 + 1] = add(x1i,x3r);
move16();
a[j3] = add(x1r,x3i);
move16();
a[j3 + 1] = sub(x1i,x3r);
move16();
}
wk1r = w[2];
move32();
tmp2 = add(l,m);
FOR (j = m; j < tmp2; j += 2)
{
j1 = add(j,l);
j2 = add(j1,l);
j3 = add(j2,l);
x0r = add(a[j],a[j1]);
x0i = add(a[j + 1],a[j1 + 1]);
x1r = sub(a[j],a[j1]);
x1i = sub(a[j + 1],a[j1 + 1]);
x2r = add(a[j2],a[j3]);
x2i = add(a[j2 + 1],a[j3 + 1]);
x3r = sub(a[j2],a[j3]);
x3i = sub(a[j2 + 1],a[j3 + 1]);
a[j] = add(x0r,x2r);
move16();
a[j + 1] = add(x0i,x2i);
move16();
a[j2] = sub(x2i,x0i);
move16();
a[j2 + 1] = sub(x0r,x2r);
move16();
x0r = sub(x1r,x3i);
x0i = add(x1i,x3r);
tmp = sub(x0r,x0i);
L_tmp = Mult_32_16(wk1r,tmp);/*Q(15+Qx+Q_edct) */
a[j1] = round_fx(L_shl(L_tmp,1)); /*Q(Qx+Q_edct) */
tmp = add(x0r,x0i);
L_tmp = Mult_32_16(wk1r,tmp); /*Q(15+Qx+Q_edct) */
a[j1 + 1] = round_fx(L_shl(L_tmp,1)); /*Q(Qx+Q_edct) */
x0r = add(x3i,x1r);
x0i = sub(x3r,x1i);
tmp = sub(x0i,x0r);
L_tmp = Mult_32_16(wk1r,tmp);/*Q(15+Qx+Q_edct) */
a[j3] = round_fx(L_shl(L_tmp,1)); /*Q(Qx+Q_edct) */
tmp = add(x0i,x0r);
L_tmp = Mult_32_16(wk1r,tmp); /*Q(15+Qx+Q_edct) */
a[j3 + 1] = round_fx(L_shl(L_tmp,1)); /*Q(Qx+Q_edct) */
}
k1 = 0;
move16();
m2 = shl(m,1);
FOR (k = m2; k < n; k += m2)
{
k1 = add(k1,2);
k2 = shl(k1,1);
wk2r = L_add(0,w[k1]);
wk2i = L_add(0,w[k1 + 1]);
wk1r = L_add(0,w[k2]);
wk1i = L_add(0,w[k2 + 1]);
L_tmp = L_shl(Mult_32_32(wk2i,wk1i),1);/*Q29 */
wk3r = L_sub(wk1r,L_shl(L_tmp,1));/*Q30 */
L_tmp = L_shl(Mult_32_32(wk2i,wk1r),1);/*Q29 */
wk3i = L_sub(L_shl(L_tmp,1),wk1i);/*Q30 */
tmp2 = add(l,k);
FOR (j = k; j < tmp2; j += 2)
{
j1 = add(j,l);
j2 = add(j1,l);
j3 = add(j2,l);
x0r = add(a[j],a[j1]);
x0i = add(a[j + 1],a[j1 + 1]);
x1r = sub(a[j],a[j1]);
x1i = sub(a[j + 1],a[j1 + 1]);
x2r = add(a[j2],a[j3]);
x2i = add(a[j2 + 1],a[j3 + 1]);
x3r = sub(a[j2],a[j3]);
x3i = sub(a[j2 + 1],a[j3 + 1]);
a[j] = add(x0r,x2r);
move16();
a[j + 1] = add(x0i,x2i);
move16();
x0r = sub(x0r,x2r);
x0i = sub(x0i,x2i);
L_tmp = Mult_32_16(wk2r,x0r); /*Q(15+Qx+Q_edct) */
L_tmp = Msub_32_16(L_tmp,wk2i,x0i); /*Q(15+Qx+Q_edct) */
a[j2] = round_fx(L_shl(L_tmp,1)); /*Q(Qx+Q_edct) */
L_tmp = Mult_32_16(wk2r,x0i); /*Q(15+Qx+Q_edct) */
L_tmp = Madd_32_16(L_tmp,wk2i,x0r); /*Q(15+Qx+Q_edct) */
a[j2 + 1] = round_fx(L_shl(L_tmp,1)); /*Q(Qx+Q_edct) */
x0r = sub(x1r,x3i);
x0i = add(x1i,x3r);
L_tmp = Mult_32_16(wk1r,x0r); /*Q(15+Qx+Q_edct) */
L_tmp = Msub_32_16(L_tmp,wk1i,x0i); /*Q(15+Qx+Q_edct) */
a[j1] = round_fx(L_shl(L_tmp,1)); /*Q(Qx+Q_edct) */
L_tmp = Mult_32_16(wk1r,x0i); /*Q(15+Qx+Q_edct) */
L_tmp = Madd_32_16(L_tmp,wk1i,x0r); /*Q(15+Qx+Q_edct) */
a[j1 + 1] = round_fx(L_shl(L_tmp,1)); /*Q(Qx+Q_edct) */
L_x0r = L_add((Word32) x1r, (Word32) x3i);
L_x0i = L_sub((Word32) x1i, (Word32) x3r);
x0r = extract_l(L_x0r);
x0i = extract_l(L_x0i);
L_tmp = Mult_32_16(wk3r,x0r); /*Q(15+Qx+Q_edct) */
L_tmp = Msub_32_16(L_tmp,wk3i,x0i); /*Q(15+Qx+Q_edct) */
a[j3] = round_fx(L_shl(L_tmp,1)); /*Q(Qx+Q_edct) */
L_tmp = Mult_32_16(wk3r,x0i); /*Q(15+Qx+Q_edct) */
L_tmp = Madd_32_16(L_tmp,wk3i,x0r); /*Q(15+Qx+Q_edct) */
a[j3 + 1] = round_fx(L_shl(L_tmp,1)); /*Q(Qx+Q_edct) */
}
wk1r = w[k2 + 2];
move32();
wk1i = w[k2 + 3];
move32();
L_tmp = L_shl(Mult_32_32(wk2r,wk1i),1);/*Q29 */
wk3r = L_sub(wk1r,L_shl(L_tmp,1)); /*Q30 */
L_tmp = L_shl(Mult_32_32(wk2r,wk1r),1);/*Q29 */
wk3i = L_sub(L_shl(L_tmp,1),wk1i); /*Q30 */
tmp2 = add(l,add(k,m));
FOR (j = add(k,m); j < tmp2; j += 2)
{
j1 = add(j,l);
j2 = add(j1,l);
j3 = add(j2,l);
x0r = add(a[j],a[j1]);
x0i = add(a[j + 1],a[j1 + 1]);
x1r = sub(a[j],a[j1]);
x1i = sub(a[j + 1],a[j1 + 1]);
x2r = add(a[j2],a[j3]);
x2i = add(a[j2 + 1],a[j3 + 1]);
x3r = sub(a[j2],a[j3]);
x3i = sub(a[j2 + 1],a[j3 + 1]);
a[j] = add(x0r,x2r);
move16();
a[j + 1] = add(x0i,x2i);
move16();
x0r = sub(x0r,x2r);
x0i = sub(x0i,x2i);
tmp = negate(x0r);
L_tmp = Mult_32_16(wk2i,tmp);/*Q(15+Qx+Q_edct) */
L_tmp = Msub_32_16(L_tmp,wk2r,x0i); /*Q(15+Qx+Q_edct) */
a[j2] = round_fx(L_shl(L_tmp,1)); /*Q(Qx+Q_edct) */
tmp = negate(x0i);
L_tmp = Mult_32_16(wk2i,tmp);/*Q(15+Qx+Q_edct) */
L_tmp = Madd_32_16(L_tmp,wk2r,x0r); /*Q(15+Qx+Q_edct) */
a[j2 + 1] = round_fx(L_shl(L_tmp,1)); /*Q(Qx+Q_edct) */
x0r = sub(x1r,x3i);
x0i = add(x1i,x3r);
L_tmp = Mult_32_16(wk1r,x0r);/*Q(15+Qx+Q_edct) */
L_tmp = Msub_32_16(L_tmp,wk1i,x0i); /*Q(15+Qx+Q_edct) */
a[j1] = round_fx(L_shl(L_tmp,1)); /*Q(Qx+Q_edct) */
L_tmp = Mult_32_16(wk1r,x0i);/*Q(15+Qx+Q_edct) */
L_tmp = Madd_32_16(L_tmp,wk1i,x0r); /*Q(15+Qx+Q_edct) */
a[j1 + 1] = round_fx(L_shl(L_tmp,1)); /*Q(Qx+Q_edct) */
x0r = add(x1r,x3i);
x0i = sub(x1i,x3r);
L_tmp = Mult_32_16(wk3r,x0r); /*Q(15+Qx+Q_edct) */
L_tmp = Msub_32_16(L_tmp,wk3i,x0i); /*Q(15+Qx+Q_edct) */
a[j3] = round_fx(L_shl(L_tmp,1)); /*Q(Qx+Q_edct) */
L_tmp = Mult_32_16(wk3r,x0i); /*Q(15+Qx+Q_edct) */
L_tmp = Madd_32_16(L_tmp,wk3i,x0r); /*Q(15+Qx+Q_edct) */
a[j3 + 1] = round_fx(L_shl(L_tmp,1)); /*Q(Qx+Q_edct) */
}
}
return;
}
void fft3_fx(const Word16 X[], Word16 Y[], const Word16 n)
{
Word16 Z[PH_ECU_SPEC_SIZE];
Word16 *Z0, *Z1, *Z2;
Word16 *z0, *z1, *z2;
const Word16 *x;
const Word16 *t_sin = sincos_t_rad3_fx;
Word16 m, mMinus1, step;
Word16 i, l;
Word16 c1_ind, s1_ind, c2_ind, s2_ind;
Word16 c1_step, s1_step, c2_step, s2_step;
Word16 *RY, *IY, *RZ0, *IZ0, *RZ1, *IZ1, *RZ2, *IZ2;
Word32 acc;
Word16 mBy2, orderMinus1;
const Word16 *pPhaseTbl;
/* Determine the order of the transform, the length of decimated */
/* transforms m, and the step for the sine and cosine tables. */
SWITCH(n)
{
case 1536:
orderMinus1 = 9-1;
move16();
m = 512;
move16();
step = 1;
move16();
pPhaseTbl = FFT_W256;
BREAK;
case 384:
orderMinus1 = 7-1;
move16();
m = 128;
move16();
step = 4;
move16();
pPhaseTbl = FFT_W64;
BREAK;
default:
orderMinus1 = 7-1;
move16();
m = 128;
move16();
step = 4;
move16();
pPhaseTbl = FFT_W64;
BREAK;
}
/* Compose decimated sequences X[3i], X[3i+1],X[3i+2] */
/* compute their FFT of length m. */
Z0 = &Z[0];
z0 = &Z0[0];
Z1 = &Z0[m];
z1 = &Z1[0]; /* Z1 = &Z[ m]; */
Z2 = &Z1[m];
z2 = &Z2[0]; /* Z2 = &Z[2m]; */
x = &X[0];
FOR (i = 0; i < m; i++)
{
*z0++ = *x++; /* Z0[i] = X[3i]; */ move16();
*z1++ = *x++; /* Z1[i] = X[3i+1]; */ move16();
*z2++ = *x++; /* Z2[i] = X[3i+2]; */ move16();
}
mBy2 = shr(m,1);
r_fft_fx_lc(pPhaseTbl, m, mBy2, orderMinus1, Z0, Z0, 1);
r_fft_fx_lc(pPhaseTbl, m, mBy2, orderMinus1, Z1, Z1, 1);
r_fft_fx_lc(pPhaseTbl, m, mBy2, orderMinus1, Z2, Z2, 1);
/* Butterflies of order 3. */
/* pointer initialization */
mMinus1 = sub(m,1);
RY = &Y[0];
IY = &Y[n];
IY--; /* Decrement the address counter.*/
RZ0 = &Z0[0];
IZ0 = &Z0[mMinus1];
RZ1 = &Z1[0];
IZ1 = &Z1[mMinus1];
RZ2 = &Z2[0];
IZ2 = &Z2[mMinus1];
c1_step = negate(step);
s1_step = step;
move16();
c2_step = shl(c1_step,1);
s2_step = shl(s1_step,1);
c1_ind = add(T_SIN_PI_2, c1_step);
s1_ind = s1_step;
move16();
c2_ind = add(T_SIN_PI_2, c2_step);
s2_ind = s2_step;
move16();
/* special case: i = 0 */
acc = L_mult(*RZ0++, 0x4000);
acc = L_mac(acc, *RZ1++, 0x4000);
*RY++ = mac_r(acc, *RZ2++, 0x4000);
move16();
/* first 3/12-- from 1 to (3*m/8)-1 */
l = sub(shr(n, 3),1); /* (3*m/8) - 1 = (n/8) - 1 */
FOR (i = 0; i < l; i++)
{
acc = L_shl(*RZ0++, 15); /* Align with the following non-fractional mode so as to gain 1 more bit headroom. */
acc = L_mac0(acc, *RZ1, t_sin[c1_ind]); /* Non-fractional mode gains 1 more bit headroom. */
acc = L_mac0(acc, *IZ1, t_sin[s1_ind]);
acc = L_mac0(acc, *RZ2, t_sin[c2_ind]);
acc = L_mac0(acc, *IZ2, t_sin[s2_ind]);
*RY++ = round_fx(acc); /* bit growth = 1 (compensated by non-fractional mode MAC). */
acc = L_shl(*IZ0--, 15);
acc = L_msu0(acc, *RZ1++, t_sin[s1_ind]);
acc = L_mac0(acc, *IZ1--, t_sin[c1_ind]);
acc = L_msu0(acc, *RZ2++, t_sin[s2_ind]);
acc = L_mac0(acc, *IZ2--, t_sin[c2_ind]);
*IY-- = round_fx(acc);
c1_ind = add(c1_ind, c1_step);
s1_ind = add(s1_ind, s1_step);
c2_ind = add(c2_ind, c2_step);
s2_ind = add(s2_ind, s2_step);
}
/* next 1/12-- from (3*m/8) to (4*m/8)-1 */
l = shr(m,3); /* (4*m/8) - (3*m/8) = m/8 */
FOR (i = 0; i < l; i++)
{
acc = L_shl(*RZ0++, 15);
acc = L_mac0(acc, *RZ1, t_sin[c1_ind]); /* Non-fractional mode gains 1 more bit headroom. */
acc = L_mac0(acc, *IZ1, t_sin[s1_ind]);
acc = L_msu0(acc, *RZ2, t_sin[c2_ind]);
acc = L_mac0(acc, *IZ2, t_sin[s2_ind]);
*RY++ = round_fx(acc);
acc = L_shl(*IZ0--, 15);
acc = L_msu0(acc, *RZ1++, t_sin[s1_ind]);
acc = L_mac0(acc, *IZ1--, t_sin[c1_ind]);
acc = L_msu0(acc, *RZ2++, t_sin[s2_ind]);
acc = L_msu0(acc, *IZ2--, t_sin[c2_ind]);
*IY-- = round_fx(acc);
c1_ind = add(c1_ind, c1_step);
s1_ind = add(s1_ind, s1_step);
c2_ind = sub(c2_ind, c2_step);
s2_ind = sub(s2_ind, s2_step);
}
/* special case: i = m/2 i.e. 1/3 */
acc = L_shl(*RZ0--, 15);
acc = L_mac0(acc, *RZ1, t_sin[c1_ind]);
acc = L_msu0(acc, *RZ2, t_sin[c2_ind]);
*RY++ = round_fx(acc);
acc = 0;
acc = L_msu0(acc, *RZ1--, t_sin[s1_ind]);
acc = L_msu0(acc, *RZ2--, t_sin[s2_ind]);
*IY-- = round_fx(acc);
IZ0++;
IZ1++;
IZ2++;
c1_ind = add(c1_ind, c1_step);
s1_ind = add(s1_ind, s1_step);
c2_ind = sub(c2_ind, c2_step);
s2_ind = sub(s2_ind, s2_step);
/* next 2/12-- from ((m/2)+1) to (6*m/8)-1 */
l = sub(shr(m,2), 1); /* (6*m/8) - ((m/2)+1) = m/4 - 1 */
FOR (i = 0; i < l; i++)
{
acc = L_shl(*RZ0--, 15);
acc = L_mac0(acc, *RZ1, t_sin[c1_ind]); /* Non-fractional mode gains 1 more bit headroom. */
acc = L_msu0(acc, *IZ1, t_sin[s1_ind]);
acc = L_msu0(acc, *RZ2, t_sin[c2_ind]);
acc = L_msu0(acc, *IZ2, t_sin[s2_ind]);
*RY++ = round_fx(acc);
acc = L_mult0(*IZ0++, -32768);
acc = L_msu0(acc, *RZ1--, t_sin[s1_ind]);
acc = L_msu0(acc, *IZ1++, t_sin[c1_ind]);
acc = L_msu0(acc, *RZ2--, t_sin[s2_ind]);
acc = L_mac0(acc, *IZ2++, t_sin[c2_ind]);
*IY-- = round_fx(acc);
c1_ind = add(c1_ind, c1_step);
s1_ind = add(s1_ind, s1_step);
c2_ind = sub(c2_ind, c2_step);
s2_ind = sub(s2_ind, s2_step);
}
/*--------------------------half--------------------------// */
/* next 2/12-- from (6*m/8) to (8*m/8) - 1 */
l = shr(m,2);
FOR (i = 0; i < l; i++)
{
acc = L_shl(*RZ0--, 15);
acc = L_msu0(acc, *RZ1, t_sin[c1_ind]); /* Non-fractional mode gains 1 more bit headroom. */
acc = L_msu0(acc, *IZ1, t_sin[s1_ind]);
acc = L_msu0(acc, *RZ2, t_sin[c2_ind]);
acc = L_mac0(acc, *IZ2, t_sin[s2_ind]);
*RY++ = round_fx(acc);
acc = L_mult0(*IZ0++, -32768);
acc = L_msu0(acc, *RZ1--, t_sin[s1_ind]);
acc = L_mac0(acc, *IZ1++, t_sin[c1_ind]);
acc = L_mac0(acc, *RZ2--, t_sin[s2_ind]);
acc = L_mac0(acc, *IZ2++, t_sin[c2_ind]);
*IY-- = round_fx(acc);
c1_ind = sub(c1_ind, c1_step);
s1_ind = sub(s1_ind, s1_step);
c2_ind = add(c2_ind, c2_step);
s2_ind = add(s2_ind, s2_step);
}
/* special case: i = m, i.e 2/3 */
acc = L_shl(*RZ0++, 15);
acc = L_msu0(acc, *RZ1, t_sin[c1_ind]);
acc = L_msu0(acc, *RZ2, t_sin[c2_ind]);
*RY++ = round_fx(acc);
acc = L_deposit_l(0);
acc = L_msu0(acc, *RZ1++, t_sin[s1_ind]);
acc = L_mac0(acc, *RZ2++, t_sin[s2_ind]);
*IY-- = round_fx(acc);
IZ0--; /* Just decrement the address counter */
IZ1--;
IZ2--;
c1_ind = sub(c1_ind, c1_step);
s1_ind = sub(s1_ind, s1_step);
c2_ind = add(c2_ind, c2_step);
s2_ind = add(s2_ind, s2_step);
/* next 1/12-- from (m + 1) to (9*m/8) - 1 */
l = sub(shr(m, 3), 1); /* (9*m/8) - (m +1) = m/8 - 1 */
FOR (i = 0; i < l; i++)
{
acc = L_shl(*RZ0++, 15);
acc = L_msu0(acc, *RZ1, t_sin[c1_ind]); /* Non-fractional mode gains 1 more bit headroom. */
acc = L_mac0(acc, *IZ1, t_sin[s1_ind]);
acc = L_msu0(acc, *RZ2, t_sin[c2_ind]);
acc = L_msu0(acc, *IZ2, t_sin[s2_ind]);
*RY++ = round_fx(acc);
acc = L_shl(*IZ0--, 15);
acc = L_msu0(acc, *RZ1++, t_sin[s1_ind]);
acc = L_msu0(acc, *IZ1--, t_sin[c1_ind]);
acc = L_mac0(acc, *RZ2++, t_sin[s2_ind]);
acc = L_msu0(acc, *IZ2--, t_sin[c2_ind]);
*IY-- = round_fx(acc);
c1_ind = sub(c1_ind, c1_step);
s1_ind = sub(s1_ind, s1_step);
c2_ind = add(c2_ind, c2_step);
s2_ind = add(s2_ind, s2_step);
}
/* last 3/12-- from (9*m/8) to (12*m/8) - 1 */
l = shr(n,3); /* (12*m/8) - (9*m/8) = 3*m/8 = n/8 */
FOR (i = 0; i < l; i++)
{
acc = L_shl(*RZ0++, 15);
acc = L_msu0(acc, *RZ1, t_sin[c1_ind]); /* Non-fractional mode gains 1 more bit headroom. */
acc = L_mac0(acc, *IZ1, t_sin[s1_ind]);
acc = L_mac0(acc, *RZ2, t_sin[c2_ind]);
acc = L_msu0(acc, *IZ2, t_sin[s2_ind]);
*RY++ = round_fx(acc);
acc = L_shl(*IZ0--, 15);
acc = L_msu0(acc, *RZ1++, t_sin[s1_ind]);
acc = L_msu0(acc, *IZ1--, t_sin[c1_ind]);
acc = L_mac0(acc, *RZ2++, t_sin[s2_ind]);
acc = L_mac0(acc, *IZ2--, t_sin[c2_ind]);
*IY-- = round_fx(acc);
c1_ind = sub(c1_ind, c1_step);
s1_ind = sub(s1_ind, s1_step);
c2_ind = sub(c2_ind, c2_step);
s2_ind = sub(s2_ind, s2_step);
}
/* special case: i = 3*m/2 */
acc = L_shl(*RZ0, 15);
acc = L_msu0(acc, *RZ1, t_sin[c1_ind]);
acc = L_mac0(acc, *RZ2, t_sin[c2_ind]);
*RY = round_fx(acc);
return;
}
void ifft3_fx(const Word16 Z[], Word16 X[], const Word16 n)
{
Word16 Y[PH_ECU_SPEC_SIZE];
const Word16 *t_sin = sincos_t_rad3_fx;
Word16 m, mMinus1, step, step2;
Word16 i, l;
Word16 c0_ind, s0_ind, c1_ind, s1_ind, c2_ind, s2_ind;
const Word16 *RZ0, *IZ0, *RZ1, *IZ1, *RZ2, *IZ2;
const Word16 *RZ00, *IZ00, *RZ10, *IZ10, *RZ20, *IZ20;
Word16 *RY0, *IY0, *RY1, *IY1, *RY2, *IY2, *y0, *y1, *y2, *pX;
Word32 acc;
Word16 mBy2, orderMinus1, nMinusMBy2;
const Word16 *pPhaseTbl;
/* Determine the order of the transform, the length of decimated */
/* transforms m, and the step for the sine and cosine tables. */
SWITCH(n)
{
case 1536:
orderMinus1 = 9-1;
move16();
m = 512;
move16();
step = 1;
move16();
pPhaseTbl = FFT_W256;
BREAK;
case 384:
orderMinus1 = 7-1;
move16();
m = 128;
move16();
step = 4;
move16();
pPhaseTbl = FFT_W64;
BREAK;
default:
orderMinus1 = 7-1;
move16();
m = 128;
move16();
step = 4;
move16();
pPhaseTbl = FFT_W64;
BREAK;
}
nMinusMBy2 = shr(sub(n, m),1);
mMinus1 = sub(m,1);
/* pointer initialization */
RY0 = &Y[0];
IY0 = &Y[m];
RY1 = &RY0[m];
IY1 = &RY1[mMinus1];
RY2 = &RY1[m];
IY2 = &RY2[mMinus1];
RZ00 = &Z[0]; /* The zero positions of the pointers */
RZ10 = &RZ00[m];
RZ20 = &RZ00[nMinusMBy2];
IZ00 = &Z[n];
IZ10 = &IZ00[-m];
IZ20 = &IZ00[-nMinusMBy2];
RZ0 = RZ00; /* Reset the pointers to zero positions. */
RZ1 = RZ10;
RZ2 = RZ20;
IZ0 = IZ00;
IZ1 = IZ10;
IZ2 = IZ20;
/* Inverse butterflies of order 3. */
/* Construction of Y0 */
acc = L_mult(*RZ0++, 0x4000);
acc = L_mac(acc, *RZ1++, 0x4000);
*RY0++ = mac_r(acc, *RZ2--, 0x4000);
move16();
IZ0--;
IZ1--;
IZ2++;
IY0--;
l = sub(shr(m, 1), 1);
FOR (i = 0; i < l; i++)
{
acc = L_mult(*RZ0++, 0x4000);
acc = L_mac(acc, *RZ1++, 0x4000);
*RY0++ = mac_r(acc, *RZ2--, 0x4000);
move16();
acc = L_mult(*IZ0--, 0x4000);
acc = L_mac(acc, *IZ1--, 0x4000);
*IY0-- = msu_r(acc, *IZ2++, 0x4000);
move16();
}
/* m/2 */
acc = L_mult(*RZ0, 0x4000);
acc = L_mac(acc, *RZ1, 0x4000);
*RY0++ = mac_r(acc, *RZ2, 0x4000);
move16();
/* Construction of Y1 */
c0_ind=T_SIN_PI_2;
s0_ind=0;
c1_ind=T_SIN_PI_2*1/3;
s1_ind=T_SIN_PI_2*2/3;
c2_ind=T_SIN_PI_2*1/3;
s2_ind=T_SIN_PI_2*2/3;
RZ0 = RZ00; /* Reset pointers to zero positions. */
RZ1 = RZ10;
RZ2 = RZ20;
IZ0 = IZ00;
IZ1 = IZ10;
IZ2 = IZ20;
acc = L_mult0(*RZ0++, t_sin[c0_ind]);
acc = L_msu0(acc, *RZ1++, t_sin[c1_ind]);
acc = L_msu0(acc, *RZ2--, t_sin[c2_ind]);
IZ0--;
acc = L_msu0(acc, *IZ1--, t_sin[s1_ind]);
acc = L_msu0(acc, *IZ2++, t_sin[s2_ind]);
*RY1++ = round_fx(acc);
c0_ind=sub(c0_ind,step);
s0_ind=add(s0_ind,step);
c1_ind=add(c1_ind,step);
s1_ind=sub(s1_ind,step);
c2_ind=sub(c2_ind,step);
s2_ind=add(s2_ind,step);
/* From 1 to (m/4) - 1. */
l = sub(shr(m,2),1);
FOR (i = 0; i < l; i++)
{
acc = L_mult0(*RZ0, t_sin[c0_ind]);
acc = L_msu0(acc, *RZ1, t_sin[c1_ind]);
acc = L_msu0(acc, *RZ2, t_sin[c2_ind]);
acc = L_msu0(acc, *IZ0, t_sin[s0_ind]);
acc = L_msu0(acc, *IZ1, t_sin[s1_ind]);
acc = L_msu0(acc, *IZ2, t_sin[s2_ind]);
*RY1++ = round_fx(acc);
acc = L_mult0(*IZ0--, t_sin[c0_ind]);
acc = L_msu0(acc, *IZ1--, t_sin[c1_ind]);
acc = L_mac0(acc, *IZ2++, t_sin[c2_ind]);
acc = L_mac0(acc, *RZ0++, t_sin[s0_ind]);
acc = L_mac0(acc, *RZ1++, t_sin[s1_ind]);
acc = L_msu0(acc, *RZ2--, t_sin[s2_ind]);
*IY1-- = round_fx(acc);
c0_ind=sub(c0_ind,step);
s0_ind=add(s0_ind,step);
c1_ind=add(c1_ind,step);
s1_ind=sub(s1_ind,step);
c2_ind=sub(c2_ind,step);
s2_ind=add(s2_ind,step);
}
/* From m/4 to m/2 -1. */
l = shr(m, 2); /* m/2 - m/4 = m/4 */
FOR (i = 0; i < l; i++)
{
acc = L_mult0(*RZ0, t_sin[c0_ind]);
acc = L_msu0(acc, *RZ1, t_sin[c1_ind]);
acc = L_mac0(acc, *RZ2, t_sin[c2_ind]);
acc = L_msu0(acc, *IZ0, t_sin[s0_ind]);
acc = L_msu0(acc, *IZ1, t_sin[s1_ind]);
acc = L_msu0(acc, *IZ2, t_sin[s2_ind]);
*RY1++ = round_fx(acc);
acc = L_mult0(*IZ0--, t_sin[c0_ind]);
acc = L_msu0(acc, *IZ1--, t_sin[c1_ind]);
acc = L_msu0(acc, *IZ2++, t_sin[c2_ind]);
acc = L_mac0(acc, *RZ0++, t_sin[s0_ind]);
acc = L_mac0(acc, *RZ1++, t_sin[s1_ind]);
acc = L_msu0(acc, *RZ2--, t_sin[s2_ind]);
*IY1-- = round_fx(acc);
c0_ind=sub(c0_ind,step);
s0_ind=add(s0_ind,step);
c1_ind=add(c1_ind,step);
s1_ind=sub(s1_ind,step);
c2_ind=add(c2_ind,step);
s2_ind=sub(s2_ind,step);
}
/* m/2 */
acc = L_mult0(*RZ0, t_sin[c0_ind]);
acc = L_msu0(acc, *RZ1, t_sin[c1_ind]);
acc = L_mac0(acc, *RZ2, t_sin[c2_ind]);
acc = L_msu0(acc, *IZ0, t_sin[s0_ind]);
acc = L_msu0(acc, *IZ1, t_sin[s1_ind]);
acc = L_msu0(acc, *IZ2, t_sin[s2_ind]);
*RY1++ = round_fx(acc);
/* Construction of Y2 */
c0_ind=T_SIN_PI_2;
s0_ind=0;
c1_ind=T_SIN_PI_2*1/3;
s1_ind=T_SIN_PI_2*2/3;
c2_ind=T_SIN_PI_2*1/3;
s2_ind=T_SIN_PI_2*2/3;
step2 = shl(step,1);
RZ0 = RZ00; /* Reset pointers to zero positions. */
RZ1 = RZ10;
RZ2 = RZ20;
IZ0 = IZ00;
IZ1 = IZ10;
IZ2 = IZ20;
acc = L_mult0(*RZ0++, t_sin[c0_ind]);
acc = L_msu0(acc, *RZ1++, t_sin[c1_ind]);
acc = L_msu0(acc, *RZ2--, t_sin[c2_ind]);
IZ0--;
acc = L_mac0(acc, *IZ1--, t_sin[s1_ind]);
acc = L_mac0(acc, *IZ2++, t_sin[s2_ind]);
*RY2++ = round_fx(acc);
c0_ind=sub(c0_ind,step2);
s0_ind=add(s0_ind,step2);
c1_ind=sub(c1_ind,step2);
s1_ind=add(s1_ind,step2);
c2_ind=add(c2_ind,step2);
s2_ind=sub(s2_ind,step2);
/* From 1 to (m/8) - 1. */
l = sub(shr(m, 3),1); /* m/8 - 1. */
FOR (i = 0; i < l; i++)
{
acc = L_mult0(*RZ0, t_sin[c0_ind]);
acc = L_msu0(acc, *RZ1, t_sin[c1_ind]);
acc = L_msu0(acc, *RZ2, t_sin[c2_ind]);
acc = L_msu0(acc, *IZ0, t_sin[s0_ind]);
acc = L_mac0(acc, *IZ1, t_sin[s1_ind]);
acc = L_mac0(acc, *IZ2, t_sin[s2_ind]);
*RY2++ = round_fx(acc);
acc = L_mult0(*IZ0--, t_sin[c0_ind]);
acc = L_msu0(acc, *IZ1--, t_sin[c1_ind]);
acc = L_mac0(acc, *IZ2++, t_sin[c2_ind]);
acc = L_mac0(acc, *RZ0++, t_sin[s0_ind]);
acc = L_msu0(acc, *RZ1++, t_sin[s1_ind]);
acc = L_mac0(acc, *RZ2--, t_sin[s2_ind]);
*IY2-- = round_fx(acc);
c0_ind=sub(c0_ind,step2);
s0_ind=add(s0_ind,step2);
c1_ind=sub(c1_ind,step2);
s1_ind=add(s1_ind,step2);
c2_ind=add(c2_ind,step2);
s2_ind=sub(s2_ind,step2);
}
/* From (m/8) to (m/4) - 1. */
l = shr(m, 3); /* m/4 - m/8 = m/8 */
FOR (i = 0; i < l; i++)
{
acc = L_mult0(*RZ0, t_sin[c0_ind]);
acc = L_mac0(acc, *RZ1, t_sin[c1_ind]);
acc = L_msu0(acc, *RZ2, t_sin[c2_ind]);
acc = L_msu0(acc, *IZ0, t_sin[s0_ind]);
acc = L_mac0(acc, *IZ1, t_sin[s1_ind]);
acc = L_mac0(acc, *IZ2, t_sin[s2_ind]);
*RY2++ = round_fx(acc);
acc = L_mult0(*IZ0--, t_sin[c0_ind]);
acc = L_mac0(acc, *IZ1--, t_sin[c1_ind]);
acc = L_mac0(acc, *IZ2++, t_sin[c2_ind]);
acc = L_mac0(acc, *RZ0++, t_sin[s0_ind]);
acc = L_msu0(acc, *RZ1++, t_sin[s1_ind]);
acc = L_mac0(acc, *RZ2--, t_sin[s2_ind]);
*IY2-- = round_fx(acc);
c0_ind=sub(c0_ind,step2);
s0_ind=add(s0_ind,step2);
c1_ind=add(c1_ind,step2);
s1_ind=sub(s1_ind,step2);
c2_ind=add(c2_ind,step2);
s2_ind=sub(s2_ind,step2);
}
/* From m/4 to 3*m/8 - 1. */
l = shr(m, 3); /* 3*m/8 - m/4 = m/8 */
FOR (i = 0; i < l; i++)
{
acc = L_mult0(*RZ0, t_sin[c0_ind]);
acc = L_mac0(acc, *RZ1, t_sin[c1_ind]);
acc = L_msu0(acc, *RZ2, t_sin[c2_ind]);
acc = L_msu0(acc, *IZ0, t_sin[s0_ind]);
acc = L_mac0(acc, *IZ1, t_sin[s1_ind]);
acc = L_msu0(acc, *IZ2, t_sin[s2_ind]);
*RY2++ = round_fx(acc);
acc = L_mult0(*IZ0--, t_sin[c0_ind]);
acc = L_mac0(acc, *IZ1--, t_sin[c1_ind]);
acc = L_mac0(acc, *IZ2++, t_sin[c2_ind]);
acc = L_mac0(acc, *RZ0++, t_sin[s0_ind]);
acc = L_msu0(acc, *RZ1++, t_sin[s1_ind]);
acc = L_msu0(acc, *RZ2--, t_sin[s2_ind]);
*IY2-- = round_fx(acc);
c0_ind=sub(c0_ind,step2);
s0_ind=add(s0_ind,step2);
c1_ind=add(c1_ind,step2);
s1_ind=sub(s1_ind,step2);
c2_ind=sub(c2_ind,step2);
s2_ind=add(s2_ind,step2);
}
/* From 3*m/8 to m/2 - 1*/
l = shr(m, 3); /* m/2 - 3*m/8 = m/8 */
FOR (i = 0; i < l; i++)
{
acc = L_mult0(*RZ1, t_sin[c1_ind]);
acc = L_msu0(acc, *RZ0, t_sin[c0_ind]);
acc = L_msu0(acc, *RZ2, t_sin[c2_ind]);
acc = L_msu0(acc, *IZ0, t_sin[s0_ind]);
acc = L_mac0(acc, *IZ1, t_sin[s1_ind]);
acc = L_msu0(acc, *IZ2, t_sin[s2_ind]);
*RY2++ = round_fx(acc);
acc = L_mult0(*IZ1--, t_sin[c1_ind]);
acc = L_msu0(acc, *IZ0--, t_sin[c0_ind]);
acc = L_mac0(acc, *IZ2++, t_sin[c2_ind]);
acc = L_mac0(acc, *RZ0++, t_sin[s0_ind]);
acc = L_msu0(acc, *RZ1++, t_sin[s1_ind]);
acc = L_msu0(acc, *RZ2--, t_sin[s2_ind]);
*IY2-- = round_fx(acc);
c0_ind=add(c0_ind,step2);
s0_ind=sub(s0_ind,step2);
c1_ind=add(c1_ind,step2);
s1_ind=sub(s1_ind,step2);
c2_ind=sub(c2_ind,step2);
s2_ind=add(s2_ind,step2);
}
/* m/2 */
acc = L_mult0(*RZ1, t_sin[c1_ind]);
acc = L_msu0(acc, *RZ0, t_sin[c0_ind]);
acc = L_msu0(acc, *RZ2, t_sin[c2_ind]);
acc = L_msu0(acc, *IZ0, t_sin[s0_ind]);
acc = L_mac0(acc, *IZ1, t_sin[s1_ind]);
acc = L_msu0(acc, *IZ2, t_sin[s2_ind]);
*RY2++ = round_fx(acc);
/* Compute the inverse FFT for all 3 blocks. */
RY0 = &Y[0]; /* Rewind the pointers. */
RY1 = &Y[m];
RY2 = &RY1[m];
mBy2 = shr(m,1);
r_fft_fx_lc(pPhaseTbl, m, mBy2, orderMinus1, RY0, RY0, 0); /* inverse FFT */
r_fft_fx_lc(pPhaseTbl, m, mBy2, orderMinus1, RY1, RY1, 0); /* inverse FFT */
r_fft_fx_lc(pPhaseTbl, m, mBy2, orderMinus1, RY2, RY2, 0); /* inverse FFT */
y0 = RY0;
y1 = RY1;
y2 = RY2;
/* Interlacing and scaling, scale = 1/3 */
pX = X;
FOR (i = 0; i < m; i++)
{
*pX++ = shl(mult_r(*y0++, FFT3_ONE_THIRD), 1);
move16();
*pX++ = shl(mult_r(*y1++, FFT3_ONE_THIRD), 1);
move16();
*pX++ = shl(mult_r(*y2++, FFT3_ONE_THIRD), 1);
move16();
}
return;
}