2833 lines
80 KiB
C++
Executable File
2833 lines
80 KiB
C++
Executable File
/*====================================================================================
|
|
EVS Codec 3GPP TS26.442 Apr 03, 2018. Version 12.11.0 / 13.6.0 / 14.2.0
|
|
====================================================================================*/
|
|
|
|
#include "options.h" /* Compilation switches */
|
|
#include "cnst_fx.h" /* Common constants */
|
|
#include "prot_fx.h" /* Function prototypes */
|
|
#include "rom_com_fx.h" /* Static table prototypes */
|
|
#include "stl.h"
|
|
#include <assert.h>
|
|
|
|
/*-----------------------------------------------------------------*
|
|
* Local functions
|
|
*-----------------------------------------------------------------*/
|
|
|
|
#define FFT3_ONE_THIRD 21845 /* 1/3 in Q16 */
|
|
|
|
static void fft5_shift4_16fx( Word16 n1, Word16 *zRe, Word16 *zIm, const Word16 *Idx );
|
|
static void fft64_16fx( Word16 *x, Word16 *y, const Word16 *Idx );
|
|
static void fft32_5_16fx( Word16 *x, Word16 *y, const Word16 *Idx );
|
|
static void cftmdl_16fx(Word16 n, Word16 l, Word16 *a, const Word32 *w);
|
|
static void cftfsub_16fx( Word16 n, Word16 *a, const Word32 *w );
|
|
static void cft1st_16fx(Word16 n, Word16 *a, const Word32 *w);
|
|
static void cftmdl_16fx(Word16 n, Word16 l, Word16 *a, const Word32 *w);
|
|
static void fft5_shift4_16fx( Word16 n1, Word16 *zRe, Word16 *zIm, const Word16 *Idx );
|
|
static void bitrv2_SR_16fx( Word16 n, const Word16 *ip, Word16 *a );
|
|
static void fft64_16fx( Word16 *x, Word16 *y, const Word16 *Idx );
|
|
static void fft5_32_16fx( Word16 *zRe, Word16 *zIm, const Word16 *Idx );
|
|
static void cdftForw_16fx( Word16 n, Word16 *a, const Word16 *ip, const Word32 *w );
|
|
|
|
#include "math_32.h"
|
|
|
|
/*-----------------------------------------------------------------*
|
|
* Local functions
|
|
*-----------------------------------------------------------------*/
|
|
static void cdftForw_fx( Word16 n, Word32 *a, const Word16 *ip, const Word16 *w );
|
|
static void bitrv2_SR_fx( Word16 n, const Word16 *ip, Word32 *a );
|
|
static void cftfsub_fx( Word16 n, Word32 *a, const Word16 *w );
|
|
static void cft1st_fx( Word16 n, Word32 *a, const Word16 *w );
|
|
static void cftmdl_fx( Word16 n, Word16 l, Word32 *a, const Word16 *w );
|
|
|
|
|
|
void DoRTFTn_fx(
|
|
Word32 *x, /* i/o : real part of input and output data */
|
|
Word32 *y, /* i/o : imaginary part of input and output data */
|
|
const Word16 n /* i : size of the FFT up to 1024 */
|
|
)
|
|
{
|
|
|
|
Word16 i;
|
|
Word32 z[2048], *pt;
|
|
|
|
pt = z;
|
|
FOR ( i=0; i<n; i++ )
|
|
{
|
|
*pt++ = x[i];
|
|
move16();
|
|
*pt++ = y[i];
|
|
move16();
|
|
}
|
|
|
|
IF (sub(n, 16) == 0)
|
|
{
|
|
cdftForw_fx(2*n,z,Ip_fft16_fx,w_fft16_fx);
|
|
}
|
|
ELSE IF (sub(n, 32) == 0)
|
|
{
|
|
cdftForw_fx(2*n,z,Ip_fft32_fx,w_fft32_fx);
|
|
}
|
|
ELSE IF (sub(n, 64) == 0)
|
|
{
|
|
cdftForw_fx(2*n,z,Ip_fft64_fx,w_fft64_fx);
|
|
}
|
|
ELSE IF (sub(n, 128) == 0)
|
|
{
|
|
cdftForw_fx(2*n,z,Ip_fft128_fx,w_fft128_fx);
|
|
}
|
|
ELSE IF (sub(n, 256) == 0)
|
|
{
|
|
cdftForw_fx(2*n,z,Ip_fft256_fx,w_fft256_fx);
|
|
}
|
|
ELSE IF (sub(n, 512) == 0)
|
|
{
|
|
cdftForw_fx(2*n,z,Ip_fft512_fx,w_fft512_fx);
|
|
}
|
|
ELSE
|
|
{
|
|
assert(0);
|
|
}
|
|
|
|
x[0]=z[0];
|
|
move16();
|
|
y[0]=z[1];
|
|
move16();
|
|
pt = &z[2];
|
|
FOR( i=n-1; i>=1 ; i--)
|
|
{
|
|
x[i]=*pt++;
|
|
move16();
|
|
y[i]=*pt++;
|
|
move16();
|
|
}
|
|
|
|
return;
|
|
}
|
|
|
|
/*-----------------------------------------------------------------*
|
|
* cdftForw_fx()
|
|
* Main fuction of Complex Discrete Fourier Transform
|
|
*-----------------------------------------------------------------*/
|
|
static void cdftForw_fx(
|
|
Word16 n, /* i : data length of real and imag */
|
|
Word32 *a, /* i/o : input/output data Q(q)*/
|
|
const Word16 *ip, /* i : work area for bit reversal */
|
|
const Word16 *w /* i : cos/sin table Q14*/
|
|
)
|
|
{
|
|
/* bit reversal */
|
|
bitrv2_SR_fx(n, ip + 2, a);
|
|
|
|
/* Do FFT */
|
|
cftfsub_fx(n, a, w);
|
|
}
|
|
|
|
/*-----------------------------------------------------------------*
|
|
* bitrv2_SR_fx()
|
|
* Bit reversal
|
|
*-----------------------------------------------------------------*/
|
|
static void bitrv2_SR_fx(
|
|
Word16 n, /* i : data length of real and imag */
|
|
const Word16 *ip, /* i/o : work area for bit reversal */
|
|
Word32 *a /* i/o : input/output data Q(q)*/
|
|
)
|
|
{
|
|
Word16 j, j1, k, k1, m, m2;
|
|
Word16 l;
|
|
Word32 xr, xi, yr, yi;
|
|
|
|
l = n;
|
|
move16();
|
|
m = 1;
|
|
move16();
|
|
|
|
WHILE (shl(m, 3) < l)
|
|
{
|
|
l = shr(l, 1);
|
|
m = shl(m, 1);
|
|
}
|
|
|
|
m2 = shl(m, 1);
|
|
IF (shl(m, 3) == l)
|
|
{
|
|
FOR (k = 0; k < m; k++)
|
|
{
|
|
FOR (j = 0; j < k; j++)
|
|
{
|
|
j1 = add(shl(j, 1), ip[k]);
|
|
k1 = add(shl(k, 1), ip[j]);
|
|
xr = L_add(0,a[j1]);
|
|
xi = L_add(0,a[j1 + 1]);
|
|
yr = L_add(0,a[k1]);
|
|
yi = L_add(0,a[k1 + 1]);
|
|
a[j1] = yr;
|
|
move32();
|
|
a[j1 + 1] = yi;
|
|
move32();
|
|
a[k1] = xr;
|
|
move32();
|
|
a[k1 + 1] = xi;
|
|
move32();
|
|
j1 = add(j1, m2);
|
|
k1 = add(k1, shl(m2, 1));
|
|
xr = L_add(0,a[j1]);
|
|
xi = L_add(0,a[j1 + 1]);
|
|
yr = L_add(0,a[k1]);
|
|
yi = L_add(0,a[k1 + 1]);
|
|
a[j1] = yr;
|
|
move32();
|
|
a[j1 + 1] = yi;
|
|
move32();
|
|
a[k1] = xr;
|
|
move32();
|
|
a[k1 + 1] = xi;
|
|
move32();
|
|
j1 = add(j1, m2);
|
|
k1 = sub(k1, m2);
|
|
xr = L_add(0,a[j1]);
|
|
xi = L_add(0,a[j1 + 1]);
|
|
xi = L_add(0,a[j1 + 1]);
|
|
yr = L_add(0,a[k1]);
|
|
yi = L_add(0,a[k1 + 1]);
|
|
a[j1] = yr;
|
|
move32();
|
|
a[j1 + 1] = yi;
|
|
move32();
|
|
a[k1] = xr;
|
|
move32();
|
|
a[k1 + 1] = xi;
|
|
move32();
|
|
j1 = add(j1, m2);
|
|
k1 = add(k1, shl(m2, 1));
|
|
xr = L_add(0,a[j1]);
|
|
xi = L_add(0,a[j1 + 1]);
|
|
yr = L_add(0,a[k1]);
|
|
yi = L_add(0,a[k1 + 1]);
|
|
a[j1] = yr;
|
|
move32();
|
|
a[j1 + 1] = yi;
|
|
move32();
|
|
a[k1] = xr;
|
|
move32();
|
|
a[k1 + 1] = xi;
|
|
move32();
|
|
}
|
|
|
|
j1 = add(add(shl(k, 1), m2), ip[k]);
|
|
k1 = add(j1, m2);
|
|
xr = L_add(0,a[j1]);
|
|
xi = L_add(0,a[j1 + 1]);
|
|
yr = L_add(0,a[k1]);
|
|
yi = L_add(0,a[k1 + 1]);
|
|
a[j1] = yr;
|
|
move32();
|
|
a[j1 + 1] = yi;
|
|
move32();
|
|
a[k1] = xr;
|
|
move32();
|
|
a[k1 + 1] = xi;
|
|
move32();
|
|
}
|
|
}
|
|
ELSE
|
|
{
|
|
FOR (k = 1; k < m; k++)
|
|
{
|
|
FOR (j = 0; j < k; j++)
|
|
{
|
|
j1 = add(shl(j, 1), ip[k]);
|
|
k1 = add(shl(k, 1), ip[j]);
|
|
xr = L_add(0,a[j1]);
|
|
xi = L_add(0,a[j1 + 1]);
|
|
yr = L_add(0,a[k1]);
|
|
yi = L_add(0,a[k1 + 1]);
|
|
a[j1] = yr;
|
|
move32();
|
|
a[j1 + 1] = yi;
|
|
move32();
|
|
a[k1] = xr;
|
|
move32();
|
|
a[k1 + 1] = xi;
|
|
move32();
|
|
j1 = add(j1, m2);
|
|
k1 = add(k1, m2);
|
|
xr = L_add(0,a[j1]);
|
|
xi = L_add(0,a[j1 + 1]);
|
|
yr = L_add(0,a[k1]);
|
|
yi = L_add(0,a[k1 + 1]);
|
|
a[j1] = yr;
|
|
move32();
|
|
a[j1 + 1] = yi;
|
|
move32();
|
|
a[k1] = xr;
|
|
move32();
|
|
a[k1 + 1] = xi;
|
|
move32();
|
|
}
|
|
}
|
|
}
|
|
|
|
return;
|
|
}
|
|
|
|
/*-----------------------------------------------------------------*
|
|
* cftfsub_fx()
|
|
* Complex Discrete Fourier Transform
|
|
*-----------------------------------------------------------------*/
|
|
static void cftfsub_fx(
|
|
Word16 n, /* i : data length of real and imag */
|
|
Word32 *a, /* i/o : input/output data Q(q)*/
|
|
const Word16 *w /* i : cos/sin table Q14*/
|
|
)
|
|
{
|
|
Word16 j, j1, j2, j3, l;
|
|
Word32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
|
|
|
|
l = 2;
|
|
move16();
|
|
|
|
IF (n > 8)
|
|
{
|
|
cft1st_fx(n, a, w);
|
|
l = 8;
|
|
move16();
|
|
WHILE ((shl(l, 2) < n))
|
|
{
|
|
cftmdl_fx(n, l, a, w);
|
|
l = shl(l, 2);
|
|
}
|
|
}
|
|
IF (shl(l, 2) == n)
|
|
{
|
|
FOR (j = 0; j < l; j += 2)
|
|
{
|
|
j1 = add(j, l);
|
|
j2 = add(j1, l);
|
|
j3 = add(j2, l);
|
|
x0r = L_add(a[j], a[j1]);
|
|
x0i = L_add(a[j + 1], a[j1 + 1]);
|
|
x1r = L_sub(a[j], a[j1]);
|
|
x1i = L_sub(a[j + 1], a[j1 + 1]);
|
|
x2r = L_add(a[j2], a[j3]);
|
|
x2i = L_add(a[j2 + 1], a[j3 + 1]);
|
|
x3r = L_sub(a[j2], a[j3]);
|
|
x3i = L_sub(a[j2 + 1], a[j3 + 1]);
|
|
a[j] = L_add(x0r, x2r);
|
|
move32();
|
|
a[j2] = L_sub(x0r, x2r);
|
|
move32();
|
|
a[j + 1] = L_add(x0i, x2i);
|
|
move32();
|
|
a[j2 + 1] = L_sub(x0i, x2i);
|
|
move32();
|
|
a[j1] = L_sub(x1r, x3i);
|
|
move32();
|
|
a[j1 + 1] = L_add(x1i, x3r);
|
|
move32();
|
|
a[j3] = L_add(x1r, x3i);
|
|
move32();
|
|
a[j3 + 1] = L_sub(x1i, x3r);
|
|
move32();
|
|
}
|
|
}
|
|
ELSE
|
|
{
|
|
FOR (j = 0; j < l; j += 2)
|
|
{
|
|
j1 = add(j, l);
|
|
x0r = L_sub(a[j], a[j1]);
|
|
x0i = L_sub(a[j + 1], a[j1 + 1]);
|
|
a[j] = L_add(a[j], a[j1]);
|
|
move32();
|
|
a[j + 1] = L_add(a[j + 1], a[j1 + 1]);
|
|
move32();
|
|
a[j1] = x0r;
|
|
move32();
|
|
move32();
|
|
a[j1 + 1] = x0i;
|
|
move32();
|
|
move32();
|
|
}
|
|
}
|
|
|
|
return;
|
|
}
|
|
|
|
/*-----------------------------------------------------------------*
|
|
* cft1st_fx()
|
|
* Subfunction of Complex Discrete Fourier Transform
|
|
*-----------------------------------------------------------------*/
|
|
static void cft1st_fx(
|
|
Word16 n, /* i : data length of real and imag */
|
|
Word32 *a, /* i/o : input/output data Q(q)*/
|
|
const Word16 *w /* i : cos/sin table Q14*/
|
|
)
|
|
{
|
|
Word16 j, k1, k2;
|
|
Word16 wk1r, wk1i, wk2r, wk2i, wk3r, wk3i;
|
|
Word32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
|
|
|
|
x0r = L_add(a[0], a[2]);
|
|
x0i = L_add(a[1], a[3]);
|
|
x1r = L_sub(a[0], a[2]);
|
|
x1i = L_sub(a[1], a[3]);
|
|
x2r = L_add(a[4], a[6]);
|
|
x2i = L_add(a[5], a[7]);
|
|
x3r = L_sub(a[4], a[6]);
|
|
x3i = L_sub(a[5], a[7]);
|
|
a[0] = L_add(x0r, x2r);
|
|
move32();
|
|
a[1] = L_add(x0i, x2i);
|
|
move32();
|
|
a[4] = L_sub(x0r, x2r);
|
|
move32();
|
|
a[5] = L_sub(x0i, x2i);
|
|
move32();
|
|
a[2] = L_sub(x1r, x3i);
|
|
move32();
|
|
a[3] = L_add(x1i, x3r);
|
|
move32();
|
|
a[6] = L_add(x1r, x3i);
|
|
move32();
|
|
a[7] = L_sub(x1i, x3r);
|
|
move32();
|
|
|
|
wk1r = w[2];
|
|
move16();
|
|
x0r = L_add(a[8], a[10]);
|
|
x0i = L_add(a[9], a[11]);
|
|
x1r = L_sub(a[8], a[10]);
|
|
x1i = L_sub(a[9], a[11]);
|
|
x2r = L_add(a[12], a[14]);
|
|
x2i = L_add(a[13], a[15]);
|
|
x3r = L_sub(a[12], a[14]);
|
|
x3i = L_sub(a[13], a[15]);
|
|
a[8] = L_add(x0r, x2r);
|
|
move32();
|
|
a[9] = L_add(x0i, x2i);
|
|
move32();
|
|
a[12] = L_sub(x2i, x0i);
|
|
move32();
|
|
a[13] = L_sub(x0r, x2r);
|
|
move32();
|
|
|
|
x0r = L_sub(x1r, x3i);
|
|
x0i = L_add(x1i ,x3r);
|
|
a[10] = Mult_32_16(L_shl(L_sub(x0r, x0i), 1), wk1r);
|
|
move32();
|
|
a[11] = Mult_32_16(L_shl(L_add(x0r, x0i), 1), wk1r);
|
|
move32();
|
|
x0r = L_add(x3i, x1r);
|
|
x0i = L_sub(x3r,x1i);
|
|
a[14] = Mult_32_16(L_shl(L_sub(x0i, x0r), 1), wk1r);
|
|
move32();
|
|
a[15] = Mult_32_16(L_shl(L_add(x0i, x0r), 1), wk1r);
|
|
move32();
|
|
|
|
k1 = 0;
|
|
move16();
|
|
FOR (j = 16; j < n; j += 16)
|
|
{
|
|
k1 = add(k1, 2);
|
|
k2 = shl(k1, 1);
|
|
wk2r = w[k1];
|
|
move16();
|
|
wk2i = w[k1 + 1];
|
|
move16();
|
|
wk1r = w[k2];
|
|
move16();
|
|
wk1i = w[k2 + 1];
|
|
move16();
|
|
wk3r = extract_l(L_sub(L_deposit_l(wk1r), L_shr(L_mult(wk2i, wk1i), 14)));
|
|
wk3i = extract_l(L_msu0(L_shr(L_mult(wk2i, wk1r), 14), wk1i, 1));
|
|
x0r = L_add(a[j], a[j + 2]);
|
|
x0i = L_add(a[j + 1], a[j + 3]);
|
|
x1r = L_sub(a[j], a[j + 2]);
|
|
x1i = L_sub(a[j + 1], a[j + 3]);
|
|
x2r = L_add(a[j + 4], a[j + 6]);
|
|
x2i = L_add(a[j + 5], a[j + 7]);
|
|
x3r = L_sub(a[j + 4], a[j + 6]);
|
|
x3i = L_sub(a[j + 5], a[j + 7]);
|
|
a[j] = L_add(x0r, x2r);
|
|
move32();
|
|
a[j + 1] = L_add(x0i, x2i);
|
|
move32();
|
|
x0r = L_sub(x0r, x2r);
|
|
x0i = L_sub(x0i, x2i);
|
|
a[j + 4] = L_sub(Mult_32_16(L_shl(x0r, 1), wk2r), Mult_32_16(L_shl(x0i, 1), wk2i));
|
|
move32();
|
|
a[j + 5] = L_add(Mult_32_16(L_shl(x0i, 1), wk2r), Mult_32_16(L_shl(x0r, 1), wk2i));
|
|
move32();
|
|
x0r = L_sub(x1r, x3i);
|
|
x0i = L_add(x1i, x3r);
|
|
a[j + 2] = L_sub(Mult_32_16(L_shl(x0r, 1), wk1r), Mult_32_16(L_shl(x0i, 1), wk1i));
|
|
move32();
|
|
a[j + 3] = L_add(Mult_32_16(L_shl(x0i, 1), wk1r), Mult_32_16(L_shl(x0r, 1), wk1i));
|
|
move32();
|
|
x0r = L_add(x1r, x3i);
|
|
x0i = L_sub(x1i, x3r);
|
|
a[j + 6] = L_sub(Mult_32_16(L_shl(x0r, 1), wk3r), Mult_32_16(L_shl(x0i, 1), wk3i));
|
|
move32();
|
|
a[j + 7] = L_add(Mult_32_16(L_shl(x0i, 1), wk3r), Mult_32_16(L_shl(x0r, 1), wk3i));
|
|
move32();
|
|
|
|
wk1r = w[k2 + 2];
|
|
move16();
|
|
wk1i = w[k2 + 3];
|
|
move16();
|
|
wk3r = extract_l(L_sub(L_deposit_l(wk1r), L_shr(L_mult(wk2r, wk1i), 14)));
|
|
wk3i = extract_l(L_msu0(L_shr(L_mult(wk2r, wk1r), 14), wk1i, 1));
|
|
x0r = L_add(a[j + 8], a[j + 10]);
|
|
x0i = L_add(a[j + 9], a[j + 11]);
|
|
x1r = L_sub(a[j + 8], a[j + 10]);
|
|
x1i = L_sub(a[j + 9], a[j + 11]);
|
|
x2r = L_add(a[j + 12], a[j + 14]);
|
|
x2i = L_add(a[j + 13], a[j + 15]);
|
|
x3r = L_sub(a[j + 12], a[j + 14]);
|
|
x3i = L_sub(a[j + 13], a[j + 15]);
|
|
a[j + 8] = L_add(x0r, x2r);
|
|
move32();
|
|
a[j + 9] = L_add(x0i, x2i);
|
|
move32();
|
|
x0r = L_sub(x0r, x2r);
|
|
x0i = L_sub(x0i, x2i);
|
|
a[j + 12] = L_negate(L_add(Mult_32_16(L_shl(x0r, 1), wk2i), Mult_32_16(L_shl(x0i, 1), wk2r)));
|
|
move32();
|
|
a[j + 13] = L_sub(Mult_32_16(L_shl(x0r, 1), wk2r), Mult_32_16(L_shl(x0i, 1), wk2i));
|
|
move32();
|
|
x0r = L_sub(x1r, x3i);
|
|
x0i = L_add(x1i, x3r);
|
|
a[j + 10] = L_sub(Mult_32_16(L_shl(x0r, 1), wk1r), Mult_32_16(L_shl(x0i, 1), wk1i));
|
|
move32();
|
|
a[j + 11] = L_add(Mult_32_16(L_shl(x0i, 1), wk1r), Mult_32_16(L_shl(x0r, 1), wk1i));
|
|
move32();
|
|
x0r =L_add(x1r, x3i);
|
|
x0i =L_sub(x1i, x3r);
|
|
a[j + 14] = L_sub(Mult_32_16(L_shl(x0r, 1), wk3r), Mult_32_16(L_shl(x0i, 1), wk3i));
|
|
move32();
|
|
a[j + 15] = L_add(Mult_32_16(L_shl(x0i, 1), wk3r), Mult_32_16(L_shl(x0r, 1), wk3i));
|
|
move32();
|
|
}
|
|
|
|
return;
|
|
}
|
|
|
|
/*-----------------------------------------------------------------*
|
|
* cftmdl_fx()
|
|
* Subfunction of Complex Discrete Fourier Transform
|
|
*-----------------------------------------------------------------*/
|
|
static void cftmdl_fx(
|
|
Word16 n, /* i : data length of real and imag */
|
|
Word16 l, /* i : initial shift for processing */
|
|
Word32 *a, /* i/o : input/output data Q(Qx+Q_edct)*/
|
|
const Word16 *w /* i : cos/sin table Q30*/
|
|
)
|
|
{
|
|
Word16 j, j1, j2, j3, k, k1, k2, m, m2;
|
|
Word16 wk1r, wk1i, wk2r, wk2i, wk3r, wk3i;
|
|
Word32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
|
|
Word16 tmp;
|
|
|
|
m = shl(l, 2);
|
|
FOR (j = 0; j < l; j += 2)
|
|
{
|
|
j1 = add(j, l);
|
|
j2 = add(j1, l);
|
|
j3 = add(j2, l);
|
|
x0r = L_add(a[j], a[j1]);
|
|
x0i = L_add(a[j + 1], a[j1 + 1]);
|
|
x1r = L_sub(a[j], a[j1]);
|
|
x1i = L_sub(a[j + 1], a[j1 + 1]);
|
|
x2r = L_add(a[j2], a[j3]);
|
|
x2i = L_add(a[j2 + 1], a[j3 + 1]);
|
|
x3r = L_sub(a[j2], a[j3]);
|
|
x3i = L_sub(a[j2 + 1], a[j3 + 1]);
|
|
a[j] = L_add(x0r, x2r);
|
|
move32();
|
|
a[j + 1] = L_add(x0i, x2i);
|
|
move32();
|
|
a[j2] = L_sub(x0r, x2r);
|
|
move32();
|
|
a[j2 + 1] =L_sub(x0i, x2i);
|
|
move32();
|
|
a[j1] = L_sub(x1r, x3i);
|
|
move32();
|
|
a[j1 + 1] = L_add(x1i, x3r);
|
|
move32();
|
|
a[j3] = L_add(x1r, x3i);
|
|
move32();
|
|
a[j3 + 1] = L_sub(x1i, x3r);
|
|
move32();
|
|
}
|
|
|
|
wk1r = w[2];
|
|
move16();
|
|
tmp = add(l, m);
|
|
FOR (j = m; j < tmp; j += 2)
|
|
{
|
|
j1 = add(j, l);
|
|
j2 = add(j1, l);
|
|
j3 = add(j2, l);
|
|
x0r = L_add(a[j], a[j1]);
|
|
x0i = L_add(a[j + 1], a[j1 + 1]);
|
|
x1r = L_sub(a[j], a[j1]);
|
|
x1i = L_sub(a[j + 1], a[j1 + 1]);
|
|
x2r = L_add(a[j2], a[j3]);
|
|
x2i = L_add(a[j2 + 1], a[j3 + 1]);
|
|
x3r = L_sub(a[j2], a[j3]);
|
|
x3i = L_sub(a[j2 + 1], a[j3 + 1]);
|
|
a[j] = L_add(x0r, x2r);
|
|
move32();
|
|
a[j + 1] = L_add(x0i, x2i);
|
|
move32();
|
|
a[j2] = L_sub(x2i, x0i);
|
|
move32();
|
|
a[j2 + 1] = L_sub(x0r, x2r);
|
|
move32();
|
|
x0r = L_sub(x1r, x3i);
|
|
x0i = L_add(x1i, x3r);
|
|
a[j1] = Mult_32_16(L_shl(L_sub(x0r, x0i), 1), wk1r);
|
|
move32();
|
|
a[j1 + 1] = Mult_32_16(L_shl(L_add(x0r, x0i), 1), wk1r);
|
|
move32();
|
|
x0r = L_add(x3i, x1r);
|
|
x0i = L_sub(x3r, x1i);
|
|
a[j3] = Mult_32_16(L_shl(L_sub(x0i, x0r), 1), wk1r);
|
|
move32();
|
|
a[j3 + 1] = Mult_32_16(L_shl(L_add(x0r, x0i), 1), wk1r);
|
|
move32();
|
|
}
|
|
|
|
k1 = 0;
|
|
move16();
|
|
m2 = shl(m, 1);
|
|
FOR (k = m2; k < n; k += m2)
|
|
{
|
|
k1 = add(k1, 2);
|
|
k2 = shl(k1, 1);
|
|
wk2r = w[k1];
|
|
move16();
|
|
wk2i = w[k1 + 1];
|
|
move16();
|
|
wk1r = w[k2];
|
|
move16();
|
|
wk1i = w[k2 + 1];
|
|
move16();
|
|
wk3r = extract_l(L_sub(L_deposit_l(wk1r), L_shr(L_mult(wk2i, wk1i), 14)));
|
|
wk3i = extract_l(L_msu0(L_shr(L_mult(wk2i, wk1r), 14), wk1i, 1));
|
|
|
|
tmp = add(l, k) ;
|
|
FOR (j = k; j < tmp; j += 2)
|
|
{
|
|
j1 = add(j, l);
|
|
j2 = add(j1, l);
|
|
j3 = add(j2, l);
|
|
x0r = L_add(a[j], a[j1]);
|
|
x0i = L_add(a[j + 1], a[j1 + 1]);
|
|
x1r = L_sub(a[j], a[j1]);
|
|
x1i = L_sub(a[j + 1], a[j1 + 1]);
|
|
x2r = L_add(a[j2], a[j3]);
|
|
x2i = L_add(a[j2 + 1], a[j3 + 1]);
|
|
x3r = L_sub(a[j2], a[j3]);
|
|
x3i = L_sub(a[j2 + 1], a[j3 + 1]);
|
|
a[j] = L_add(x0r, x2r);
|
|
move32();
|
|
a[j + 1] = L_add(x0i, x2i);
|
|
move32();
|
|
x0r = L_sub(x0r, x2r);
|
|
x0i = L_sub(x0i, x2i);
|
|
a[j2] = L_sub(Mult_32_16(L_shl(x0r, 1), wk2r), Mult_32_16(L_shl(x0i, 1), wk2i));
|
|
move32();
|
|
a[j2 + 1] = L_add(Mult_32_16(L_shl(x0i, 1), wk2r), Mult_32_16(L_shl(x0r, 1), wk2i));
|
|
move32();
|
|
x0r = L_sub(x1r, x3i);
|
|
x0i = L_add(x1i, x3r);
|
|
a[j1] = L_sub(Mult_32_16(L_shl(x0r, 1), wk1r), Mult_32_16(L_shl(x0i, 1), wk1i));
|
|
move32();
|
|
a[j1 + 1] = L_add(Mult_32_16(L_shl(x0i, 1), wk1r), Mult_32_16(L_shl(x0r, 1), wk1i));
|
|
move32();
|
|
x0r = L_add(x1r, x3i);
|
|
x0i = L_sub(x1i, x3r);
|
|
a[j3] = L_sub(Mult_32_16(L_shl(x0r, 1), wk3r), Mult_32_16(L_shl(x0i, 1), wk3i));
|
|
move32();
|
|
a[j3 + 1] = L_add(Mult_32_16(L_shl(x0i, 1), wk3r), Mult_32_16(L_shl(x0r, 1), wk3i));
|
|
move32();
|
|
}
|
|
|
|
wk1r = w[k2 + 2];
|
|
move16();
|
|
wk1i = w[k2 + 3];
|
|
move16();
|
|
wk3r = extract_l(L_sub(L_deposit_l(wk1r), L_shr(L_mult(wk2r, wk1i), 14)));
|
|
wk3i = extract_l(L_msu0(L_shr(L_mult(wk2r, wk1r), 14), wk1i, 1));
|
|
|
|
tmp = add(l, add(k, m));
|
|
FOR (j = add(k, m); j < tmp; j += 2)
|
|
{
|
|
j1 = add(j, l);
|
|
j2 = add(j1, l);
|
|
j3 = add(j2, l);
|
|
x0r = L_add(a[j], a[j1]);
|
|
x0i = L_add(a[j + 1], a[j1 + 1]);
|
|
x1r = L_sub(a[j], a[j1]);
|
|
x1i = L_sub(a[j + 1], a[j1 + 1]);
|
|
x2r = L_add(a[j2], a[j3]);
|
|
x2i = L_add(a[j2 + 1], a[j3 + 1]);
|
|
x3r = L_sub(a[j2], a[j3]);
|
|
x3i = L_sub(a[j2 + 1], a[j3 + 1]);
|
|
a[j] = L_add(x0r, x2r);
|
|
move32();
|
|
a[j + 1] = L_add(x0i, x2i);
|
|
move32();
|
|
x0r= L_sub(x0r, x2r);
|
|
x0i=L_sub(x0i, x2i);
|
|
a[j2] = L_negate(L_add(Mult_32_16(L_shl(x0r, 1), wk2i), Mult_32_16(L_shl(x0i, 1), wk2r)));
|
|
move32();
|
|
a[j2 + 1] = L_sub(Mult_32_16(L_shl(x0r, 1), wk2r), Mult_32_16(L_shl(x0i, 1), wk2i));
|
|
move32();
|
|
x0r = L_sub(x1r, x3i);
|
|
x0i = L_add(x1i, x3r);
|
|
a[j1] = L_sub(Mult_32_16(L_shl(x0r, 1), wk1r), Mult_32_16(L_shl(x0i, 1), wk1i));
|
|
move32();
|
|
a[j1 + 1] = L_add(Mult_32_16(L_shl(x0i, 1), wk1r), Mult_32_16(L_shl(x0r, 1), wk1i));
|
|
move32();
|
|
x0r = L_add(x1r, x3i);
|
|
x0i = L_sub(x1i, x3r);
|
|
a[j3] = L_sub(Mult_32_16(L_shl(x0r, 1), wk3r), Mult_32_16(L_shl(x0i, 1), wk3i));
|
|
move32();
|
|
a[j3 + 1] = L_add(Mult_32_16(L_shl(x0i, 1), wk3r), Mult_32_16(L_shl(x0r, 1), wk3i));
|
|
move32();
|
|
}
|
|
}
|
|
|
|
return;
|
|
}
|
|
|
|
|
|
static void cftbsub_fx(
|
|
Word16 n,
|
|
Word32 *a,
|
|
const Word16 *w /* i : cos/sin table */
|
|
)
|
|
{
|
|
Word16 j, j1, j2, j3, l;
|
|
Word32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
|
|
|
|
l = 2;
|
|
move16();
|
|
IF (n > 8)
|
|
{
|
|
cft1st_fx(n, a, w);
|
|
l = 8;
|
|
move16();
|
|
|
|
WHILE (sub(shl(l, 2), n) < 0)
|
|
{
|
|
cftmdl_fx(n, l, a, w);
|
|
l = shl(l, 2);
|
|
}
|
|
}
|
|
|
|
IF (sub(shl(l, 2), n) == 0)
|
|
{
|
|
FOR (j = 0; j < l; j += 2)
|
|
{
|
|
j1 = add(j, l);
|
|
j2 = add(j1, l);
|
|
j3 = add(j2, l);
|
|
x0r = L_add(a[j], a[j1]);
|
|
x0i = L_negate(L_add(a[j + 1], a[j1 + 1]));
|
|
x1r = L_sub(a[j], a[j1]);
|
|
x1i = L_sub(a[j1 + 1], a[j + 1]);
|
|
x2r = L_add(a[j2], a[j3]);
|
|
x2i = L_add(a[j2 + 1], a[j3 + 1]);
|
|
x3r = L_sub(a[j2], a[j3]);
|
|
x3i = L_sub(a[j2 + 1], a[j3 + 1]);
|
|
a[j] = L_add(x0r, x2r);
|
|
move32();
|
|
a[j + 1] = L_sub(x0i, x2i);
|
|
move32();
|
|
a[j2] = L_sub(x0r, x2r);
|
|
move32();
|
|
a[j2 + 1] = L_add(x0i, x2i);
|
|
move32();
|
|
a[j1] = L_sub(x1r, x3i);
|
|
move32();
|
|
a[j1 + 1] = L_sub(x1i, x3r);
|
|
move32();
|
|
a[j3] = L_add(x1r, x3i);
|
|
move32();
|
|
a[j3 + 1] = L_add(x1i, x3r);
|
|
move32();
|
|
}
|
|
}
|
|
ELSE
|
|
{
|
|
FOR (j = 0; j < l; j += 2)
|
|
{
|
|
j1 = add(j, l);
|
|
x0r = L_sub(a[j], a[j1]);
|
|
x0i = L_sub(a[j1 + 1], a[j + 1]);
|
|
a[j] = L_add(a[j], a[j1]);
|
|
move32();
|
|
a[j + 1] = L_negate(L_add(a[j + 1], a[j1 + 1]));
|
|
move32();
|
|
a[j1] = x0r;
|
|
move32();
|
|
move32();
|
|
a[j1 + 1] = x0i;
|
|
move32();
|
|
move32();
|
|
}
|
|
}
|
|
}
|
|
|
|
static void rftfsub_fx(
|
|
Word16 n,
|
|
Word32 *a,
|
|
Word16 nc,
|
|
const Word16 *c
|
|
)
|
|
{
|
|
Word16 j, k, kk, ks, m, tmp;
|
|
Word32 xr, xi, yr, yi;
|
|
Word16 wkr, wki;
|
|
|
|
m = shr(n, 1);
|
|
/*ks = 2 * nc / m; */
|
|
tmp = shl(nc, 1);
|
|
ks = 0;
|
|
move16();
|
|
WHILE (sub(tmp, m) >= 0)
|
|
{
|
|
ks = add(ks, 1);
|
|
tmp = sub(tmp, m);
|
|
}
|
|
kk = 0;
|
|
move16();
|
|
FOR (j = 2; j < m; j += 2)
|
|
{
|
|
k = sub(n, j);
|
|
kk = add(kk, ks);
|
|
wkr = sub(8192, c[nc - kk]);
|
|
wki = c[kk];
|
|
move16();
|
|
xr = L_sub(a[j], a[k]);
|
|
xi = L_add(a[j + 1], a[k + 1]);
|
|
yr = L_sub(Mult_32_16(L_shl(xr, 1), wkr), Mult_32_16(L_shl(xi, 1), wki));
|
|
yi = L_add(Mult_32_16(L_shl(xi, 1), wkr), Mult_32_16(L_shl(xr, 1), wki));
|
|
a[j] = L_sub(a[j], yr);
|
|
move32();
|
|
a[j + 1] = L_sub(a[j + 1], yi);
|
|
move32();
|
|
a[k] = L_add(a[k], yr);
|
|
move32();
|
|
a[k + 1] = L_sub(a[k + 1], yi);
|
|
move32();
|
|
}
|
|
}
|
|
|
|
|
|
static void rftbsub_fx(
|
|
Word16 n,
|
|
Word32 *a,
|
|
Word16 nc,
|
|
const Word16 *c
|
|
)
|
|
{
|
|
Word16 j, k, kk, ks, m, tmp;
|
|
Word32 xr, xi, yr, yi;
|
|
Word16 wkr, wki;
|
|
|
|
a[1] = L_negate(a[1]);
|
|
m = shr(n, 1);
|
|
/*ks = 2 * nc / m; */
|
|
tmp = shl(nc, 1);
|
|
ks = 0;
|
|
move16();
|
|
WHILE (sub(tmp, m) >= 0)
|
|
{
|
|
ks = add(ks, 1);
|
|
tmp = sub(tmp, m);
|
|
}
|
|
kk = 0;
|
|
move16();
|
|
FOR (j = 2; j < m; j += 2)
|
|
{
|
|
k = sub(n, j);
|
|
kk = add(kk, ks);
|
|
wkr = sub(8192, c[nc - kk]);
|
|
wki = c[kk];
|
|
move16();
|
|
xr = L_sub(a[j], a[k]);
|
|
xi = L_add(a[j + 1], a[k + 1]);
|
|
yr = L_add(Mult_32_16(L_shl(xr, 1), wkr), Mult_32_16(L_shl(xi, 1), wki));
|
|
yi = L_sub(Mult_32_16(L_shl(xi, 1), wkr), Mult_32_16(L_shl(xr, 1), wki));
|
|
a[j] = L_sub(a[j], yr);
|
|
move32();
|
|
a[j + 1] = L_sub(yi, a[j + 1]);
|
|
move32();
|
|
a[k] = L_add(a[k], yr);
|
|
move32();
|
|
a[k + 1] = L_sub(yi, a[k + 1]);
|
|
move32();
|
|
}
|
|
a[m + 1] = L_negate(a[m + 1]);
|
|
move32();
|
|
}
|
|
|
|
|
|
static void dctsub_fx(
|
|
Word16 n,
|
|
Word32 *a,
|
|
Word16 nc,
|
|
const Word16 *c
|
|
)
|
|
{
|
|
Word16 j, k, kk, ks, m, tmp;
|
|
Word16 wkr, wki;
|
|
Word32 xr;
|
|
|
|
m = shr(n, 1);
|
|
/*ks = nc / n; */
|
|
tmp = nc;
|
|
move16();
|
|
ks = 0;
|
|
move16();
|
|
WHILE (sub(tmp, n) >= 0)
|
|
{
|
|
ks = add(ks, 1);
|
|
tmp = sub(tmp, n);
|
|
}
|
|
kk = 0;
|
|
move16();
|
|
FOR (j = 1; j < m; j++)
|
|
{
|
|
k = sub(n, j);
|
|
kk = add(kk, ks);
|
|
wkr = sub(c[kk], c[nc - kk]);
|
|
wki = add(c[kk], c[nc - kk]);
|
|
xr = L_sub(Mult_32_16(L_shl(a[j], 1), wki), Mult_32_16(L_shl(a[k], 1), wkr));
|
|
a[j] = L_add(Mult_32_16(L_shl(a[j], 1), wkr), Mult_32_16(L_shl(a[k], 1), wki));
|
|
move32();
|
|
a[k] = xr;
|
|
move32();
|
|
}
|
|
a[m] = Mult_32_16(L_shl(a[m], 1), c[0]);
|
|
}
|
|
|
|
/*-----------------------------------------------------------------*
|
|
* edct2_fx()
|
|
*
|
|
* Transformation of the signal to DCT domain
|
|
* OR Inverse EDCT-II for short frames
|
|
*-----------------------------------------------------------------*/
|
|
|
|
void edct2_fx(
|
|
Word16 n,
|
|
Word16 isgn,
|
|
Word16 *in,
|
|
Word32 *a,
|
|
Word16 *q,
|
|
const Word16 *ip,
|
|
const Word16 *w
|
|
)
|
|
{
|
|
Word16 j, nw, nc;
|
|
Word32 xr;
|
|
|
|
*q = Exp16Array(n, in);
|
|
*q = add(*q, 6);
|
|
FOR (j = 0; j < n; j++)
|
|
{
|
|
a[j] = L_shl((Word32) in[j], *q);
|
|
move32();
|
|
}
|
|
|
|
nw = ip[0];
|
|
move16();
|
|
if (sub(n, shl(nw, 2)) > 0)
|
|
{
|
|
nw = shr(n, 2);
|
|
}
|
|
|
|
nc = ip[1];
|
|
move16();
|
|
if (n > nc)
|
|
{
|
|
nc = n;
|
|
move16();
|
|
}
|
|
|
|
IF (isgn < 0)
|
|
{
|
|
xr = a[n - 1];
|
|
move32();
|
|
FOR (j = n - 2; j >= 2; j -= 2)
|
|
{
|
|
a[j + 1] = L_sub(a[j], a[j - 1]);
|
|
move32();
|
|
a[j] = L_add(a[j], a[j - 1]);
|
|
move32();
|
|
}
|
|
a[1] = L_sub(a[0], xr);
|
|
move32();
|
|
a[0] = L_add(a[0], xr);
|
|
move32();
|
|
|
|
IF (n > 4)
|
|
{
|
|
rftbsub_fx(n, a, nc, w + nw);
|
|
bitrv2_SR_fx(n, ip + 2, a);
|
|
cftbsub_fx(n, a, w);
|
|
}
|
|
ELSE IF (n == 4)
|
|
{
|
|
cftfsub_fx(n, a, w);
|
|
}
|
|
}
|
|
|
|
IF (isgn >= 0)
|
|
{
|
|
a[0] = L_shr(a[0], 1);
|
|
move32();
|
|
}
|
|
|
|
dctsub_fx(n, a, nc, w + nw);
|
|
|
|
IF (isgn >= 0)
|
|
{
|
|
IF (n > 4)
|
|
{
|
|
bitrv2_SR_fx(n, ip + 2, a);
|
|
cftfsub_fx(n, a, w);
|
|
rftfsub_fx(n, a, nc, w + nw);
|
|
}
|
|
ELSE IF (n == 4)
|
|
{
|
|
cftfsub_fx(n, a, w);
|
|
}
|
|
xr = L_sub(a[0], a[1]);
|
|
a[0] = L_add(a[0], a[1]);
|
|
move32();
|
|
FOR (j = 2; j < n; j += 2)
|
|
{
|
|
a[j - 1] = L_sub(a[j], a[j + 1]);
|
|
move32();
|
|
a[j] = L_add(a[j], a[j + 1]);
|
|
move32();
|
|
}
|
|
a[n - 1] = xr;
|
|
move32();
|
|
|
|
FOR (j = 0; j < n; j ++)
|
|
{
|
|
a[j] = L_shr(a[j], 5);
|
|
move32();
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
/*-----------------------------------------------------------------*
|
|
* fft5_shift4()
|
|
* 5-point FFT with 4-point circular shift
|
|
*-----------------------------------------------------------------*/
|
|
|
|
static void fft5_shift4_16fx(
|
|
Word16 n1, /* i : length of data */
|
|
Word16 *zRe, /* i/o : real part of input and output data */
|
|
Word16 *zIm, /* i/o : imaginary part of input and output data */
|
|
const Word16 *Idx /* i : pointer of the address table */
|
|
)
|
|
{
|
|
Word16 T1, To, T8, Tt, T9, Ts, Te, Tp, Th, Tn,T2, T3, T4, T5, T6, T7;
|
|
Word16 i0,i1,i2,i3,i4;
|
|
Word32 L_tmp;
|
|
|
|
i0 = Idx[0];
|
|
move16();
|
|
i1 = Idx[n1];
|
|
move16();
|
|
i2 = Idx[n1*2];
|
|
move16();
|
|
i3 = Idx[n1*3];
|
|
move16();
|
|
i4 = Idx[n1*4];
|
|
move16();
|
|
|
|
T1 = zRe[i0];
|
|
move16();
|
|
To = zIm[i0];
|
|
move16();
|
|
|
|
T2 = zRe[i1];
|
|
move16();
|
|
T3 = zRe[i4];
|
|
move16();
|
|
T4 = add(T2,T3);
|
|
T5 = zRe[i2];
|
|
move16();
|
|
T6 = zRe[i3];
|
|
move16();
|
|
T7 = add(T5,T6);
|
|
T8 = add(T4,T7);
|
|
Tt = sub(T5,T6);
|
|
/* T9 = KP559016994 * (T4 - T7); */
|
|
L_tmp = Mult_32_16(KP559016994_16FX,sub(T4,T7));
|
|
T9 = round_fx(L_tmp);
|
|
Ts = sub(T2,T3);
|
|
|
|
T2 = zIm[i1];
|
|
move16();
|
|
T3 = zIm[i4];
|
|
move16();
|
|
T4 = add(T2,T3);
|
|
T5 = zIm[i2];
|
|
move16();
|
|
T6 = zIm[i3];
|
|
move16();
|
|
T7 = add(T5,T6);
|
|
Te = sub(T2,T3);
|
|
Tp = add(T4,T7);
|
|
Th = sub(T5,T6);
|
|
/* Tn = KP559016994 * (T4 - T7); */
|
|
L_tmp = Mult_32_16(KP559016994_16FX,sub(T4,T7));
|
|
Tn = round_fx(L_tmp);
|
|
|
|
zRe[i0] = add(T1,T8);
|
|
move16();
|
|
zIm[i0] = add(To,Tp);
|
|
move16();
|
|
|
|
/* T2 = KP951056516*Te + KP587785252*Th; */
|
|
L_tmp = Mult_32_16(KP951056516_16FX,Te);
|
|
L_tmp = Madd_32_16(L_tmp,KP587785252_16FX,Th);
|
|
T2 = round_fx(L_tmp);
|
|
|
|
/*T3 = KP951056516*Th - KP587785252*Te; */
|
|
L_tmp = Mult_32_16(KP951056516_16FX,Th);
|
|
L_tmp = Msub_32_16(L_tmp,KP587785252_16FX,Te);
|
|
T3 = round_fx(L_tmp);
|
|
|
|
T6 = sub(T1,shr(T8,2));
|
|
T4 = add(T9,T6);
|
|
T5 = sub(T6,T9);
|
|
zRe[i1] = sub(T4,T2);
|
|
move16();
|
|
zRe[i2] = add(T5,T3);
|
|
move16();
|
|
zRe[i4] = add(T4,T2);
|
|
move16();
|
|
zRe[i3] = sub(T5,T3);
|
|
move16();
|
|
|
|
/* T2 = KP951056516 * Ts + KP587785252 * Tt; */
|
|
L_tmp = Mult_32_16(KP951056516_16FX,Ts);
|
|
L_tmp = Madd_32_16(L_tmp,KP587785252_16FX,Tt);
|
|
T2 = round_fx(L_tmp);
|
|
|
|
/* T3 = KP951056516 * Tt - KP587785252 * Ts; */
|
|
L_tmp = Mult_32_16(KP951056516_16FX,Tt);
|
|
L_tmp = Msub_32_16(L_tmp,KP587785252_16FX,Ts);
|
|
T3 = round_fx(L_tmp);
|
|
|
|
T6 = sub(To,shr(Tp,2));
|
|
T4 = add(Tn,T6);
|
|
T5 = sub(T6,Tn);
|
|
zIm[i4] = sub(T4,T2);
|
|
move16();
|
|
zIm[i2] = sub(T5,T3);
|
|
move16();
|
|
zIm[i1] = add(T2,T4);
|
|
move16();
|
|
zIm[i3] = add(T3,T5);
|
|
move16();
|
|
|
|
return;
|
|
}
|
|
|
|
/*-----------------------------------------------------------------*
|
|
* fft5_32()
|
|
* 5-point FFT called for 32 times
|
|
*-----------------------------------------------------------------*/
|
|
static void fft5_32_16fx(
|
|
Word16 *zRe, /* i/o : real part of input and output data */
|
|
Word16 *zIm, /* i/o : imaginary part of input and output data */
|
|
const Word16 *Idx /* i : pointer of the address table */
|
|
)
|
|
{
|
|
Word16 T1, To, T8, Tt, T9, Ts, Te, Tp, Th, Tn,T2, T3, T4, T5, T6, T7;
|
|
Word16 i0,i1,i2,i3,i4;
|
|
Word32 L_tmp;
|
|
|
|
i0 = Idx[0];
|
|
move16();
|
|
i1 = Idx[32];
|
|
move16();
|
|
i2 = Idx[64];
|
|
move16();
|
|
i3 = Idx[96];
|
|
move16();
|
|
i4 = Idx[128];
|
|
move16();
|
|
|
|
T1 = zRe[i0];
|
|
move16();
|
|
To = zIm[i0];
|
|
move16();
|
|
|
|
T2 = zRe[i1];
|
|
move16();
|
|
T3 = zRe[i4];
|
|
move16();
|
|
T4 = add(T2, T3);
|
|
T5 = zRe[i2];
|
|
move16();
|
|
T6 = zRe[i3];
|
|
move16();
|
|
T7 = add(T5,T6);
|
|
T8 = add(T4,T7);
|
|
Tt = sub(T5,T6);
|
|
/* T9 = KP559016994 * (T4 - T7); */
|
|
L_tmp = Mult_32_16(KP559016994_16FX,sub(T4,T7));
|
|
T9 = round_fx(L_tmp);
|
|
Ts = sub(T2,T3);
|
|
|
|
T2 = zIm[i1];
|
|
move16();
|
|
T3 = zIm[i4];
|
|
move16();
|
|
T4 = add(T2,T3);
|
|
T5 = zIm[i2];
|
|
move16();
|
|
T6 = zIm[i3];
|
|
move16();
|
|
T7 = add(T5,T6);
|
|
Te = sub(T2,T3);
|
|
Tp = add(T4,T7);
|
|
Th = sub(T5,T6);
|
|
L_tmp = Mult_32_16(KP559016994_16FX,sub(T4,T7));
|
|
Tn = round_fx(L_tmp);
|
|
|
|
|
|
|
|
zRe[i0] = add(T1,T8);
|
|
move16();
|
|
zIm[i0] = add(To,Tp);
|
|
move32();
|
|
|
|
|
|
|
|
/*T2 = KP951056516*Te + KP587785252*Th; */
|
|
L_tmp = Mult_32_16(KP951056516_16FX,Te);
|
|
L_tmp = Madd_32_16(L_tmp,KP587785252_16FX,Th);
|
|
T2 = round_fx(L_tmp);
|
|
|
|
/*T3 = KP951056516*Th - KP587785252*Te; */
|
|
L_tmp = Mult_32_16(KP951056516_16FX,Th);
|
|
L_tmp = Msub_32_16(L_tmp,KP587785252_16FX,Te);
|
|
T3 = round_fx(L_tmp);
|
|
|
|
|
|
|
|
T6 = sub(T1,shr(T8,2));
|
|
T4 = add(T9,T6);
|
|
T5 = sub(T6,T9);
|
|
zRe[i3] = sub(T4,T2);
|
|
move32();
|
|
zRe[i1] = add(T5,T3);
|
|
move32();
|
|
zRe[i2] = add(T4,T2);
|
|
move32();
|
|
zRe[i4] = sub(T5,T3);
|
|
move32();
|
|
|
|
/* T2 = KP951056516 * Ts + KP587785252 * Tt; */
|
|
L_tmp = Mult_32_16(KP951056516_16FX,Ts);
|
|
L_tmp = Madd_32_16(L_tmp,KP587785252_16FX,Tt);
|
|
T2 = round_fx(L_tmp);
|
|
|
|
/* T3 = KP951056516 * Tt - KP587785252 * Ts; */
|
|
L_tmp = Mult_32_16(KP951056516_16FX,Tt);
|
|
L_tmp = Msub_32_16(L_tmp,KP587785252_16FX,Ts);
|
|
T3 = round_fx(L_tmp);
|
|
|
|
T6 = sub(To,shr(Tp,2));
|
|
T4 = add(Tn,T6);
|
|
T5 = sub(T6,Tn);
|
|
zIm[i2] = sub(T4,T2);
|
|
move16();
|
|
zIm[i1] = sub(T5,T3);
|
|
move16();
|
|
zIm[i3] = add(T2,T4);
|
|
move16();
|
|
zIm[i4] = add(T3,T5);
|
|
move16();
|
|
|
|
|
|
return;
|
|
}
|
|
|
|
/*-----------------------------------------------------------------*
|
|
* fft64()
|
|
* 64-point FFT
|
|
*-----------------------------------------------------------------*/
|
|
static void fft64_16fx(
|
|
Word16 *x, /* i/o : real part of input and output data */
|
|
Word16 *y, /* i/o : imaginary part of input and output data */
|
|
const Word16 *Idx /* i : pointer of the address table */
|
|
)
|
|
{
|
|
Word16 i,id,jd;
|
|
Word16 z[128];
|
|
move16();/*penalty for 1 ptr init */
|
|
FOR ( i=0; i<64; i++ )
|
|
{
|
|
id = Idx[i];
|
|
move16();
|
|
z[2*i] = x[id];
|
|
move16();
|
|
z[2*i+1] = y[id];
|
|
move16();
|
|
}
|
|
|
|
cdftForw_16fx(128,z,Ip_fft64_16fx,w_fft64_16fx);
|
|
|
|
move16();/*penalty for 1 ptr init */
|
|
FOR( i=0; i<64 ; i++)
|
|
{
|
|
jd = Odx_fft64_16fx[i];
|
|
move16();
|
|
id = Idx[jd];
|
|
move16();
|
|
x[id]=z[2*i];
|
|
move16();
|
|
y[id]=z[2*i+1];
|
|
move16();
|
|
}
|
|
|
|
return;
|
|
}
|
|
|
|
|
|
/*-----------------------------------------------------------------*
|
|
* fft32_5()
|
|
* 32-point FFT called for 5 times
|
|
*-----------------------------------------------------------------*/
|
|
static void fft32_5_16fx(
|
|
Word16 *x, /* i/o : real part of input and output data */
|
|
Word16 *y, /* i/o : imaginary part of input and output data */
|
|
const Word16 *Idx /* i : pointer of the address table */
|
|
)
|
|
{
|
|
Word16 i,id,jd;
|
|
Word16 z[64];
|
|
|
|
move16();/*penalty for 1 ptr init */
|
|
FOR( i=0; i<32; i++ )
|
|
{
|
|
id = Idx[i];
|
|
move16();
|
|
z[2*i] = x[id];
|
|
move16();
|
|
z[2*i+1] = y[id];
|
|
move16();
|
|
}
|
|
|
|
cdftForw_16fx(64,z,Ip_fft32_16fx,w_fft32_16fx);
|
|
|
|
move16();/*penalty for 1 ptr init */
|
|
FOR( i=0; i<32; i++ )
|
|
{
|
|
jd = Odx_fft32_5[i];
|
|
move16();
|
|
id = Idx[jd];
|
|
move16();
|
|
x[id]=z[2*i];
|
|
move16();
|
|
y[id]=z[2*i+1];
|
|
move16();
|
|
}
|
|
|
|
return;
|
|
}
|
|
|
|
|
|
|
|
/*-----------------------------------------------------------------*
|
|
* DoRTFT160()
|
|
* a low complexity 2-dimensional DFT of 160 points
|
|
*-----------------------------------------------------------------*/
|
|
void DoRTFT160_16fx(
|
|
Word16 x[], /* i/o : real part of input and output data */
|
|
Word16 y[] /* i/o : imaginary part of input and output data */
|
|
)
|
|
{
|
|
Word16 j;
|
|
|
|
/* Applying 32-point FFT for 5 times based on the address table Idx_dortft160 */
|
|
FOR(j=0; j<5; j++)
|
|
{
|
|
fft32_5_16fx(x,y,Idx_dortft160+shl(j,5)/*32*j*/);
|
|
}
|
|
|
|
/* Applying 5-point FFT for 32 times based on the address table Idx_dortft160 */
|
|
FOR(j=0; j<32; j++)
|
|
{
|
|
fft5_32_16fx(x,y,Idx_dortft160+j);
|
|
}
|
|
|
|
return;
|
|
}
|
|
|
|
/*-----------------------------------------------------------------*
|
|
* DoRTFT320()
|
|
* a low complexity 2-dimensional DFT of 320 points
|
|
*-----------------------------------------------------------------*/
|
|
void DoRTFT320_16fx(
|
|
Word16 *x, /* i/o : real part of input and output data */
|
|
Word16 *y /* i/o : imaginary part of input and output data */
|
|
)
|
|
{
|
|
Word16 j;
|
|
|
|
/* Applying 64-point FFT for 5 times based on the address table Idx_dortft160 */
|
|
FOR(j=0; j<5; j++)
|
|
{
|
|
fft64_16fx(x,y,Idx_dortft320_16fx+shl(j,6)/*64*j*/);
|
|
}
|
|
|
|
/* Applying 5-point FFT for 64 times based on the address table Idx_dortft160 */
|
|
FOR(j=0; j<64; j++)
|
|
{
|
|
fft5_shift4_16fx(64,x,y,Idx_dortft320_16fx+j);
|
|
}
|
|
|
|
return;
|
|
}
|
|
|
|
/*-----------------------------------------------------------------*
|
|
* DoRTFT128()
|
|
* FFT with 128 points
|
|
*-----------------------------------------------------------------*/
|
|
void DoRTFT128_16fx(
|
|
Word16 *x, /* i/o : real part of input and output data Q(Qx+Q_edct)*/
|
|
Word16 *y /* i/o : imaginary part of input and output data Q(Qx+Q_edct)*/
|
|
)
|
|
{
|
|
|
|
Word16 i;
|
|
Word16 z[256];
|
|
|
|
move16();/*penalty for 1 ptr init */
|
|
FOR ( i=0; i<128; i++ )
|
|
{
|
|
z[2*i] = x[i];
|
|
move16();
|
|
z[2*i+1] = y[i];
|
|
move16();
|
|
}
|
|
|
|
cdftForw_16fx(256,z,Ip_fft128_16fx,w_fft128_16fx);
|
|
|
|
x[0]=z[0];
|
|
move16();
|
|
y[0]=z[1];
|
|
move16();
|
|
move16();/*penalty for 1 ptr init */
|
|
move16();/*penalty for 1 ptr init */
|
|
FOR( i=1; i<128 ; i++)
|
|
{
|
|
x[128-i]=z[2*i];
|
|
move16();
|
|
y[128-i]=z[2*i+1];
|
|
move16();
|
|
}
|
|
|
|
return;
|
|
}
|
|
/*-----------------------------------------------------------------*
|
|
* cdftForw()
|
|
* Main fuction of Complex Discrete Fourier Transform
|
|
*-----------------------------------------------------------------*/
|
|
static void cdftForw_16fx(
|
|
Word16 n, /* i : data length of real and imag */
|
|
Word16 *a, /* i/o : input/output data Q(Qx+Q_edct)*/
|
|
const Word16 *ip, /* i : work area for bit reversal */
|
|
const Word32 *w /* i : cos/sin table Q30*/
|
|
)
|
|
{
|
|
/* bit reversal */
|
|
bitrv2_SR_16fx(n, ip + 2, a);
|
|
|
|
/* Do FFT */
|
|
cftfsub_16fx(n, a, w);
|
|
}
|
|
|
|
/*-----------------------------------------------------------------*
|
|
* bitrv2_SR()
|
|
* Bit reversal
|
|
*-----------------------------------------------------------------*/
|
|
static void bitrv2_SR_16fx(
|
|
Word16 n, /* i : data length of real and imag */
|
|
const Word16 *ip, /* i/o : work area for bit reversal */
|
|
Word16 *a /* i/o : input/output data Q(Qx+Q_edct)*/
|
|
)
|
|
{
|
|
Word16 j, j1, k, k1, m, m2;
|
|
Word16 l;
|
|
Word16 xr, xi, yr, yi;
|
|
|
|
l = n;
|
|
move16();
|
|
m = 1;
|
|
move16();
|
|
|
|
WHILE (sub(shl(m,3),l) < 0)
|
|
{
|
|
l = shr(l,1);
|
|
m = shl(m,1);
|
|
}
|
|
|
|
m2 = shl(m,1);
|
|
IF (sub(shl(m, 3),l) == 0)
|
|
{
|
|
FOR (k = 0; k < m; k++)
|
|
{
|
|
FOR (j = 0; j < k; j++)
|
|
{
|
|
j1 = add(shl(j,1),ip[k]);
|
|
k1 = add(shl(k,1),ip[j]);
|
|
xr = a[j1];
|
|
move16();
|
|
xi = a[j1 + 1];
|
|
move16();
|
|
yr = a[k1];
|
|
move16();
|
|
yi = a[k1 + 1];
|
|
move16();
|
|
a[j1] = yr;
|
|
move16();
|
|
a[j1 + 1] = yi;
|
|
move16();
|
|
a[k1] = xr;
|
|
move16();
|
|
a[k1 + 1] = xi;
|
|
move16();
|
|
j1 = add(j1,m2);
|
|
k1 = add(k1,shl(m2,1));
|
|
xr = a[j1];
|
|
move16();
|
|
xi = a[j1 + 1];
|
|
move16();
|
|
yr = a[k1];
|
|
move16();
|
|
yi = a[k1 + 1];
|
|
move16();
|
|
a[j1] = yr;
|
|
move16();
|
|
a[j1 + 1] = yi;
|
|
move16();
|
|
a[k1] = xr;
|
|
move16();
|
|
a[k1 + 1] = xi;
|
|
move16();
|
|
j1 = add(j1,m2);
|
|
k1 = sub(k1,m2);
|
|
xr = a[j1];
|
|
move16();
|
|
xi = a[j1 + 1];
|
|
move16();
|
|
xi = a[j1 + 1];
|
|
move16();
|
|
yr = a[k1];
|
|
move16();
|
|
yi = a[k1 + 1];
|
|
move16();
|
|
a[j1] = yr;
|
|
move16();
|
|
a[j1 + 1] = yi;
|
|
move16();
|
|
a[k1] = xr;
|
|
move16();
|
|
a[k1 + 1] = xi;
|
|
move16();
|
|
j1 = add(j1,m2);
|
|
k1 = add(k1,shl(m2,1));
|
|
xr = a[j1];
|
|
move16();
|
|
xi = a[j1 + 1];
|
|
move16();
|
|
yr = a[k1];
|
|
move16();
|
|
yi = a[k1 + 1];
|
|
move16();
|
|
a[j1] = yr;
|
|
move16();
|
|
a[j1 + 1] = yi;
|
|
move16();
|
|
a[k1] = xr;
|
|
move16();
|
|
a[k1 + 1] = xi;
|
|
move16();
|
|
}
|
|
|
|
j1 = add(add(shl(k,1),m2),ip[k]);
|
|
k1 = add(j1,m2);
|
|
xr = a[j1];
|
|
move16();
|
|
xi = a[j1 + 1];
|
|
move16();
|
|
yr = a[k1];
|
|
move16();
|
|
yi = a[k1 + 1];
|
|
move16();
|
|
a[j1] = yr;
|
|
move16();
|
|
a[j1 + 1] = yi;
|
|
move16();
|
|
a[k1] = xr;
|
|
move16();
|
|
a[k1 + 1] = xi;
|
|
move16();
|
|
}
|
|
}
|
|
ELSE
|
|
{
|
|
FOR (k = 1; k < m; k++)
|
|
{
|
|
FOR (j = 0; j < k; j++)
|
|
{
|
|
j1 = add(shl(j,1),ip[k]);
|
|
k1 = add(shl(k,1),ip[j]);
|
|
xr = a[j1];
|
|
move16();
|
|
xi = a[j1 + 1];
|
|
move16();
|
|
yr = a[k1];
|
|
move16();
|
|
yi = a[k1 + 1];
|
|
move16();
|
|
a[j1] = yr;
|
|
move16();
|
|
a[j1 + 1] = yi;
|
|
move16();
|
|
a[k1] = xr;
|
|
move16();
|
|
a[k1 + 1] = xi;
|
|
move16();
|
|
j1 = add(j1,m2);
|
|
k1 = add(k1,m2);
|
|
xr = a[j1];
|
|
move16();
|
|
xi = a[j1 + 1];
|
|
move16();
|
|
yr = a[k1];
|
|
move16();
|
|
yi = a[k1 + 1];
|
|
move16();
|
|
a[j1] = yr;
|
|
move16();
|
|
a[j1 + 1] = yi;
|
|
move16();
|
|
a[k1] = xr;
|
|
move16();
|
|
a[k1 + 1] = xi;
|
|
move16();
|
|
}
|
|
}
|
|
}
|
|
|
|
return;
|
|
}
|
|
|
|
/*-----------------------------------------------------------------*
|
|
* cftfsub()
|
|
* Complex Discrete Fourier Transform
|
|
*-----------------------------------------------------------------*/
|
|
static void cftfsub_16fx(
|
|
Word16 n, /* i : data length of real and imag */
|
|
Word16 *a, /* i/o : input/output data Q(Qx+Q_edct)*/
|
|
const Word32 *w /* i : cos/sin table Q30*/
|
|
)
|
|
{
|
|
Word16 j, j1, j2, j3, l;
|
|
Word16 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
|
|
|
|
l = 2;
|
|
move16();
|
|
IF (sub(n,8) > 0)
|
|
{
|
|
cft1st_16fx(n, a, w);
|
|
l = 8;
|
|
move16();
|
|
WHILE (sub(shl(l, 2),n) < 0)
|
|
{
|
|
cftmdl_16fx(n, l, a, w);
|
|
l = shl(l,2);
|
|
}
|
|
}
|
|
|
|
IF (sub(shl(l,2),n) == 0)
|
|
{
|
|
FOR (j = 0; j < l; j += 2)
|
|
{
|
|
j1 = add(j,l);
|
|
j2 = add(j1,l);
|
|
j3 = add(j2,l);
|
|
x0r = add(a[j],a[j1]);
|
|
x0i = add(a[j + 1],a[j1 + 1]);
|
|
x1r = sub(a[j],a[j1]);
|
|
x1i = sub(a[j + 1],a[j1 + 1]);
|
|
x2r = add(a[j2],a[j3]);
|
|
x2i = add(a[j2 + 1],a[j3 + 1]);
|
|
x3r = sub(a[j2],a[j3]);
|
|
x3i = sub(a[j2 + 1],a[j3 + 1]);
|
|
a[j] = add(x0r,x2r);
|
|
move16();
|
|
a[j + 1] = add(x0i,x2i);
|
|
move16();
|
|
a[j2] = sub(x0r,x2r);
|
|
move16();
|
|
a[j2 + 1] = sub(x0i,x2i);
|
|
move16();
|
|
a[j1] = sub(x1r,x3i);
|
|
move16();
|
|
a[j1 + 1] = add(x1i,x3r);
|
|
move16();
|
|
a[j3] = add(x1r,x3i);
|
|
move16();
|
|
a[j3 + 1] = sub(x1i,x3r);
|
|
move16();
|
|
}
|
|
}
|
|
ELSE
|
|
{
|
|
FOR (j = 0; j < l; j += 2)
|
|
{
|
|
j1 = add(j,l);
|
|
x0r = sub(a[j],a[j1]);
|
|
x0i = sub(a[j + 1],a[j1 + 1]);
|
|
a[j] = add(a[j],a[j1]);
|
|
move16();
|
|
a[j + 1] = add(a[j + 1],a[j1 + 1]);
|
|
move16();
|
|
a[j1] = x0r;
|
|
move16();
|
|
a[j1 + 1] = x0i;
|
|
move16();
|
|
}
|
|
}
|
|
return;
|
|
}
|
|
|
|
/*-----------------------------------------------------------------*
|
|
* cft1st()
|
|
* Subfunction of Complex Discrete Fourier Transform
|
|
*-----------------------------------------------------------------*/
|
|
static void cft1st_16fx(
|
|
Word16 n, /* i : data length of real and imag */
|
|
Word16 *a, /* i/o : input/output data Q(Qx+Q_edct)*/
|
|
const Word32 *w /* i : cos/sin table Q30*/
|
|
)
|
|
{
|
|
Word16 j, k1, k2;
|
|
Word32 wk1r, wk1i, wk2r, wk2i, wk3r, wk3i;
|
|
Word16 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
|
|
Word16 tmp;
|
|
Word32 L_tmp;
|
|
|
|
x0r = add(a[0],a[2]);
|
|
x0i = add(a[1],a[3]);
|
|
x1r = sub(a[0],a[2]);
|
|
x1i = sub(a[1],a[3]);
|
|
x2r = add(a[4],a[6]);
|
|
x2i = add(a[5],a[7]);
|
|
x3r = sub(a[4],a[6]);
|
|
x3i = sub(a[5],a[7]);
|
|
a[0] = add(x0r,x2r);
|
|
move16();
|
|
a[1] = add(x0i,x2i);
|
|
move16();
|
|
a[4] = sub(x0r,x2r);
|
|
move16();
|
|
a[5] = sub(x0i,x2i);
|
|
move16();
|
|
a[2] = sub(x1r,x3i);
|
|
move16();
|
|
a[3] = add(x1i,x3r);
|
|
move16();
|
|
a[6] = add(x1r,x3i);
|
|
move16();
|
|
a[7] = sub(x1i,x3r);
|
|
move16();
|
|
|
|
wk1r = w[2];
|
|
move32();
|
|
|
|
x0r = add(a[8],a[10]);
|
|
x0i = add(a[9],a[11]);
|
|
x1r = sub(a[8],a[10]);
|
|
x1i = sub(a[9],a[11]);
|
|
x2r = add(a[12],a[14]);
|
|
x2i = add(a[13],a[15]);
|
|
x3r = sub(a[12],a[14]);
|
|
x3i = sub(a[13],a[15]);
|
|
a[8] = add(x0r,x2r);
|
|
move16();
|
|
a[9] = add(x0i,x2i);
|
|
move16();
|
|
a[12] = sub(x2i,x0i);
|
|
move16();
|
|
a[13] = sub(x0r,x2r);
|
|
move16();
|
|
|
|
x0r = sub(x1r,x3i);
|
|
x0i = add(x1i,x3r);
|
|
tmp = sub(x0r,x0i);
|
|
L_tmp = Mult_32_16(wk1r,tmp); /*Q(15+Qx+Q_edct) */
|
|
|
|
a[10] = round_fx(L_shl(L_tmp,1)); /*Q(Qx+Q_edct) */
|
|
|
|
tmp = add(x0r,x0i);
|
|
L_tmp = Mult_32_16(wk1r,tmp); /*Q(15+Qx+Q_edct) */
|
|
a[11] = round_fx(L_shl(L_tmp,1)); /* Q(Qx+Q_edct) */
|
|
|
|
x0r = add(x3i,x1r);
|
|
x0i = sub(x3r,x1i);
|
|
tmp = sub(x0i,x0r);
|
|
L_tmp = Mult_32_16(wk1r,tmp); /*Q(15+Qx+Q_edct) */
|
|
a[14] = round_fx(L_shl(L_tmp,1)); /*Q(Qx+Q_edct) */
|
|
|
|
tmp = add(x0i,x0r);
|
|
L_tmp = Mult_32_16(wk1r,tmp); /*Q(15+Qx+Q_edct) */
|
|
a[15] = round_fx(L_shl(L_tmp,1)); /*Q(Qx+Q_edct) */
|
|
|
|
k1 = 0;
|
|
move16();
|
|
|
|
FOR (j = 16; j < n; j += 16)
|
|
{
|
|
k1 = add(k1,2);
|
|
k2 = shl(k1,1);
|
|
|
|
wk2r = L_add(0,w[k1]);
|
|
wk2i = L_add(0,w[k1 + 1]);
|
|
wk1r = L_add(0,w[k2]);
|
|
wk1i = L_add(0,w[k2 + 1]);
|
|
|
|
L_tmp = L_shl(Mult_32_32(wk2i,wk1i),1);/*Q29 */
|
|
wk3r = L_sub(wk1r,L_shl(L_tmp,1));/*Q30 */
|
|
|
|
L_tmp = L_shl(Mult_32_32(wk2i,wk1r),1);/*Q29 */
|
|
wk3i = L_sub(L_shl(L_tmp,1),wk1i);/*Q30 */
|
|
|
|
x0r = add(a[j],a[j + 2]);
|
|
x0i = add(a[j + 1],a[j + 3]);
|
|
x1r = sub(a[j],a[j + 2]);
|
|
x1i = sub(a[j + 1],a[j + 3]);
|
|
x2r = add(a[j + 4],a[j + 6]);
|
|
x2i = add(a[j + 5],a[j + 7]);
|
|
x3r = sub(a[j + 4],a[j + 6]);
|
|
x3i = sub(a[j + 5],a[j + 7]);
|
|
a[j] = add(x0r,x2r);
|
|
move16();
|
|
a[j + 1] = add(x0i,x2i);
|
|
move16();
|
|
|
|
x0r = sub(x0r,x2r);
|
|
x0i = sub(x0i,x2i);
|
|
L_tmp = Mult_32_16(wk2r,x0r);/*Q(15+Qx+Q_edct) */
|
|
L_tmp = Msub_32_16(L_tmp,wk2i,x0i); /*Q(15+Qx+Q_edct) */
|
|
a[j + 4] = round_fx(L_shl(L_tmp,1)); /*Q(Qx+Q_edct) */
|
|
|
|
L_tmp = Mult_32_16(wk2r,x0i);/*Q(15+Qx+Q_edct) */
|
|
L_tmp = Madd_32_16(L_tmp,wk2i,x0r); /*Q(15+Qx+Q_edct) */
|
|
a[j + 5] = round_fx(L_shl(L_tmp,1)); /*Q(Qx+Q_edct) */
|
|
|
|
x0r = sub(x1r,x3i);
|
|
x0i = add(x1i,x3r);
|
|
L_tmp = Mult_32_16(wk1r,x0r);/*Q(15+Qx+Q_edct) */
|
|
L_tmp = Msub_32_16(L_tmp,wk1i,x0i); /*Q(15+Qx+Q_edct) */
|
|
a[j + 2] = round_fx(L_shl(L_tmp,1)); /*Q(Qx+Q_edct) */
|
|
|
|
L_tmp = Mult_32_16(wk1r,x0i);/*Q(15+Qx+Q_edct) */
|
|
L_tmp = Madd_32_16(L_tmp,wk1i,x0r); /*Q(15+Qx+Q_edct) */
|
|
a[j + 3] = round_fx(L_shl(L_tmp,1)); /*Q(Qx+Q_edct) */
|
|
|
|
x0r = add(x1r,x3i);
|
|
x0i = sub(x1i,x3r);
|
|
L_tmp = Mult_32_16(wk3r,x0r); /*Q(15+Qx+Q_edct) */
|
|
L_tmp = Msub_32_16(L_tmp,wk3i,x0i); /*Q(15+Qx+Q_edct) */
|
|
a[j + 6] = round_fx(L_shl(L_tmp,1)); /*Q(Qx+Q_edct) */
|
|
|
|
L_tmp = Mult_32_16(wk3r,x0i); /*Q(15+Qx+Q_edct) */
|
|
L_tmp = Madd_32_16(L_tmp,wk3i,x0r); /*Q(15+Qx+Q_edct) */
|
|
a[j + 7] = round_fx(L_shl(L_tmp,1)); /*Q(Qx+Q_edct) */
|
|
|
|
wk1r = L_add(0,w[k2 + 2]);
|
|
wk1i = L_add(0,w[k2 + 3]);
|
|
L_tmp = L_shl(Mult_32_32(wk2r,wk1i),1);/*Q29 */
|
|
wk3r = L_sub(wk1r,L_shl(L_tmp,1)); /*Q30 */
|
|
|
|
L_tmp = L_shl(Mult_32_32(wk2r,wk1r),1);/*Q29 */
|
|
wk3i = L_sub(L_shl(L_tmp,1),wk1i); /*Q30 */
|
|
|
|
x0r = add(a[j + 8],a[j + 10]);
|
|
x0i = add(a[j + 9],a[j + 11]);
|
|
x1r = sub(a[j + 8],a[j + 10]);
|
|
x1i = sub(a[j + 9],a[j + 11]);
|
|
x2r = add(a[j + 12],a[j + 14]);
|
|
x2i = add(a[j + 13],a[j + 15]);
|
|
x3r = sub(a[j + 12],a[j + 14]);
|
|
x3i = sub(a[j + 13],a[j + 15]);
|
|
a[j + 8] = add(x0r,x2r);
|
|
move16();
|
|
a[j + 9] = add(x0i,x2i);
|
|
move16();
|
|
|
|
x0r = sub(x0r,x2r);
|
|
x0i = sub(x0i,x2i);
|
|
tmp = negate(x0r);
|
|
L_tmp = Mult_32_16(wk2i,tmp);/*Q(15+Qx+Q_edct) */
|
|
L_tmp = Msub_32_16(L_tmp,wk2r,x0i); /*Q(15+Qx+Q_edct) */
|
|
a[j + 12] = round_fx(L_shl(L_tmp,1)); /*Q(Qx+Q_edct) */
|
|
|
|
tmp = negate(x0i);
|
|
L_tmp = Mult_32_16(wk2i,tmp);/*Q(15+Qx+Q_edct) */
|
|
L_tmp = Madd_32_16(L_tmp,wk2r,x0r); /*Q(15+Qx+Q_edct) */
|
|
a[j + 13] = round_fx(L_shl(L_tmp,1)); /*Q(Qx+Q_edct) */
|
|
|
|
x0r = sub(x1r,x3i);
|
|
x0i = add(x1i,x3r);
|
|
L_tmp = Mult_32_16(wk1r,x0r);/*Q(15+Qx+Q_edct) */
|
|
L_tmp = Msub_32_16(L_tmp,wk1i,x0i); /*Q(15+Qx+Q_edct) */
|
|
a[j + 10] = round_fx(L_shl(L_tmp,1)); /*Q(Qx+Q_edct) */
|
|
|
|
L_tmp = Mult_32_16(wk1r,x0i);/*Q(15+Qx+Q_edct) */
|
|
L_tmp = Madd_32_16(L_tmp,wk1i,x0r); /*Q(15+Qx+Q_edct) */
|
|
a[j + 11] = round_fx(L_shl(L_tmp,1)); /*Q(Qx+Q_edct) */
|
|
|
|
x0r = add(x1r,x3i);
|
|
x0i = sub(x1i,x3r);
|
|
|
|
L_tmp = Mult_32_16(wk3r,x0r); /*Q(15+Qx+Q_edct) */
|
|
L_tmp = Msub_32_16(L_tmp,wk3i,x0i); /*Q(15+Qx+Q_edct) */
|
|
a[j + 14] = round_fx(L_shl(L_tmp,1)); /*Q(Qx+Q_edct) */
|
|
|
|
L_tmp = Mult_32_16(wk3r,x0i); /*Q(15+Qx+Q_edct) */
|
|
L_tmp = Madd_32_16(L_tmp,wk3i,x0r); /*Q(15+Qx+Q_edct) */
|
|
a[j + 15] = round_fx(L_shl(L_tmp,1)); /*Q(Qx+Q_edct) */
|
|
}
|
|
|
|
return;
|
|
}
|
|
|
|
/*-----------------------------------------------------------------*
|
|
* cftmdl()
|
|
* Subfunction of Complex Discrete Fourier Transform
|
|
*-----------------------------------------------------------------*/
|
|
static void cftmdl_16fx(
|
|
Word16 n, /* i : data length of real and imag */
|
|
Word16 l, /* i : initial shift for processing */
|
|
Word16 *a, /* i/o : input/output data Q(Qx+Q_edct)*/
|
|
const Word32 *w /* i : cos/sin table Q30*/
|
|
)
|
|
{
|
|
Word16 j, j1, j2, j3, k, k1, k2, m, m2;
|
|
Word32 wk1r, wk1i, wk2r, wk2i, wk3r, wk3i;
|
|
Word16 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
|
|
Word16 tmp, tmp2;
|
|
Word32 L_tmp;
|
|
Word32 L_x0r, L_x0i;
|
|
|
|
m = shl(l, 2);
|
|
move16();
|
|
FOR (j = 0; j < l; j += 2)
|
|
{
|
|
j1 = add(j,l);
|
|
j2 = add(j1,l);
|
|
j3 = add(j2,l);
|
|
x0r = add(a[j],a[j1]);
|
|
x0i = add(a[j + 1],a[j1 + 1]);
|
|
x1r = sub(a[j],a[j1]);
|
|
x1i = sub(a[j + 1],a[j1 + 1]);
|
|
x2r = add(a[j2],a[j3]);
|
|
x2i = add(a[j2 + 1],a[j3 + 1]);
|
|
x3r = sub(a[j2],a[j3]);
|
|
x3i = sub(a[j2 + 1],a[j3 + 1]);
|
|
a[j] = add(x0r,x2r);
|
|
move16();
|
|
a[j + 1] = add(x0i,x2i);
|
|
move16();
|
|
a[j2] = sub(x0r,x2r);
|
|
move16();
|
|
a[j2 + 1] = sub(x0i,x2i);
|
|
move16();
|
|
a[j1] = sub(x1r,x3i);
|
|
move16();
|
|
a[j1 + 1] = add(x1i,x3r);
|
|
move16();
|
|
a[j3] = add(x1r,x3i);
|
|
move16();
|
|
a[j3 + 1] = sub(x1i,x3r);
|
|
move16();
|
|
}
|
|
|
|
wk1r = w[2];
|
|
move32();
|
|
tmp2 = add(l,m);
|
|
FOR (j = m; j < tmp2; j += 2)
|
|
{
|
|
j1 = add(j,l);
|
|
j2 = add(j1,l);
|
|
j3 = add(j2,l);
|
|
x0r = add(a[j],a[j1]);
|
|
x0i = add(a[j + 1],a[j1 + 1]);
|
|
x1r = sub(a[j],a[j1]);
|
|
x1i = sub(a[j + 1],a[j1 + 1]);
|
|
x2r = add(a[j2],a[j3]);
|
|
x2i = add(a[j2 + 1],a[j3 + 1]);
|
|
x3r = sub(a[j2],a[j3]);
|
|
x3i = sub(a[j2 + 1],a[j3 + 1]);
|
|
a[j] = add(x0r,x2r);
|
|
move16();
|
|
a[j + 1] = add(x0i,x2i);
|
|
move16();
|
|
a[j2] = sub(x2i,x0i);
|
|
move16();
|
|
a[j2 + 1] = sub(x0r,x2r);
|
|
move16();
|
|
|
|
x0r = sub(x1r,x3i);
|
|
x0i = add(x1i,x3r);
|
|
tmp = sub(x0r,x0i);
|
|
L_tmp = Mult_32_16(wk1r,tmp);/*Q(15+Qx+Q_edct) */
|
|
a[j1] = round_fx(L_shl(L_tmp,1)); /*Q(Qx+Q_edct) */
|
|
|
|
tmp = add(x0r,x0i);
|
|
L_tmp = Mult_32_16(wk1r,tmp); /*Q(15+Qx+Q_edct) */
|
|
a[j1 + 1] = round_fx(L_shl(L_tmp,1)); /*Q(Qx+Q_edct) */
|
|
|
|
x0r = add(x3i,x1r);
|
|
x0i = sub(x3r,x1i);
|
|
tmp = sub(x0i,x0r);
|
|
L_tmp = Mult_32_16(wk1r,tmp);/*Q(15+Qx+Q_edct) */
|
|
a[j3] = round_fx(L_shl(L_tmp,1)); /*Q(Qx+Q_edct) */
|
|
|
|
tmp = add(x0i,x0r);
|
|
L_tmp = Mult_32_16(wk1r,tmp); /*Q(15+Qx+Q_edct) */
|
|
a[j3 + 1] = round_fx(L_shl(L_tmp,1)); /*Q(Qx+Q_edct) */
|
|
}
|
|
|
|
k1 = 0;
|
|
move16();
|
|
m2 = shl(m,1);
|
|
FOR (k = m2; k < n; k += m2)
|
|
{
|
|
k1 = add(k1,2);
|
|
k2 = shl(k1,1);
|
|
wk2r = L_add(0,w[k1]);
|
|
wk2i = L_add(0,w[k1 + 1]);
|
|
wk1r = L_add(0,w[k2]);
|
|
wk1i = L_add(0,w[k2 + 1]);
|
|
L_tmp = L_shl(Mult_32_32(wk2i,wk1i),1);/*Q29 */
|
|
wk3r = L_sub(wk1r,L_shl(L_tmp,1));/*Q30 */
|
|
|
|
L_tmp = L_shl(Mult_32_32(wk2i,wk1r),1);/*Q29 */
|
|
wk3i = L_sub(L_shl(L_tmp,1),wk1i);/*Q30 */
|
|
|
|
tmp2 = add(l,k);
|
|
FOR (j = k; j < tmp2; j += 2)
|
|
{
|
|
j1 = add(j,l);
|
|
j2 = add(j1,l);
|
|
j3 = add(j2,l);
|
|
x0r = add(a[j],a[j1]);
|
|
x0i = add(a[j + 1],a[j1 + 1]);
|
|
x1r = sub(a[j],a[j1]);
|
|
x1i = sub(a[j + 1],a[j1 + 1]);
|
|
x2r = add(a[j2],a[j3]);
|
|
x2i = add(a[j2 + 1],a[j3 + 1]);
|
|
x3r = sub(a[j2],a[j3]);
|
|
x3i = sub(a[j2 + 1],a[j3 + 1]);
|
|
a[j] = add(x0r,x2r);
|
|
move16();
|
|
a[j + 1] = add(x0i,x2i);
|
|
move16();
|
|
|
|
x0r = sub(x0r,x2r);
|
|
x0i = sub(x0i,x2i);
|
|
|
|
L_tmp = Mult_32_16(wk2r,x0r); /*Q(15+Qx+Q_edct) */
|
|
L_tmp = Msub_32_16(L_tmp,wk2i,x0i); /*Q(15+Qx+Q_edct) */
|
|
a[j2] = round_fx(L_shl(L_tmp,1)); /*Q(Qx+Q_edct) */
|
|
|
|
L_tmp = Mult_32_16(wk2r,x0i); /*Q(15+Qx+Q_edct) */
|
|
L_tmp = Madd_32_16(L_tmp,wk2i,x0r); /*Q(15+Qx+Q_edct) */
|
|
a[j2 + 1] = round_fx(L_shl(L_tmp,1)); /*Q(Qx+Q_edct) */
|
|
|
|
x0r = sub(x1r,x3i);
|
|
x0i = add(x1i,x3r);
|
|
|
|
L_tmp = Mult_32_16(wk1r,x0r); /*Q(15+Qx+Q_edct) */
|
|
L_tmp = Msub_32_16(L_tmp,wk1i,x0i); /*Q(15+Qx+Q_edct) */
|
|
a[j1] = round_fx(L_shl(L_tmp,1)); /*Q(Qx+Q_edct) */
|
|
|
|
L_tmp = Mult_32_16(wk1r,x0i); /*Q(15+Qx+Q_edct) */
|
|
L_tmp = Madd_32_16(L_tmp,wk1i,x0r); /*Q(15+Qx+Q_edct) */
|
|
a[j1 + 1] = round_fx(L_shl(L_tmp,1)); /*Q(Qx+Q_edct) */
|
|
|
|
L_x0r = L_add((Word32) x1r, (Word32) x3i);
|
|
L_x0i = L_sub((Word32) x1i, (Word32) x3r);
|
|
x0r = extract_l(L_x0r);
|
|
x0i = extract_l(L_x0i);
|
|
L_tmp = Mult_32_16(wk3r,x0r); /*Q(15+Qx+Q_edct) */
|
|
L_tmp = Msub_32_16(L_tmp,wk3i,x0i); /*Q(15+Qx+Q_edct) */
|
|
a[j3] = round_fx(L_shl(L_tmp,1)); /*Q(Qx+Q_edct) */
|
|
|
|
L_tmp = Mult_32_16(wk3r,x0i); /*Q(15+Qx+Q_edct) */
|
|
L_tmp = Madd_32_16(L_tmp,wk3i,x0r); /*Q(15+Qx+Q_edct) */
|
|
a[j3 + 1] = round_fx(L_shl(L_tmp,1)); /*Q(Qx+Q_edct) */
|
|
}
|
|
|
|
wk1r = w[k2 + 2];
|
|
move32();
|
|
wk1i = w[k2 + 3];
|
|
move32();
|
|
L_tmp = L_shl(Mult_32_32(wk2r,wk1i),1);/*Q29 */
|
|
wk3r = L_sub(wk1r,L_shl(L_tmp,1)); /*Q30 */
|
|
|
|
L_tmp = L_shl(Mult_32_32(wk2r,wk1r),1);/*Q29 */
|
|
wk3i = L_sub(L_shl(L_tmp,1),wk1i); /*Q30 */
|
|
|
|
tmp2 = add(l,add(k,m));
|
|
FOR (j = add(k,m); j < tmp2; j += 2)
|
|
{
|
|
j1 = add(j,l);
|
|
j2 = add(j1,l);
|
|
j3 = add(j2,l);
|
|
x0r = add(a[j],a[j1]);
|
|
x0i = add(a[j + 1],a[j1 + 1]);
|
|
x1r = sub(a[j],a[j1]);
|
|
x1i = sub(a[j + 1],a[j1 + 1]);
|
|
x2r = add(a[j2],a[j3]);
|
|
x2i = add(a[j2 + 1],a[j3 + 1]);
|
|
x3r = sub(a[j2],a[j3]);
|
|
x3i = sub(a[j2 + 1],a[j3 + 1]);
|
|
a[j] = add(x0r,x2r);
|
|
move16();
|
|
a[j + 1] = add(x0i,x2i);
|
|
move16();
|
|
|
|
x0r = sub(x0r,x2r);
|
|
x0i = sub(x0i,x2i);
|
|
|
|
tmp = negate(x0r);
|
|
L_tmp = Mult_32_16(wk2i,tmp);/*Q(15+Qx+Q_edct) */
|
|
L_tmp = Msub_32_16(L_tmp,wk2r,x0i); /*Q(15+Qx+Q_edct) */
|
|
a[j2] = round_fx(L_shl(L_tmp,1)); /*Q(Qx+Q_edct) */
|
|
|
|
tmp = negate(x0i);
|
|
L_tmp = Mult_32_16(wk2i,tmp);/*Q(15+Qx+Q_edct) */
|
|
L_tmp = Madd_32_16(L_tmp,wk2r,x0r); /*Q(15+Qx+Q_edct) */
|
|
a[j2 + 1] = round_fx(L_shl(L_tmp,1)); /*Q(Qx+Q_edct) */
|
|
|
|
x0r = sub(x1r,x3i);
|
|
x0i = add(x1i,x3r);
|
|
|
|
L_tmp = Mult_32_16(wk1r,x0r);/*Q(15+Qx+Q_edct) */
|
|
L_tmp = Msub_32_16(L_tmp,wk1i,x0i); /*Q(15+Qx+Q_edct) */
|
|
a[j1] = round_fx(L_shl(L_tmp,1)); /*Q(Qx+Q_edct) */
|
|
|
|
L_tmp = Mult_32_16(wk1r,x0i);/*Q(15+Qx+Q_edct) */
|
|
L_tmp = Madd_32_16(L_tmp,wk1i,x0r); /*Q(15+Qx+Q_edct) */
|
|
a[j1 + 1] = round_fx(L_shl(L_tmp,1)); /*Q(Qx+Q_edct) */
|
|
|
|
x0r = add(x1r,x3i);
|
|
x0i = sub(x1i,x3r);
|
|
|
|
L_tmp = Mult_32_16(wk3r,x0r); /*Q(15+Qx+Q_edct) */
|
|
L_tmp = Msub_32_16(L_tmp,wk3i,x0i); /*Q(15+Qx+Q_edct) */
|
|
a[j3] = round_fx(L_shl(L_tmp,1)); /*Q(Qx+Q_edct) */
|
|
|
|
L_tmp = Mult_32_16(wk3r,x0i); /*Q(15+Qx+Q_edct) */
|
|
L_tmp = Madd_32_16(L_tmp,wk3i,x0r); /*Q(15+Qx+Q_edct) */
|
|
a[j3 + 1] = round_fx(L_shl(L_tmp,1)); /*Q(Qx+Q_edct) */
|
|
}
|
|
}
|
|
|
|
return;
|
|
}
|
|
|
|
void fft3_fx(const Word16 X[], Word16 Y[], const Word16 n)
|
|
{
|
|
Word16 Z[PH_ECU_SPEC_SIZE];
|
|
Word16 *Z0, *Z1, *Z2;
|
|
Word16 *z0, *z1, *z2;
|
|
const Word16 *x;
|
|
const Word16 *t_sin = sincos_t_rad3_fx;
|
|
Word16 m, mMinus1, step;
|
|
Word16 i, l;
|
|
Word16 c1_ind, s1_ind, c2_ind, s2_ind;
|
|
Word16 c1_step, s1_step, c2_step, s2_step;
|
|
Word16 *RY, *IY, *RZ0, *IZ0, *RZ1, *IZ1, *RZ2, *IZ2;
|
|
Word32 acc;
|
|
Word16 mBy2, orderMinus1;
|
|
const Word16 *pPhaseTbl;
|
|
|
|
/* Determine the order of the transform, the length of decimated */
|
|
/* transforms m, and the step for the sine and cosine tables. */
|
|
SWITCH(n)
|
|
{
|
|
case 1536:
|
|
orderMinus1 = 9-1;
|
|
move16();
|
|
m = 512;
|
|
move16();
|
|
step = 1;
|
|
move16();
|
|
pPhaseTbl = FFT_W256;
|
|
BREAK;
|
|
case 384:
|
|
orderMinus1 = 7-1;
|
|
move16();
|
|
m = 128;
|
|
move16();
|
|
step = 4;
|
|
move16();
|
|
pPhaseTbl = FFT_W64;
|
|
BREAK;
|
|
default:
|
|
orderMinus1 = 7-1;
|
|
move16();
|
|
m = 128;
|
|
move16();
|
|
step = 4;
|
|
move16();
|
|
pPhaseTbl = FFT_W64;
|
|
BREAK;
|
|
}
|
|
|
|
/* Compose decimated sequences X[3i], X[3i+1],X[3i+2] */
|
|
/* compute their FFT of length m. */
|
|
Z0 = &Z[0];
|
|
z0 = &Z0[0];
|
|
Z1 = &Z0[m];
|
|
z1 = &Z1[0]; /* Z1 = &Z[ m]; */
|
|
Z2 = &Z1[m];
|
|
z2 = &Z2[0]; /* Z2 = &Z[2m]; */
|
|
x = &X[0];
|
|
FOR (i = 0; i < m; i++)
|
|
{
|
|
*z0++ = *x++; /* Z0[i] = X[3i]; */ move16();
|
|
*z1++ = *x++; /* Z1[i] = X[3i+1]; */ move16();
|
|
*z2++ = *x++; /* Z2[i] = X[3i+2]; */ move16();
|
|
}
|
|
mBy2 = shr(m,1);
|
|
r_fft_fx_lc(pPhaseTbl, m, mBy2, orderMinus1, Z0, Z0, 1);
|
|
r_fft_fx_lc(pPhaseTbl, m, mBy2, orderMinus1, Z1, Z1, 1);
|
|
r_fft_fx_lc(pPhaseTbl, m, mBy2, orderMinus1, Z2, Z2, 1);
|
|
|
|
/* Butterflies of order 3. */
|
|
/* pointer initialization */
|
|
mMinus1 = sub(m,1);
|
|
RY = &Y[0];
|
|
IY = &Y[n];
|
|
IY--; /* Decrement the address counter.*/
|
|
RZ0 = &Z0[0];
|
|
IZ0 = &Z0[mMinus1];
|
|
RZ1 = &Z1[0];
|
|
IZ1 = &Z1[mMinus1];
|
|
RZ2 = &Z2[0];
|
|
IZ2 = &Z2[mMinus1];
|
|
|
|
c1_step = negate(step);
|
|
s1_step = step;
|
|
move16();
|
|
c2_step = shl(c1_step,1);
|
|
s2_step = shl(s1_step,1);
|
|
c1_ind = add(T_SIN_PI_2, c1_step);
|
|
s1_ind = s1_step;
|
|
move16();
|
|
c2_ind = add(T_SIN_PI_2, c2_step);
|
|
s2_ind = s2_step;
|
|
move16();
|
|
|
|
/* special case: i = 0 */
|
|
acc = L_mult(*RZ0++, 0x4000);
|
|
acc = L_mac(acc, *RZ1++, 0x4000);
|
|
*RY++ = mac_r(acc, *RZ2++, 0x4000);
|
|
move16();
|
|
|
|
/* first 3/12-- from 1 to (3*m/8)-1 */
|
|
l = sub(shr(n, 3),1); /* (3*m/8) - 1 = (n/8) - 1 */
|
|
FOR (i = 0; i < l; i++)
|
|
{
|
|
acc = L_shl(*RZ0++, 15); /* Align with the following non-fractional mode so as to gain 1 more bit headroom. */
|
|
acc = L_mac0(acc, *RZ1, t_sin[c1_ind]); /* Non-fractional mode gains 1 more bit headroom. */
|
|
acc = L_mac0(acc, *IZ1, t_sin[s1_ind]);
|
|
acc = L_mac0(acc, *RZ2, t_sin[c2_ind]);
|
|
acc = L_mac0(acc, *IZ2, t_sin[s2_ind]);
|
|
*RY++ = round_fx(acc); /* bit growth = 1 (compensated by non-fractional mode MAC). */
|
|
|
|
acc = L_shl(*IZ0--, 15);
|
|
acc = L_msu0(acc, *RZ1++, t_sin[s1_ind]);
|
|
acc = L_mac0(acc, *IZ1--, t_sin[c1_ind]);
|
|
acc = L_msu0(acc, *RZ2++, t_sin[s2_ind]);
|
|
acc = L_mac0(acc, *IZ2--, t_sin[c2_ind]);
|
|
*IY-- = round_fx(acc);
|
|
|
|
c1_ind = add(c1_ind, c1_step);
|
|
s1_ind = add(s1_ind, s1_step);
|
|
c2_ind = add(c2_ind, c2_step);
|
|
s2_ind = add(s2_ind, s2_step);
|
|
}
|
|
|
|
/* next 1/12-- from (3*m/8) to (4*m/8)-1 */
|
|
l = shr(m,3); /* (4*m/8) - (3*m/8) = m/8 */
|
|
FOR (i = 0; i < l; i++)
|
|
{
|
|
acc = L_shl(*RZ0++, 15);
|
|
acc = L_mac0(acc, *RZ1, t_sin[c1_ind]); /* Non-fractional mode gains 1 more bit headroom. */
|
|
acc = L_mac0(acc, *IZ1, t_sin[s1_ind]);
|
|
acc = L_msu0(acc, *RZ2, t_sin[c2_ind]);
|
|
acc = L_mac0(acc, *IZ2, t_sin[s2_ind]);
|
|
*RY++ = round_fx(acc);
|
|
|
|
acc = L_shl(*IZ0--, 15);
|
|
acc = L_msu0(acc, *RZ1++, t_sin[s1_ind]);
|
|
acc = L_mac0(acc, *IZ1--, t_sin[c1_ind]);
|
|
acc = L_msu0(acc, *RZ2++, t_sin[s2_ind]);
|
|
acc = L_msu0(acc, *IZ2--, t_sin[c2_ind]);
|
|
*IY-- = round_fx(acc);
|
|
|
|
c1_ind = add(c1_ind, c1_step);
|
|
s1_ind = add(s1_ind, s1_step);
|
|
c2_ind = sub(c2_ind, c2_step);
|
|
s2_ind = sub(s2_ind, s2_step);
|
|
}
|
|
|
|
/* special case: i = m/2 i.e. 1/3 */
|
|
acc = L_shl(*RZ0--, 15);
|
|
acc = L_mac0(acc, *RZ1, t_sin[c1_ind]);
|
|
acc = L_msu0(acc, *RZ2, t_sin[c2_ind]);
|
|
*RY++ = round_fx(acc);
|
|
|
|
acc = 0;
|
|
acc = L_msu0(acc, *RZ1--, t_sin[s1_ind]);
|
|
acc = L_msu0(acc, *RZ2--, t_sin[s2_ind]);
|
|
*IY-- = round_fx(acc);
|
|
IZ0++;
|
|
IZ1++;
|
|
IZ2++;
|
|
|
|
c1_ind = add(c1_ind, c1_step);
|
|
s1_ind = add(s1_ind, s1_step);
|
|
c2_ind = sub(c2_ind, c2_step);
|
|
s2_ind = sub(s2_ind, s2_step);
|
|
|
|
/* next 2/12-- from ((m/2)+1) to (6*m/8)-1 */
|
|
l = sub(shr(m,2), 1); /* (6*m/8) - ((m/2)+1) = m/4 - 1 */
|
|
FOR (i = 0; i < l; i++)
|
|
{
|
|
acc = L_shl(*RZ0--, 15);
|
|
acc = L_mac0(acc, *RZ1, t_sin[c1_ind]); /* Non-fractional mode gains 1 more bit headroom. */
|
|
acc = L_msu0(acc, *IZ1, t_sin[s1_ind]);
|
|
acc = L_msu0(acc, *RZ2, t_sin[c2_ind]);
|
|
acc = L_msu0(acc, *IZ2, t_sin[s2_ind]);
|
|
*RY++ = round_fx(acc);
|
|
|
|
acc = L_mult0(*IZ0++, -32768);
|
|
acc = L_msu0(acc, *RZ1--, t_sin[s1_ind]);
|
|
acc = L_msu0(acc, *IZ1++, t_sin[c1_ind]);
|
|
acc = L_msu0(acc, *RZ2--, t_sin[s2_ind]);
|
|
acc = L_mac0(acc, *IZ2++, t_sin[c2_ind]);
|
|
*IY-- = round_fx(acc);
|
|
|
|
c1_ind = add(c1_ind, c1_step);
|
|
s1_ind = add(s1_ind, s1_step);
|
|
c2_ind = sub(c2_ind, c2_step);
|
|
s2_ind = sub(s2_ind, s2_step);
|
|
}
|
|
|
|
/*--------------------------half--------------------------// */
|
|
/* next 2/12-- from (6*m/8) to (8*m/8) - 1 */
|
|
l = shr(m,2);
|
|
FOR (i = 0; i < l; i++)
|
|
{
|
|
acc = L_shl(*RZ0--, 15);
|
|
acc = L_msu0(acc, *RZ1, t_sin[c1_ind]); /* Non-fractional mode gains 1 more bit headroom. */
|
|
acc = L_msu0(acc, *IZ1, t_sin[s1_ind]);
|
|
acc = L_msu0(acc, *RZ2, t_sin[c2_ind]);
|
|
acc = L_mac0(acc, *IZ2, t_sin[s2_ind]);
|
|
*RY++ = round_fx(acc);
|
|
|
|
acc = L_mult0(*IZ0++, -32768);
|
|
acc = L_msu0(acc, *RZ1--, t_sin[s1_ind]);
|
|
acc = L_mac0(acc, *IZ1++, t_sin[c1_ind]);
|
|
acc = L_mac0(acc, *RZ2--, t_sin[s2_ind]);
|
|
acc = L_mac0(acc, *IZ2++, t_sin[c2_ind]);
|
|
*IY-- = round_fx(acc);
|
|
|
|
c1_ind = sub(c1_ind, c1_step);
|
|
s1_ind = sub(s1_ind, s1_step);
|
|
c2_ind = add(c2_ind, c2_step);
|
|
s2_ind = add(s2_ind, s2_step);
|
|
}
|
|
|
|
/* special case: i = m, i.e 2/3 */
|
|
acc = L_shl(*RZ0++, 15);
|
|
acc = L_msu0(acc, *RZ1, t_sin[c1_ind]);
|
|
acc = L_msu0(acc, *RZ2, t_sin[c2_ind]);
|
|
*RY++ = round_fx(acc);
|
|
|
|
acc = L_deposit_l(0);
|
|
acc = L_msu0(acc, *RZ1++, t_sin[s1_ind]);
|
|
acc = L_mac0(acc, *RZ2++, t_sin[s2_ind]);
|
|
*IY-- = round_fx(acc);
|
|
IZ0--; /* Just decrement the address counter */
|
|
IZ1--;
|
|
IZ2--;
|
|
|
|
c1_ind = sub(c1_ind, c1_step);
|
|
s1_ind = sub(s1_ind, s1_step);
|
|
c2_ind = add(c2_ind, c2_step);
|
|
s2_ind = add(s2_ind, s2_step);
|
|
|
|
/* next 1/12-- from (m + 1) to (9*m/8) - 1 */
|
|
l = sub(shr(m, 3), 1); /* (9*m/8) - (m +1) = m/8 - 1 */
|
|
FOR (i = 0; i < l; i++)
|
|
{
|
|
acc = L_shl(*RZ0++, 15);
|
|
acc = L_msu0(acc, *RZ1, t_sin[c1_ind]); /* Non-fractional mode gains 1 more bit headroom. */
|
|
acc = L_mac0(acc, *IZ1, t_sin[s1_ind]);
|
|
acc = L_msu0(acc, *RZ2, t_sin[c2_ind]);
|
|
acc = L_msu0(acc, *IZ2, t_sin[s2_ind]);
|
|
*RY++ = round_fx(acc);
|
|
|
|
acc = L_shl(*IZ0--, 15);
|
|
acc = L_msu0(acc, *RZ1++, t_sin[s1_ind]);
|
|
acc = L_msu0(acc, *IZ1--, t_sin[c1_ind]);
|
|
acc = L_mac0(acc, *RZ2++, t_sin[s2_ind]);
|
|
acc = L_msu0(acc, *IZ2--, t_sin[c2_ind]);
|
|
*IY-- = round_fx(acc);
|
|
|
|
c1_ind = sub(c1_ind, c1_step);
|
|
s1_ind = sub(s1_ind, s1_step);
|
|
c2_ind = add(c2_ind, c2_step);
|
|
s2_ind = add(s2_ind, s2_step);
|
|
}
|
|
|
|
/* last 3/12-- from (9*m/8) to (12*m/8) - 1 */
|
|
l = shr(n,3); /* (12*m/8) - (9*m/8) = 3*m/8 = n/8 */
|
|
FOR (i = 0; i < l; i++)
|
|
{
|
|
acc = L_shl(*RZ0++, 15);
|
|
acc = L_msu0(acc, *RZ1, t_sin[c1_ind]); /* Non-fractional mode gains 1 more bit headroom. */
|
|
acc = L_mac0(acc, *IZ1, t_sin[s1_ind]);
|
|
acc = L_mac0(acc, *RZ2, t_sin[c2_ind]);
|
|
acc = L_msu0(acc, *IZ2, t_sin[s2_ind]);
|
|
*RY++ = round_fx(acc);
|
|
|
|
acc = L_shl(*IZ0--, 15);
|
|
acc = L_msu0(acc, *RZ1++, t_sin[s1_ind]);
|
|
acc = L_msu0(acc, *IZ1--, t_sin[c1_ind]);
|
|
acc = L_mac0(acc, *RZ2++, t_sin[s2_ind]);
|
|
acc = L_mac0(acc, *IZ2--, t_sin[c2_ind]);
|
|
*IY-- = round_fx(acc);
|
|
|
|
c1_ind = sub(c1_ind, c1_step);
|
|
s1_ind = sub(s1_ind, s1_step);
|
|
c2_ind = sub(c2_ind, c2_step);
|
|
s2_ind = sub(s2_ind, s2_step);
|
|
}
|
|
|
|
/* special case: i = 3*m/2 */
|
|
acc = L_shl(*RZ0, 15);
|
|
acc = L_msu0(acc, *RZ1, t_sin[c1_ind]);
|
|
acc = L_mac0(acc, *RZ2, t_sin[c2_ind]);
|
|
*RY = round_fx(acc);
|
|
|
|
return;
|
|
}
|
|
|
|
|
|
void ifft3_fx(const Word16 Z[], Word16 X[], const Word16 n)
|
|
{
|
|
Word16 Y[PH_ECU_SPEC_SIZE];
|
|
const Word16 *t_sin = sincos_t_rad3_fx;
|
|
Word16 m, mMinus1, step, step2;
|
|
Word16 i, l;
|
|
Word16 c0_ind, s0_ind, c1_ind, s1_ind, c2_ind, s2_ind;
|
|
const Word16 *RZ0, *IZ0, *RZ1, *IZ1, *RZ2, *IZ2;
|
|
const Word16 *RZ00, *IZ00, *RZ10, *IZ10, *RZ20, *IZ20;
|
|
Word16 *RY0, *IY0, *RY1, *IY1, *RY2, *IY2, *y0, *y1, *y2, *pX;
|
|
Word32 acc;
|
|
Word16 mBy2, orderMinus1, nMinusMBy2;
|
|
const Word16 *pPhaseTbl;
|
|
|
|
/* Determine the order of the transform, the length of decimated */
|
|
/* transforms m, and the step for the sine and cosine tables. */
|
|
SWITCH(n)
|
|
{
|
|
case 1536:
|
|
orderMinus1 = 9-1;
|
|
move16();
|
|
m = 512;
|
|
move16();
|
|
step = 1;
|
|
move16();
|
|
pPhaseTbl = FFT_W256;
|
|
BREAK;
|
|
case 384:
|
|
orderMinus1 = 7-1;
|
|
move16();
|
|
m = 128;
|
|
move16();
|
|
step = 4;
|
|
move16();
|
|
pPhaseTbl = FFT_W64;
|
|
BREAK;
|
|
default:
|
|
orderMinus1 = 7-1;
|
|
move16();
|
|
m = 128;
|
|
move16();
|
|
step = 4;
|
|
move16();
|
|
pPhaseTbl = FFT_W64;
|
|
BREAK;
|
|
}
|
|
|
|
nMinusMBy2 = shr(sub(n, m),1);
|
|
mMinus1 = sub(m,1);
|
|
/* pointer initialization */
|
|
RY0 = &Y[0];
|
|
IY0 = &Y[m];
|
|
RY1 = &RY0[m];
|
|
IY1 = &RY1[mMinus1];
|
|
RY2 = &RY1[m];
|
|
IY2 = &RY2[mMinus1];
|
|
|
|
RZ00 = &Z[0]; /* The zero positions of the pointers */
|
|
RZ10 = &RZ00[m];
|
|
RZ20 = &RZ00[nMinusMBy2];
|
|
IZ00 = &Z[n];
|
|
IZ10 = &IZ00[-m];
|
|
IZ20 = &IZ00[-nMinusMBy2];
|
|
|
|
RZ0 = RZ00; /* Reset the pointers to zero positions. */
|
|
RZ1 = RZ10;
|
|
RZ2 = RZ20;
|
|
IZ0 = IZ00;
|
|
IZ1 = IZ10;
|
|
IZ2 = IZ20;
|
|
|
|
/* Inverse butterflies of order 3. */
|
|
|
|
/* Construction of Y0 */
|
|
acc = L_mult(*RZ0++, 0x4000);
|
|
acc = L_mac(acc, *RZ1++, 0x4000);
|
|
*RY0++ = mac_r(acc, *RZ2--, 0x4000);
|
|
move16();
|
|
IZ0--;
|
|
IZ1--;
|
|
IZ2++;
|
|
IY0--;
|
|
|
|
l = sub(shr(m, 1), 1);
|
|
FOR (i = 0; i < l; i++)
|
|
{
|
|
acc = L_mult(*RZ0++, 0x4000);
|
|
acc = L_mac(acc, *RZ1++, 0x4000);
|
|
*RY0++ = mac_r(acc, *RZ2--, 0x4000);
|
|
move16();
|
|
|
|
acc = L_mult(*IZ0--, 0x4000);
|
|
acc = L_mac(acc, *IZ1--, 0x4000);
|
|
*IY0-- = msu_r(acc, *IZ2++, 0x4000);
|
|
move16();
|
|
}
|
|
|
|
/* m/2 */
|
|
acc = L_mult(*RZ0, 0x4000);
|
|
acc = L_mac(acc, *RZ1, 0x4000);
|
|
*RY0++ = mac_r(acc, *RZ2, 0x4000);
|
|
move16();
|
|
|
|
|
|
/* Construction of Y1 */
|
|
c0_ind=T_SIN_PI_2;
|
|
s0_ind=0;
|
|
c1_ind=T_SIN_PI_2*1/3;
|
|
s1_ind=T_SIN_PI_2*2/3;
|
|
c2_ind=T_SIN_PI_2*1/3;
|
|
s2_ind=T_SIN_PI_2*2/3;
|
|
|
|
RZ0 = RZ00; /* Reset pointers to zero positions. */
|
|
RZ1 = RZ10;
|
|
RZ2 = RZ20;
|
|
IZ0 = IZ00;
|
|
IZ1 = IZ10;
|
|
IZ2 = IZ20;
|
|
acc = L_mult0(*RZ0++, t_sin[c0_ind]);
|
|
acc = L_msu0(acc, *RZ1++, t_sin[c1_ind]);
|
|
acc = L_msu0(acc, *RZ2--, t_sin[c2_ind]);
|
|
IZ0--;
|
|
acc = L_msu0(acc, *IZ1--, t_sin[s1_ind]);
|
|
acc = L_msu0(acc, *IZ2++, t_sin[s2_ind]);
|
|
*RY1++ = round_fx(acc);
|
|
|
|
c0_ind=sub(c0_ind,step);
|
|
s0_ind=add(s0_ind,step);
|
|
c1_ind=add(c1_ind,step);
|
|
s1_ind=sub(s1_ind,step);
|
|
c2_ind=sub(c2_ind,step);
|
|
s2_ind=add(s2_ind,step);
|
|
|
|
/* From 1 to (m/4) - 1. */
|
|
l = sub(shr(m,2),1);
|
|
FOR (i = 0; i < l; i++)
|
|
{
|
|
acc = L_mult0(*RZ0, t_sin[c0_ind]);
|
|
acc = L_msu0(acc, *RZ1, t_sin[c1_ind]);
|
|
acc = L_msu0(acc, *RZ2, t_sin[c2_ind]);
|
|
acc = L_msu0(acc, *IZ0, t_sin[s0_ind]);
|
|
acc = L_msu0(acc, *IZ1, t_sin[s1_ind]);
|
|
acc = L_msu0(acc, *IZ2, t_sin[s2_ind]);
|
|
*RY1++ = round_fx(acc);
|
|
|
|
acc = L_mult0(*IZ0--, t_sin[c0_ind]);
|
|
acc = L_msu0(acc, *IZ1--, t_sin[c1_ind]);
|
|
acc = L_mac0(acc, *IZ2++, t_sin[c2_ind]);
|
|
acc = L_mac0(acc, *RZ0++, t_sin[s0_ind]);
|
|
acc = L_mac0(acc, *RZ1++, t_sin[s1_ind]);
|
|
acc = L_msu0(acc, *RZ2--, t_sin[s2_ind]);
|
|
*IY1-- = round_fx(acc);
|
|
|
|
c0_ind=sub(c0_ind,step);
|
|
s0_ind=add(s0_ind,step);
|
|
c1_ind=add(c1_ind,step);
|
|
s1_ind=sub(s1_ind,step);
|
|
c2_ind=sub(c2_ind,step);
|
|
s2_ind=add(s2_ind,step);
|
|
}
|
|
|
|
/* From m/4 to m/2 -1. */
|
|
l = shr(m, 2); /* m/2 - m/4 = m/4 */
|
|
FOR (i = 0; i < l; i++)
|
|
{
|
|
acc = L_mult0(*RZ0, t_sin[c0_ind]);
|
|
acc = L_msu0(acc, *RZ1, t_sin[c1_ind]);
|
|
acc = L_mac0(acc, *RZ2, t_sin[c2_ind]);
|
|
acc = L_msu0(acc, *IZ0, t_sin[s0_ind]);
|
|
acc = L_msu0(acc, *IZ1, t_sin[s1_ind]);
|
|
acc = L_msu0(acc, *IZ2, t_sin[s2_ind]);
|
|
*RY1++ = round_fx(acc);
|
|
|
|
acc = L_mult0(*IZ0--, t_sin[c0_ind]);
|
|
acc = L_msu0(acc, *IZ1--, t_sin[c1_ind]);
|
|
acc = L_msu0(acc, *IZ2++, t_sin[c2_ind]);
|
|
acc = L_mac0(acc, *RZ0++, t_sin[s0_ind]);
|
|
acc = L_mac0(acc, *RZ1++, t_sin[s1_ind]);
|
|
acc = L_msu0(acc, *RZ2--, t_sin[s2_ind]);
|
|
*IY1-- = round_fx(acc);
|
|
|
|
c0_ind=sub(c0_ind,step);
|
|
s0_ind=add(s0_ind,step);
|
|
c1_ind=add(c1_ind,step);
|
|
s1_ind=sub(s1_ind,step);
|
|
c2_ind=add(c2_ind,step);
|
|
s2_ind=sub(s2_ind,step);
|
|
}
|
|
|
|
/* m/2 */
|
|
acc = L_mult0(*RZ0, t_sin[c0_ind]);
|
|
acc = L_msu0(acc, *RZ1, t_sin[c1_ind]);
|
|
acc = L_mac0(acc, *RZ2, t_sin[c2_ind]);
|
|
acc = L_msu0(acc, *IZ0, t_sin[s0_ind]);
|
|
acc = L_msu0(acc, *IZ1, t_sin[s1_ind]);
|
|
acc = L_msu0(acc, *IZ2, t_sin[s2_ind]);
|
|
*RY1++ = round_fx(acc);
|
|
|
|
/* Construction of Y2 */
|
|
c0_ind=T_SIN_PI_2;
|
|
s0_ind=0;
|
|
c1_ind=T_SIN_PI_2*1/3;
|
|
s1_ind=T_SIN_PI_2*2/3;
|
|
c2_ind=T_SIN_PI_2*1/3;
|
|
s2_ind=T_SIN_PI_2*2/3;
|
|
step2 = shl(step,1);
|
|
|
|
RZ0 = RZ00; /* Reset pointers to zero positions. */
|
|
RZ1 = RZ10;
|
|
RZ2 = RZ20;
|
|
IZ0 = IZ00;
|
|
IZ1 = IZ10;
|
|
IZ2 = IZ20;
|
|
acc = L_mult0(*RZ0++, t_sin[c0_ind]);
|
|
acc = L_msu0(acc, *RZ1++, t_sin[c1_ind]);
|
|
acc = L_msu0(acc, *RZ2--, t_sin[c2_ind]);
|
|
IZ0--;
|
|
acc = L_mac0(acc, *IZ1--, t_sin[s1_ind]);
|
|
acc = L_mac0(acc, *IZ2++, t_sin[s2_ind]);
|
|
*RY2++ = round_fx(acc);
|
|
|
|
c0_ind=sub(c0_ind,step2);
|
|
s0_ind=add(s0_ind,step2);
|
|
c1_ind=sub(c1_ind,step2);
|
|
s1_ind=add(s1_ind,step2);
|
|
c2_ind=add(c2_ind,step2);
|
|
s2_ind=sub(s2_ind,step2);
|
|
|
|
/* From 1 to (m/8) - 1. */
|
|
l = sub(shr(m, 3),1); /* m/8 - 1. */
|
|
FOR (i = 0; i < l; i++)
|
|
{
|
|
acc = L_mult0(*RZ0, t_sin[c0_ind]);
|
|
acc = L_msu0(acc, *RZ1, t_sin[c1_ind]);
|
|
acc = L_msu0(acc, *RZ2, t_sin[c2_ind]);
|
|
acc = L_msu0(acc, *IZ0, t_sin[s0_ind]);
|
|
acc = L_mac0(acc, *IZ1, t_sin[s1_ind]);
|
|
acc = L_mac0(acc, *IZ2, t_sin[s2_ind]);
|
|
*RY2++ = round_fx(acc);
|
|
|
|
acc = L_mult0(*IZ0--, t_sin[c0_ind]);
|
|
acc = L_msu0(acc, *IZ1--, t_sin[c1_ind]);
|
|
acc = L_mac0(acc, *IZ2++, t_sin[c2_ind]);
|
|
acc = L_mac0(acc, *RZ0++, t_sin[s0_ind]);
|
|
acc = L_msu0(acc, *RZ1++, t_sin[s1_ind]);
|
|
acc = L_mac0(acc, *RZ2--, t_sin[s2_ind]);
|
|
*IY2-- = round_fx(acc);
|
|
|
|
c0_ind=sub(c0_ind,step2);
|
|
s0_ind=add(s0_ind,step2);
|
|
c1_ind=sub(c1_ind,step2);
|
|
s1_ind=add(s1_ind,step2);
|
|
c2_ind=add(c2_ind,step2);
|
|
s2_ind=sub(s2_ind,step2);
|
|
}
|
|
|
|
/* From (m/8) to (m/4) - 1. */
|
|
l = shr(m, 3); /* m/4 - m/8 = m/8 */
|
|
FOR (i = 0; i < l; i++)
|
|
{
|
|
acc = L_mult0(*RZ0, t_sin[c0_ind]);
|
|
acc = L_mac0(acc, *RZ1, t_sin[c1_ind]);
|
|
acc = L_msu0(acc, *RZ2, t_sin[c2_ind]);
|
|
acc = L_msu0(acc, *IZ0, t_sin[s0_ind]);
|
|
acc = L_mac0(acc, *IZ1, t_sin[s1_ind]);
|
|
acc = L_mac0(acc, *IZ2, t_sin[s2_ind]);
|
|
*RY2++ = round_fx(acc);
|
|
|
|
acc = L_mult0(*IZ0--, t_sin[c0_ind]);
|
|
acc = L_mac0(acc, *IZ1--, t_sin[c1_ind]);
|
|
acc = L_mac0(acc, *IZ2++, t_sin[c2_ind]);
|
|
acc = L_mac0(acc, *RZ0++, t_sin[s0_ind]);
|
|
acc = L_msu0(acc, *RZ1++, t_sin[s1_ind]);
|
|
acc = L_mac0(acc, *RZ2--, t_sin[s2_ind]);
|
|
*IY2-- = round_fx(acc);
|
|
|
|
c0_ind=sub(c0_ind,step2);
|
|
s0_ind=add(s0_ind,step2);
|
|
c1_ind=add(c1_ind,step2);
|
|
s1_ind=sub(s1_ind,step2);
|
|
c2_ind=add(c2_ind,step2);
|
|
s2_ind=sub(s2_ind,step2);
|
|
}
|
|
|
|
/* From m/4 to 3*m/8 - 1. */
|
|
l = shr(m, 3); /* 3*m/8 - m/4 = m/8 */
|
|
FOR (i = 0; i < l; i++)
|
|
{
|
|
acc = L_mult0(*RZ0, t_sin[c0_ind]);
|
|
acc = L_mac0(acc, *RZ1, t_sin[c1_ind]);
|
|
acc = L_msu0(acc, *RZ2, t_sin[c2_ind]);
|
|
acc = L_msu0(acc, *IZ0, t_sin[s0_ind]);
|
|
acc = L_mac0(acc, *IZ1, t_sin[s1_ind]);
|
|
acc = L_msu0(acc, *IZ2, t_sin[s2_ind]);
|
|
*RY2++ = round_fx(acc);
|
|
|
|
acc = L_mult0(*IZ0--, t_sin[c0_ind]);
|
|
acc = L_mac0(acc, *IZ1--, t_sin[c1_ind]);
|
|
acc = L_mac0(acc, *IZ2++, t_sin[c2_ind]);
|
|
acc = L_mac0(acc, *RZ0++, t_sin[s0_ind]);
|
|
acc = L_msu0(acc, *RZ1++, t_sin[s1_ind]);
|
|
acc = L_msu0(acc, *RZ2--, t_sin[s2_ind]);
|
|
*IY2-- = round_fx(acc);
|
|
|
|
c0_ind=sub(c0_ind,step2);
|
|
s0_ind=add(s0_ind,step2);
|
|
c1_ind=add(c1_ind,step2);
|
|
s1_ind=sub(s1_ind,step2);
|
|
c2_ind=sub(c2_ind,step2);
|
|
s2_ind=add(s2_ind,step2);
|
|
}
|
|
|
|
/* From 3*m/8 to m/2 - 1*/
|
|
l = shr(m, 3); /* m/2 - 3*m/8 = m/8 */
|
|
FOR (i = 0; i < l; i++)
|
|
{
|
|
acc = L_mult0(*RZ1, t_sin[c1_ind]);
|
|
acc = L_msu0(acc, *RZ0, t_sin[c0_ind]);
|
|
acc = L_msu0(acc, *RZ2, t_sin[c2_ind]);
|
|
acc = L_msu0(acc, *IZ0, t_sin[s0_ind]);
|
|
acc = L_mac0(acc, *IZ1, t_sin[s1_ind]);
|
|
acc = L_msu0(acc, *IZ2, t_sin[s2_ind]);
|
|
*RY2++ = round_fx(acc);
|
|
|
|
acc = L_mult0(*IZ1--, t_sin[c1_ind]);
|
|
acc = L_msu0(acc, *IZ0--, t_sin[c0_ind]);
|
|
acc = L_mac0(acc, *IZ2++, t_sin[c2_ind]);
|
|
acc = L_mac0(acc, *RZ0++, t_sin[s0_ind]);
|
|
acc = L_msu0(acc, *RZ1++, t_sin[s1_ind]);
|
|
acc = L_msu0(acc, *RZ2--, t_sin[s2_ind]);
|
|
*IY2-- = round_fx(acc);
|
|
|
|
c0_ind=add(c0_ind,step2);
|
|
s0_ind=sub(s0_ind,step2);
|
|
c1_ind=add(c1_ind,step2);
|
|
s1_ind=sub(s1_ind,step2);
|
|
c2_ind=sub(c2_ind,step2);
|
|
s2_ind=add(s2_ind,step2);
|
|
}
|
|
|
|
/* m/2 */
|
|
acc = L_mult0(*RZ1, t_sin[c1_ind]);
|
|
acc = L_msu0(acc, *RZ0, t_sin[c0_ind]);
|
|
acc = L_msu0(acc, *RZ2, t_sin[c2_ind]);
|
|
acc = L_msu0(acc, *IZ0, t_sin[s0_ind]);
|
|
acc = L_mac0(acc, *IZ1, t_sin[s1_ind]);
|
|
acc = L_msu0(acc, *IZ2, t_sin[s2_ind]);
|
|
*RY2++ = round_fx(acc);
|
|
|
|
/* Compute the inverse FFT for all 3 blocks. */
|
|
RY0 = &Y[0]; /* Rewind the pointers. */
|
|
RY1 = &Y[m];
|
|
RY2 = &RY1[m];
|
|
mBy2 = shr(m,1);
|
|
r_fft_fx_lc(pPhaseTbl, m, mBy2, orderMinus1, RY0, RY0, 0); /* inverse FFT */
|
|
r_fft_fx_lc(pPhaseTbl, m, mBy2, orderMinus1, RY1, RY1, 0); /* inverse FFT */
|
|
r_fft_fx_lc(pPhaseTbl, m, mBy2, orderMinus1, RY2, RY2, 0); /* inverse FFT */
|
|
|
|
y0 = RY0;
|
|
y1 = RY1;
|
|
y2 = RY2;
|
|
|
|
/* Interlacing and scaling, scale = 1/3 */
|
|
pX = X;
|
|
FOR (i = 0; i < m; i++)
|
|
{
|
|
*pX++ = shl(mult_r(*y0++, FFT3_ONE_THIRD), 1);
|
|
move16();
|
|
*pX++ = shl(mult_r(*y1++, FFT3_ONE_THIRD), 1);
|
|
move16();
|
|
*pX++ = shl(mult_r(*y2++, FFT3_ONE_THIRD), 1);
|
|
move16();
|
|
}
|
|
|
|
return;
|
|
}
|