rtphone/src/libs/libevs/lib_enc/arith_coder_enc.cpp

629 lines
20 KiB
C++

/*====================================================================================
EVS Codec 3GPP TS26.443 Nov 13, 2018. Version 12.11.0 / 13.7.0 / 14.3.0 / 15.1.0
====================================================================================*/
#include <math.h>
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#include <assert.h>
#include "options.h"
#include "cnst.h"
#include "prot.h"
#include "rom_com.h"
#include "basop_util.h"
#include "basop_proto_func.h"
namespace evs {
/*-------------------------------------------------------------------*
* Local constants
*-------------------------------------------------------------------*/
#define LOG2_E 1.44269504089f
#define kMaxNumHeapElems 10
typedef struct HeapElem
{
float mScore; /* Sort key */
int mIndex; /* Original index */
} HeapElem;
typedef struct Heap
{
HeapElem mElem[2*kMaxNumHeapElems+1];
int mSize;
} Heap;
/*-------------------------------------------------------------------*
* tcx_arith_estimate_scale()
*
*
*-------------------------------------------------------------------*/
static float tcx_arith_estimate_scale( /* o: estimated SQ scale */
const float abs_spectrum[], /* i: absolute MDCT coefficients */
int L_frame, /* i: number of spectral lines */
const Word16 envelope[], /* i: scaled envelope (Q15-e) */
Word16 envelope_e /* i: scaled envelope exponent (Q0) */
)
{
float scale, tmp;
int k;
/* compute normalised standard deviation and determine approximate scale */
scale = 0.01f;
for (k = 0; k < L_frame; k++)
{
tmp = abs_spectrum[k] * envelope[k];
scale += tmp * tmp;
}
tmp = (float)(1 << (15-envelope_e));
scale = (float)sqrt((L_frame * tmp*tmp*4.0f) / scale);
return scale;
}
/*-------------------------------------------------------------------*
* MinHeapify_i()
*
*
*-------------------------------------------------------------------*/
static void MinHeapify_i(Heap *H, int i)
{
int left, right, largest;
HeapElem T;
left = 2*i + 1;
right = left + 1;
largest = i;
if (H->mElem[left].mScore < H->mElem[largest].mScore)
{
largest = left;
}
if (H->mElem[right].mScore < H->mElem[largest].mScore)
{
largest = right;
}
while (largest != i)
{
T.mIndex = H->mElem[i].mIndex;
T.mScore = H->mElem[i].mScore;
H->mElem[i].mIndex = H->mElem[largest].mIndex;
H->mElem[i].mScore = H->mElem[largest].mScore;
H->mElem[largest].mIndex = T.mIndex;
H->mElem[largest].mScore = T.mScore;
i = largest;
left = 2*i + 1;
right = left + 1;
if (H->mElem[left].mScore < H->mElem[largest].mScore)
{
largest = left;
}
if (H->mElem[right].mScore < H->mElem[largest].mScore)
{
largest = right;
}
}
return;
}
/*-------------------------------------------------------------------*
* tcx_arith_find_max_scale()
*
*
*-------------------------------------------------------------------*/
static float tcx_arith_find_max_scale(
const float abs_spectrum[], /* i: absolute MDCT coefficients */
int L_frame, /* i: number of spectral lines */
const Word16 envelope[], /* i: scaled envelope (Q15-e) */
Word16 envelope_e, /* i: scaled envelope exponent (Q0) */
const Word16 exps[], /* i: expfp(-(int)envelope[]/2) */
float deadzone /* i: deadzone (0.5f = no deadzone) */
)
{
int i, k, q;
float p, scale;
Heap heap;
Word16 tmpi1, tmpi2;
float envelope_scale;
const float limit = -9.70406052784f; /* = ln(1/16384): log of smallest allowed probability */
/* Find the top most offending lines according to probability estimates */
heap.mSize = kMaxNumHeapElems;
heap.mElem[0].mScore = 0; /* mal: just to silnce the compiler */
for (i=0; i<kMaxNumHeapElems; ++i)
{
heap.mElem[i].mIndex = 0;
heap.mElem[i].mScore = 0;
}
for (; i<2*kMaxNumHeapElems+1; ++i)
{
heap.mElem[i].mScore = FLT_MAX;
}
for (k=0; k<L_frame; ++k)
{
p = envelope[k] * abs_spectrum[k];
if (p > heap.mElem[0].mScore)
{
heap.mElem[0].mScore = p;
heap.mElem[0].mIndex = k;
MinHeapify_i(&heap, 0);
}
}
/* Make sure the scale is limited so that the offending lines don't cause probability underflow. */
/* Also limit scale to avoiding saturation of the gain quantizer */
scale = 1.0f/(float)sqrt(L_frame*0.5f);
envelope_scale = -(float)pow(2, envelope_e-16);
for (i=0; i<heap.mSize; ++i)
{
k = heap.mElem[i].mIndex;
/* Get approximate maximum allowed magnitude */
q = (int)ceil(((limit - log(1.0f - (exps[k]/32768.0) * (exps[k]/32768.0))) / (envelope[k]*envelope_scale) - 1) / 2.0f);
/* Refinement: get the exact q */
powfp_odd2(exps[k], q, &tmpi1, &tmpi2);
if (tmpi1 - tmpi2 >= 2)
{
/* q may be too low */
powfp_odd2(exps[k], q+1, &tmpi1, &tmpi2);
while (tmpi1 - tmpi2 >= 2)
{
++q;
powfp_odd2(exps[k], q+1, &tmpi1, &tmpi2);
}
}
else
{
/* q is too high */
--q;
powfp_odd2(exps[k], q, &tmpi1, &tmpi2);
while (tmpi1 - tmpi2 < 2)
{
--q;
powfp_odd2(exps[k], q, &tmpi1, &tmpi2);
}
}
/* Find the largest scale so that the quantized magnitude is at most q */
p = (q+0.99f-deadzone)/(abs_spectrum[k] + 0.000001f);
assert((int)(abs_spectrum[k] * p + deadzone) <= q);
scale = min(scale, p);
}
return scale;
}
/*-------------------------------------------------------------------*
* tcx_arith_find_kMax()
*
*
*-------------------------------------------------------------------*/
static int tcx_arith_find_kMax( /* o: index of highest freq. nonzero line (-1 if all zeros) */
const float abs_spectrum[], /* i: absolute MDCT coefficients */
int L_frame, /* i: number of spectral lines */
float scale, /* i: scalar quantizer scale */
float deadzone, /* i: deadzone (0.5f = no deadzone) */
const int deadzone_flags[] /* i: line-wise deadzone control */
)
{
int kMax;
kMax = L_frame - 1;
while ((kMax >= 0) && (abs_spectrum[kMax] * scale < (1.0f - deadzone) + deadzone * deadzone_flags[kMax]))
{
kMax--;
}
return kMax;
}
/*-------------------------------------------------------------------*
* tcx_arith_rateloop()
*
*
*-------------------------------------------------------------------*/
static float tcx_arith_rateloop( /* o: best scale */
const float abs_spectrum[], /* i: absolute MDCT coefficients */
int L_frame, /* i: number of spectral lines */
const Word16 envelope[], /* i: scaled envelope (Q15-e) */
Word16 envelope_e, /* i: scaled envelope exponent (Q0) */
const Word16 exps[], /* i: expfp(-(int)envelope[]/2) */
int target_bits, /* i: target bit budget */
float deadzone, /* i: deadzone (0.5f = no deadzone) */
const int deadzone_flags[], /* i: line-wise deadzone control */
float *target_bits_fac /* i/o: scale estimator compensation */
)
{
int k, kMax, q;
float s, adjust;
float fixed_bits[2][N_MAX_ARI];
float estimator_undershoot;
float max_complexity;
int iter; /* rate loop iteration counter */
float scale; /* SQ scale factor to try next */
float scale_best; /* best SQ scale factor */
float scale_max; /* maximum allowable scale factor */
float lob; /* lower bound of SQ scale factor */
float hib; /* upper bound of SQ scale factor */
int flag; /* 1:bit surplus, -1:bit deficit, 0:unknown */
float complexity; /* cumulative rate loop complexity */
float bits; /* number of bits (approximate) */
float envelope_scale;
scale = tcx_arith_estimate_scale(abs_spectrum, L_frame, envelope, envelope_e);
scale *= *target_bits_fac;
scale_max = tcx_arith_find_max_scale(abs_spectrum, L_frame, envelope, envelope_e, exps, deadzone);
if (scale > scale_max) scale = scale_max;
scale_best = scale;
lob = 0.0f;
hib = 0.0f;
flag = 0;
complexity = 0;
bits = 0;
max_complexity = 96.0f * L_frame;
iter = 0;
envelope_scale = (float)pow(2, envelope_e-15);
estimator_undershoot = 0;
/* Precalculate fixed bit costs */
for (k=0; k<L_frame; ++k)
{
s = envelope[k] * envelope_scale;
fixed_bits[0][k] = -log2_f(1-exps[k]/32768.0f);
fixed_bits[1][k] = 1-s*0.5f*LOG2_E - log2_f(1-(exps[k]/32768.0f)*(exps[k]/32768.0f));
}
while (complexity + 48 + L_frame * 11 < max_complexity)
{
kMax = tcx_arith_find_kMax( abs_spectrum, L_frame, scale, deadzone, deadzone_flags );
complexity += 16 + (L_frame - kMax) * 5 + (kMax + 1) * 2;
bits = estimator_undershoot * kMax + 1;
for (k=0; k<=kMax; ++k)
{
s = envelope[k] * envelope_scale;
q = (int)(abs_spectrum[k] * scale + deadzone);
bits += fixed_bits[min(1,q)][k];
bits += s*q*LOG2_E;
}
complexity += 32 + 6*kMax;
if (iter == 0)
{
/* First rate loop iteration */
if (scale < scale_max)
{
/* Only update in non-degenerate case */
/* Update estimator temporal compensation factor */
*target_bits_fac *= target_bits / (float)bits;
if (*target_bits_fac > 1.25f) *target_bits_fac = 1.25f;
if (*target_bits_fac < 0.75f) *target_bits_fac = 0.75f;
}
}
if (bits <= target_bits)
{
/* Bits leftover => scale is too small */
if (flag <= 0 || scale >= scale_best)
{
scale_best = scale;
flag = 1;
}
lob = scale;
if (hib > 0)
{
/* Bisection search */
scale = (lob + hib)*0.5f;
}
else
{
/* Initial scale adaptation */
adjust = 1.25f * target_bits / (float)bits;
if (adjust > 2.0f) adjust = 2.0f;
scale *= adjust;
if (scale > scale_max) scale = scale_max;
}
}
else
{
/* Ran out of bits => scale is too large */
hib = scale;
if (lob > 0)
{
/* Bisection search */
scale = (lob + hib)*0.5f;
}
else
{
/* Initial scale adaptation */
adjust = 0.8f * target_bits / (float)bits;
if (adjust < 0.5f) adjust = 0.5f;
scale *= adjust;
}
if (flag <= 0)
{
scale_best = scale;
flag = 0;
}
}
++iter;
}
return scale_best;
}
/*-------------------------------------------------------------------*
* tcx_arith_encode()
*
*
*-------------------------------------------------------------------*/
static int tcx_arith_encode( /* o: number of bits consumed */
int q_abs_spectrum[], /* i/o: scalar quantized absolute spectrum */
const int signs[], /* i: signs */
int kMax, /* i: number of nonzero spectral lines to code */
int L_frame, /* i: nominal number of spectral lines */
const Word16 exps[], /* i: expfp(-(int)envelope[]/2) */
int target_bits, /* i: target bit budget */
int prm[] /* o: bit-stream */
)
{
Tastat as, as_lastgood;
int bp, bp_lastgood;
int k;
int kEncoded;
Word16 tmpi1, tmpi2;
/* Final coding */
ari_start_encoding_14bits(&as);
ari_copy_states(&as, &as_lastgood);
bp = bp_lastgood = 0;
kEncoded = kMax;
for (k=0; k<=kMax; ++k)
{
if (q_abs_spectrum[k] == 0)
{
assert(exps[k] >= 2);
bp = ari_encode_14bits_range(prm, bp, target_bits, &as, exps[k]>>1, 16384);
}
else
{
/* q_abs_spectrum[k] != 0 */
powfp_odd2(exps[k], q_abs_spectrum[k], &tmpi1, &tmpi2);
while (tmpi1 < tmpi2 + 2)
{
--q_abs_spectrum[k];
powfp_odd2(exps[k], q_abs_spectrum[k], &tmpi1, &tmpi2);
}
bp = ari_encode_14bits_range(prm, bp, target_bits, &as, tmpi2>>1, tmpi1>>1);
bp = ari_encode_14bits_sign(prm, bp, target_bits, &as, signs[k]);
}
/* Check bit budget status */
if (as.high <= as.low)
{
/* no bits left */
/* printf("\noverflow at %d\n\n", k); */
if (q_abs_spectrum[k] > 1) /* Lower magnitude is still > 0 */
{
/* Restore state */
ari_copy_states(&as_lastgood, &as);
bp = bp_lastgood;
/* Quantize to lower magnitude */
--q_abs_spectrum[k];
/* Retry encoding */
powfp_odd2(exps[k], q_abs_spectrum[k], &tmpi1, &tmpi2);
bp = ari_encode_14bits_range(prm, bp, target_bits, &as, tmpi2>>1, tmpi1>>1);
bp = ari_encode_14bits_sign(prm, bp, target_bits, &as, signs[k]);
if (as.high > as.low) /* Success */
{
ari_copy_states(&as, &as_lastgood);
bp_lastgood = bp;
kEncoded = k;
for (++k; k <= kMax; k++)
{
q_abs_spectrum[k] = 0;
}
break;
}
}
ari_copy_states(&as_lastgood, &as);
bp = bp_lastgood;
kEncoded = k-1;
for (; k <= kMax; k++)
{
q_abs_spectrum[k] = 0;
}
break;
}
else
{
ari_copy_states(&as, &as_lastgood);
bp_lastgood = bp;
}
}
/* Send zeros until L_frame */
for (k=kEncoded+1, kEncoded=L_frame-1; k<L_frame; ++k)
{
assert(exps[k] >= 2);
bp = ari_encode_14bits_range(prm, bp, target_bits, &as, exps[k]>>1, 16384);
/* Check bit budget status */
if (as.high <= as.low)
{
/* no bits left */
ari_copy_states(&as_lastgood, &as);
bp = bp_lastgood;
kEncoded = k-1;
break;
}
else
{
ari_copy_states(&as, &as_lastgood);
bp_lastgood = bp;
}
}
if (kEncoded == L_frame-1)
{
/* RESQ bits possibly available */
/* Limit target bits to actually needed bits */
bp = ari_done_cbr_encoding_14bits(prm, bp, bp + 16 + as.vobf, &as);
}
else
{
bp = ari_done_cbr_encoding_14bits(prm, bp, target_bits, &as);
}
return bp;
}
/*-------------------------------------------------------------------*
* tcx_arith_encode_envelope()
*
*
*-------------------------------------------------------------------*/
void tcx_arith_encode_envelope(
float spectrum[], /* i/o: MDCT coefficients */
int signs[], /* o: signs (spectrum[.]<0) */
int L_frame, /* i: frame or MDCT length */
int L_spec, /* i: length w/o BW limitation */
Encoder_State *st, /* i/o: coder state */
const Word16 A_ind[], /* i: quantised LPC coefficients */
int target_bits, /* i: number of available bits */
int prm[], /* o: bitstream parameters */
int use_hm, /* i: use HM in current frame? */
int prm_hm[], /* o: HM parameter area */
short tcxltp_pitch, /* i: TCX LTP pitch in FD, -1 if n/a*/
int *arith_bits, /* o: bits used for ari. coding */
int *signaling_bits, /* o: bits used for signaling */
int low_complexity /* i: low-complexity flag */
)
{
Word16 tmp;
Word32 env[N_MAX_ARI]; /* unscaled envelope (Q16) */
Word16 *envelope; /* scaled envelope (Q15-e) */
Word16 envelope_e;
Word16 exponents[N_MAX_ARI]; /* Q15 */
int L_spec_core;
int *q_spectrum;
TCX_config *tcx_cfg;
float scale;
int k, kMax;
float deadzone;
const int *deadzone_flags;
float gamma_w, gamma_uw;
int hm_bits;
assert(L_spec<=N_MAX_ARI);
tcx_cfg = &st->tcx_cfg;
deadzone = tcx_cfg->sq_rounding;
deadzone_flags = st->memQuantZeros;
*signaling_bits = 0;
assert(st->enableTcxLpc);
gamma_w = 1.0f;
gamma_uw = 1.0f/st->gamma;
tcx_arith_render_envelope( A_ind, L_frame, L_spec, FL2WORD16(tcx_cfg->preemph_fac), FL2WORD16(gamma_w), FL2WORD16(0.5f*gamma_uw), env );
for (k=0; k<L_spec; ++k)
{
if(spectrum[k] < 0)
{
spectrum[k]=-spectrum[k];
signs[k]=1;
}
else
{
signs[k]=0;
}
}
if (use_hm)
{
tcx_hm_analyse( spectrum, L_spec, env, target_bits, tcx_cfg->coder_type, prm_hm, tcxltp_pitch, st->tcxltp_gain, &hm_bits );
target_bits -= hm_bits;
*signaling_bits += hm_bits;
}
else
{
prm_hm[0] = 0; /* just to be sure */
hm_bits = 0;
}
L_spec_core = L_spec;
if (st->igf)
{
L_spec_core = min(L_spec_core, st->hIGFEnc.infoStartLine);
}
envelope = (Word16*)env;
tcx_arith_scale_envelope( L_spec, L_spec_core, env, target_bits, low_complexity, envelope, &envelope_e );
tmp = sub(envelope_e, 1);
FOR (k = 0; k < L_spec; k++)
{
exponents[k] = expfp(negate(envelope[k]), tmp);
}
scale = tcx_arith_rateloop( spectrum, L_spec, envelope, envelope_e, exponents, target_bits, deadzone, deadzone_flags, &st->LPDmem.tcx_target_bits_fac );
/* Final quantization */
kMax = tcx_arith_find_kMax( spectrum, L_spec, scale, deadzone, deadzone_flags );
q_spectrum = (int*)env; /* Reuse buffer */
for (k=0; k<=kMax; ++k)
{
/* quantise using dead-zone */
q_spectrum[k] = (int)(spectrum[k] * scale + deadzone);
}
/* Final encoding */
*arith_bits = tcx_arith_encode( q_spectrum, signs, kMax, L_spec, exponents, target_bits, prm );
/* Multiply back the signs */
for (k=0; k<=kMax; ++k)
{
spectrum[k] = (float)(q_spectrum[k] * (1-2*signs[k]));
}
for (; k<max(L_frame, L_spec); ++k)
{
spectrum[k] = 0;
}
return;
}
} // end of namespace