- upgrade to opus 1.5

This commit is contained in:
Dmytro Bogovych 2024-03-13 11:28:16 +03:00
parent 37471d56ff
commit 62d72fda5c
86 changed files with 542531 additions and 0 deletions

View File

@ -0,0 +1,101 @@
/* Copyright (c) 2023 Amazon */
/*
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include <immintrin.h>
#include "x86cpu.h"
#include "pitch.h"
#if defined(OPUS_X86_MAY_HAVE_AVX2) && !defined(FIXED_POINT)
/* Like the "regular" xcorr_kernel(), but computes 8 results at a time. */
static void xcorr_kernel_avx(const float *x, const float *y, float sum[8], int len)
{
__m256 xsum0, xsum1, xsum2, xsum3, xsum4, xsum5, xsum6, xsum7;
xsum7 = xsum6 = xsum5 = xsum4 = xsum3 = xsum2 = xsum1 = xsum0 = _mm256_setzero_ps();
int i;
__m256 x0;
/* Compute 8 inner products using partial sums. */
for (i=0;i<len-7;i+=8)
{
x0 = _mm256_loadu_ps(x+i);
xsum0 = _mm256_fmadd_ps(x0, _mm256_loadu_ps(y+i ), xsum0);
xsum1 = _mm256_fmadd_ps(x0, _mm256_loadu_ps(y+i+1), xsum1);
xsum2 = _mm256_fmadd_ps(x0, _mm256_loadu_ps(y+i+2), xsum2);
xsum3 = _mm256_fmadd_ps(x0, _mm256_loadu_ps(y+i+3), xsum3);
xsum4 = _mm256_fmadd_ps(x0, _mm256_loadu_ps(y+i+4), xsum4);
xsum5 = _mm256_fmadd_ps(x0, _mm256_loadu_ps(y+i+5), xsum5);
xsum6 = _mm256_fmadd_ps(x0, _mm256_loadu_ps(y+i+6), xsum6);
xsum7 = _mm256_fmadd_ps(x0, _mm256_loadu_ps(y+i+7), xsum7);
}
if (i != len) {
static const int mask[15] = {-1, -1, -1, -1, -1, -1, -1, 0, 0, 0, 0, 0, 0, 0, 0};
__m256i m;
m = _mm256_loadu_si256((__m256i*)(void*)(mask + 7+i-len));
x0 = _mm256_maskload_ps(x+i, m);
xsum0 = _mm256_fmadd_ps(x0, _mm256_maskload_ps(y+i , m), xsum0);
xsum1 = _mm256_fmadd_ps(x0, _mm256_maskload_ps(y+i+1, m), xsum1);
xsum2 = _mm256_fmadd_ps(x0, _mm256_maskload_ps(y+i+2, m), xsum2);
xsum3 = _mm256_fmadd_ps(x0, _mm256_maskload_ps(y+i+3, m), xsum3);
xsum4 = _mm256_fmadd_ps(x0, _mm256_maskload_ps(y+i+4, m), xsum4);
xsum5 = _mm256_fmadd_ps(x0, _mm256_maskload_ps(y+i+5, m), xsum5);
xsum6 = _mm256_fmadd_ps(x0, _mm256_maskload_ps(y+i+6, m), xsum6);
xsum7 = _mm256_fmadd_ps(x0, _mm256_maskload_ps(y+i+7, m), xsum7);
}
/* 8 horizontal adds. */
/* Compute [0 4] [1 5] [2 6] [3 7] */
xsum0 = _mm256_add_ps(_mm256_permute2f128_ps(xsum0, xsum4, 2<<4), _mm256_permute2f128_ps(xsum0, xsum4, 1 | (3<<4)));
xsum1 = _mm256_add_ps(_mm256_permute2f128_ps(xsum1, xsum5, 2<<4), _mm256_permute2f128_ps(xsum1, xsum5, 1 | (3<<4)));
xsum2 = _mm256_add_ps(_mm256_permute2f128_ps(xsum2, xsum6, 2<<4), _mm256_permute2f128_ps(xsum2, xsum6, 1 | (3<<4)));
xsum3 = _mm256_add_ps(_mm256_permute2f128_ps(xsum3, xsum7, 2<<4), _mm256_permute2f128_ps(xsum3, xsum7, 1 | (3<<4)));
/* Compute [0 1 4 5] [2 3 6 7] */
xsum0 = _mm256_hadd_ps(xsum0, xsum1);
xsum1 = _mm256_hadd_ps(xsum2, xsum3);
/* Compute [0 1 2 3 4 5 6 7] */
xsum0 = _mm256_hadd_ps(xsum0, xsum1);
_mm256_storeu_ps(sum, xsum0);
}
void celt_pitch_xcorr_avx2(const float *_x, const float *_y, float *xcorr, int len, int max_pitch, int arch)
{
int i;
celt_assert(max_pitch>0);
(void)arch;
for (i=0;i<max_pitch-7;i+=8)
{
xcorr_kernel_avx(_x, _y+i, &xcorr[i], len);
}
for (;i<max_pitch;i++)
{
xcorr[i] = celt_inner_prod(_x, _y+i, len, arch);
}
}
#endif

View File

@ -0,0 +1,47 @@
/* Copyright (c) 2023 Amazon */
/*
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifdef _MSC_VER
# ifdef OPUS_X86_MAY_HAVE_SSE
# ifndef __SSE__
# define __SSE__
# endif
# endif
# ifdef OPUS_X86_MAY_HAVE_SSE2
# ifndef __SSE2__
# define __SSE2__
# endif
# endif
# ifdef OPUS_X86_MAY_HAVE_SSE4_1
# ifndef __SSE4_1__
# define __SSE4_1__
# endif
# endif
#endif

View File

@ -0,0 +1,88 @@
/* Copyright (c) 2018-2019 Mozilla
2023 Amazon */
/*
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "arm/armcpu.h"
#include "nnet.h"
#if defined(OPUS_HAVE_RTCD)
#if (defined(OPUS_ARM_MAY_HAVE_DOTPROD) && !defined(OPUS_ARM_PRESUME_DOTPROD))
void (*const DNN_COMPUTE_LINEAR_IMPL[OPUS_ARCHMASK + 1])(
const LinearLayer *linear,
float *out,
const float *in
) = {
compute_linear_c, /* default */
compute_linear_c,
compute_linear_c,
MAY_HAVE_NEON(compute_linear), /* neon */
MAY_HAVE_DOTPROD(compute_linear) /* dotprod */
};
#endif
#if (defined(OPUS_ARM_MAY_HAVE_DOTPROD) || defined(OPUS_ARM_MAY_HAVE_NEON)) && !defined(OPUS_ARM_PRESUME_NEON)
void (*const DNN_COMPUTE_ACTIVATION_IMPL[OPUS_ARCHMASK + 1])(
float *output,
const float *input,
int N,
int activation
) = {
compute_activation_c, /* default */
compute_activation_c,
compute_activation_c,
MAY_HAVE_NEON(compute_activation), /* neon */
MAY_HAVE_DOTPROD(compute_activation) /* dotprod */
};
void (*const DNN_COMPUTE_CONV2D_IMPL[OPUS_ARCHMASK + 1])(
const Conv2dLayer *conv,
float *out,
float *mem,
const float *in,
int height,
int hstride,
int activation
) = {
compute_conv2d_c, /* default */
compute_conv2d_c,
compute_conv2d_c,
MAY_HAVE_NEON(compute_conv2d), /* neon */
MAY_HAVE_DOTPROD(compute_conv2d) /* dotprod */
};
#endif
#endif

View File

@ -0,0 +1,104 @@
/* Copyright (c) 2011-2019 Mozilla
2023 Amazon */
/*
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef DNN_ARM_H
#define DNN_ARM_H
#include "cpu_support.h"
#include "opus_types.h"
void compute_linear_dotprod(const LinearLayer *linear, float *out, const float *in);
void compute_linear_neon(const LinearLayer *linear, float *out, const float *in);
void compute_activation_neon(float *output, const float *input, int N, int activation);
void compute_activation_dotprod(float *output, const float *input, int N, int activation);
void compute_conv2d_neon(const Conv2dLayer *conv, float *out, float *mem, const float *in, int height, int hstride, int activation);
void compute_conv2d_dotprod(const Conv2dLayer *conv, float *out, float *mem, const float *in, int height, int hstride, int activation);
#if defined(OPUS_ARM_PRESUME_DOTPROD)
#define OVERRIDE_COMPUTE_LINEAR
#define compute_linear(linear, out, in, arch) ((void)(arch),compute_linear_dotprod(linear, out, in))
#elif defined(OPUS_ARM_PRESUME_NEON_INTR) && !defined(OPUS_ARM_MAY_HAVE_DOTPROD)
#define OVERRIDE_COMPUTE_LINEAR
#define compute_linear(linear, out, in, arch) ((void)(arch),compute_linear_neon(linear, out, in))
#elif defined(OPUS_HAVE_RTCD) && (defined(OPUS_ARM_MAY_HAVE_DOTPROD) || defined(OPUS_ARM_MAY_HAVE_NEON))
extern void (*const DNN_COMPUTE_LINEAR_IMPL[OPUS_ARCHMASK + 1])(
const LinearLayer *linear,
float *out,
const float *in
);
#define OVERRIDE_COMPUTE_LINEAR
#define compute_linear(linear, out, in, arch) \
((*DNN_COMPUTE_LINEAR_IMPL[(arch) & OPUS_ARCHMASK])(linear, out, in))
#endif
#if defined(OPUS_ARM_PRESUME_NEON)
#define OVERRIDE_COMPUTE_ACTIVATION
#define compute_activation(output, input, N, activation, arch) ((void)(arch),compute_activation_neon(output, input, N, activation))
#define OVERRIDE_COMPUTE_CONV2D
#define compute_conv2d(conv, out, mem, in, height, hstride, activation, arch) ((void)(arch),compute_conv2d_neon(conv, out, mem, in, height, hstride, activation))
#elif defined(OPUS_HAVE_RTCD) && (defined(OPUS_ARM_MAY_HAVE_DOTPROD) || defined(OPUS_ARM_MAY_HAVE_NEON))
extern void (*const DNN_COMPUTE_ACTIVATION_IMPL[OPUS_ARCHMASK + 1])(
float *output,
const float *input,
int N,
int activation
);
#define OVERRIDE_COMPUTE_ACTIVATION
#define compute_activation(output, input, N, activation, arch) \
((*DNN_COMPUTE_ACTIVATION_IMPL[(arch) & OPUS_ARCHMASK])(output, input, N, activation))
extern void (*const DNN_COMPUTE_CONV2D_IMPL[OPUS_ARCHMASK + 1])(
const Conv2dLayer *conv,
float *out,
float *mem,
const float *in,
int height,
int hstride,
int activation
);
#define OVERRIDE_COMPUTE_CONV2D
#define compute_conv2d(conv, out, mem, in, height, hstride, activation, arch) \
((*DNN_COMPUTE_CONV2D_IMPL[(arch) & OPUS_ARCHMASK])(conv, out, mem, in, height, hstride, activation))
#endif
#endif /* DNN_ARM_H */

View File

@ -0,0 +1,38 @@
/* Copyright (c) 2018-2019 Mozilla
2023 Amazon */
/*
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#ifndef __ARM_FEATURE_DOTPROD
#error nnet_dotprod.c is being compiled without DOTPROD enabled
#endif
#define RTCD_ARCH dotprod
#include "nnet_arch.h"

View File

@ -0,0 +1,38 @@
/* Copyright (c) 2018-2019 Mozilla
2023 Amazon */
/*
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#if !(defined(__ARM_NEON__) || defined(__ARM_NEON))
#error nnet_neon.c is being compiled without Neon enabled
#endif
#define RTCD_ARCH neon
#include "nnet_arch.h"

246
src/libs/opus/dnn/burg.c Normal file
View File

@ -0,0 +1,246 @@
/***********************************************************************
Copyright (c) 2006-2011, Skype Limited. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
- Neither the name of Internet Society, IETF or IETF Trust, nor the
names of specific contributors, may be used to endorse or promote
products derived from this software without specific prior written
permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
***********************************************************************/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include <math.h>
#include <string.h>
#include <assert.h>
#include "arch.h"
#include "burg.h"
#define MAX_FRAME_SIZE 384 /* subfr_length * nb_subfr = ( 0.005 * 16000 + 16 ) * 4 = 384*/
#define SILK_MAX_ORDER_LPC 16
#define FIND_LPC_COND_FAC 1e-5f
/* sum of squares of a silk_float array, with result as double */
static double silk_energy_FLP(
const float *data,
int dataSize
)
{
int i;
double result;
/* 4x unrolled loop */
result = 0.0;
for( i = 0; i < dataSize - 3; i += 4 ) {
result += data[ i + 0 ] * (double)data[ i + 0 ] +
data[ i + 1 ] * (double)data[ i + 1 ] +
data[ i + 2 ] * (double)data[ i + 2 ] +
data[ i + 3 ] * (double)data[ i + 3 ];
}
/* add any remaining products */
for( ; i < dataSize; i++ ) {
result += data[ i ] * (double)data[ i ];
}
assert( result >= 0.0 );
return result;
}
/* inner product of two silk_float arrays, with result as double */
static double silk_inner_product_FLP(
const float *data1,
const float *data2,
int dataSize
)
{
int i;
double result;
/* 4x unrolled loop */
result = 0.0;
for( i = 0; i < dataSize - 3; i += 4 ) {
result += data1[ i + 0 ] * (double)data2[ i + 0 ] +
data1[ i + 1 ] * (double)data2[ i + 1 ] +
data1[ i + 2 ] * (double)data2[ i + 2 ] +
data1[ i + 3 ] * (double)data2[ i + 3 ];
}
/* add any remaining products */
for( ; i < dataSize; i++ ) {
result += data1[ i ] * (double)data2[ i ];
}
return result;
}
/* Compute reflection coefficients from input signal */
float silk_burg_analysis( /* O returns residual energy */
float A[], /* O prediction coefficients (length order) */
const float x[], /* I input signal, length: nb_subfr*(D+L_sub) */
const float minInvGain, /* I minimum inverse prediction gain */
const int subfr_length, /* I input signal subframe length (incl. D preceding samples) */
const int nb_subfr, /* I number of subframes stacked in x */
const int D /* I order */
)
{
int k, n, s, reached_max_gain;
double C0, invGain, num, nrg_f, nrg_b, rc, Atmp, tmp1, tmp2;
const float *x_ptr;
double C_first_row[ SILK_MAX_ORDER_LPC ], C_last_row[ SILK_MAX_ORDER_LPC ];
double CAf[ SILK_MAX_ORDER_LPC + 1 ], CAb[ SILK_MAX_ORDER_LPC + 1 ];
double Af[ SILK_MAX_ORDER_LPC ];
assert( subfr_length * nb_subfr <= MAX_FRAME_SIZE );
/* Compute autocorrelations, added over subframes */
C0 = silk_energy_FLP( x, nb_subfr * subfr_length );
memset( C_first_row, 0, SILK_MAX_ORDER_LPC * sizeof( double ) );
for( s = 0; s < nb_subfr; s++ ) {
x_ptr = x + s * subfr_length;
for( n = 1; n < D + 1; n++ ) {
C_first_row[ n - 1 ] += silk_inner_product_FLP( x_ptr, x_ptr + n, subfr_length - n );
}
}
memcpy( C_last_row, C_first_row, SILK_MAX_ORDER_LPC * sizeof( double ) );
/* Initialize */
CAb[ 0 ] = CAf[ 0 ] = C0 + FIND_LPC_COND_FAC * C0 + 1e-9f;
invGain = 1.0f;
reached_max_gain = 0;
for( n = 0; n < D; n++ ) {
/* Update first row of correlation matrix (without first element) */
/* Update last row of correlation matrix (without last element, stored in reversed order) */
/* Update C * Af */
/* Update C * flipud(Af) (stored in reversed order) */
for( s = 0; s < nb_subfr; s++ ) {
x_ptr = x + s * subfr_length;
tmp1 = x_ptr[ n ];
tmp2 = x_ptr[ subfr_length - n - 1 ];
for( k = 0; k < n; k++ ) {
C_first_row[ k ] -= x_ptr[ n ] * x_ptr[ n - k - 1 ];
C_last_row[ k ] -= x_ptr[ subfr_length - n - 1 ] * x_ptr[ subfr_length - n + k ];
Atmp = Af[ k ];
tmp1 += x_ptr[ n - k - 1 ] * Atmp;
tmp2 += x_ptr[ subfr_length - n + k ] * Atmp;
}
for( k = 0; k <= n; k++ ) {
CAf[ k ] -= tmp1 * x_ptr[ n - k ];
CAb[ k ] -= tmp2 * x_ptr[ subfr_length - n + k - 1 ];
}
}
tmp1 = C_first_row[ n ];
tmp2 = C_last_row[ n ];
for( k = 0; k < n; k++ ) {
Atmp = Af[ k ];
tmp1 += C_last_row[ n - k - 1 ] * Atmp;
tmp2 += C_first_row[ n - k - 1 ] * Atmp;
}
CAf[ n + 1 ] = tmp1;
CAb[ n + 1 ] = tmp2;
/* Calculate nominator and denominator for the next order reflection (parcor) coefficient */
num = CAb[ n + 1 ];
nrg_b = CAb[ 0 ];
nrg_f = CAf[ 0 ];
for( k = 0; k < n; k++ ) {
Atmp = Af[ k ];
num += CAb[ n - k ] * Atmp;
nrg_b += CAb[ k + 1 ] * Atmp;
nrg_f += CAf[ k + 1 ] * Atmp;
}
assert( nrg_f > 0.0 );
assert( nrg_b > 0.0 );
/* Calculate the next order reflection (parcor) coefficient */
rc = -2.0 * num / ( nrg_f + nrg_b );
assert( rc > -1.0 && rc < 1.0 );
/* Update inverse prediction gain */
tmp1 = invGain * ( 1.0 - rc * rc );
if( tmp1 <= minInvGain ) {
/* Max prediction gain exceeded; set reflection coefficient such that max prediction gain is exactly hit */
rc = sqrt( 1.0 - minInvGain / invGain );
if( num > 0 ) {
/* Ensure adjusted reflection coefficients has the original sign */
rc = -rc;
}
invGain = minInvGain;
reached_max_gain = 1;
} else {
invGain = tmp1;
}
/* Update the AR coefficients */
for( k = 0; k < (n + 1) >> 1; k++ ) {
tmp1 = Af[ k ];
tmp2 = Af[ n - k - 1 ];
Af[ k ] = tmp1 + rc * tmp2;
Af[ n - k - 1 ] = tmp2 + rc * tmp1;
}
Af[ n ] = rc;
if( reached_max_gain ) {
/* Reached max prediction gain; set remaining coefficients to zero and exit loop */
for( k = n + 1; k < D; k++ ) {
Af[ k ] = 0.0;
}
break;
}
/* Update C * Af and C * Ab */
for( k = 0; k <= n + 1; k++ ) {
tmp1 = CAf[ k ];
CAf[ k ] += rc * CAb[ n - k + 1 ];
CAb[ n - k + 1 ] += rc * tmp1;
}
}
if( reached_max_gain ) {
/* Convert to float */
for( k = 0; k < D; k++ ) {
A[ k ] = (float)( -Af[ k ] );
}
/* Subtract energy of preceding samples from C0 */
for( s = 0; s < nb_subfr; s++ ) {
C0 -= silk_energy_FLP( x + s * subfr_length, D );
}
/* Approximate residual energy */
nrg_f = C0 * invGain;
} else {
/* Compute residual energy and store coefficients as float */
nrg_f = CAf[ 0 ];
tmp1 = 1.0;
for( k = 0; k < D; k++ ) {
Atmp = Af[ k ];
nrg_f += CAf[ k + 1 ] * Atmp;
tmp1 += Atmp * Atmp;
A[ k ] = (float)(-Atmp);
}
nrg_f -= FIND_LPC_COND_FAC * C0 * tmp1;
}
/* Return residual energy */
return MAX32(0, (float)nrg_f);
}

41
src/libs/opus/dnn/burg.h Normal file
View File

@ -0,0 +1,41 @@
/***********************************************************************
Copyright (c) 2006-2011, Skype Limited. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
- Neither the name of Internet Society, IETF or IETF Trust, nor the
names of specific contributors, may be used to endorse or promote
products derived from this software without specific prior written
permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
***********************************************************************/
#ifndef BURG_H
#define BURG_H
float silk_burg_analysis( /* O returns residual energy */
float A[], /* O prediction coefficients (length order) */
const float x[], /* I input signal, length: nb_subfr*(D+L_sub) */
const float minInvGain, /* I minimum inverse prediction gain */
const int subfr_length, /* I input signal subframe length (incl. D preceding samples) */
const int nb_subfr, /* I number of subframes stacked in x */
const int D /* I order */
);
#endif

View File

@ -0,0 +1,56 @@
#ifndef COMMON_H
#define COMMON_H
#include <stdlib.h>
#include <string.h>
#include <math.h>
#include "opus_defines.h"
#define LOG256 5.5451774445f
static OPUS_INLINE float log2_approx(float x)
{
int integer;
float frac;
union {
float f;
int i;
} in;
in.f = x;
integer = (in.i>>23)-127;
in.i -= integer<<23;
frac = in.f - 1.5f;
frac = -0.41445418f + frac*(0.95909232f
+ frac*(-0.33951290f + frac*0.16541097f));
return 1+integer+frac;
}
#define log_approx(x) (0.69315f*log2_approx(x))
static OPUS_INLINE float ulaw2lin(float u)
{
float s;
float scale_1 = 32768.f/255.f;
u = u - 128.f;
s = u >= 0.f ? 1.f : -1.f;
u = fabs(u);
return s*scale_1*(exp(u/128.*LOG256)-1);
}
static OPUS_INLINE int lin2ulaw(float x)
{
float u;
float scale = 255.f/32768.f;
int s = x >= 0 ? 1 : -1;
x = fabs(x);
u = (s*(128*log_approx(1+scale*x)/LOG256));
u = 128 + u;
if (u < 0) u = 0;
if (u > 255) u = 255;
return (int)floor(.5 + u);
}
#endif

View File

@ -0,0 +1,44 @@
/* Copyright (c) 2022 Amazon
Written by Jean-Marc Valin */
/*
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include <math.h>
#include "celt/entenc.h"
#include "os_support.h"
#include "dred_config.h"
#include "dred_coding.h"
int compute_quantizer(int q0, int dQ, int qmax, int i) {
int quant;
static const int dQ_table[8] = {0, 2, 3, 4, 6, 8, 12, 16};
quant = q0 + (dQ_table[dQ]*i + 8)/16;
return quant > qmax ? qmax : quant;
}

View File

@ -0,0 +1,36 @@
/* Copyright (c) 2022 Amazon
Written by Jan Buethe */
/*
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef DRED_CODING_H
#define DRED_CODING_H
#include "opus_types.h"
#include "entcode.h"
int compute_quantizer(int q0, int dQ, int qmax, int i);
#endif

View File

@ -0,0 +1,54 @@
/* Copyright (c) 2022 Amazon
Written by Jan Buethe */
/*
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef DRED_CONFIG_H
#define DRED_CONFIG_H
/* Change this once DRED gets an extension number assigned. */
#define DRED_EXTENSION_ID 126
/* Remove these two completely once DRED gets an extension number assigned. */
#define DRED_EXPERIMENTAL_VERSION 10
#define DRED_EXPERIMENTAL_BYTES 2
#define DRED_MIN_BYTES 8
/* these are inpart duplicates to the values defined in dred_rdovae_constants.h */
#define DRED_SILK_ENCODER_DELAY (79+12-80)
#define DRED_FRAME_SIZE 160
#define DRED_DFRAME_SIZE (2 * (DRED_FRAME_SIZE))
#define DRED_MAX_DATA_SIZE 1000
#define DRED_ENC_Q0 6
#define DRED_ENC_Q1 15
/* Covers 1.04 second so we can cover one second, after the lookahead. */
#define DRED_MAX_LATENTS 26
#define DRED_NUM_REDUNDANCY_FRAMES (2*DRED_MAX_LATENTS)
#define DRED_MAX_FRAMES (4*DRED_MAX_LATENTS)
#endif

View File

@ -0,0 +1,129 @@
/* Copyright (c) 2022 Amazon
Written by Jan Buethe */
/*
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <string.h>
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "os_support.h"
#include "dred_decoder.h"
#include "dred_coding.h"
#include "celt/entdec.h"
#include "celt/laplace.h"
#include "dred_rdovae_stats_data.h"
#include "dred_rdovae_constants.h"
static void dred_decode_latents(ec_dec *dec, float *x, const opus_uint8 *scale, const opus_uint8 *r, const opus_uint8 *p0, int dim) {
int i;
for (i=0;i<dim;i++) {
int q;
if (r[i] == 0 || p0[i] == 255) q = 0;
else q = ec_laplace_decode_p0(dec, p0[i]<<7, r[i]<<7);
x[i] = q*256.f/(scale[i] == 0 ? 1 : scale[i]);
}
}
int dred_ec_decode(OpusDRED *dec, const opus_uint8 *bytes, int num_bytes, int min_feature_frames, int dred_frame_offset)
{
ec_dec ec;
int q_level;
int i;
int offset;
int q0;
int dQ;
int qmax;
int state_qoffset;
int extra_offset;
/* since features are decoded in quadruples, it makes no sense to go with an uneven number of redundancy frames */
celt_assert(DRED_NUM_REDUNDANCY_FRAMES % 2 == 0);
/* decode initial state and initialize RDOVAE decoder */
ec_dec_init(&ec, (unsigned char*)bytes, num_bytes);
q0 = ec_dec_uint(&ec, 16);
dQ = ec_dec_uint(&ec, 8);
if (ec_dec_uint(&ec, 2)) extra_offset = 32*ec_dec_uint(&ec, 256);
else extra_offset = 0;
/* Compute total offset, including DRED position in a multiframe packet. */
dec->dred_offset = 16 - ec_dec_uint(&ec, 32) - extra_offset + dred_frame_offset;
/*printf("%d %d %d\n", dred_offset, q0, dQ);*/
qmax = 15;
if (q0 < 14 && dQ > 0) {
int nvals;
int ft;
int s;
/* The distribution for the dQmax symbol is split evenly between zero
(which implies qmax == 15) and larger values, with the probability of
all larger values being uniform.
This is equivalent to coding 1 bit to decide if the maximum is less than
15 followed by a uint to decide the actual value if it is less than
15, but combined into a single symbol. */
nvals = 15 - (q0 + 1);
ft = 2*nvals;
s = ec_decode(&ec, ft);
if (s >= nvals) {
qmax = q0 + (s - nvals) + 1;
ec_dec_update(&ec, s, s + 1, ft);
}
else {
ec_dec_update(&ec, 0, nvals, ft);
}
}
state_qoffset = q0*DRED_STATE_DIM;
dred_decode_latents(
&ec,
dec->state,
dred_state_quant_scales_q8 + state_qoffset,
dred_state_r_q8 + state_qoffset,
dred_state_p0_q8 + state_qoffset,
DRED_STATE_DIM);
/* decode newest to oldest and store oldest to newest */
for (i = 0; i < IMIN(DRED_NUM_REDUNDANCY_FRAMES, (min_feature_frames+1)/2); i += 2)
{
/* FIXME: Figure out how to avoid missing a last frame that would take up < 8 bits. */
if (8*num_bytes - ec_tell(&ec) <= 7)
break;
q_level = compute_quantizer(q0, dQ, qmax, i/2);
offset = q_level*DRED_LATENT_DIM;
dred_decode_latents(
&ec,
&dec->latents[(i/2)*DRED_LATENT_DIM],
dred_latent_quant_scales_q8 + offset,
dred_latent_r_q8 + offset,
dred_latent_p0_q8 + offset,
DRED_LATENT_DIM
);
offset = 2 * i * DRED_NUM_FEATURES;
}
dec->process_stage = 1;
dec->nb_latents = i/2;
return i/2;
}

View File

@ -0,0 +1,49 @@
/* Copyright (c) 2022 Amazon
Written by Jan Buethe */
/*
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef DRED_DECODER_H
#define DRED_DECODER_H
#include "opus.h"
#include "dred_config.h"
#include "dred_rdovae.h"
#include "entcode.h"
#include "dred_rdovae_constants.h"
struct OpusDRED {
float fec_features[2*DRED_NUM_REDUNDANCY_FRAMES*DRED_NUM_FEATURES];
float state[DRED_STATE_DIM];
float latents[(DRED_NUM_REDUNDANCY_FRAMES/2)*DRED_LATENT_DIM];
int nb_latents;
int process_stage;
int dred_offset;
};
int dred_ec_decode(OpusDRED *dec, const opus_uint8 *bytes, int num_bytes, int min_feature_frames, int dred_frame_offset);
#endif

View File

@ -0,0 +1,363 @@
/* Copyright (c) 2022 Amazon
Written by Jan Buethe */
/*
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include <string.h>
#if 0
#include <stdio.h>
#include <math.h>
#endif
#include "dred_encoder.h"
#include "dred_coding.h"
#include "celt/entenc.h"
#include "dred_decoder.h"
#include "float_cast.h"
#include "os_support.h"
#include "celt/laplace.h"
#include "dred_rdovae_stats_data.h"
static void DRED_rdovae_init_encoder(RDOVAEEncState *enc_state)
{
memset(enc_state, 0, sizeof(*enc_state));
}
int dred_encoder_load_model(DREDEnc* enc, const void *data, int len)
{
WeightArray *list;
int ret;
parse_weights(&list, data, len);
ret = init_rdovaeenc(&enc->model, list);
opus_free(list);
if (ret == 0) {
ret = lpcnet_encoder_load_model(&enc->lpcnet_enc_state, data, len);
}
if (ret == 0) enc->loaded = 1;
return (ret == 0) ? OPUS_OK : OPUS_BAD_ARG;
}
void dred_encoder_reset(DREDEnc* enc)
{
OPUS_CLEAR((char*)&enc->DREDENC_RESET_START,
sizeof(DREDEnc)-
((char*)&enc->DREDENC_RESET_START - (char*)enc));
enc->input_buffer_fill = DRED_SILK_ENCODER_DELAY;
lpcnet_encoder_init(&enc->lpcnet_enc_state);
DRED_rdovae_init_encoder(&enc->rdovae_enc);
}
void dred_encoder_init(DREDEnc* enc, opus_int32 Fs, int channels)
{
enc->Fs = Fs;
enc->channels = channels;
enc->loaded = 0;
#ifndef USE_WEIGHTS_FILE
if (init_rdovaeenc(&enc->model, rdovaeenc_arrays) == 0) enc->loaded = 1;
#endif
dred_encoder_reset(enc);
}
static void dred_process_frame(DREDEnc *enc, int arch)
{
float feature_buffer[2 * 36];
float input_buffer[2*DRED_NUM_FEATURES] = {0};
celt_assert(enc->loaded);
/* shift latents buffer */
OPUS_MOVE(enc->latents_buffer + DRED_LATENT_DIM, enc->latents_buffer, (DRED_MAX_FRAMES - 1) * DRED_LATENT_DIM);
OPUS_MOVE(enc->state_buffer + DRED_STATE_DIM, enc->state_buffer, (DRED_MAX_FRAMES - 1) * DRED_STATE_DIM);
/* calculate LPCNet features */
lpcnet_compute_single_frame_features_float(&enc->lpcnet_enc_state, enc->input_buffer, feature_buffer, arch);
lpcnet_compute_single_frame_features_float(&enc->lpcnet_enc_state, enc->input_buffer + DRED_FRAME_SIZE, feature_buffer + 36, arch);
/* prepare input buffer (discard LPC coefficients) */
OPUS_COPY(input_buffer, feature_buffer, DRED_NUM_FEATURES);
OPUS_COPY(input_buffer + DRED_NUM_FEATURES, feature_buffer + 36, DRED_NUM_FEATURES);
/* run RDOVAE encoder */
dred_rdovae_encode_dframe(&enc->rdovae_enc, &enc->model, enc->latents_buffer, enc->state_buffer, input_buffer, arch);
enc->latents_buffer_fill = IMIN(enc->latents_buffer_fill+1, DRED_NUM_REDUNDANCY_FRAMES);
}
void filter_df2t(const float *in, float *out, int len, float b0, const float *b, const float *a, int order, float *mem)
{
int i;
for (i=0;i<len;i++) {
int j;
float xi, yi, nyi;
xi = in[i];
yi = xi*b0 + mem[0];
nyi = -yi;
for (j=0;j<order;j++)
{
mem[j] = mem[j+1] + b[j]*xi + a[j]*nyi;
}
out[i] = yi;
/*fprintf(stdout, "%f\n", out[i]);*/
}
}
#define MAX_DOWNMIX_BUFFER (960*2)
static void dred_convert_to_16k(DREDEnc *enc, const float *in, int in_len, float *out, int out_len)
{
float downmix[MAX_DOWNMIX_BUFFER];
int i;
int up;
celt_assert(enc->channels*in_len <= MAX_DOWNMIX_BUFFER);
celt_assert(in_len * (opus_int32)16000 == out_len * enc->Fs);
switch(enc->Fs) {
case 8000:
up = 2;
break;
case 12000:
up = 4;
break;
case 16000:
up = 1;
break;
case 24000:
up = 2;
break;
case 48000:
up = 1;
break;
default:
celt_assert(0);
}
OPUS_CLEAR(downmix, up*in_len);
if (enc->channels == 1) {
for (i=0;i<in_len;i++) downmix[up*i] = FLOAT2INT16(up*in[i]);
} else {
for (i=0;i<in_len;i++) downmix[up*i] = FLOAT2INT16(.5*up*(in[2*i]+in[2*i+1]));
}
if (enc->Fs == 16000) {
OPUS_COPY(out, downmix, out_len);
} else if (enc->Fs == 48000 || enc->Fs == 24000) {
/* ellip(7, .2, 70, 7750/24000) */
static const float filter_b[8] = { 0.005873358047f, 0.012980854831f, 0.014531340042f, 0.014531340042f, 0.012980854831f, 0.005873358047f, 0.004523418224f, 0.f};
static const float filter_a[8] = {-3.878718597768f, 7.748834257468f, -9.653651699533f, 8.007342726666f, -4.379450178552f, 1.463182111810f, -0.231720677804f, 0.f};
float b0 = 0.004523418224f;
filter_df2t(downmix, downmix, up*in_len, b0, filter_b, filter_a, RESAMPLING_ORDER, enc->resample_mem);
for (i=0;i<out_len;i++) out[i] = downmix[3*i];
} else if (enc->Fs == 12000) {
/* ellip(7, .2, 70, 7750/24000) */
static const float filter_b[8] = {-0.001017101081f, 0.003673127243f, 0.001009165267f, 0.001009165267f, 0.003673127243f, -0.001017101081f, 0.002033596776f, 0.f};
static const float filter_a[8] = {-4.930414411612f, 11.291643096504f, -15.322037343815f, 13.216403930898f, -7.220409219553f, 2.310550142771f, -0.334338618782f, 0.f};
float b0 = 0.002033596776f;
filter_df2t(downmix, downmix, up*in_len, b0, filter_b, filter_a, RESAMPLING_ORDER, enc->resample_mem);
for (i=0;i<out_len;i++) out[i] = downmix[3*i];
} else if (enc->Fs == 8000) {
/* ellip(7, .2, 70, 3900/8000) */
static const float filter_b[8] = { 0.081670120929f, 0.180401598565f, 0.259391051971f, 0.259391051971f, 0.180401598565f, 0.081670120929f, 0.020109185709f, 0.f};
static const float filter_a[8] = {-1.393651933659f, 2.609789872676f, -2.403541968806f, 2.056814957331f, -1.148908574570f, 0.473001413788f, -0.110359852412f, 0.f};
float b0 = 0.020109185709f;
filter_df2t(downmix, out, out_len, b0, filter_b, filter_a, RESAMPLING_ORDER, enc->resample_mem);
} else {
celt_assert(0);
}
}
void dred_compute_latents(DREDEnc *enc, const float *pcm, int frame_size, int extra_delay, int arch)
{
int curr_offset16k;
int frame_size16k = frame_size * 16000 / enc->Fs;
celt_assert(enc->loaded);
curr_offset16k = 40 + extra_delay*16000/enc->Fs - enc->input_buffer_fill;
enc->dred_offset = (int)floor((curr_offset16k+20.f)/40.f);
enc->latent_offset = 0;
while (frame_size16k > 0) {
int process_size16k;
int process_size;
process_size16k = IMIN(2*DRED_FRAME_SIZE, frame_size16k);
process_size = process_size16k * enc->Fs / 16000;
dred_convert_to_16k(enc, pcm, process_size, &enc->input_buffer[enc->input_buffer_fill], process_size16k);
enc->input_buffer_fill += process_size16k;
if (enc->input_buffer_fill >= 2*DRED_FRAME_SIZE)
{
curr_offset16k += 320;
dred_process_frame(enc, arch);
enc->input_buffer_fill -= 2*DRED_FRAME_SIZE;
OPUS_MOVE(&enc->input_buffer[0], &enc->input_buffer[2*DRED_FRAME_SIZE], enc->input_buffer_fill);
/* 15 ms (6*2.5 ms) is the ideal offset for DRED because it corresponds to our vocoder look-ahead. */
if (enc->dred_offset < 6) {
enc->dred_offset += 8;
} else {
enc->latent_offset++;
}
}
pcm += process_size;
frame_size16k -= process_size16k;
}
}
static void dred_encode_latents(ec_enc *enc, const float *x, const opus_uint8 *scale, const opus_uint8 *dzone, const opus_uint8 *r, const opus_uint8 *p0, int dim, int arch) {
int i;
int q[IMAX(DRED_LATENT_DIM,DRED_STATE_DIM)];
float xq[IMAX(DRED_LATENT_DIM,DRED_STATE_DIM)];
float delta[IMAX(DRED_LATENT_DIM,DRED_STATE_DIM)];
float deadzone[IMAX(DRED_LATENT_DIM,DRED_STATE_DIM)];
float eps = .1f;
/* This is split into multiple loops (with temporary arrays) so that the compiler
can vectorize all of it, and so we can call the vector tanh(). */
for (i=0;i<dim;i++) {
delta[i] = dzone[i]*(1.f/256.f);
xq[i] = x[i]*scale[i]*(1.f/256.f);
deadzone[i] = xq[i]/(delta[i]+eps);
}
compute_activation(deadzone, deadzone, dim, ACTIVATION_TANH, arch);
for (i=0;i<dim;i++) {
xq[i] = xq[i] - delta[i]*deadzone[i];
q[i] = (int)floor(.5f+xq[i]);
}
for (i=0;i<dim;i++) {
/* Make the impossible actually impossible. */
if (r[i] == 0 || p0[i] == 255) q[i] = 0;
else ec_laplace_encode_p0(enc, q[i], p0[i]<<7, r[i]<<7);
}
}
static int dred_voice_active(const unsigned char *activity_mem, int offset) {
int i;
for (i=0;i<16;i++) {
if (activity_mem[8*offset + i] == 1) return 1;
}
return 0;
}
int dred_encode_silk_frame(DREDEnc *enc, unsigned char *buf, int max_chunks, int max_bytes, int q0, int dQ, int qmax, unsigned char *activity_mem, int arch) {
ec_enc ec_encoder;
int q_level;
int i;
int offset;
int ec_buffer_fill;
int state_qoffset;
ec_enc ec_bak;
int prev_active=0;
int latent_offset;
int extra_dred_offset=0;
int dred_encoded=0;
int delayed_dred=0;
int total_offset;
latent_offset = enc->latent_offset;
/* Delaying new DRED data when just out of silence because we already have the
main Opus payload for that frame. */
if (activity_mem[0] && enc->last_extra_dred_offset>0) {
latent_offset = enc->last_extra_dred_offset;
delayed_dred = 1;
enc->last_extra_dred_offset = 0;
}
while (latent_offset < enc->latents_buffer_fill && !dred_voice_active(activity_mem, latent_offset)) {
latent_offset++;
extra_dred_offset++;
}
if (!delayed_dred) enc->last_extra_dred_offset = extra_dred_offset;
/* entropy coding of state and latents */
ec_enc_init(&ec_encoder, buf, max_bytes);
ec_enc_uint(&ec_encoder, q0, 16);
ec_enc_uint(&ec_encoder, dQ, 8);
total_offset = 16 - (enc->dred_offset - extra_dred_offset*8);
celt_assert(total_offset>=0);
if (total_offset > 31) {
ec_enc_uint(&ec_encoder, 1, 2);
ec_enc_uint(&ec_encoder, total_offset>>5, 256);
ec_enc_uint(&ec_encoder, total_offset&31, 32);
} else {
ec_enc_uint(&ec_encoder, 0, 2);
ec_enc_uint(&ec_encoder, total_offset, 32);
}
celt_assert(qmax >= q0);
if (q0 < 14 && dQ > 0) {
int nvals;
/* If you want to use qmax == q0, you should have set dQ = 0. */
celt_assert(qmax > q0);
nvals = 15 - (q0 + 1);
ec_encode(&ec_encoder, qmax >= 15 ? 0 : nvals + qmax - (q0 + 1),
qmax >= 15 ? nvals : nvals + qmax - q0, 2*nvals);
}
state_qoffset = q0*DRED_STATE_DIM;
dred_encode_latents(
&ec_encoder,
&enc->state_buffer[latent_offset*DRED_STATE_DIM],
dred_state_quant_scales_q8 + state_qoffset,
dred_state_dead_zone_q8 + state_qoffset,
dred_state_r_q8 + state_qoffset,
dred_state_p0_q8 + state_qoffset,
DRED_STATE_DIM,
arch);
if (ec_tell(&ec_encoder) > 8*max_bytes) {
return 0;
}
ec_bak = ec_encoder;
for (i = 0; i < IMIN(2*max_chunks, enc->latents_buffer_fill-latent_offset-1); i += 2)
{
int active;
q_level = compute_quantizer(q0, dQ, qmax, i/2);
offset = q_level * DRED_LATENT_DIM;
dred_encode_latents(
&ec_encoder,
enc->latents_buffer + (i+latent_offset) * DRED_LATENT_DIM,
dred_latent_quant_scales_q8 + offset,
dred_latent_dead_zone_q8 + offset,
dred_latent_r_q8 + offset,
dred_latent_p0_q8 + offset,
DRED_LATENT_DIM,
arch
);
if (ec_tell(&ec_encoder) > 8*max_bytes) {
/* If we haven't been able to code one chunk, give up on DRED completely. */
if (i==0) return 0;
break;
}
active = dred_voice_active(activity_mem, i+latent_offset);
if (active || prev_active) {
ec_bak = ec_encoder;
dred_encoded = i+2;
}
prev_active = active;
}
/* Avoid sending empty DRED packets. */
if (dred_encoded==0 || (dred_encoded<=2 && extra_dred_offset)) return 0;
ec_encoder = ec_bak;
ec_buffer_fill = (ec_tell(&ec_encoder)+7)/8;
ec_enc_shrink(&ec_encoder, ec_buffer_fill);
ec_enc_done(&ec_encoder);
return ec_buffer_fill;
}

View File

@ -0,0 +1,71 @@
/* Copyright (c) 2022 Amazon
Written by Jan Buethe */
/*
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef DRED_ENCODER_H
#define DRED_ENCODER_H
#include "lpcnet.h"
#include "dred_config.h"
#include "dred_rdovae.h"
#include "entcode.h"
#include "lpcnet_private.h"
#include "dred_rdovae_enc.h"
#include "dred_rdovae_enc_data.h"
#define RESAMPLING_ORDER 8
typedef struct {
RDOVAEEnc model;
LPCNetEncState lpcnet_enc_state;
RDOVAEEncState rdovae_enc;
int loaded;
opus_int32 Fs;
int channels;
#define DREDENC_RESET_START input_buffer
float input_buffer[2*DRED_DFRAME_SIZE];
int input_buffer_fill;
int dred_offset;
int latent_offset;
int last_extra_dred_offset;
float latents_buffer[DRED_MAX_FRAMES * DRED_LATENT_DIM];
int latents_buffer_fill;
float state_buffer[DRED_MAX_FRAMES * DRED_STATE_DIM];
float resample_mem[RESAMPLING_ORDER + 1];
} DREDEnc;
int dred_encoder_load_model(DREDEnc* enc, const void *data, int len);
void dred_encoder_init(DREDEnc* enc, opus_int32 Fs, int channels);
void dred_encoder_reset(DREDEnc* enc);
void dred_deinit_encoder(DREDEnc *enc);
void dred_compute_latents(DREDEnc *enc, const float *pcm, int frame_size, int extra_delay, int arch);
int dred_encode_silk_frame(DREDEnc *enc, unsigned char *buf, int max_chunks, int max_bytes, int q0, int dQ, int qmax, unsigned char *activity_mem, int arch);
#endif

View File

@ -0,0 +1,42 @@
/* Copyright (c) 2022 Amazon
Written by Jan Buethe */
/*
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef DRED_RDOVAE_H
#define DRED_RDOVAE_H
#include <stdlib.h>
#include "opus_types.h"
typedef struct RDOVAEDec RDOVAEDec;
typedef struct RDOVAEEnc RDOVAEEnc;
typedef struct RDOVAEDecStruct RDOVAEDecState;
typedef struct RDOVAEEncStruct RDOVAEEncState;
#endif

View File

@ -0,0 +1,33 @@
/* Auto generated from checkpoint rdovae_sparse5m_32.pth */
#ifndef DRED_RDOVAE_CONSTANTS_H
#define DRED_RDOVAE_CONSTANTS_H
#include "nnet.h"
#define DRED_NUM_FEATURES 20
#define DRED_LATENT_DIM 21
#define DRED_STATE_DIM 19
#define DRED_PADDED_LATENT_DIM 24
#define DRED_PADDED_STATE_DIM 24
#define DRED_NUM_QUANTIZATION_LEVELS 16
#define DRED_MAX_RNN_NEURONS 96
#define DRED_MAX_CONV_INPUTS 1536
#define DRED_ENC_MAX_RNN_NEURONS 1536
#define DRED_ENC_MAX_CONV_INPUTS 1536
#define DRED_DEC_MAX_RNN_NEURONS 96
#endif /* DRED_RDOVAE_CONSTANTS_H */

View File

@ -0,0 +1,139 @@
/* Copyright (c) 2022 Amazon
Written by Jan Buethe */
/*
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "dred_rdovae_dec.h"
#include "dred_rdovae_constants.h"
#include "os_support.h"
static void conv1_cond_init(float *mem, int len, int dilation, int *init)
{
if (!*init) {
int i;
for (i=0;i<dilation;i++) OPUS_CLEAR(&mem[i*len], len);
}
*init = 1;
}
void DRED_rdovae_decode_all(const RDOVAEDec *model, float *features, const float *state, const float *latents, int nb_latents, int arch)
{
int i;
RDOVAEDecState dec;
memset(&dec, 0, sizeof(dec));
dred_rdovae_dec_init_states(&dec, model, state, arch);
for (i = 0; i < 2*nb_latents; i += 2)
{
dred_rdovae_decode_qframe(
&dec,
model,
&features[2*i*DRED_NUM_FEATURES],
&latents[(i/2)*DRED_LATENT_DIM],
arch);
}
}
void dred_rdovae_dec_init_states(
RDOVAEDecState *h, /* io: state buffer handle */
const RDOVAEDec *model,
const float *initial_state, /* i: initial state */
int arch
)
{
float hidden[DEC_HIDDEN_INIT_OUT_SIZE];
float state_init[DEC_GRU1_STATE_SIZE+DEC_GRU2_STATE_SIZE+DEC_GRU3_STATE_SIZE+DEC_GRU4_STATE_SIZE+DEC_GRU5_STATE_SIZE];
int counter=0;
compute_generic_dense(&model->dec_hidden_init, hidden, initial_state, ACTIVATION_TANH, arch);
compute_generic_dense(&model->dec_gru_init, state_init, hidden, ACTIVATION_TANH, arch);
OPUS_COPY(h->gru1_state, state_init, DEC_GRU1_STATE_SIZE);
counter += DEC_GRU1_STATE_SIZE;
OPUS_COPY(h->gru2_state, &state_init[counter], DEC_GRU2_STATE_SIZE);
counter += DEC_GRU2_STATE_SIZE;
OPUS_COPY(h->gru3_state, &state_init[counter], DEC_GRU3_STATE_SIZE);
counter += DEC_GRU3_STATE_SIZE;
OPUS_COPY(h->gru4_state, &state_init[counter], DEC_GRU4_STATE_SIZE);
counter += DEC_GRU4_STATE_SIZE;
OPUS_COPY(h->gru5_state, &state_init[counter], DEC_GRU5_STATE_SIZE);
h->initialized = 0;
}
void dred_rdovae_decode_qframe(
RDOVAEDecState *dec_state, /* io: state buffer handle */
const RDOVAEDec *model,
float *qframe, /* o: quadruple feature frame (four concatenated frames in reverse order) */
const float *input, /* i: latent vector */
int arch
)
{
float buffer[DEC_DENSE1_OUT_SIZE + DEC_GRU1_OUT_SIZE + DEC_GRU2_OUT_SIZE + DEC_GRU3_OUT_SIZE + DEC_GRU4_OUT_SIZE + DEC_GRU5_OUT_SIZE
+ DEC_CONV1_OUT_SIZE + DEC_CONV2_OUT_SIZE + DEC_CONV3_OUT_SIZE + DEC_CONV4_OUT_SIZE + DEC_CONV5_OUT_SIZE];
int output_index = 0;
/* run encoder stack and concatenate output in buffer*/
compute_generic_dense(&model->dec_dense1, &buffer[output_index], input, ACTIVATION_TANH, arch);
output_index += DEC_DENSE1_OUT_SIZE;
compute_generic_gru(&model->dec_gru1_input, &model->dec_gru1_recurrent, dec_state->gru1_state, buffer, arch);
compute_glu(&model->dec_glu1, &buffer[output_index], dec_state->gru1_state, arch);
output_index += DEC_GRU1_OUT_SIZE;
conv1_cond_init(dec_state->conv1_state, output_index, 1, &dec_state->initialized);
compute_generic_conv1d(&model->dec_conv1, &buffer[output_index], dec_state->conv1_state, buffer, output_index, ACTIVATION_TANH, arch);
output_index += DEC_CONV1_OUT_SIZE;
compute_generic_gru(&model->dec_gru2_input, &model->dec_gru2_recurrent, dec_state->gru2_state, buffer, arch);
compute_glu(&model->dec_glu2, &buffer[output_index], dec_state->gru2_state, arch);
output_index += DEC_GRU2_OUT_SIZE;
conv1_cond_init(dec_state->conv2_state, output_index, 1, &dec_state->initialized);
compute_generic_conv1d(&model->dec_conv2, &buffer[output_index], dec_state->conv2_state, buffer, output_index, ACTIVATION_TANH, arch);
output_index += DEC_CONV2_OUT_SIZE;
compute_generic_gru(&model->dec_gru3_input, &model->dec_gru3_recurrent, dec_state->gru3_state, buffer, arch);
compute_glu(&model->dec_glu3, &buffer[output_index], dec_state->gru3_state, arch);
output_index += DEC_GRU3_OUT_SIZE;
conv1_cond_init(dec_state->conv3_state, output_index, 1, &dec_state->initialized);
compute_generic_conv1d(&model->dec_conv3, &buffer[output_index], dec_state->conv3_state, buffer, output_index, ACTIVATION_TANH, arch);
output_index += DEC_CONV3_OUT_SIZE;
compute_generic_gru(&model->dec_gru4_input, &model->dec_gru4_recurrent, dec_state->gru4_state, buffer, arch);
compute_glu(&model->dec_glu4, &buffer[output_index], dec_state->gru4_state, arch);
output_index += DEC_GRU4_OUT_SIZE;
conv1_cond_init(dec_state->conv4_state, output_index, 1, &dec_state->initialized);
compute_generic_conv1d(&model->dec_conv4, &buffer[output_index], dec_state->conv4_state, buffer, output_index, ACTIVATION_TANH, arch);
output_index += DEC_CONV4_OUT_SIZE;
compute_generic_gru(&model->dec_gru5_input, &model->dec_gru5_recurrent, dec_state->gru5_state, buffer, arch);
compute_glu(&model->dec_glu5, &buffer[output_index], dec_state->gru5_state, arch);
output_index += DEC_GRU5_OUT_SIZE;
conv1_cond_init(dec_state->conv5_state, output_index, 1, &dec_state->initialized);
compute_generic_conv1d(&model->dec_conv5, &buffer[output_index], dec_state->conv5_state, buffer, output_index, ACTIVATION_TANH, arch);
output_index += DEC_CONV5_OUT_SIZE;
compute_generic_dense(&model->dec_output, qframe, buffer, ACTIVATION_LINEAR, arch);
}

View File

@ -0,0 +1,53 @@
/* Copyright (c) 2022 Amazon
Written by Jan Buethe */
/*
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef DRED_RDOVAE_DEC_H
#define DRED_RDOVAE_DEC_H
#include "dred_rdovae.h"
#include "dred_rdovae_dec_data.h"
#include "dred_rdovae_stats_data.h"
struct RDOVAEDecStruct {
int initialized;
float gru1_state[DEC_GRU1_STATE_SIZE];
float gru2_state[DEC_GRU2_STATE_SIZE];
float gru3_state[DEC_GRU3_STATE_SIZE];
float gru4_state[DEC_GRU4_STATE_SIZE];
float gru5_state[DEC_GRU5_STATE_SIZE];
float conv1_state[DEC_CONV1_STATE_SIZE];
float conv2_state[DEC_CONV2_STATE_SIZE];
float conv3_state[DEC_CONV3_STATE_SIZE];
float conv4_state[DEC_CONV4_STATE_SIZE];
float conv5_state[DEC_CONV5_STATE_SIZE];
};
void dred_rdovae_dec_init_states(RDOVAEDecState *h, const RDOVAEDec *model, const float * initial_state, int arch);
void dred_rdovae_decode_qframe(RDOVAEDecState *h, const RDOVAEDec *model, float *qframe, const float * z, int arch);
void DRED_rdovae_decode_all(const RDOVAEDec *model, float *features, const float *state, const float *latents, int nb_latents, int arch);
#endif

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,124 @@
/* Auto generated from checkpoint rdovae_sparse5m_32.pth */
#ifndef DRED_RDOVAE_DEC_DATA_H
#define DRED_RDOVAE_DEC_DATA_H
#include "nnet.h"
#include "opus_types.h"
#include "dred_rdovae.h"
#include "dred_rdovae_constants.h"
#define DEC_DENSE1_OUT_SIZE 96
#define DEC_GLU1_OUT_SIZE 96
#define DEC_GLU2_OUT_SIZE 96
#define DEC_GLU3_OUT_SIZE 96
#define DEC_GLU4_OUT_SIZE 96
#define DEC_GLU5_OUT_SIZE 96
#define DEC_OUTPUT_OUT_SIZE 80
#define DEC_HIDDEN_INIT_OUT_SIZE 128
#define DEC_GRU_INIT_OUT_SIZE 480
#define DEC_GRU1_OUT_SIZE 96
#define DEC_GRU1_STATE_SIZE 96
#define DEC_GRU2_OUT_SIZE 96
#define DEC_GRU2_STATE_SIZE 96
#define DEC_GRU3_OUT_SIZE 96
#define DEC_GRU3_STATE_SIZE 96
#define DEC_GRU4_OUT_SIZE 96
#define DEC_GRU4_STATE_SIZE 96
#define DEC_GRU5_OUT_SIZE 96
#define DEC_GRU5_STATE_SIZE 96
#define DEC_CONV1_OUT_SIZE 32
#define DEC_CONV1_IN_SIZE 192
#define DEC_CONV1_STATE_SIZE (192 * (1))
#define DEC_CONV1_DELAY 0
#define DEC_CONV2_OUT_SIZE 32
#define DEC_CONV2_IN_SIZE 320
#define DEC_CONV2_STATE_SIZE (320 * (1))
#define DEC_CONV2_DELAY 0
#define DEC_CONV3_OUT_SIZE 32
#define DEC_CONV3_IN_SIZE 448
#define DEC_CONV3_STATE_SIZE (448 * (1))
#define DEC_CONV3_DELAY 0
#define DEC_CONV4_OUT_SIZE 32
#define DEC_CONV4_IN_SIZE 576
#define DEC_CONV4_STATE_SIZE (576 * (1))
#define DEC_CONV4_DELAY 0
#define DEC_CONV5_OUT_SIZE 32
#define DEC_CONV5_IN_SIZE 704
#define DEC_CONV5_STATE_SIZE (704 * (1))
#define DEC_CONV5_DELAY 0
struct RDOVAEDec {
LinearLayer dec_dense1;
LinearLayer dec_glu1;
LinearLayer dec_glu2;
LinearLayer dec_glu3;
LinearLayer dec_glu4;
LinearLayer dec_glu5;
LinearLayer dec_output;
LinearLayer dec_hidden_init;
LinearLayer dec_gru_init;
LinearLayer dec_gru1_input;
LinearLayer dec_gru1_recurrent;
LinearLayer dec_gru2_input;
LinearLayer dec_gru2_recurrent;
LinearLayer dec_gru3_input;
LinearLayer dec_gru3_recurrent;
LinearLayer dec_gru4_input;
LinearLayer dec_gru4_recurrent;
LinearLayer dec_gru5_input;
LinearLayer dec_gru5_recurrent;
LinearLayer dec_conv1;
LinearLayer dec_conv2;
LinearLayer dec_conv3;
LinearLayer dec_conv4;
LinearLayer dec_conv5;
};
int init_rdovaedec(RDOVAEDec *model, const WeightArray *arrays);
#endif /* DRED_RDOVAE_DEC_DATA_H */

View File

@ -0,0 +1,110 @@
/* Copyright (c) 2022 Amazon
Written by Jan Buethe */
/*
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <math.h>
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "dred_rdovae_enc.h"
#include "os_support.h"
#include "dred_rdovae_constants.h"
static void conv1_cond_init(float *mem, int len, int dilation, int *init)
{
if (!*init) {
int i;
for (i=0;i<dilation;i++) OPUS_CLEAR(&mem[i*len], len);
}
*init = 1;
}
void dred_rdovae_encode_dframe(
RDOVAEEncState *enc_state, /* io: encoder state */
const RDOVAEEnc *model,
float *latents, /* o: latent vector */
float *initial_state, /* o: initial state */
const float *input, /* i: double feature frame (concatenated) */
int arch
)
{
float padded_latents[DRED_PADDED_LATENT_DIM];
float padded_state[DRED_PADDED_STATE_DIM];
float buffer[ENC_DENSE1_OUT_SIZE + ENC_GRU1_OUT_SIZE + ENC_GRU2_OUT_SIZE + ENC_GRU3_OUT_SIZE + ENC_GRU4_OUT_SIZE + ENC_GRU5_OUT_SIZE
+ ENC_CONV1_OUT_SIZE + ENC_CONV2_OUT_SIZE + ENC_CONV3_OUT_SIZE + ENC_CONV4_OUT_SIZE + ENC_CONV5_OUT_SIZE];
float state_hidden[GDENSE1_OUT_SIZE];
int output_index = 0;
/* run encoder stack and concatenate output in buffer*/
compute_generic_dense(&model->enc_dense1, &buffer[output_index], input, ACTIVATION_TANH, arch);
output_index += ENC_DENSE1_OUT_SIZE;
compute_generic_gru(&model->enc_gru1_input, &model->enc_gru1_recurrent, enc_state->gru1_state, buffer, arch);
OPUS_COPY(&buffer[output_index], enc_state->gru1_state, ENC_GRU1_OUT_SIZE);
output_index += ENC_GRU1_OUT_SIZE;
conv1_cond_init(enc_state->conv1_state, output_index, 1, &enc_state->initialized);
compute_generic_conv1d(&model->enc_conv1, &buffer[output_index], enc_state->conv1_state, buffer, output_index, ACTIVATION_TANH, arch);
output_index += ENC_CONV1_OUT_SIZE;
compute_generic_gru(&model->enc_gru2_input, &model->enc_gru2_recurrent, enc_state->gru2_state, buffer, arch);
OPUS_COPY(&buffer[output_index], enc_state->gru2_state, ENC_GRU2_OUT_SIZE);
output_index += ENC_GRU2_OUT_SIZE;
conv1_cond_init(enc_state->conv2_state, output_index, 2, &enc_state->initialized);
compute_generic_conv1d_dilation(&model->enc_conv2, &buffer[output_index], enc_state->conv2_state, buffer, output_index, 2, ACTIVATION_TANH, arch);
output_index += ENC_CONV2_OUT_SIZE;
compute_generic_gru(&model->enc_gru3_input, &model->enc_gru3_recurrent, enc_state->gru3_state, buffer, arch);
OPUS_COPY(&buffer[output_index], enc_state->gru3_state, ENC_GRU3_OUT_SIZE);
output_index += ENC_GRU3_OUT_SIZE;
conv1_cond_init(enc_state->conv3_state, output_index, 2, &enc_state->initialized);
compute_generic_conv1d_dilation(&model->enc_conv3, &buffer[output_index], enc_state->conv3_state, buffer, output_index, 2, ACTIVATION_TANH, arch);
output_index += ENC_CONV3_OUT_SIZE;
compute_generic_gru(&model->enc_gru4_input, &model->enc_gru4_recurrent, enc_state->gru4_state, buffer, arch);
OPUS_COPY(&buffer[output_index], enc_state->gru4_state, ENC_GRU4_OUT_SIZE);
output_index += ENC_GRU4_OUT_SIZE;
conv1_cond_init(enc_state->conv4_state, output_index, 2, &enc_state->initialized);
compute_generic_conv1d_dilation(&model->enc_conv4, &buffer[output_index], enc_state->conv4_state, buffer, output_index, 2, ACTIVATION_TANH, arch);
output_index += ENC_CONV4_OUT_SIZE;
compute_generic_gru(&model->enc_gru5_input, &model->enc_gru5_recurrent, enc_state->gru5_state, buffer, arch);
OPUS_COPY(&buffer[output_index], enc_state->gru5_state, ENC_GRU5_OUT_SIZE);
output_index += ENC_GRU5_OUT_SIZE;
conv1_cond_init(enc_state->conv5_state, output_index, 2, &enc_state->initialized);
compute_generic_conv1d_dilation(&model->enc_conv5, &buffer[output_index], enc_state->conv5_state, buffer, output_index, 2, ACTIVATION_TANH, arch);
output_index += ENC_CONV5_OUT_SIZE;
compute_generic_dense(&model->enc_zdense, padded_latents, buffer, ACTIVATION_LINEAR, arch);
OPUS_COPY(latents, padded_latents, DRED_LATENT_DIM);
/* next, calculate initial state */
compute_generic_dense(&model->gdense1, state_hidden, buffer, ACTIVATION_TANH, arch);
compute_generic_dense(&model->gdense2, padded_state, state_hidden, ACTIVATION_LINEAR, arch);
OPUS_COPY(initial_state, padded_state, DRED_STATE_DIM);
}

View File

@ -0,0 +1,52 @@
/* Copyright (c) 2022 Amazon
Written by Jan Buethe */
/*
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef DRED_RDOVAE_ENC_H
#define DRED_RDOVAE_ENC_H
#include "dred_rdovae.h"
#include "dred_rdovae_enc_data.h"
struct RDOVAEEncStruct {
int initialized;
float gru1_state[ENC_GRU1_STATE_SIZE];
float gru2_state[ENC_GRU2_STATE_SIZE];
float gru3_state[ENC_GRU3_STATE_SIZE];
float gru4_state[ENC_GRU4_STATE_SIZE];
float gru5_state[ENC_GRU5_STATE_SIZE];
float conv1_state[ENC_CONV1_STATE_SIZE];
float conv2_state[2*ENC_CONV2_STATE_SIZE];
float conv3_state[2*ENC_CONV3_STATE_SIZE];
float conv4_state[2*ENC_CONV4_STATE_SIZE];
float conv5_state[2*ENC_CONV5_STATE_SIZE];
};
void dred_rdovae_encode_dframe(RDOVAEEncState *enc_state, const RDOVAEEnc *model, float *latents, float *initial_state, const float *input, int arch);
#endif

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,109 @@
/* Auto generated from checkpoint rdovae_sparse5m_32.pth */
#ifndef DRED_RDOVAE_ENC_DATA_H
#define DRED_RDOVAE_ENC_DATA_H
#include "nnet.h"
#include "opus_types.h"
#include "dred_rdovae.h"
#include "dred_rdovae_constants.h"
#define ENC_DENSE1_OUT_SIZE 64
#define ENC_ZDENSE_OUT_SIZE 24
#define GDENSE1_OUT_SIZE 128
#define GDENSE2_OUT_SIZE 24
#define ENC_GRU1_OUT_SIZE 64
#define ENC_GRU1_STATE_SIZE 64
#define ENC_GRU2_OUT_SIZE 64
#define ENC_GRU2_STATE_SIZE 64
#define ENC_GRU3_OUT_SIZE 64
#define ENC_GRU3_STATE_SIZE 64
#define ENC_GRU4_OUT_SIZE 64
#define ENC_GRU4_STATE_SIZE 64
#define ENC_GRU5_OUT_SIZE 64
#define ENC_GRU5_STATE_SIZE 64
#define ENC_CONV1_OUT_SIZE 96
#define ENC_CONV1_IN_SIZE 128
#define ENC_CONV1_STATE_SIZE (128 * (1))
#define ENC_CONV1_DELAY 0
#define ENC_CONV2_OUT_SIZE 96
#define ENC_CONV2_IN_SIZE 288
#define ENC_CONV2_STATE_SIZE (288 * (1))
#define ENC_CONV2_DELAY 0
#define ENC_CONV3_OUT_SIZE 96
#define ENC_CONV3_IN_SIZE 448
#define ENC_CONV3_STATE_SIZE (448 * (1))
#define ENC_CONV3_DELAY 0
#define ENC_CONV4_OUT_SIZE 96
#define ENC_CONV4_IN_SIZE 608
#define ENC_CONV4_STATE_SIZE (608 * (1))
#define ENC_CONV4_DELAY 0
#define ENC_CONV5_OUT_SIZE 96
#define ENC_CONV5_IN_SIZE 768
#define ENC_CONV5_STATE_SIZE (768 * (1))
#define ENC_CONV5_DELAY 0
struct RDOVAEEnc {
LinearLayer enc_dense1;
LinearLayer enc_zdense;
LinearLayer gdense1;
LinearLayer gdense2;
LinearLayer enc_gru1_input;
LinearLayer enc_gru1_recurrent;
LinearLayer enc_gru2_input;
LinearLayer enc_gru2_recurrent;
LinearLayer enc_gru3_input;
LinearLayer enc_gru3_recurrent;
LinearLayer enc_gru4_input;
LinearLayer enc_gru4_recurrent;
LinearLayer enc_gru5_input;
LinearLayer enc_gru5_recurrent;
LinearLayer enc_conv1;
LinearLayer enc_conv2;
LinearLayer enc_conv3;
LinearLayer enc_conv4;
LinearLayer enc_conv5;
};
int init_rdovaeenc(RDOVAEEnc *model, const WeightArray *arrays);
#endif /* DRED_RDOVAE_ENC_DATA_H */

View File

@ -0,0 +1,353 @@
/* Auto generated from checkpoint rdovae_sparse5m_32.pth */
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "dred_rdovae_stats_data.h"
const opus_uint8 dred_latent_quant_scales_q8[336] = {
255, 255, 255, 255, 255, 255, 255, 255,
255, 255, 255, 255, 255, 255, 255, 255,
255, 255, 255, 255, 255, 208, 219, 218,
217, 216, 219, 218, 217, 222, 217, 219,
214, 215, 218, 219, 219, 217, 217, 213,
215, 214, 168, 187, 187, 186, 183, 189,
187, 184, 192, 184, 189, 180, 181, 187,
188, 189, 185, 184, 178, 181, 179, 134,
160, 160, 159, 155, 163, 160, 157, 167,
157, 163, 151, 152, 160, 162, 164, 158,
157, 149, 152, 149, 106, 137, 138, 137,
131, 141, 138, 133, 146, 135, 141, 127,
129, 138, 139, 143, 136, 135, 124, 127,
125, 82, 117, 118, 118, 111, 122, 119,
113, 128, 115, 122, 108, 109, 119, 120,
124, 117, 115, 104, 105, 104, 64, 101,
102, 102, 95, 107, 103, 96, 114, 99,
107, 91, 93, 102, 103, 108, 101, 99,
87, 86, 87, 48, 81, 84, 87, 79,
90, 87, 78, 87, 84, 90, 76, 78,
87, 80, 69, 86, 84, 72, 58, 72,
36, 70, 71, 76, 67, 87, 72, 67,
78, 73, 74, 65, 67, 75, 69, 20,
76, 74, 60, 46, 61, 26, 50, 63,
66, 57, 31, 45, 53, 63, 65, 40,
56, 57, 56, 34, 5, 67, 63, 50,
21, 51, 17, 23, 31, 53, 48, 11,
18, 34, 40, 56, 15, 47, 49, 34,
12, 0, 58, 54, 42, 10, 43, 10,
6, 7, 25, 42, 3, 6, 17, 9,
48, 5, 41, 43, 19, 3, 1, 47,
47, 35, 2, 36, 3, 6, 7, 11,
35, 3, 5, 6, 8, 18, 4, 35,
38, 7, 3, 1, 15, 16, 29, 0,
31, 3, 5, 5, 5, 29, 2, 3,
5, 6, 11, 3, 31, 33, 4, 2,
1, 11, 10, 25, 0, 27, 2, 3,
3, 2, 24, 1, 2, 4, 4, 6,
2, 27, 29, 2, 1, 1, 7, 7,
21, 0, 23, 2, 2, 2, 1, 21,
1, 2, 3, 3, 2, 1, 24, 27,
2, 1, 0, 6, 5, 18, 0, 20
};
const opus_uint8 dred_latent_dead_zone_q8[336] = {
1, 0, 10, 0, 0, 6, 11, 0,
0, 0, 7, 0, 0, 0, 13, 0,
4, 0, 0, 5, 0, 1, 0, 13,
1, 0, 12, 15, 0, 8, 0, 12,
0, 0, 3, 18, 0, 7, 0, 0,
12, 0, 0, 7, 16, 5, 0, 17,
18, 0, 25, 2, 17, 0, 1, 7,
22, 4, 11, 3, 0, 18, 0, 0,
17, 20, 9, 1, 24, 22, 5, 43,
6, 22, 1, 2, 11, 28, 13, 14,
7, 0, 26, 0, 0, 29, 24, 14,
4, 31, 27, 11, 66, 11, 28, 2,
4, 16, 34, 23, 19, 11, 0, 34,
0, 1, 45, 29, 20, 6, 41, 33,
17, 94, 16, 36, 4, 6, 21, 43,
37, 24, 16, 0, 43, 0, 1, 70,
35, 26, 9, 56, 41, 27, 133, 23,
47, 5, 9, 28, 56, 56, 30, 21,
0, 56, 1, 2, 107, 41, 37, 11,
85, 48, 46, 168, 31, 59, 7, 11,
39, 72, 255, 39, 28, 0, 84, 2,
3, 160, 53, 51, 16, 255, 53, 75,
231, 44, 81, 9, 14, 54, 255, 255,
49, 38, 0, 255, 3, 12, 255, 255,
81, 24, 255, 255, 124, 255, 71, 255,
12, 20, 67, 255, 255, 70, 54, 0,
255, 5, 27, 255, 255, 124, 37, 255,
255, 220, 255, 104, 255, 15, 28, 255,
255, 255, 96, 73, 2, 255, 8, 44,
255, 255, 255, 53, 255, 255, 255, 255,
158, 255, 19, 37, 255, 255, 255, 123,
108, 3, 255, 11, 178, 255, 255, 255,
87, 255, 255, 255, 255, 255, 255, 23,
57, 255, 255, 255, 255, 255, 5, 255,
14, 255, 255, 255, 255, 108, 255, 255,
255, 255, 255, 255, 27, 65, 255, 255,
255, 255, 255, 7, 255, 16, 255, 255,
255, 255, 255, 255, 255, 255, 255, 255,
255, 30, 75, 255, 255, 255, 255, 255,
9, 255, 18, 255, 255, 255, 255, 255,
255, 255, 255, 255, 255, 255, 38, 96,
255, 255, 255, 255, 255, 11, 255, 21
};
const opus_uint8 dred_latent_r_q8[336] = {
233, 94, 91, 112, 149, 65, 92, 118,
55, 122, 82, 190, 175, 100, 80, 62,
125, 130, 236, 90, 219, 228, 85, 75,
96, 138, 50, 75, 107, 47, 107, 67,
181, 165, 85, 64, 47, 109, 114, 233,
72, 213, 222, 72, 58, 81, 125, 36,
59, 97, 36, 92, 53, 171, 154, 70,
49, 33, 92, 98, 229, 54, 207, 214,
59, 43, 65, 109, 24, 43, 74, 27,
76, 40, 160, 143, 56, 35, 21, 75,
82, 224, 37, 199, 204, 45, 29, 51,
93, 14, 29, 60, 19, 60, 29, 149,
128, 42, 23, 12, 59, 66, 219, 24,
190, 191, 32, 17, 38, 77, 8, 18,
48, 13, 46, 20, 135, 113, 31, 14,
6, 43, 50, 213, 15, 181, 176, 21,
9, 26, 61, 4, 10, 38, 8, 34,
14, 120, 98, 21, 7, 3, 30, 37,
206, 9, 172, 155, 10, 4, 16, 45,
2, 5, 29, 3, 22, 8, 101, 81,
12, 2, 0, 18, 24, 198, 3, 160,
135, 4, 2, 10, 32, 0, 2, 17,
2, 15, 4, 85, 67, 6, 0, 0,
10, 15, 189, 0, 148, 106, 0, 0,
4, 21, 0, 0, 6, 0, 9, 0,
68, 53, 1, 0, 0, 5, 7, 180,
0, 133, 66, 0, 0, 1, 12, 0,
0, 0, 0, 4, 0, 52, 41, 0,
0, 0, 1, 2, 169, 0, 118, 32,
0, 0, 0, 7, 0, 0, 0, 0,
2, 0, 38, 31, 0, 0, 0, 1,
1, 158, 0, 103, 0, 0, 0, 0,
3, 0, 0, 0, 0, 0, 0, 26,
23, 0, 0, 0, 0, 0, 146, 0,
88, 0, 0, 0, 0, 1, 0, 0,
0, 0, 0, 0, 17, 15, 0, 0,
0, 0, 0, 132, 0, 74, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 10, 9, 0, 0, 0, 0, 0,
118, 0, 62, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 6, 5,
0, 0, 0, 0, 0, 104, 0, 51
};
const opus_uint8 dred_latent_p0_q8[336] = {
12, 162, 137, 134, 107, 138, 118, 138,
201, 114, 145, 66, 81, 143, 146, 159,
122, 121, 20, 104, 37, 14, 171, 147,
142, 118, 152, 130, 149, 209, 123, 157,
75, 91, 153, 157, 168, 130, 128, 23,
120, 43, 18, 184, 158, 152, 126, 167,
144, 159, 220, 133, 169, 85, 102, 163,
170, 179, 140, 137, 27, 139, 49, 22,
197, 171, 163, 135, 183, 158, 167, 229,
145, 182, 96, 113, 175, 183, 193, 150,
147, 32, 159, 57, 27, 211, 184, 175,
144, 199, 174, 180, 237, 158, 196, 107,
122, 187, 198, 208, 161, 159, 37, 182,
66, 35, 224, 198, 188, 155, 215, 190,
194, 243, 172, 209, 115, 131, 199, 213,
222, 174, 170, 43, 205, 75, 44, 235,
212, 201, 166, 231, 206, 208, 248, 186,
223, 122, 140, 211, 228, 237, 187, 183,
50, 227, 84, 57, 246, 228, 216, 179,
246, 223, 227, 253, 204, 237, 132, 153,
226, 245, 255, 203, 198, 58, 251, 96,
78, 252, 241, 228, 192, 255, 237, 239,
254, 218, 248, 140, 164, 237, 255, 255,
216, 212, 67, 255, 106, 106, 255, 255,
242, 207, 255, 255, 250, 255, 234, 255,
151, 177, 249, 255, 255, 232, 228, 76,
255, 115, 152, 255, 255, 253, 223, 255,
255, 255, 255, 246, 255, 163, 192, 255,
255, 255, 245, 241, 87, 255, 126, 201,
255, 255, 255, 235, 255, 255, 255, 255,
253, 255, 175, 205, 255, 255, 255, 253,
250, 98, 255, 137, 255, 255, 255, 255,
248, 255, 255, 255, 255, 255, 255, 189,
220, 255, 255, 255, 255, 255, 108, 255,
148, 255, 255, 255, 255, 253, 255, 255,
255, 255, 255, 255, 201, 230, 255, 255,
255, 255, 255, 116, 255, 159, 255, 255,
255, 255, 255, 255, 255, 255, 255, 255,
255, 213, 238, 255, 255, 255, 255, 255,
125, 255, 169, 255, 255, 255, 255, 255,
255, 255, 255, 255, 255, 255, 224, 244,
255, 255, 255, 255, 255, 134, 255, 180
};
const opus_uint8 dred_state_quant_scales_q8[304] = {
255, 255, 255, 255, 255, 255, 255, 255,
255, 255, 121, 255, 255, 255, 255, 255,
255, 255, 71, 215, 215, 233, 215, 216,
215, 216, 216, 215, 216, 114, 215, 217,
227, 216, 216, 216, 216, 65, 181, 181,
205, 181, 182, 181, 182, 183, 181, 183,
102, 182, 184, 196, 182, 184, 184, 183,
60, 153, 153, 175, 152, 154, 152, 155,
155, 152, 155, 84, 153, 155, 166, 154,
156, 156, 155, 54, 129, 128, 146, 127,
131, 128, 131, 132, 128, 131, 61, 129,
130, 140, 130, 133, 134, 131, 49, 109,
108, 120, 107, 111, 108, 111, 113, 108,
112, 43, 108, 110, 118, 110, 114, 115,
110, 45, 93, 91, 97, 89, 95, 91,
95, 98, 91, 96, 31, 91, 92, 98,
93, 98, 100, 93, 42, 78, 77, 77,
74, 81, 76, 81, 87, 77, 82, 1,
77, 77, 82, 80, 87, 91, 78, 45,
67, 65, 62, 62, 70, 64, 71, 79,
65, 71, 0, 65, 64, 69, 69, 77,
82, 66, 49, 58, 55, 49, 53, 63,
55, 65, 79, 56, 62, 2, 55, 54,
57, 60, 72, 77, 57, 92, 51, 47,
40, 44, 57, 46, 60, 78, 47, 54,
131, 47, 45, 48, 53, 66, 67, 49,
189, 45, 41, 33, 37, 51, 39, 53,
69, 41, 47, 188, 40, 38, 40, 47,
59, 59, 42, 235, 40, 36, 27, 32,
47, 34, 47, 62, 36, 41, 255, 34,
32, 34, 42, 52, 52, 37, 255, 35,
31, 23, 28, 41, 29, 41, 53, 31,
37, 216, 28, 27, 29, 37, 46, 46,
32, 213, 31, 27, 19, 24, 36, 25,
36, 46, 27, 34, 181, 24, 23, 24,
32, 40, 40, 28, 177, 27, 24, 15,
21, 31, 21, 32, 40, 24, 42, 151,
20, 19, 20, 28, 36, 36, 25, 146
};
const opus_uint8 dred_state_dead_zone_q8[304] = {
13, 16, 9, 6, 20, 10, 13, 35,
13, 15, 255, 9, 11, 10, 23, 14,
26, 43, 255, 12, 14, 8, 8, 18,
8, 13, 30, 11, 15, 255, 7, 9,
8, 19, 14, 24, 38, 255, 11, 12,
7, 8, 17, 7, 13, 25, 9, 15,
255, 6, 6, 6, 17, 14, 21, 32,
255, 11, 11, 6, 9, 16, 5, 13,
22, 8, 15, 255, 5, 5, 5, 14,
15, 19, 27, 255, 11, 9, 6, 9,
15, 4, 13, 19, 6, 15, 255, 4,
3, 4, 12, 16, 17, 22, 255, 11,
8, 4, 9, 15, 4, 13, 17, 5,
16, 255, 3, 2, 3, 11, 17, 16,
18, 255, 11, 7, 3, 10, 15, 3,
14, 16, 4, 17, 255, 2, 2, 2,
9, 18, 12, 14, 255, 11, 4, 3,
8, 14, 3, 12, 18, 3, 17, 255,
1, 2, 2, 8, 20, 7, 7, 255,
11, 4, 2, 8, 13, 2, 12, 15,
2, 18, 255, 1, 2, 1, 8, 18,
0, 1, 255, 13, 4, 3, 11, 14,
3, 11, 22, 3, 16, 2, 0, 3,
2, 11, 0, 11, 0, 121, 12, 4,
2, 11, 13, 4, 2, 0, 3, 20,
0, 0, 4, 2, 9, 8, 14, 0,
29, 9, 5, 0, 10, 9, 6, 1,
1, 4, 26, 0, 1, 4, 2, 4,
13, 14, 0, 4, 7, 7, 3, 15,
9, 8, 10, 0, 9, 34, 0, 2,
3, 4, 4, 14, 17, 0, 1, 13,
5, 2, 22, 14, 9, 17, 4, 6,
46, 0, 2, 3, 3, 7, 23, 24,
0, 0, 19, 3, 4, 28, 21, 10,
24, 7, 2, 75, 0, 3, 3, 4,
9, 33, 32, 0, 1, 26, 7, 4,
37, 30, 10, 34, 12, 5, 255, 0,
3, 2, 1, 13, 50, 46, 0, 4
};
const opus_uint8 dred_state_r_q8[304] = {
207, 224, 253, 207, 197, 233, 190, 198,
232, 180, 4, 245, 251, 254, 210, 173,
169, 223, 22, 199, 218, 253, 199, 187,
229, 181, 189, 227, 168, 3, 243, 251,
253, 203, 162, 156, 218, 17, 190, 212,
252, 190, 177, 224, 170, 178, 223, 156,
1, 240, 250, 253, 194, 149, 142, 212,
12, 181, 205, 252, 180, 165, 218, 158,
167, 217, 143, 0, 237, 249, 253, 185,
135, 128, 205, 7, 169, 196, 251, 169,
152, 212, 144, 154, 210, 128, 0, 234,
247, 252, 174, 120, 112, 197, 4, 158,
187, 250, 157, 139, 205, 130, 141, 203,
112, 0, 230, 246, 251, 162, 105, 98,
188, 2, 145, 177, 249, 144, 124, 197,
115, 127, 194, 97, 0, 226, 244, 250,
149, 91, 85, 179, 1, 130, 165, 247,
128, 109, 187, 100, 115, 183, 79, 0,
220, 241, 249, 136, 78, 77, 166, 2,
116, 152, 245, 113, 95, 177, 86, 106,
173, 64, 0, 214, 239, 248, 122, 67,
71, 155, 11, 103, 139, 242, 99, 84,
166, 78, 107, 161, 50, 0, 208, 235,
246, 109, 63, 61, 143, 90, 90, 126,
239, 82, 74, 154, 70, 107, 149, 37,
19, 200, 232, 244, 98, 53, 48, 131,
166, 77, 112, 235, 68, 64, 140, 60,
96, 136, 25, 104, 191, 227, 242, 88,
43, 37, 120, 183, 66, 101, 231, 56,
56, 127, 48, 86, 124, 17, 132, 182,
222, 239, 78, 32, 28, 110, 188, 52,
87, 226, 46, 42, 112, 36, 71, 111,
10, 117, 171, 216, 236, 64, 22, 18,
99, 178, 39, 74, 220, 37, 30, 97,
25, 57, 99, 7, 100, 160, 210, 233,
51, 15, 10, 89, 164, 27, 60, 213,
30, 19, 81, 16, 43, 84, 5, 83,
147, 202, 229, 38, 9, 5, 79, 150
};
const opus_uint8 dred_state_p0_q8[304] = {
40, 24, 1, 35, 45, 15, 47, 44,
15, 58, 252, 7, 2, 1, 25, 56,
53, 17, 230, 45, 28, 2, 41, 50,
18, 54, 49, 17, 65, 253, 8, 3,
1, 28, 64, 61, 21, 235, 52, 32,
2, 48, 56, 21, 62, 54, 20, 73,
255, 9, 3, 1, 33, 73, 69, 25,
240, 59, 37, 2, 56, 64, 24, 70,
60, 23, 82, 255, 11, 4, 2, 39,
83, 78, 31, 246, 67, 43, 2, 65,
72, 29, 79, 67, 27, 92, 255, 13,
4, 2, 45, 93, 88, 39, 250, 75,
49, 3, 75, 81, 33, 89, 74, 31,
102, 255, 15, 5, 2, 52, 105, 98,
45, 252, 84, 56, 4, 85, 90, 39,
99, 82, 35, 112, 255, 18, 6, 3,
60, 116, 109, 52, 254, 95, 63, 5,
97, 101, 45, 110, 90, 40, 125, 255,
21, 7, 4, 70, 128, 116, 58, 251,
105, 72, 6, 109, 110, 52, 119, 95,
46, 136, 255, 24, 9, 4, 79, 135,
121, 65, 235, 115, 80, 7, 124, 118,
60, 126, 97, 53, 146, 255, 29, 11,
5, 89, 131, 131, 74, 129, 124, 90,
9, 139, 125, 69, 127, 91, 61, 160,
189, 33, 13, 6, 98, 142, 145, 84,
50, 132, 100, 11, 153, 132, 78, 136,
99, 70, 176, 93, 39, 15, 7, 106,
155, 159, 94, 39, 139, 110, 13, 168,
139, 88, 150, 107, 78, 193, 72, 46,
17, 8, 114, 168, 172, 105, 36, 153,
119, 16, 183, 155, 98, 167, 121, 88,
209, 83, 53, 21, 10, 128, 185, 188,
116, 43, 167, 128, 19, 197, 171, 108,
183, 135, 96, 226, 96, 60, 24, 12,
142, 201, 204, 128, 51, 182, 142, 23,
210, 188, 119, 200, 151, 109, 251, 110,
69, 29, 14, 157, 218, 220, 139, 60
};

View File

@ -0,0 +1,27 @@
/* Auto generated from checkpoint rdovae_sparse5m_32.pth */
#ifndef DRED_RDOVAE_STATS_DATA_H
#define DRED_RDOVAE_STATS_DATA_H
#include "nnet.h"
#include "opus_types.h"
#include "dred_rdovae_constants.h"
extern const opus_uint8 dred_latent_quant_scales_q8[336];
extern const opus_uint8 dred_latent_dead_zone_q8[336];
extern const opus_uint8 dred_latent_r_q8[336];
extern const opus_uint8 dred_latent_p0_q8[336];
extern const opus_uint8 dred_state_quant_scales_q8[304];
extern const opus_uint8 dred_state_dead_zone_q8[304];
extern const opus_uint8 dred_state_r_q8[304];
extern const opus_uint8 dred_state_p0_q8[304];
#endif /* DRED_RDOVAE_STATS_DATA_H */

View File

@ -0,0 +1,280 @@
/* Copyright (c) 2017-2018 Mozilla */
/*
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#include <unistd.h>
#include "kiss_fft.h"
#include "common.h"
#include <math.h>
#include "freq.h"
#include "pitch.h"
#include "arch.h"
#include <assert.h>
#include "lpcnet.h"
#include "lpcnet_private.h"
#include "os_support.h"
#include "cpu_support.h"
static void biquad(float *y, float mem[2], const float *x, const float *b, const float *a, int N) {
int i;
for (i=0;i<N;i++) {
float xi, yi;
xi = x[i];
yi = x[i] + mem[0];
mem[0] = mem[1] + (b[0]*(double)xi - a[0]*(double)yi);
mem[1] = (b[1]*(double)xi - a[1]*(double)yi);
y[i] = yi;
}
}
static float uni_rand(void) {
return rand()/(double)RAND_MAX-.5;
}
static void rand_resp(float *a, float *b) {
a[0] = .75*uni_rand();
a[1] = .75*uni_rand();
b[0] = .75*uni_rand();
b[1] = .75*uni_rand();
}
void compute_noise(int *noise, float noise_std) {
int i;
for (i=0;i<FRAME_SIZE;i++) {
noise[i] = (int)floor(.5 + noise_std*.707*(log_approx(rand()/(float)RAND_MAX)-log_approx(rand()/(float)RAND_MAX)));
}
}
static opus_int16 float2short(float x)
{
int i;
i = (int)floor(.5+x);
return IMAX(-32767, IMIN(32767, i));
}
void write_audio(LPCNetEncState *st, const opus_int16 *pcm, const int *noise, FILE *file) {
int i;
opus_int16 data[2*FRAME_SIZE];
for (i=0;i<FRAME_SIZE;i++) {
float p=0;
float e;
int j;
for (j=0;j<LPC_ORDER;j++) p -= st->features[NB_BANDS+2+j]*st->sig_mem[j];
e = lin2ulaw(pcm[i] - p);
/* Signal in. */
data[2*i] = float2short(st->sig_mem[0]);
/* Signal out. */
data[2*i+1] = pcm[i];
/* Simulate error on excitation. */
e += noise[i];
e = IMIN(255, IMAX(0, e));
OPUS_MOVE(&st->sig_mem[1], &st->sig_mem[0], LPC_ORDER-1);
st->sig_mem[0] = p + ulaw2lin(e);
}
fwrite(data, 4*FRAME_SIZE, 1, file);
}
int main(int argc, char **argv) {
int i;
char *argv0;
int count=0;
static const float a_hp[2] = {-1.99599, 0.99600};
static const float b_hp[2] = {-2, 1};
float a_sig[2] = {0};
float b_sig[2] = {0};
float mem_hp_x[2]={0};
float mem_resp_x[2]={0};
float mem_preemph=0;
float x[FRAME_SIZE];
int gain_change_count=0;
FILE *f1;
FILE *ffeat;
FILE *fpcm=NULL;
opus_int16 pcm[FRAME_SIZE]={0};
int noisebuf[FRAME_SIZE]={0};
opus_int16 tmp[FRAME_SIZE] = {0};
float speech_gain=1;
float old_speech_gain = 1;
int one_pass_completed = 0;
LPCNetEncState *st;
float noise_std=0;
int training = -1;
int burg = 0;
int pitch = 0;
FILE *fnoise = NULL;
float noise_gain = 0;
long noise_size=0;
int arch;
srand(getpid());
arch = opus_select_arch();
st = lpcnet_encoder_create();
argv0=argv[0];
if (argc == 5 && strcmp(argv[1], "-btrain")==0) {
burg = 1;
training = 1;
}
else if (argc == 4 && strcmp(argv[1], "-btest")==0) {
burg = 1;
training = 0;
}
else if (argc == 5 && strcmp(argv[1], "-ptrain")==0) {
pitch = 1;
training = 1;
fnoise = fopen(argv[2], "rb");
fseek(fnoise, 0, SEEK_END);
noise_size = ftell(fnoise);
fseek(fnoise, 0, SEEK_SET);
argv++;
}
else if (argc == 4 && strcmp(argv[1], "-ptest")==0) {
pitch = 1;
training = 0;
}
else if (argc == 5 && strcmp(argv[1], "-train")==0) training = 1;
else if (argc == 4 && strcmp(argv[1], "-test")==0) training = 0;
if (training == -1) {
fprintf(stderr, "usage: %s -train <speech> <features out> <pcm out>\n", argv0);
fprintf(stderr, " or %s -test <speech> <features out>\n", argv0);
return 1;
}
f1 = fopen(argv[2], "r");
if (f1 == NULL) {
fprintf(stderr,"Error opening input .s16 16kHz speech input file: %s\n", argv[2]);
exit(1);
}
ffeat = fopen(argv[3], "wb");
if (ffeat == NULL) {
fprintf(stderr,"Error opening output feature file: %s\n", argv[3]);
exit(1);
}
if (training && !pitch) {
fpcm = fopen(argv[4], "wb");
if (fpcm == NULL) {
fprintf(stderr,"Error opening output PCM file: %s\n", argv[4]);
exit(1);
}
}
while (1) {
size_t ret;
ret = fread(tmp, sizeof(opus_int16), FRAME_SIZE, f1);
if (feof(f1) || ret != FRAME_SIZE) {
if (!training) break;
rewind(f1);
ret = fread(tmp, sizeof(opus_int16), FRAME_SIZE, f1);
if (ret != FRAME_SIZE) {
fprintf(stderr, "error reading\n");
exit(1);
}
one_pass_completed = 1;
}
for (i=0;i<FRAME_SIZE;i++) x[i] = tmp[i];
if (count*FRAME_SIZE_5MS>=10000000 && one_pass_completed) break;
if (training && ++gain_change_count > 2821) {
float tmp1, tmp2;
speech_gain = pow(10., (-30+(rand()%40))/20.);
if (rand()&1) speech_gain = -speech_gain;
if (rand()%20==0) speech_gain *= .01;
if (!pitch && rand()%100==0) speech_gain = 0;
gain_change_count = 0;
rand_resp(a_sig, b_sig);
tmp1 = rand()/(float)RAND_MAX;
tmp2 = rand()/(float)RAND_MAX;
noise_std = ABS16(-1.5*log(1e-4+tmp1)-.5*log(1e-4+tmp2));
if (fnoise != NULL) {
long pos;
/* Randomize the fraction because rand() only gives us 31 bits. */
float frac_pos = rand()/(float)RAND_MAX;
pos = (long)(frac_pos*noise_size);
/* 32-bit alignment. */
pos = pos/4 * 4;
if (pos > noise_size-500000) pos = noise_size-500000;
noise_gain = pow(10., (-15+(rand()%40))/20.);
if (rand()%10==0) noise_gain = 0;
fseek(fnoise, pos, SEEK_SET);
}
}
if (fnoise != NULL) {
opus_int16 noise[FRAME_SIZE];
ret = fread(noise, sizeof(opus_int16), FRAME_SIZE, fnoise);
for (i=0;i<FRAME_SIZE;i++) x[i] += noise[i]*noise_gain;
}
biquad(x, mem_hp_x, x, b_hp, a_hp, FRAME_SIZE);
biquad(x, mem_resp_x, x, b_sig, a_sig, FRAME_SIZE);
for (i=0;i<FRAME_SIZE;i++) {
float g;
float f = (float)i/FRAME_SIZE;
g = f*speech_gain + (1-f)*old_speech_gain;
x[i] *= g;
}
if (burg) {
float ceps[2*NB_BANDS];
burg_cepstral_analysis(ceps, x);
fwrite(ceps, sizeof(float), 2*NB_BANDS, ffeat);
}
preemphasis(x, &mem_preemph, x, PREEMPHASIS, FRAME_SIZE);
for (i=0;i<FRAME_SIZE;i++) x[i] += rand()/(float)RAND_MAX - .5f;
/* PCM is delayed by 1/2 frame to make the features centered on the frames. */
for (i=0;i<FRAME_SIZE-TRAINING_OFFSET;i++) pcm[i+TRAINING_OFFSET] = float2short(x[i]);
compute_frame_features(st, x, arch);
if (fpcm) {
compute_noise(noisebuf, noise_std);
}
if (pitch) {
signed char pitch_features[PITCH_MAX_PERIOD-PITCH_MIN_PERIOD+PITCH_IF_FEATURES];
for (i=0;i<PITCH_MAX_PERIOD-PITCH_MIN_PERIOD;i++) {
pitch_features[i] = (int)floor(.5f + 127.f*st->xcorr_features[i]);
}
for (i=0;i<PITCH_IF_FEATURES;i++) {
pitch_features[i+PITCH_MAX_PERIOD-PITCH_MIN_PERIOD] = (int)floor(.5f + 127.f*st->if_features[i]);
}
fwrite(pitch_features, PITCH_MAX_PERIOD-PITCH_MIN_PERIOD+PITCH_IF_FEATURES, 1, ffeat);
} else {
fwrite(st->features, sizeof(float), NB_TOTAL_FEATURES, ffeat);
}
/*if(pitch) fwrite(pcm, FRAME_SIZE, 2, stdout);*/
if (fpcm) write_audio(st, pcm, noisebuf, fpcm);
/*if (fpcm) fwrite(pcm, sizeof(opus_int16), FRAME_SIZE, fpcm);*/
for (i=0;i<TRAINING_OFFSET;i++) pcm[i] = float2short(x[i+FRAME_SIZE-TRAINING_OFFSET]);
old_speech_gain = speech_gain;
count++;
}
fclose(f1);
fclose(ffeat);
if (fpcm) fclose(fpcm);
lpcnet_encoder_destroy(st);
return 0;
}

225
src/libs/opus/dnn/fargan.c Normal file
View File

@ -0,0 +1,225 @@
/* Copyright (c) 2023 Amazon */
/*
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "fargan.h"
#include "os_support.h"
#include "freq.h"
#include "fargan_data.h"
#include "lpcnet.h"
#include "pitch.h"
#include "nnet.h"
#include "lpcnet_private.h"
#include "cpu_support.h"
#define FARGAN_FEATURES (NB_FEATURES)
static void compute_fargan_cond(FARGANState *st, float *cond, const float *features, int period)
{
FARGAN *model;
float dense_in[NB_FEATURES+COND_NET_PEMBED_OUT_SIZE];
float conv1_in[COND_NET_FCONV1_IN_SIZE];
float fdense2_in[COND_NET_FCONV1_OUT_SIZE];
model = &st->model;
celt_assert(FARGAN_FEATURES+COND_NET_PEMBED_OUT_SIZE == model->cond_net_fdense1.nb_inputs);
celt_assert(COND_NET_FCONV1_IN_SIZE == model->cond_net_fdense1.nb_outputs);
celt_assert(COND_NET_FCONV1_OUT_SIZE == model->cond_net_fconv1.nb_outputs);
OPUS_COPY(&dense_in[NB_FEATURES], &model->cond_net_pembed.float_weights[IMAX(0,IMIN(period-32, 223))*COND_NET_PEMBED_OUT_SIZE], COND_NET_PEMBED_OUT_SIZE);
OPUS_COPY(dense_in, features, NB_FEATURES);
compute_generic_dense(&model->cond_net_fdense1, conv1_in, dense_in, ACTIVATION_TANH, st->arch);
compute_generic_conv1d(&model->cond_net_fconv1, fdense2_in, st->cond_conv1_state, conv1_in, COND_NET_FCONV1_IN_SIZE, ACTIVATION_TANH, st->arch);
compute_generic_dense(&model->cond_net_fdense2, cond, fdense2_in, ACTIVATION_TANH, st->arch);
}
static void fargan_deemphasis(float *pcm, float *deemph_mem) {
int i;
for (i=0;i<FARGAN_SUBFRAME_SIZE;i++) {
pcm[i] += FARGAN_DEEMPHASIS * *deemph_mem;
*deemph_mem = pcm[i];
}
}
static void run_fargan_subframe(FARGANState *st, float *pcm, const float *cond, int period)
{
int i, pos;
float fwc0_in[SIG_NET_INPUT_SIZE];
float gru1_in[SIG_NET_FWC0_CONV_OUT_SIZE+2*FARGAN_SUBFRAME_SIZE];
float gru2_in[SIG_NET_GRU1_OUT_SIZE+2*FARGAN_SUBFRAME_SIZE];
float gru3_in[SIG_NET_GRU2_OUT_SIZE+2*FARGAN_SUBFRAME_SIZE];
float pred[FARGAN_SUBFRAME_SIZE+4];
float prev[FARGAN_SUBFRAME_SIZE];
float pitch_gate[4];
float gain;
float gain_1;
float skip_cat[10000];
float skip_out[SIG_NET_SKIP_DENSE_OUT_SIZE];
FARGAN *model;
celt_assert(st->cont_initialized);
model = &st->model;
compute_generic_dense(&model->sig_net_cond_gain_dense, &gain, cond, ACTIVATION_LINEAR, st->arch);
gain = exp(gain);
gain_1 = 1.f/(1e-5f + gain);
pos = PITCH_MAX_PERIOD-period-2;
for (i=0;i<FARGAN_SUBFRAME_SIZE+4;i++) {
pred[i] = MIN32(1.f, MAX32(-1.f, gain_1*st->pitch_buf[IMAX(0, pos)]));
pos++;
if (pos == PITCH_MAX_PERIOD) pos -= period;
}
for (i=0;i<FARGAN_SUBFRAME_SIZE;i++) prev[i] = MAX32(-1.f, MIN16(1.f, gain_1*st->pitch_buf[PITCH_MAX_PERIOD-FARGAN_SUBFRAME_SIZE+i]));
OPUS_COPY(&fwc0_in[0], &cond[0], FARGAN_COND_SIZE);
OPUS_COPY(&fwc0_in[FARGAN_COND_SIZE], pred, FARGAN_SUBFRAME_SIZE+4);
OPUS_COPY(&fwc0_in[FARGAN_COND_SIZE+FARGAN_SUBFRAME_SIZE+4], prev, FARGAN_SUBFRAME_SIZE);
compute_generic_conv1d(&model->sig_net_fwc0_conv, gru1_in, st->fwc0_mem, fwc0_in, SIG_NET_INPUT_SIZE, ACTIVATION_TANH, st->arch);
celt_assert(SIG_NET_FWC0_GLU_GATE_OUT_SIZE == model->sig_net_fwc0_glu_gate.nb_outputs);
compute_glu(&model->sig_net_fwc0_glu_gate, gru1_in, gru1_in, st->arch);
compute_generic_dense(&model->sig_net_gain_dense_out, pitch_gate, gru1_in, ACTIVATION_SIGMOID, st->arch);
for (i=0;i<FARGAN_SUBFRAME_SIZE;i++) gru1_in[SIG_NET_FWC0_GLU_GATE_OUT_SIZE+i] = pitch_gate[0]*pred[i+2];
OPUS_COPY(&gru1_in[SIG_NET_FWC0_GLU_GATE_OUT_SIZE+FARGAN_SUBFRAME_SIZE], prev, FARGAN_SUBFRAME_SIZE);
compute_generic_gru(&model->sig_net_gru1_input, &model->sig_net_gru1_recurrent, st->gru1_state, gru1_in, st->arch);
compute_glu(&model->sig_net_gru1_glu_gate, gru2_in, st->gru1_state, st->arch);
for (i=0;i<FARGAN_SUBFRAME_SIZE;i++) gru2_in[SIG_NET_GRU1_OUT_SIZE+i] = pitch_gate[1]*pred[i+2];
OPUS_COPY(&gru2_in[SIG_NET_GRU1_OUT_SIZE+FARGAN_SUBFRAME_SIZE], prev, FARGAN_SUBFRAME_SIZE);
compute_generic_gru(&model->sig_net_gru2_input, &model->sig_net_gru2_recurrent, st->gru2_state, gru2_in, st->arch);
compute_glu(&model->sig_net_gru2_glu_gate, gru3_in, st->gru2_state, st->arch);
for (i=0;i<FARGAN_SUBFRAME_SIZE;i++) gru3_in[SIG_NET_GRU2_OUT_SIZE+i] = pitch_gate[2]*pred[i+2];
OPUS_COPY(&gru3_in[SIG_NET_GRU2_OUT_SIZE+FARGAN_SUBFRAME_SIZE], prev, FARGAN_SUBFRAME_SIZE);
compute_generic_gru(&model->sig_net_gru3_input, &model->sig_net_gru3_recurrent, st->gru3_state, gru3_in, st->arch);
compute_glu(&model->sig_net_gru3_glu_gate, &skip_cat[SIG_NET_GRU1_OUT_SIZE+SIG_NET_GRU2_OUT_SIZE], st->gru3_state, st->arch);
OPUS_COPY(skip_cat, gru2_in, SIG_NET_GRU1_OUT_SIZE);
OPUS_COPY(&skip_cat[SIG_NET_GRU1_OUT_SIZE], gru3_in, SIG_NET_GRU2_OUT_SIZE);
OPUS_COPY(&skip_cat[SIG_NET_GRU1_OUT_SIZE+SIG_NET_GRU2_OUT_SIZE+SIG_NET_GRU3_OUT_SIZE], gru1_in, SIG_NET_FWC0_CONV_OUT_SIZE);
for (i=0;i<FARGAN_SUBFRAME_SIZE;i++) skip_cat[SIG_NET_GRU1_OUT_SIZE+SIG_NET_GRU2_OUT_SIZE+SIG_NET_GRU3_OUT_SIZE+SIG_NET_FWC0_CONV_OUT_SIZE+i] = pitch_gate[3]*pred[i+2];
OPUS_COPY(&skip_cat[SIG_NET_GRU1_OUT_SIZE+SIG_NET_GRU2_OUT_SIZE+SIG_NET_GRU3_OUT_SIZE+SIG_NET_FWC0_CONV_OUT_SIZE+FARGAN_SUBFRAME_SIZE], prev, FARGAN_SUBFRAME_SIZE);
compute_generic_dense(&model->sig_net_skip_dense, skip_out, skip_cat, ACTIVATION_TANH, st->arch);
compute_glu(&model->sig_net_skip_glu_gate, skip_out, skip_out, st->arch);
compute_generic_dense(&model->sig_net_sig_dense_out, pcm, skip_out, ACTIVATION_TANH, st->arch);
for (i=0;i<FARGAN_SUBFRAME_SIZE;i++) pcm[i] *= gain;
OPUS_MOVE(st->pitch_buf, &st->pitch_buf[FARGAN_SUBFRAME_SIZE], PITCH_MAX_PERIOD-FARGAN_SUBFRAME_SIZE);
OPUS_COPY(&st->pitch_buf[PITCH_MAX_PERIOD-FARGAN_SUBFRAME_SIZE], pcm, FARGAN_SUBFRAME_SIZE);
fargan_deemphasis(pcm, &st->deemph_mem);
}
void fargan_cont(FARGANState *st, const float *pcm0, const float *features0)
{
int i;
float cond[COND_NET_FDENSE2_OUT_SIZE];
float x0[FARGAN_CONT_SAMPLES];
float dummy[FARGAN_SUBFRAME_SIZE];
int period=0;
/* Pre-load features. */
for (i=0;i<5;i++) {
const float *features = &features0[i*NB_FEATURES];
st->last_period = period;
period = (int)floor(.5+256./pow(2.f,((1./60.)*((features[NB_BANDS]+1.5)*60))));
compute_fargan_cond(st, cond, features, period);
}
x0[0] = 0;
for (i=1;i<FARGAN_CONT_SAMPLES;i++) {
x0[i] = pcm0[i] - FARGAN_DEEMPHASIS*pcm0[i-1];
}
OPUS_COPY(&st->pitch_buf[PITCH_MAX_PERIOD-FARGAN_FRAME_SIZE], x0, FARGAN_FRAME_SIZE);
st->cont_initialized = 1;
for (i=0;i<FARGAN_NB_SUBFRAMES;i++) {
run_fargan_subframe(st, dummy, &cond[i*FARGAN_COND_SIZE], st->last_period);
OPUS_COPY(&st->pitch_buf[PITCH_MAX_PERIOD-FARGAN_SUBFRAME_SIZE], &x0[FARGAN_FRAME_SIZE+i*FARGAN_SUBFRAME_SIZE], FARGAN_SUBFRAME_SIZE);
}
st->deemph_mem = pcm0[FARGAN_CONT_SAMPLES-1];
}
void fargan_init(FARGANState *st)
{
int ret;
OPUS_CLEAR(st, 1);
st->arch = opus_select_arch();
#ifndef USE_WEIGHTS_FILE
ret = init_fargan(&st->model, fargan_arrays);
#else
ret = 0;
#endif
celt_assert(ret == 0);
}
int fargan_load_model(FARGANState *st, const void *data, int len) {
WeightArray *list;
int ret;
parse_weights(&list, data, len);
ret = init_fargan(&st->model, list);
opus_free(list);
if (ret == 0) return 0;
else return -1;
}
static void fargan_synthesize_impl(FARGANState *st, float *pcm, const float *features)
{
int subframe;
float cond[COND_NET_FDENSE2_OUT_SIZE];
int period;
celt_assert(st->cont_initialized);
period = (int)floor(.5+256./pow(2.f,((1./60.)*((features[NB_BANDS]+1.5)*60))));
compute_fargan_cond(st, cond, features, period);
for (subframe=0;subframe<FARGAN_NB_SUBFRAMES;subframe++) {
float *sub_cond;
sub_cond = &cond[subframe*FARGAN_COND_SIZE];
run_fargan_subframe(st, &pcm[subframe*FARGAN_SUBFRAME_SIZE], sub_cond, st->last_period);
}
st->last_period = period;
}
void fargan_synthesize(FARGANState *st, float *pcm, const float *features)
{
fargan_synthesize_impl(st, pcm, features);
}
void fargan_synthesize_int(FARGANState *st, opus_int16 *pcm, const float *features)
{
int i;
float fpcm[FARGAN_FRAME_SIZE];
fargan_synthesize(st, fpcm, features);
for (i=0;i<LPCNET_FRAME_SIZE;i++) pcm[i] = (int)floor(.5 + MIN32(32767, MAX32(-32767, 32768.f*fpcm[i])));
}

View File

@ -0,0 +1,68 @@
/* Copyright (c) 2023 Amazon */
/*
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef FARGAN_H
#define FARGAN_H
#include "freq.h"
#include "fargan_data.h"
#include "pitchdnn.h"
#define FARGAN_CONT_SAMPLES 320
#define FARGAN_NB_SUBFRAMES 4
#define FARGAN_SUBFRAME_SIZE 40
#define FARGAN_FRAME_SIZE (FARGAN_NB_SUBFRAMES*FARGAN_SUBFRAME_SIZE)
#define FARGAN_COND_SIZE (COND_NET_FDENSE2_OUT_SIZE/FARGAN_NB_SUBFRAMES)
#define FARGAN_DEEMPHASIS 0.85f
#define SIG_NET_INPUT_SIZE (FARGAN_COND_SIZE+2*FARGAN_SUBFRAME_SIZE+4)
#define SIG_NET_FWC0_STATE_SIZE (2*SIG_NET_INPUT_SIZE)
#define FARGAN_MAX_RNN_NEURONS SIG_NET_GRU1_OUT_SIZE
typedef struct {
FARGAN model;
int arch;
int cont_initialized;
float deemph_mem;
float pitch_buf[PITCH_MAX_PERIOD];
float cond_conv1_state[COND_NET_FCONV1_STATE_SIZE];
float fwc0_mem[SIG_NET_FWC0_STATE_SIZE];
float gru1_state[SIG_NET_GRU1_STATE_SIZE];
float gru2_state[SIG_NET_GRU2_STATE_SIZE];
float gru3_state[SIG_NET_GRU3_STATE_SIZE];
int last_period;
} FARGANState;
void fargan_init(FARGANState *st);
int fargan_load_model(FARGANState *st, const void *data, int len);
void fargan_cont(FARGANState *st, const float *pcm0, const float *features0);
void fargan_synthesize(FARGANState *st, float *pcm, const float *features);
void fargan_synthesize_int(FARGANState *st, opus_int16 *pcm, const float *features);
#endif /* FARGAN_H */

104298
src/libs/opus/dnn/fargan_data.c Normal file

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,79 @@
#ifndef FARGAN_DATA_H
#define FARGAN_DATA_H
#include "nnet.h"
#define COND_NET_PEMBED_OUT_SIZE 12
#define COND_NET_FDENSE1_OUT_SIZE 64
#define COND_NET_FCONV1_OUT_SIZE 128
#define COND_NET_FCONV1_IN_SIZE 64
#define COND_NET_FCONV1_STATE_SIZE (64 * (2))
#define COND_NET_FCONV1_DELAY 1
#define COND_NET_FDENSE2_OUT_SIZE 320
#define SIG_NET_COND_GAIN_DENSE_OUT_SIZE 1
#define SIG_NET_FWC0_CONV_OUT_SIZE 192
#define SIG_NET_FWC0_GLU_GATE_OUT_SIZE 192
#define SIG_NET_GRU1_OUT_SIZE 160
#define SIG_NET_GRU1_STATE_SIZE 160
#define SIG_NET_GRU2_OUT_SIZE 128
#define SIG_NET_GRU2_STATE_SIZE 128
#define SIG_NET_GRU3_OUT_SIZE 128
#define SIG_NET_GRU3_STATE_SIZE 128
#define SIG_NET_GRU1_GLU_GATE_OUT_SIZE 160
#define SIG_NET_GRU2_GLU_GATE_OUT_SIZE 128
#define SIG_NET_GRU3_GLU_GATE_OUT_SIZE 128
#define SIG_NET_SKIP_GLU_GATE_OUT_SIZE 128
#define SIG_NET_SKIP_DENSE_OUT_SIZE 128
#define SIG_NET_SIG_DENSE_OUT_OUT_SIZE 40
#define SIG_NET_GAIN_DENSE_OUT_OUT_SIZE 4
typedef struct {
LinearLayer cond_net_pembed;
LinearLayer cond_net_fdense1;
LinearLayer cond_net_fconv1;
LinearLayer cond_net_fdense2;
LinearLayer sig_net_cond_gain_dense;
LinearLayer sig_net_fwc0_conv;
LinearLayer sig_net_fwc0_glu_gate;
LinearLayer sig_net_gru1_input;
LinearLayer sig_net_gru1_recurrent;
LinearLayer sig_net_gru2_input;
LinearLayer sig_net_gru2_recurrent;
LinearLayer sig_net_gru3_input;
LinearLayer sig_net_gru3_recurrent;
LinearLayer sig_net_gru1_glu_gate;
LinearLayer sig_net_gru2_glu_gate;
LinearLayer sig_net_gru3_glu_gate;
LinearLayer sig_net_skip_glu_gate;
LinearLayer sig_net_skip_dense;
LinearLayer sig_net_sig_dense_out;
LinearLayer sig_net_gain_dense_out;
} FARGAN;
int init_fargan(FARGAN *model, const WeightArray *arrays);
#endif /* FARGAN_DATA_H */

328
src/libs/opus/dnn/freq.c Normal file
View File

@ -0,0 +1,328 @@
/* Copyright (c) 2017-2018 Mozilla */
/*
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#include "kiss_fft.h"
#include <math.h>
#include "freq.h"
#include "pitch.h"
#include "arch.h"
#include "burg.h"
#include <assert.h>
#include "os_support.h"
#define SQUARE(x) ((x)*(x))
static const opus_int16 eband5ms[] = {
/*0 200 400 600 800 1k 1.2 1.4 1.6 2k 2.4 2.8 3.2 4k 4.8 5.6 6.8 8k*/
0, 1, 2, 3, 4, 5, 6, 7, 8, 10, 12, 14, 16, 20, 24, 28, 34, 40
};
static const float compensation[] = {
0.8f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 0.666667f, 0.5f, 0.5f, 0.5f, 0.333333f, 0.25f, 0.25f, 0.2f, 0.166667f, 0.173913f
};
extern const kiss_fft_state kfft;
extern const float half_window[OVERLAP_SIZE];
extern const float dct_table[NB_BANDS*NB_BANDS];
static void compute_band_energy_inverse(float *bandE, const kiss_fft_cpx *X) {
int i;
float sum[NB_BANDS] = {0};
for (i=0;i<NB_BANDS-1;i++)
{
int j;
int band_size;
band_size = (eband5ms[i+1]-eband5ms[i])*WINDOW_SIZE_5MS;
for (j=0;j<band_size;j++) {
float tmp;
float frac = (float)j/band_size;
tmp = SQUARE(X[(eband5ms[i]*WINDOW_SIZE_5MS) + j].r);
tmp += SQUARE(X[(eband5ms[i]*WINDOW_SIZE_5MS) + j].i);
tmp = 1.f/(tmp + 1e-9);
sum[i] += (1-frac)*tmp;
sum[i+1] += frac*tmp;
}
}
sum[0] *= 2;
sum[NB_BANDS-1] *= 2;
for (i=0;i<NB_BANDS;i++)
{
bandE[i] = sum[i];
}
}
static float lpcn_lpc(
opus_val16 *lpc, /* out: [0...p-1] LPC coefficients */
opus_val16 *rc,
const opus_val32 *ac, /* in: [0...p] autocorrelation values */
int p
)
{
int i, j;
opus_val32 r;
opus_val32 error = ac[0];
OPUS_CLEAR(lpc, p);
OPUS_CLEAR(rc, p);
if (ac[0] != 0)
{
for (i = 0; i < p; i++) {
/* Sum up this iteration's reflection coefficient */
opus_val32 rr = 0;
for (j = 0; j < i; j++)
rr += MULT32_32_Q31(lpc[j],ac[i - j]);
rr += SHR32(ac[i + 1],3);
r = -SHL32(rr,3)/error;
rc[i] = r;
/* Update LPC coefficients and total error */
lpc[i] = SHR32(r,3);
for (j = 0; j < (i+1)>>1; j++)
{
opus_val32 tmp1, tmp2;
tmp1 = lpc[j];
tmp2 = lpc[i-1-j];
lpc[j] = tmp1 + MULT32_32_Q31(r,tmp2);
lpc[i-1-j] = tmp2 + MULT32_32_Q31(r,tmp1);
}
error = error - MULT32_32_Q31(MULT32_32_Q31(r,r),error);
/* Bail out once we get 30 dB gain */
if (error<.001f*ac[0])
break;
}
}
return error;
}
void lpcn_compute_band_energy(float *bandE, const kiss_fft_cpx *X) {
int i;
float sum[NB_BANDS] = {0};
for (i=0;i<NB_BANDS-1;i++)
{
int j;
int band_size;
band_size = (eband5ms[i+1]-eband5ms[i])*WINDOW_SIZE_5MS;
for (j=0;j<band_size;j++) {
float tmp;
float frac = (float)j/band_size;
tmp = SQUARE(X[(eband5ms[i]*WINDOW_SIZE_5MS) + j].r);
tmp += SQUARE(X[(eband5ms[i]*WINDOW_SIZE_5MS) + j].i);
sum[i] += (1-frac)*tmp;
sum[i+1] += frac*tmp;
}
}
sum[0] *= 2;
sum[NB_BANDS-1] *= 2;
for (i=0;i<NB_BANDS;i++)
{
bandE[i] = sum[i];
}
}
static void compute_burg_cepstrum(const float *pcm, float *burg_cepstrum, int len, int order) {
int i;
float burg_in[FRAME_SIZE];
float burg_lpc[LPC_ORDER];
float x[WINDOW_SIZE];
float Eburg[NB_BANDS];
float g;
kiss_fft_cpx LPC[FREQ_SIZE];
float Ly[NB_BANDS];
float logMax = -2;
float follow = -2;
assert(order <= LPC_ORDER);
assert(len <= FRAME_SIZE);
for (i=0;i<len-1;i++) burg_in[i] = pcm[i+1] - PREEMPHASIS*pcm[i];
g = silk_burg_analysis(burg_lpc, burg_in, 1e-3, len-1, 1, order);
g /= len - 2*(order-1);
OPUS_CLEAR(x, WINDOW_SIZE);
x[0] = 1;
for (i=0;i<order;i++) x[i+1] = -burg_lpc[i]*pow(.995, i+1);
forward_transform(LPC, x);
compute_band_energy_inverse(Eburg, LPC);
for (i=0;i<NB_BANDS;i++) Eburg[i] *= .45*g*(1.f/((float)WINDOW_SIZE*WINDOW_SIZE*WINDOW_SIZE));
for (i=0;i<NB_BANDS;i++) {
Ly[i] = log10(1e-2+Eburg[i]);
Ly[i] = MAX16(logMax-8, MAX16(follow-2.5, Ly[i]));
logMax = MAX16(logMax, Ly[i]);
follow = MAX16(follow-2.5, Ly[i]);
}
dct(burg_cepstrum, Ly);
burg_cepstrum[0] += - 4;
}
void burg_cepstral_analysis(float *ceps, const float *x) {
int i;
compute_burg_cepstrum(x, &ceps[0 ], FRAME_SIZE/2, LPC_ORDER);
compute_burg_cepstrum(&x[FRAME_SIZE/2], &ceps[NB_BANDS], FRAME_SIZE/2, LPC_ORDER);
for (i=0;i<NB_BANDS;i++) {
float c0, c1;
c0 = ceps[i];
c1 = ceps[NB_BANDS+i];
ceps[i ] = .5*(c0+c1);
ceps[NB_BANDS+i] = (c0-c1);
}
}
static void interp_band_gain(float *g, const float *bandE) {
int i;
memset(g, 0, FREQ_SIZE);
for (i=0;i<NB_BANDS-1;i++)
{
int j;
int band_size;
band_size = (eband5ms[i+1]-eband5ms[i])*WINDOW_SIZE_5MS;
for (j=0;j<band_size;j++) {
float frac = (float)j/band_size;
g[(eband5ms[i]*WINDOW_SIZE_5MS) + j] = (1-frac)*bandE[i] + frac*bandE[i+1];
}
}
}
void dct(float *out, const float *in) {
int i;
for (i=0;i<NB_BANDS;i++) {
int j;
float sum = 0;
for (j=0;j<NB_BANDS;j++) {
sum += in[j] * dct_table[j*NB_BANDS + i];
}
out[i] = sum*sqrt(2./NB_BANDS);
}
}
static void idct(float *out, const float *in) {
int i;
for (i=0;i<NB_BANDS;i++) {
int j;
float sum = 0;
for (j=0;j<NB_BANDS;j++) {
sum += in[j] * dct_table[i*NB_BANDS + j];
}
out[i] = sum*sqrt(2./NB_BANDS);
}
}
void forward_transform(kiss_fft_cpx *out, const float *in) {
int i;
kiss_fft_cpx x[WINDOW_SIZE];
kiss_fft_cpx y[WINDOW_SIZE];
for (i=0;i<WINDOW_SIZE;i++) {
x[i].r = in[i];
x[i].i = 0;
}
opus_fft(&kfft, x, y, 0);
for (i=0;i<FREQ_SIZE;i++) {
out[i] = y[i];
}
}
static void inverse_transform(float *out, const kiss_fft_cpx *in) {
int i;
kiss_fft_cpx x[WINDOW_SIZE];
kiss_fft_cpx y[WINDOW_SIZE];
for (i=0;i<FREQ_SIZE;i++) {
x[i] = in[i];
}
for (;i<WINDOW_SIZE;i++) {
x[i].r = x[WINDOW_SIZE - i].r;
x[i].i = -x[WINDOW_SIZE - i].i;
}
opus_fft(&kfft, x, y, 0);
/* output in reverse order for IFFT. */
out[0] = WINDOW_SIZE*y[0].r;
for (i=1;i<WINDOW_SIZE;i++) {
out[i] = WINDOW_SIZE*y[WINDOW_SIZE - i].r;
}
}
static float lpc_from_bands(float *lpc, const float *Ex)
{
int i;
float e;
float ac[LPC_ORDER+1];
float rc[LPC_ORDER];
float Xr[FREQ_SIZE];
kiss_fft_cpx X_auto[FREQ_SIZE];
float x_auto[WINDOW_SIZE];
interp_band_gain(Xr, Ex);
Xr[FREQ_SIZE-1] = 0;
OPUS_CLEAR(X_auto, FREQ_SIZE);
for (i=0;i<FREQ_SIZE;i++) X_auto[i].r = Xr[i];
inverse_transform(x_auto, X_auto);
for (i=0;i<LPC_ORDER+1;i++) ac[i] = x_auto[i];
/* -40 dB noise floor. */
ac[0] += ac[0]*1e-4 + 320/12/38.;
/* Lag windowing. */
for (i=1;i<LPC_ORDER+1;i++) ac[i] *= (1 - 6e-5*i*i);
e = lpcn_lpc(lpc, rc, ac, LPC_ORDER);
return e;
}
void lpc_weighting(float *lpc, float gamma)
{
int i;
float gamma_i = gamma;
for (i = 0; i < LPC_ORDER; i++)
{
lpc[i] *= gamma_i;
gamma_i *= gamma;
}
}
float lpc_from_cepstrum(float *lpc, const float *cepstrum)
{
int i;
float Ex[NB_BANDS];
float tmp[NB_BANDS];
OPUS_COPY(tmp, cepstrum, NB_BANDS);
tmp[0] += 4;
idct(Ex, tmp);
for (i=0;i<NB_BANDS;i++) Ex[i] = pow(10.f, Ex[i])*compensation[i];
return lpc_from_bands(lpc, Ex);
}
void apply_window(float *x) {
int i;
for (i=0;i<OVERLAP_SIZE;i++) {
x[i] *= half_window[i];
x[WINDOW_SIZE - 1 - i] *= half_window[i];
}
}

61
src/libs/opus/dnn/freq.h Normal file
View File

@ -0,0 +1,61 @@
/* Copyright (c) 2017-2018 Mozilla */
/*
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef FREQ_H
#define FREQ_H
#include "kiss_fft.h"
#define LPC_ORDER 16
#define PREEMPHASIS (0.85f)
#define FRAME_SIZE_5MS (2)
#define OVERLAP_SIZE_5MS (2)
#define TRAINING_OFFSET_5MS (1)
#define WINDOW_SIZE_5MS (FRAME_SIZE_5MS + OVERLAP_SIZE_5MS)
#define FRAME_SIZE (80*FRAME_SIZE_5MS)
#define OVERLAP_SIZE (80*OVERLAP_SIZE_5MS)
#define TRAINING_OFFSET (80*TRAINING_OFFSET_5MS)
#define WINDOW_SIZE (FRAME_SIZE + OVERLAP_SIZE)
#define FREQ_SIZE (WINDOW_SIZE/2 + 1)
#define NB_BANDS 18
#define NB_BANDS_1 (NB_BANDS - 1)
void lpcn_compute_band_energy(float *bandE, const kiss_fft_cpx *X);
void burg_cepstral_analysis(float *ceps, const float *x);
void apply_window(float *x);
void dct(float *out, const float *in);
void forward_transform(kiss_fft_cpx *out, const float *in);
float lpc_from_cepstrum(float *lpc, const float *cepstrum);
void apply_window(float *x);
void lpc_weighting(float *lpc, float gamma);
#endif

39301
src/libs/opus/dnn/lace_data.c Normal file

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,133 @@
/* Auto generated from checkpoint lace_v2.pth (sha1: 41eaab33c6cbdb192d14f43c9f292856cab789e9) */
#ifndef LACE_DATA_H
#define LACE_DATA_H
#include "nnet.h"
#define LACE_PREEMPH 0.85f
#define LACE_FRAME_SIZE 80
#define LACE_OVERLAP_SIZE 40
#define LACE_NUM_FEATURES 93
#define LACE_PITCH_MAX 300
#define LACE_PITCH_EMBEDDING_DIM 64
#define LACE_NUMBITS_RANGE_LOW 50
#define LACE_NUMBITS_RANGE_HIGH 650
#define LACE_NUMBITS_EMBEDDING_DIM 8
#define LACE_COND_DIM 128
#define LACE_HIDDEN_FEATURE_DIM 96
#define LACE_NUMBITS_SCALE_0 1.0983514785766602f
#define LACE_NUMBITS_SCALE_1 2.0509142875671387f
#define LACE_NUMBITS_SCALE_2 3.5729939937591553f
#define LACE_NUMBITS_SCALE_3 4.478035926818848f
#define LACE_NUMBITS_SCALE_4 5.926519393920898f
#define LACE_NUMBITS_SCALE_5 7.152282238006592f
#define LACE_NUMBITS_SCALE_6 8.277412414550781f
#define LACE_NUMBITS_SCALE_7 8.926830291748047f
#define LACE_PITCH_EMBEDDING_OUT_SIZE 64
#define LACE_FNET_CONV1_OUT_SIZE 96
#define LACE_FNET_CONV1_IN_SIZE 173
#define LACE_FNET_CONV1_STATE_SIZE (173 * (0))
#define LACE_FNET_CONV1_DELAY 0
#define LACE_FNET_CONV2_OUT_SIZE 128
#define LACE_FNET_CONV2_IN_SIZE 384
#define LACE_FNET_CONV2_STATE_SIZE (384 * (1))
#define LACE_FNET_CONV2_DELAY 0
#define LACE_FNET_TCONV_KERNEL_SIZE 4
#define LACE_FNET_TCONV_STRIDE 4
#define LACE_FNET_TCONV_IN_CHANNELS 128
#define LACE_FNET_TCONV_OUT_CHANNELS 128
#define LACE_FNET_GRU_OUT_SIZE 128
#define LACE_FNET_GRU_STATE_SIZE 128
#define LACE_CF1_FILTER_GAIN_A 0.690776f
#define LACE_CF1_FILTER_GAIN_B 0.000000f
#define LACE_CF1_LOG_GAIN_LIMIT 1.151293f
#define LACE_CF1_KERNEL_SIZE 16
#define LACE_CF1_LEFT_PADDING 8
#define LACE_CF1_FRAME_SIZE 80
#define LACE_CF1_OVERLAP_SIZE 40
#define LACE_CF1_IN_CHANNELS 1
#define LACE_CF1_OUT_CHANNELS 1
#define LACE_CF1_NORM_P 2
#define LACE_CF1_FEATURE_DIM 128
#define LACE_CF1_MAX_LAG 301
#define LACE_CF1_KERNEL_OUT_SIZE 16
#define LACE_CF1_GAIN_OUT_SIZE 1
#define LACE_CF1_GLOBAL_GAIN_OUT_SIZE 1
#define LACE_CF2_FILTER_GAIN_A 0.690776f
#define LACE_CF2_FILTER_GAIN_B 0.000000f
#define LACE_CF2_LOG_GAIN_LIMIT 1.151293f
#define LACE_CF2_KERNEL_SIZE 16
#define LACE_CF2_LEFT_PADDING 8
#define LACE_CF2_FRAME_SIZE 80
#define LACE_CF2_OVERLAP_SIZE 40
#define LACE_CF2_IN_CHANNELS 1
#define LACE_CF2_OUT_CHANNELS 1
#define LACE_CF2_NORM_P 2
#define LACE_CF2_FEATURE_DIM 128
#define LACE_CF2_MAX_LAG 301
#define LACE_CF2_KERNEL_OUT_SIZE 16
#define LACE_CF2_GAIN_OUT_SIZE 1
#define LACE_CF2_GLOBAL_GAIN_OUT_SIZE 1
#define LACE_AF1_FILTER_GAIN_A 1.381551f
#define LACE_AF1_FILTER_GAIN_B 0.000000f
#define LACE_AF1_SHAPE_GAIN 1.000000f
#define LACE_AF1_KERNEL_SIZE 16
#define LACE_AF1_FRAME_SIZE 80
#define LACE_AF1_LEFT_PADDING 15
#define LACE_AF1_OVERLAP_SIZE 40
#define LACE_AF1_IN_CHANNELS 1
#define LACE_AF1_OUT_CHANNELS 1
#define LACE_AF1_NORM_P 2
#define LACE_AF1_FEATURE_DIM 128
#define LACE_AF1_KERNEL_OUT_SIZE 16
#define LACE_AF1_GAIN_OUT_SIZE 1
typedef struct {
LinearLayer lace_pitch_embedding;
LinearLayer lace_fnet_conv1;
LinearLayer lace_fnet_conv2;
LinearLayer lace_fnet_tconv;
LinearLayer lace_fnet_gru_input;
LinearLayer lace_fnet_gru_recurrent;
LinearLayer lace_cf1_kernel;
LinearLayer lace_cf1_gain;
LinearLayer lace_cf1_global_gain;
LinearLayer lace_cf2_kernel;
LinearLayer lace_cf2_gain;
LinearLayer lace_cf2_global_gain;
LinearLayer lace_af1_kernel;
LinearLayer lace_af1_gain;
} LACELayers;
int init_lacelayers(LACELayers *model, const WeightArray *arrays);
#endif /* LACE_DATA_H */

196
src/libs/opus/dnn/lossgen.c Normal file
View File

@ -0,0 +1,196 @@
/* Copyright (c) 2023 Amazon */
/*
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/* This packet loss simulator can be used independently of the Opus codebase.
To do that, you need to compile the following files:
dnn/lossgen.c
dnn/lossgen_data.c
with the following files needed as #include
dnn/lossgen_data.h
dnn/lossgen.h
dnn/nnet_arch.h
dnn/nnet.h
dnn/parse_lpcnet_weights.c (included despite being a C file)
dnn/vec_avx.h
dnn/vec.h
celt/os_support.h
celt/arch.h
celt/x86/x86_arch_macros.h
include/opus_defines.h
include/opus_types.h
Additionally, the code in dnn/lossgen_demo.c can be used to generate losses from
the command line.
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "arch.h"
#include <math.h>
#include "lossgen.h"
#include "os_support.h"
#include "nnet.h"
#include "assert.h"
/* Disable RTCD for this. */
#define RTCD_ARCH c
/* Override assert to avoid undefined/redefined symbols. */
#undef celt_assert
#define celt_assert assert
/* Directly include the C files we need since the symbols won't be exposed if we link in a shared object. */
#include "parse_lpcnet_weights.c"
#include "nnet_arch.h"
#undef compute_linear
#undef compute_activation
/* Force the C version since the SIMD versions may be hidden. */
#define compute_linear(linear, out, in, arch) ((void)(arch),compute_linear_c(linear, out, in))
#define compute_activation(output, input, N, activation, arch) ((void)(arch),compute_activation_c(output, input, N, activation))
#define MAX_RNN_NEURONS_ALL IMAX(LOSSGEN_GRU1_STATE_SIZE, LOSSGEN_GRU2_STATE_SIZE)
/* These two functions are copied from nnet.c to make sure we don't have linking issues. */
void compute_generic_gru_lossgen(const LinearLayer *input_weights, const LinearLayer *recurrent_weights, float *state, const float *in, int arch)
{
int i;
int N;
float zrh[3*MAX_RNN_NEURONS_ALL];
float recur[3*MAX_RNN_NEURONS_ALL];
float *z;
float *r;
float *h;
celt_assert(3*recurrent_weights->nb_inputs == recurrent_weights->nb_outputs);
celt_assert(input_weights->nb_outputs == recurrent_weights->nb_outputs);
N = recurrent_weights->nb_inputs;
z = zrh;
r = &zrh[N];
h = &zrh[2*N];
celt_assert(recurrent_weights->nb_outputs <= 3*MAX_RNN_NEURONS_ALL);
celt_assert(in != state);
compute_linear(input_weights, zrh, in, arch);
compute_linear(recurrent_weights, recur, state, arch);
for (i=0;i<2*N;i++)
zrh[i] += recur[i];
compute_activation(zrh, zrh, 2*N, ACTIVATION_SIGMOID, arch);
for (i=0;i<N;i++)
h[i] += recur[2*N+i]*r[i];
compute_activation(h, h, N, ACTIVATION_TANH, arch);
for (i=0;i<N;i++)
h[i] = z[i]*state[i] + (1-z[i])*h[i];
for (i=0;i<N;i++)
state[i] = h[i];
}
void compute_generic_dense_lossgen(const LinearLayer *layer, float *output, const float *input, int activation, int arch)
{
compute_linear(layer, output, input, arch);
compute_activation(output, output, layer->nb_outputs, activation, arch);
}
static int sample_loss_impl(
LossGenState *st,
float percent_loss)
{
float input[2];
float tmp[LOSSGEN_DENSE_IN_OUT_SIZE];
float out;
int loss;
LossGen *model = &st->model;
input[0] = st->last_loss;
input[1] = percent_loss;
compute_generic_dense_lossgen(&model->lossgen_dense_in, tmp, input, ACTIVATION_TANH, 0);
compute_generic_gru_lossgen(&model->lossgen_gru1_input, &model->lossgen_gru1_recurrent, st->gru1_state, tmp, 0);
compute_generic_gru_lossgen(&model->lossgen_gru2_input, &model->lossgen_gru2_recurrent, st->gru2_state, st->gru1_state, 0);
compute_generic_dense_lossgen(&model->lossgen_dense_out, &out, st->gru2_state, ACTIVATION_SIGMOID, 0);
loss = (float)rand()/RAND_MAX < out;
st->last_loss = loss;
return loss;
}
int sample_loss(
LossGenState *st,
float percent_loss)
{
/* Due to GRU being initialized with zeros, the first packets aren't quite random,
so we skip them. */
if (!st->used) {
int i;
for (i=0;i<100;i++) sample_loss_impl(st, percent_loss);
st->used = 1;
}
return sample_loss_impl(st, percent_loss);
}
void lossgen_init(LossGenState *st)
{
int ret;
OPUS_CLEAR(st, 1);
#ifndef USE_WEIGHTS_FILE
ret = init_lossgen(&st->model, lossgen_arrays);
#else
ret = 0;
#endif
celt_assert(ret == 0);
(void)ret;
}
int lossgen_load_model(LossGenState *st, const void *data, int len) {
WeightArray *list;
int ret;
parse_weights(&list, data, len);
ret = init_lossgen(&st->model, list);
opus_free(list);
if (ret == 0) return 0;
else return -1;
}
#if 0
#include <stdio.h>
int main(int argc, char **argv) {
int i, N;
float p;
LossGenState st;
if (argc!=3) {
fprintf(stderr, "usage: lossgen <percentage> <length>\n");
return 1;
}
lossgen_init(&st);
p = atof(argv[1]);
N = atoi(argv[2]);
for (i=0;i<N;i++) {
printf("%d\n", sample_loss(&st, p));
}
}
#endif

View File

@ -0,0 +1,55 @@
/* Copyright (c) 2023 Amazon */
/*
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef LOSSGEN_H
#define LOSSGEN_H
#include "lossgen_data.h"
#define PITCH_MIN_PERIOD 32
#define PITCH_MAX_PERIOD 256
#define NB_XCORR_FEATURES (PITCH_MAX_PERIOD-PITCH_MIN_PERIOD)
typedef struct {
LossGen model;
float gru1_state[LOSSGEN_GRU1_STATE_SIZE];
float gru2_state[LOSSGEN_GRU2_STATE_SIZE];
int last_loss;
int used;
} LossGenState;
void lossgen_init(LossGenState *st);
int lossgen_load_model(LossGenState *st, const void *data, int len);
int sample_loss(
LossGenState *st,
float percent_loss);
#endif

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,39 @@
/* Auto generated from checkpoint lossgen2_2000.pth */
#ifndef LOSSGEN_DATA_H
#define LOSSGEN_DATA_H
#include "nnet.h"
#include "opus_types.h"
#define LOSSGEN_DENSE_IN_OUT_SIZE 8
#define LOSSGEN_DENSE_OUT_OUT_SIZE 1
#define LOSSGEN_GRU1_OUT_SIZE 16
#define LOSSGEN_GRU1_STATE_SIZE 16
#define LOSSGEN_GRU2_OUT_SIZE 32
#define LOSSGEN_GRU2_STATE_SIZE 32
#define LOSSGEN_MAX_RNN_UNITS 32
typedef struct {
LinearLayer lossgen_dense_in;
LinearLayer lossgen_dense_out;
LinearLayer lossgen_gru1_input;
LinearLayer lossgen_gru1_recurrent;
LinearLayer lossgen_gru2_input;
LinearLayer lossgen_gru2_recurrent;
} LossGen;
int init_lossgen(LossGen *model, const WeightArray *arrays);
#endif /* LOSSGEN_DATA_H */

View File

@ -0,0 +1,22 @@
#include <stdio.h>
#include <stdlib.h>
#include "lossgen.h"
int main(int argc, char **argv)
{
LossGenState st;
long num_packets;
long i;
float percent;
if (argc != 3) {
fprintf(stderr, "usage: %s <percent_loss> <nb packets>\n", argv[0]);
return 1;
}
lossgen_init(&st);
percent = atof(argv[1]);
num_packets = atol(argv[2]);
/*printf("loss: %f %d\n", percent, num_packets);*/
for (i=0;i<num_packets;i++) {
printf("%d\n", sample_loss(&st, percent*0.01f));
}
return 0;
}

183
src/libs/opus/dnn/lpcnet.h Normal file
View File

@ -0,0 +1,183 @@
/* Copyright (c) 2018 Mozilla */
/*
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef LPCNET_H_
#define LPCNET_H_
#include "opus_types.h"
#define NB_FEATURES 20
#define NB_TOTAL_FEATURES 36
/** Number of audio samples in a feature frame (not for encoding/decoding). */
#define LPCNET_FRAME_SIZE (160)
typedef struct LPCNetState LPCNetState;
typedef struct LPCNetDecState LPCNetDecState;
typedef struct LPCNetEncState LPCNetEncState;
typedef struct LPCNetPLCState LPCNetPLCState;
/** Gets the size of an <code>LPCNetDecState</code> structure.
* @returns The size in bytes.
*/
int lpcnet_decoder_get_size(void);
/** Initializes a previously allocated decoder state
* The memory pointed to by st must be at least the size returned by lpcnet_decoder_get_size().
* This is intended for applications which use their own allocator instead of malloc.
* @see lpcnet_decoder_create(),lpcnet_decoder_get_size()
* @param [in] st <tt>LPCNetDecState*</tt>: Decoder state
* @retval 0 Success
*/
int lpcnet_decoder_init(LPCNetDecState *st);
void lpcnet_reset(LPCNetState *lpcnet);
/** Allocates and initializes a decoder state.
* @returns The newly created state
*/
LPCNetDecState *lpcnet_decoder_create(void);
/** Frees an <code>LPCNetDecState</code> allocated by lpcnet_decoder_create().
* @param[in] st <tt>LPCNetDecState*</tt>: State to be freed.
*/
void lpcnet_decoder_destroy(LPCNetDecState *st);
/** Decodes a packet of LPCNET_COMPRESSED_SIZE bytes (currently 8) into LPCNET_PACKET_SAMPLES samples (currently 640).
* @param [in] st <tt>LPCNetDecState*</tt>: Decoder state
* @param [in] buf <tt>const unsigned char *</tt>: Compressed packet
* @param [out] pcm <tt>opus_int16 *</tt>: Decoded audio
* @retval 0 Success
*/
int lpcnet_decode(LPCNetDecState *st, const unsigned char *buf, opus_int16 *pcm);
/** Gets the size of an <code>LPCNetEncState</code> structure.
* @returns The size in bytes.
*/
int lpcnet_encoder_get_size(void);
/** Initializes a previously allocated encoder state
* The memory pointed to by st must be at least the size returned by lpcnet_encoder_get_size().
* This is intended for applications which use their own allocator instead of malloc.
* @see lpcnet_encoder_create(),lpcnet_encoder_get_size()
* @param [in] st <tt>LPCNetEncState*</tt>: Encoder state
* @retval 0 Success
*/
int lpcnet_encoder_init(LPCNetEncState *st);
int lpcnet_encoder_load_model(LPCNetEncState *st, const void *data, int len);
/** Allocates and initializes an encoder state.
* @returns The newly created state
*/
LPCNetEncState *lpcnet_encoder_create(void);
/** Frees an <code>LPCNetEncState</code> allocated by lpcnet_encoder_create().
* @param[in] st <tt>LPCNetEncState*</tt>: State to be freed.
*/
void lpcnet_encoder_destroy(LPCNetEncState *st);
/** Encodes LPCNET_PACKET_SAMPLES speech samples (currently 640) into a packet of LPCNET_COMPRESSED_SIZE bytes (currently 8).
* @param [in] st <tt>LPCNetDecState*</tt>: Encoder state
* @param [in] pcm <tt>opus_int16 *</tt>: Input speech to be encoded
* @param [out] buf <tt>const unsigned char *</tt>: Compressed packet
* @retval 0 Success
*/
int lpcnet_encode(LPCNetEncState *st, const opus_int16 *pcm, unsigned char *buf);
/** Compute features on LPCNET_FRAME_SIZE speech samples (currently 160) and output features for one 10-ms frame.
* @param [in] st <tt>LPCNetDecState*</tt>: Encoder state
* @param [in] pcm <tt>opus_int16 *</tt>: Input speech to be analyzed
* @param [out] features <tt>float[NB_TOTAL_FEATURES]</tt>: Four feature vectors
* @retval 0 Success
*/
int lpcnet_compute_single_frame_features(LPCNetEncState *st, const opus_int16 *pcm, float features[NB_TOTAL_FEATURES], int arch);
/** Compute features on LPCNET_FRAME_SIZE speech samples (currently 160) and output features for one 10-ms frame.
* @param [in] st <tt>LPCNetDecState*</tt>: Encoder state
* @param [in] pcm <tt>float *</tt>: Input speech to be analyzed
* @param [out] features <tt>float[NB_TOTAL_FEATURES]</tt>: Four feature vectors
* @retval 0 Success
*/
int lpcnet_compute_single_frame_features_float(LPCNetEncState *st, const float *pcm, float features[NB_TOTAL_FEATURES], int arch);
/** Gets the size of an <code>LPCNetState</code> structure.
* @returns The size in bytes.
*/
int lpcnet_get_size(void);
/** Initializes a previously allocated synthesis state
* The memory pointed to by st must be at least the size returned by lpcnet_get_size().
* This is intended for applications which use their own allocator instead of malloc.
* @see lpcnet_create(),lpcnet_get_size()
* @param [in] st <tt>LPCNetState*</tt>: Synthesis state
* @retval 0 Success
*/
int lpcnet_init(LPCNetState *st);
/** Allocates and initializes a synthesis state.
* @returns The newly created state
*/
LPCNetState *lpcnet_create(void);
/** Frees an <code>LPCNetState</code> allocated by lpcnet_create().
* @param[in] st <tt>LPCNetState*</tt>: State to be freed.
*/
void lpcnet_destroy(LPCNetState *st);
/** Synthesizes speech from an LPCNet feature vector.
* @param [in] st <tt>LPCNetState*</tt>: Synthesis state
* @param [in] features <tt>const float *</tt>: Compressed packet
* @param [out] output <tt>opus_int16 **</tt>: Synthesized speech
* @param [in] N <tt>int</tt>: Number of samples to generate
* @retval 0 Success
*/
void lpcnet_synthesize(LPCNetState *st, const float *features, opus_int16 *output, int N);
int lpcnet_plc_init(LPCNetPLCState *st);
void lpcnet_plc_reset(LPCNetPLCState *st);
int lpcnet_plc_update(LPCNetPLCState *st, opus_int16 *pcm);
int lpcnet_plc_conceal(LPCNetPLCState *st, opus_int16 *pcm);
void lpcnet_plc_fec_add(LPCNetPLCState *st, const float *features);
void lpcnet_plc_fec_clear(LPCNetPLCState *st);
int lpcnet_load_model(LPCNetState *st, const void *data, int len);
int lpcnet_plc_load_model(LPCNetPLCState *st, const void *data, int len);
#endif

View File

@ -0,0 +1,217 @@
/* Copyright (c) 2018 Mozilla */
/*
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include <math.h>
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include "arch.h"
#include "lpcnet.h"
#include "freq.h"
#include "os_support.h"
#include "fargan.h"
#include "cpu_support.h"
#ifdef USE_WEIGHTS_FILE
# if __unix__
# include <fcntl.h>
# include <sys/mman.h>
# include <unistd.h>
# include <sys/stat.h>
/* When available, mmap() is preferable to reading the file, as it leads to
better resource utilization, especially if multiple processes are using the same
file (mapping will be shared in cache). */
void *load_blob(const char *filename, int *len) {
int fd;
void *data;
struct stat st;
if (stat(filename, &st)) {
*len = 0;
return NULL;
}
*len = st.st_size;
fd = open(filename, O_RDONLY);
if (fd<0) {
*len = 0;
return NULL;
}
data = mmap(NULL, *len, PROT_READ, MAP_SHARED, fd, 0);
if (data == MAP_FAILED) {
*len = 0;
data = NULL;
}
close(fd);
return data;
}
void free_blob(void *blob, int len) {
if (blob) munmap(blob, len);
}
# else
void *load_blob(const char *filename, int *len) {
FILE *file;
void *data;
file = fopen(filename, "r");
if (file == NULL)
{
perror("could not open blob file");
*len = 0;
return NULL;
}
fseek(file, 0L, SEEK_END);
*len = ftell(file);
fseek(file, 0L, SEEK_SET);
if (*len <= 0) {
*len = 0;
return NULL;
}
data = malloc(*len);
if (!data) {
*len = 0;
return NULL;
}
*len = fread(data, 1, *len, file);
return data;
}
void free_blob(void *blob, int len) {
free(blob);
(void)len;
}
# endif
#endif
#define MODE_FEATURES 2
/*#define MODE_SYNTHESIS 3*/
#define MODE_ADDLPC 5
#define MODE_FWGAN_SYNTHESIS 6
#define MODE_FARGAN_SYNTHESIS 7
void usage(void) {
fprintf(stderr, "usage: lpcnet_demo -features <input.pcm> <features.f32>\n");
fprintf(stderr, " lpcnet_demo -fargan-synthesis <features.f32> <output.pcm>\n");
fprintf(stderr, " lpcnet_demo -addlpc <features_without_lpc.f32> <features_with_lpc.lpc>\n\n");
fprintf(stderr, " plc_options:\n");
fprintf(stderr, " causal: normal (causal) PLC\n");
fprintf(stderr, " codec: normal (causal) PLC without cross-fade (will glitch)\n");
exit(1);
}
int main(int argc, char **argv) {
int mode=0;
int arch;
FILE *fin, *fout;
#ifdef USE_WEIGHTS_FILE
int len;
void *data;
const char *filename = "weights_blob.bin";
#endif
arch = opus_select_arch();
if (argc < 4) usage();
if (strcmp(argv[1], "-features") == 0) mode=MODE_FEATURES;
else if (strcmp(argv[1], "-fargan-synthesis") == 0) mode=MODE_FARGAN_SYNTHESIS;
else if (strcmp(argv[1], "-addlpc") == 0){
mode=MODE_ADDLPC;
} else {
usage();
}
if (argc != 4) usage();
fin = fopen(argv[2], "rb");
if (fin == NULL) {
fprintf(stderr, "Can't open %s\n", argv[2]);
exit(1);
}
fout = fopen(argv[3], "wb");
if (fout == NULL) {
fprintf(stderr, "Can't open %s\n", argv[3]);
exit(1);
}
#ifdef USE_WEIGHTS_FILE
data = load_blob(filename, &len);
#endif
if (mode == MODE_FEATURES) {
LPCNetEncState *net;
net = lpcnet_encoder_create();
while (1) {
float features[NB_TOTAL_FEATURES];
opus_int16 pcm[LPCNET_FRAME_SIZE];
size_t ret;
ret = fread(pcm, sizeof(pcm[0]), LPCNET_FRAME_SIZE, fin);
if (feof(fin) || ret != LPCNET_FRAME_SIZE) break;
lpcnet_compute_single_frame_features(net, pcm, features, arch);
fwrite(features, sizeof(float), NB_TOTAL_FEATURES, fout);
}
lpcnet_encoder_destroy(net);
} else if (mode == MODE_FARGAN_SYNTHESIS) {
FARGANState fargan;
size_t ret, i;
float in_features[5*NB_TOTAL_FEATURES];
float zeros[320] = {0};
fargan_init(&fargan);
#ifdef USE_WEIGHTS_FILE
fargan_load_model(&fargan, data, len);
#endif
/* uncomment the following to align with Python code */
/*ret = fread(&in_features[0], sizeof(in_features[0]), NB_TOTAL_FEATURES, fin);*/
for (i=0;i<5;i++) {
ret = fread(&in_features[i*NB_FEATURES], sizeof(in_features[0]), NB_TOTAL_FEATURES, fin);
}
fargan_cont(&fargan, zeros, in_features);
while (1) {
float features[NB_FEATURES];
float fpcm[LPCNET_FRAME_SIZE];
opus_int16 pcm[LPCNET_FRAME_SIZE];
ret = fread(in_features, sizeof(features[0]), NB_TOTAL_FEATURES, fin);
if (feof(fin) || ret != NB_TOTAL_FEATURES) break;
OPUS_COPY(features, in_features, NB_FEATURES);
fargan_synthesize(&fargan, fpcm, features);
for (i=0;i<LPCNET_FRAME_SIZE;i++) pcm[i] = (int)floor(.5 + MIN32(32767, MAX32(-32767, 32768.f*fpcm[i])));
fwrite(pcm, sizeof(pcm[0]), LPCNET_FRAME_SIZE, fout);
}
} else if (mode == MODE_ADDLPC) {
float features[36];
size_t ret;
while (1) {
ret = fread(features, sizeof(features[0]), 36, fin);
if (ret != 36 || feof(fin)) break;
lpc_from_cepstrum(&features[20], &features[0]);
fwrite(features, sizeof(features[0]), 36, fout);
}
} else {
fprintf(stderr, "unknown action\n");
}
fclose(fin);
fclose(fout);
#ifdef USE_WEIGHTS_FILE
free_blob(data, len);
#endif
return 0;
}

View File

@ -0,0 +1,230 @@
/* Copyright (c) 2017-2019 Mozilla */
/*
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#include "kiss_fft.h"
#include "common.h"
#include <math.h>
#include "freq.h"
#include "pitch.h"
#include "arch.h"
#include <assert.h>
#include "lpcnet_private.h"
#include "lpcnet.h"
#include "os_support.h"
#include "_kiss_fft_guts.h"
#include "celt_lpc.h"
#include "mathops.h"
int lpcnet_encoder_get_size(void) {
return sizeof(LPCNetEncState);
}
int lpcnet_encoder_init(LPCNetEncState *st) {
memset(st, 0, sizeof(*st));
pitchdnn_init(&st->pitchdnn);
return 0;
}
int lpcnet_encoder_load_model(LPCNetEncState *st, const void *data, int len) {
return pitchdnn_load_model(&st->pitchdnn, data, len);
}
LPCNetEncState *lpcnet_encoder_create(void) {
LPCNetEncState *st;
st = opus_alloc(lpcnet_encoder_get_size());
lpcnet_encoder_init(st);
return st;
}
void lpcnet_encoder_destroy(LPCNetEncState *st) {
opus_free(st);
}
static void frame_analysis(LPCNetEncState *st, kiss_fft_cpx *X, float *Ex, const float *in) {
float x[WINDOW_SIZE];
OPUS_COPY(x, st->analysis_mem, OVERLAP_SIZE);
OPUS_COPY(&x[OVERLAP_SIZE], in, FRAME_SIZE);
OPUS_COPY(st->analysis_mem, &in[FRAME_SIZE-OVERLAP_SIZE], OVERLAP_SIZE);
apply_window(x);
forward_transform(X, x);
lpcn_compute_band_energy(Ex, X);
}
static void biquad(float *y, float mem[2], const float *x, const float *b, const float *a, int N) {
int i;
float mem0, mem1;
mem0 = mem[0];
mem1 = mem[1];
for (i=0;i<N;i++) {
float xi, yi, mem00;
xi = x[i];
yi = x[i] + mem0;
mem00 = mem0;
/* Original code:
mem0 = mem1 + (b[0]*xi - a[0]*yi);
mem1 = (b[1]*xi - a[1]*yi);
Modified to reduce dependency chains: (the +1e-30f forces the ordering and has no effect on the output)
*/
mem0 = (b[0]-a[0])*xi + mem1 - a[0]*mem0;
mem1 = (b[1]-a[1])*xi + 1e-30f - a[1]*mem00;
y[i] = yi;
}
mem[0] = mem0;
mem[1] = mem1;
}
#define celt_log10(x) (0.3010299957f*celt_log2(x))
void compute_frame_features(LPCNetEncState *st, const float *in, int arch) {
float aligned_in[FRAME_SIZE];
int i;
float Ly[NB_BANDS];
float follow, logMax;
kiss_fft_cpx X[FREQ_SIZE];
float Ex[NB_BANDS];
float xcorr[PITCH_MAX_PERIOD];
float ener0;
float ener;
float x[FRAME_SIZE+LPC_ORDER];
float frame_corr;
float xy, xx, yy;
int pitch;
float ener_norm[PITCH_MAX_PERIOD - PITCH_MIN_PERIOD];
/* [b,a]=ellip(2, 2, 20, 1200/8000); */
static const float lp_b[2] = {-0.84946f, 1.f};
static const float lp_a[2] = {-1.54220f, 0.70781f};
OPUS_COPY(aligned_in, &st->analysis_mem[OVERLAP_SIZE-TRAINING_OFFSET], TRAINING_OFFSET);
frame_analysis(st, X, Ex, in);
st->if_features[0] = MAX16(-1.f, MIN16(1.f, (1.f/64)*(10.f*celt_log10(1e-15f + X[0].r*X[0].r)-6.f)));
for (i=1;i<PITCH_IF_MAX_FREQ;i++) {
kiss_fft_cpx prod;
float norm_1;
C_MULC(prod, X[i], st->prev_if[i]);
norm_1 = 1.f/sqrt(1e-15f + prod.r*prod.r + prod.i*prod.i);
C_MULBYSCALAR(prod, norm_1);
st->if_features[3*i-2] = prod.r;
st->if_features[3*i-1] = prod.i;
st->if_features[3*i] = MAX16(-1.f, MIN16(1.f, (1.f/64)*(10.f*celt_log10(1e-15f + X[i].r*X[i].r + X[i].i*X[i].i)-6.f)));
}
OPUS_COPY(st->prev_if, X, PITCH_IF_MAX_FREQ);
/*for (i=0;i<88;i++) printf("%f ", st->if_features[i]);printf("\n");*/
logMax = -2;
follow = -2;
for (i=0;i<NB_BANDS;i++) {
Ly[i] = celt_log10(1e-2f+Ex[i]);
Ly[i] = MAX16(logMax-8, MAX16(follow-2.5f, Ly[i]));
logMax = MAX16(logMax, Ly[i]);
follow = MAX16(follow-2.5f, Ly[i]);
}
dct(st->features, Ly);
st->features[0] -= 4;
lpc_from_cepstrum(st->lpc, st->features);
for (i=0;i<LPC_ORDER;i++) st->features[NB_BANDS+2+i] = st->lpc[i];
OPUS_MOVE(st->exc_buf, &st->exc_buf[FRAME_SIZE], PITCH_MAX_PERIOD);
OPUS_MOVE(st->lp_buf, &st->lp_buf[FRAME_SIZE], PITCH_MAX_PERIOD);
OPUS_COPY(&aligned_in[TRAINING_OFFSET], in, FRAME_SIZE-TRAINING_OFFSET);
OPUS_COPY(&x[0], st->pitch_mem, LPC_ORDER);
OPUS_COPY(&x[LPC_ORDER], aligned_in, FRAME_SIZE);
OPUS_COPY(st->pitch_mem, &aligned_in[FRAME_SIZE-LPC_ORDER], LPC_ORDER);
celt_fir(&x[LPC_ORDER], st->lpc, &st->lp_buf[PITCH_MAX_PERIOD], FRAME_SIZE, LPC_ORDER, arch);
for (i=0;i<FRAME_SIZE;i++) {
st->exc_buf[PITCH_MAX_PERIOD+i] = st->lp_buf[PITCH_MAX_PERIOD+i] + .7f*st->pitch_filt;
st->pitch_filt = st->lp_buf[PITCH_MAX_PERIOD+i];
/*printf("%f\n", st->exc_buf[PITCH_MAX_PERIOD+i]);*/
}
biquad(&st->lp_buf[PITCH_MAX_PERIOD], st->lp_mem, &st->lp_buf[PITCH_MAX_PERIOD], lp_b, lp_a, FRAME_SIZE);
{
double ener1;
float *buf = st->exc_buf;
celt_pitch_xcorr(&buf[PITCH_MAX_PERIOD], buf, xcorr, FRAME_SIZE, PITCH_MAX_PERIOD-PITCH_MIN_PERIOD, arch);
ener0 = celt_inner_prod(&buf[PITCH_MAX_PERIOD], &buf[PITCH_MAX_PERIOD], FRAME_SIZE, arch);
ener1 = celt_inner_prod(&buf[0], &buf[0], FRAME_SIZE, arch);
/*printf("%f\n", st->frame_weight[sub]);*/
for (i=0;i<PITCH_MAX_PERIOD-PITCH_MIN_PERIOD;i++) {
ener = 1 + ener0 + ener1;
st->xcorr_features[i] = 2*xcorr[i];
ener_norm[i] = ener;
ener1 += buf[i+FRAME_SIZE]*(double)buf[i+FRAME_SIZE] - buf[i]*(double)buf[i];
/*printf("%f ", st->xcorr_features[i]);*/
}
/* Split in a separate loop so the compiler can vectorize it */
for (i=0;i<PITCH_MAX_PERIOD-PITCH_MIN_PERIOD;i++) {
st->xcorr_features[i] /= ener_norm[i];
}
/*printf("\n");*/
}
st->dnn_pitch = compute_pitchdnn(&st->pitchdnn, st->if_features, st->xcorr_features, arch);
pitch = (int)floor(.5+256./pow(2.f,((1./60.)*((st->dnn_pitch+1.5)*60))));
xx = celt_inner_prod(&st->lp_buf[PITCH_MAX_PERIOD], &st->lp_buf[PITCH_MAX_PERIOD], FRAME_SIZE, arch);
yy = celt_inner_prod(&st->lp_buf[PITCH_MAX_PERIOD-pitch], &st->lp_buf[PITCH_MAX_PERIOD-pitch], FRAME_SIZE, arch);
xy = celt_inner_prod(&st->lp_buf[PITCH_MAX_PERIOD], &st->lp_buf[PITCH_MAX_PERIOD-pitch], FRAME_SIZE, arch);
/*printf("%f %f\n", frame_corr, xy/sqrt(1e-15+xx*yy));*/
frame_corr = xy/sqrt(1+xx*yy);
frame_corr = log(1.f+exp(5.f*frame_corr))/log(1+exp(5.f));
st->features[NB_BANDS] = st->dnn_pitch;
st->features[NB_BANDS + 1] = frame_corr-.5f;
}
void preemphasis(float *y, float *mem, const float *x, float coef, int N) {
int i;
for (i=0;i<N;i++) {
float yi;
yi = x[i] + *mem;
*mem = -coef*x[i];
y[i] = yi;
}
}
static int lpcnet_compute_single_frame_features_impl(LPCNetEncState *st, float *x, float features[NB_TOTAL_FEATURES], int arch) {
preemphasis(x, &st->mem_preemph, x, PREEMPHASIS, FRAME_SIZE);
compute_frame_features(st, x, arch);
OPUS_COPY(features, &st->features[0], NB_TOTAL_FEATURES);
return 0;
}
int lpcnet_compute_single_frame_features(LPCNetEncState *st, const opus_int16 *pcm, float features[NB_TOTAL_FEATURES], int arch) {
int i;
float x[FRAME_SIZE];
for (i=0;i<FRAME_SIZE;i++) x[i] = pcm[i];
lpcnet_compute_single_frame_features_impl(st, x, features, arch);
return 0;
}
int lpcnet_compute_single_frame_features_float(LPCNetEncState *st, const float *pcm, float features[NB_TOTAL_FEATURES], int arch) {
int i;
float x[FRAME_SIZE];
for (i=0;i<FRAME_SIZE;i++) x[i] = pcm[i];
lpcnet_compute_single_frame_features_impl(st, x, features, arch);
return 0;
}

View File

@ -0,0 +1,211 @@
/* Copyright (c) 2021 Amazon */
/*
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "lpcnet_private.h"
#include "lpcnet.h"
#include "plc_data.h"
#include "os_support.h"
#include "common.h"
#include "cpu_support.h"
#ifndef M_PI
#define M_PI 3.141592653
#endif
/* Comment this out to have LPCNet update its state on every good packet (slow). */
#define PLC_SKIP_UPDATES
void lpcnet_plc_reset(LPCNetPLCState *st) {
OPUS_CLEAR((char*)&st->LPCNET_PLC_RESET_START,
sizeof(LPCNetPLCState)-
((char*)&st->LPCNET_PLC_RESET_START - (char*)st));
lpcnet_encoder_init(&st->enc);
OPUS_CLEAR(st->pcm, PLC_BUF_SIZE);
st->blend = 0;
st->loss_count = 0;
st->analysis_gap = 1;
st->analysis_pos = PLC_BUF_SIZE;
st->predict_pos = PLC_BUF_SIZE;
}
int lpcnet_plc_init(LPCNetPLCState *st) {
int ret;
st->arch = opus_select_arch();
fargan_init(&st->fargan);
lpcnet_encoder_init(&st->enc);
st->loaded = 0;
#ifndef USE_WEIGHTS_FILE
ret = init_plcmodel(&st->model, plcmodel_arrays);
if (ret == 0) st->loaded = 1;
#else
ret = 0;
#endif
celt_assert(ret == 0);
lpcnet_plc_reset(st);
return ret;
}
int lpcnet_plc_load_model(LPCNetPLCState *st, const void *data, int len) {
WeightArray *list;
int ret;
parse_weights(&list, data, len);
ret = init_plcmodel(&st->model, list);
opus_free(list);
if (ret == 0) {
ret = lpcnet_encoder_load_model(&st->enc, data, len);
}
if (ret == 0) {
ret = fargan_load_model(&st->fargan, data, len);
}
if (ret == 0) st->loaded = 1;
return ret;
}
void lpcnet_plc_fec_add(LPCNetPLCState *st, const float *features) {
if (features == NULL) {
st->fec_skip++;
return;
}
if (st->fec_fill_pos == PLC_MAX_FEC) {
OPUS_MOVE(&st->fec[0][0], &st->fec[st->fec_read_pos][0], (st->fec_fill_pos-st->fec_read_pos)*NB_FEATURES);
st->fec_fill_pos = st->fec_fill_pos-st->fec_read_pos;
st->fec_read_pos -= st->fec_read_pos;
}
OPUS_COPY(&st->fec[st->fec_fill_pos][0], features, NB_FEATURES);
st->fec_fill_pos++;
}
void lpcnet_plc_fec_clear(LPCNetPLCState *st) {
st->fec_read_pos = st->fec_fill_pos = st->fec_skip = 0;
}
static void compute_plc_pred(LPCNetPLCState *st, float *out, const float *in) {
float tmp[PLC_DENSE_IN_OUT_SIZE];
PLCModel *model = &st->model;
PLCNetState *net = &st->plc_net;
celt_assert(st->loaded);
compute_generic_dense(&model->plc_dense_in, tmp, in, ACTIVATION_TANH, st->arch);
compute_generic_gru(&model->plc_gru1_input, &model->plc_gru1_recurrent, net->gru1_state, tmp, st->arch);
compute_generic_gru(&model->plc_gru2_input, &model->plc_gru2_recurrent, net->gru2_state, net->gru1_state, st->arch);
compute_generic_dense(&model->plc_dense_out, out, net->gru2_state, ACTIVATION_LINEAR, st->arch);
}
static int get_fec_or_pred(LPCNetPLCState *st, float *out) {
if (st->fec_read_pos != st->fec_fill_pos && st->fec_skip==0) {
float plc_features[2*NB_BANDS+NB_FEATURES+1] = {0};
float discard[NB_FEATURES];
OPUS_COPY(out, &st->fec[st->fec_read_pos][0], NB_FEATURES);
st->fec_read_pos++;
/* Update PLC state using FEC, so without Burg features. */
OPUS_COPY(&plc_features[2*NB_BANDS], out, NB_FEATURES);
plc_features[2*NB_BANDS+NB_FEATURES] = -1;
compute_plc_pred(st, discard, plc_features);
return 1;
} else {
float zeros[2*NB_BANDS+NB_FEATURES+1] = {0};
compute_plc_pred(st, out, zeros);
if (st->fec_skip > 0) st->fec_skip--;
return 0;
}
}
static void queue_features(LPCNetPLCState *st, const float *features) {
OPUS_MOVE(&st->cont_features[0], &st->cont_features[NB_FEATURES], (CONT_VECTORS-1)*NB_FEATURES);
OPUS_COPY(&st->cont_features[(CONT_VECTORS-1)*NB_FEATURES], features, NB_FEATURES);
}
/* In this causal version of the code, the DNN model implemented by compute_plc_pred()
needs to generate two feature vectors to conceal the first lost packet.*/
int lpcnet_plc_update(LPCNetPLCState *st, opus_int16 *pcm) {
int i;
if (st->analysis_pos - FRAME_SIZE >= 0) st->analysis_pos -= FRAME_SIZE;
else st->analysis_gap = 1;
if (st->predict_pos - FRAME_SIZE >= 0) st->predict_pos -= FRAME_SIZE;
OPUS_MOVE(st->pcm, &st->pcm[FRAME_SIZE], PLC_BUF_SIZE-FRAME_SIZE);
for (i=0;i<FRAME_SIZE;i++) st->pcm[PLC_BUF_SIZE-FRAME_SIZE+i] = (1.f/32768.f)*pcm[i];
st->loss_count = 0;
st->blend = 0;
return 0;
}
static const float att_table[10] = {0, 0, -.2, -.2, -.4, -.4, -.8, -.8, -1.6, -1.6};
int lpcnet_plc_conceal(LPCNetPLCState *st, opus_int16 *pcm) {
int i;
celt_assert(st->loaded);
if (st->blend == 0) {
int count = 0;
st->plc_net = st->plc_bak[0];
while (st->analysis_pos + FRAME_SIZE <= PLC_BUF_SIZE) {
float x[FRAME_SIZE];
float plc_features[2*NB_BANDS+NB_FEATURES+1];
celt_assert(st->analysis_pos >= 0);
for (i=0;i<FRAME_SIZE;i++) x[i] = 32768.f*st->pcm[st->analysis_pos+i];
burg_cepstral_analysis(plc_features, x);
lpcnet_compute_single_frame_features_float(&st->enc, x, st->features, st->arch);
if ((!st->analysis_gap || count>0) && st->analysis_pos >= st->predict_pos) {
queue_features(st, st->features);
OPUS_COPY(&plc_features[2*NB_BANDS], st->features, NB_FEATURES);
plc_features[2*NB_BANDS+NB_FEATURES] = 1;
st->plc_bak[0] = st->plc_bak[1];
st->plc_bak[1] = st->plc_net;
compute_plc_pred(st, st->features, plc_features);
}
st->analysis_pos += FRAME_SIZE;
count++;
}
st->plc_bak[0] = st->plc_bak[1];
st->plc_bak[1] = st->plc_net;
get_fec_or_pred(st, st->features);
queue_features(st, st->features);
st->plc_bak[0] = st->plc_bak[1];
st->plc_bak[1] = st->plc_net;
get_fec_or_pred(st, st->features);
queue_features(st, st->features);
fargan_cont(&st->fargan, &st->pcm[PLC_BUF_SIZE-FARGAN_CONT_SAMPLES], st->cont_features);
st->analysis_gap = 0;
}
st->plc_bak[0] = st->plc_bak[1];
st->plc_bak[1] = st->plc_net;
if (get_fec_or_pred(st, st->features)) st->loss_count = 0;
else st->loss_count++;
if (st->loss_count >= 10) st->features[0] = MAX16(-10, st->features[0]+att_table[9] - 2*(st->loss_count-9));
else st->features[0] = MAX16(-10, st->features[0]+att_table[st->loss_count]);
fargan_synthesize_int(&st->fargan, pcm, &st->features[0]);
queue_features(st, st->features);
if (st->analysis_pos - FRAME_SIZE >= 0) st->analysis_pos -= FRAME_SIZE;
else st->analysis_gap = 1;
st->predict_pos = PLC_BUF_SIZE;
OPUS_MOVE(st->pcm, &st->pcm[FRAME_SIZE], PLC_BUF_SIZE-FRAME_SIZE);
for (i=0;i<FRAME_SIZE;i++) st->pcm[PLC_BUF_SIZE-FRAME_SIZE+i] = (1.f/32768.f)*pcm[i];
st->blend = 1;
return 0;
}

View File

@ -0,0 +1,90 @@
#ifndef LPCNET_PRIVATE_H
#define LPCNET_PRIVATE_H
#include <stdio.h>
#include "freq.h"
#include "lpcnet.h"
#include "plc_data.h"
#include "pitchdnn.h"
#include "fargan.h"
#define PITCH_FRAME_SIZE 320
#define PITCH_BUF_SIZE (PITCH_MAX_PERIOD+PITCH_FRAME_SIZE)
#define PLC_MAX_FEC 100
#define MAX_FEATURE_BUFFER_SIZE 4
#define PITCH_IF_MAX_FREQ 30
#define PITCH_IF_FEATURES (3*PITCH_IF_MAX_FREQ - 2)
#define CONT_VECTORS 5
#define FEATURES_DELAY 1
struct LPCNetEncState{
PitchDNNState pitchdnn;
float analysis_mem[OVERLAP_SIZE];
float mem_preemph;
kiss_fft_cpx prev_if[PITCH_IF_MAX_FREQ];
float if_features[PITCH_IF_FEATURES];
float xcorr_features[PITCH_MAX_PERIOD - PITCH_MIN_PERIOD];
float dnn_pitch;
float pitch_mem[LPC_ORDER];
float pitch_filt;
float exc_buf[PITCH_BUF_SIZE];
float lp_buf[PITCH_BUF_SIZE];
float lp_mem[4];
float lpc[LPC_ORDER];
float features[NB_TOTAL_FEATURES];
float sig_mem[LPC_ORDER];
float burg_cepstrum[2*NB_BANDS];
};
typedef struct {
float gru1_state[PLC_GRU1_STATE_SIZE];
float gru2_state[PLC_GRU2_STATE_SIZE];
} PLCNetState;
#define PLC_BUF_SIZE ((CONT_VECTORS+5)*FRAME_SIZE)
struct LPCNetPLCState {
PLCModel model;
FARGANState fargan;
LPCNetEncState enc;
int loaded;
int arch;
#define LPCNET_PLC_RESET_START fec
float fec[PLC_MAX_FEC][NB_FEATURES];
int analysis_gap;
int fec_read_pos;
int fec_fill_pos;
int fec_skip;
int analysis_pos;
int predict_pos;
float pcm[PLC_BUF_SIZE];
int blend;
float features[NB_TOTAL_FEATURES];
float cont_features[CONT_VECTORS*NB_FEATURES];
int loss_count;
PLCNetState plc_net;
PLCNetState plc_bak[2];
};
void preemphasis(float *y, float *mem, const float *x, float coef, int N);
void compute_frame_features(LPCNetEncState *st, const float *in, int arch);
void lpcnet_reset_signal(LPCNetState *lpcnet);
void run_frame_network(LPCNetState *lpcnet, float *gru_a_condition, float *gru_b_condition, float *lpc, const float *features);
void run_frame_network_deferred(LPCNetState *lpcnet, const float *features);
void run_frame_network_flush(LPCNetState *lpcnet);
void lpcnet_synthesize_tail_impl(LPCNetState *lpcnet, opus_int16 *output, int N, int preload);
void lpcnet_synthesize_impl(LPCNetState *lpcnet, const float *features, opus_int16 *output, int N, int preload);
void lpcnet_synthesize_blend_impl(LPCNetState *lpcnet, const opus_int16 *pcm_in, opus_int16 *output, int N);
void run_frame_network(LPCNetState *lpcnet, float *gru_a_condition, float *gru_b_condition, float *lpc, const float *features);
#endif

View File

@ -0,0 +1,307 @@
/* The contents of this file was automatically generated by dump_lpcnet_tables.c*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "kiss_fft.h"
static const arch_fft_state arch_fft = {0, NULL};
static const opus_int16 fft_bitrev[320] = {
0, 64, 128, 192, 256, 16, 80, 144, 208, 272, 32, 96, 160, 224, 288,
48, 112, 176, 240, 304, 4, 68, 132, 196, 260, 20, 84, 148, 212, 276,
36, 100, 164, 228, 292, 52, 116, 180, 244, 308, 8, 72, 136, 200, 264,
24, 88, 152, 216, 280, 40, 104, 168, 232, 296, 56, 120, 184, 248, 312,
12, 76, 140, 204, 268, 28, 92, 156, 220, 284, 44, 108, 172, 236, 300,
60, 124, 188, 252, 316, 1, 65, 129, 193, 257, 17, 81, 145, 209, 273,
33, 97, 161, 225, 289, 49, 113, 177, 241, 305, 5, 69, 133, 197, 261,
21, 85, 149, 213, 277, 37, 101, 165, 229, 293, 53, 117, 181, 245, 309,
9, 73, 137, 201, 265, 25, 89, 153, 217, 281, 41, 105, 169, 233, 297,
57, 121, 185, 249, 313, 13, 77, 141, 205, 269, 29, 93, 157, 221, 285,
45, 109, 173, 237, 301, 61, 125, 189, 253, 317, 2, 66, 130, 194, 258,
18, 82, 146, 210, 274, 34, 98, 162, 226, 290, 50, 114, 178, 242, 306,
6, 70, 134, 198, 262, 22, 86, 150, 214, 278, 38, 102, 166, 230, 294,
54, 118, 182, 246, 310, 10, 74, 138, 202, 266, 26, 90, 154, 218, 282,
42, 106, 170, 234, 298, 58, 122, 186, 250, 314, 14, 78, 142, 206, 270,
30, 94, 158, 222, 286, 46, 110, 174, 238, 302, 62, 126, 190, 254, 318,
3, 67, 131, 195, 259, 19, 83, 147, 211, 275, 35, 99, 163, 227, 291,
51, 115, 179, 243, 307, 7, 71, 135, 199, 263, 23, 87, 151, 215, 279,
39, 103, 167, 231, 295, 55, 119, 183, 247, 311, 11, 75, 139, 203, 267,
27, 91, 155, 219, 283, 43, 107, 171, 235, 299, 59, 123, 187, 251, 315,
15, 79, 143, 207, 271, 31, 95, 159, 223, 287, 47, 111, 175, 239, 303,
63, 127, 191, 255, 319, };
static const kiss_twiddle_cpx fft_twiddles[320] = {
{1.00000000f, -0.00000000f}, {0.999807239f, -0.0196336918f},
{0.999229014f, -0.0392598175f}, {0.998265624f, -0.0588708036f},
{0.996917307f, -0.0784590989f}, {0.995184720f, -0.0980171412f},
{0.993068457f, -0.117537394f}, {0.990569353f, -0.137012348f},
{0.987688363f, -0.156434461f}, {0.984426558f, -0.175796285f},
{0.980785251f, -0.195090324f}, {0.976765871f, -0.214309156f},
{0.972369909f, -0.233445361f}, {0.967599094f, -0.252491564f},
{0.962455213f, -0.271440446f}, {0.956940353f, -0.290284663f},
{0.951056540f, -0.309017003f}, {0.944806039f, -0.327630192f},
{0.938191354f, -0.346117049f}, {0.931214929f, -0.364470512f},
{0.923879504f, -0.382683426f}, {0.916187942f, -0.400748819f},
{0.908143163f, -0.418659747f}, {0.899748266f, -0.436409235f},
{0.891006529f, -0.453990489f}, {0.881921291f, -0.471396744f},
{0.872496009f, -0.488621235f}, {0.862734377f, -0.505657375f},
{0.852640152f, -0.522498548f}, {0.842217207f, -0.539138317f},
{0.831469595f, -0.555570245f}, {0.820401430f, -0.571787953f},
{0.809017003f, -0.587785244f}, {0.797320664f, -0.603555918f},
{0.785316944f, -0.619093955f}, {0.773010433f, -0.634393275f},
{0.760405958f, -0.649448037f}, {0.747508347f, -0.664252460f},
{0.734322488f, -0.678800762f}, {0.720853567f, -0.693087339f},
{0.707106769f, -0.707106769f}, {0.693087339f, -0.720853567f},
{0.678800762f, -0.734322488f}, {0.664252460f, -0.747508347f},
{0.649448037f, -0.760405958f}, {0.634393275f, -0.773010433f},
{0.619093955f, -0.785316944f}, {0.603555918f, -0.797320664f},
{0.587785244f, -0.809017003f}, {0.571787953f, -0.820401430f},
{0.555570245f, -0.831469595f}, {0.539138317f, -0.842217207f},
{0.522498548f, -0.852640152f}, {0.505657375f, -0.862734377f},
{0.488621235f, -0.872496009f}, {0.471396744f, -0.881921291f},
{0.453990489f, -0.891006529f}, {0.436409235f, -0.899748266f},
{0.418659747f, -0.908143163f}, {0.400748819f, -0.916187942f},
{0.382683426f, -0.923879504f}, {0.364470512f, -0.931214929f},
{0.346117049f, -0.938191354f}, {0.327630192f, -0.944806039f},
{0.309017003f, -0.951056540f}, {0.290284663f, -0.956940353f},
{0.271440446f, -0.962455213f}, {0.252491564f, -0.967599094f},
{0.233445361f, -0.972369909f}, {0.214309156f, -0.976765871f},
{0.195090324f, -0.980785251f}, {0.175796285f, -0.984426558f},
{0.156434461f, -0.987688363f}, {0.137012348f, -0.990569353f},
{0.117537394f, -0.993068457f}, {0.0980171412f, -0.995184720f},
{0.0784590989f, -0.996917307f}, {0.0588708036f, -0.998265624f},
{0.0392598175f, -0.999229014f}, {0.0196336918f, -0.999807239f},
{6.12323426e-17f, -1.00000000f}, {-0.0196336918f, -0.999807239f},
{-0.0392598175f, -0.999229014f}, {-0.0588708036f, -0.998265624f},
{-0.0784590989f, -0.996917307f}, {-0.0980171412f, -0.995184720f},
{-0.117537394f, -0.993068457f}, {-0.137012348f, -0.990569353f},
{-0.156434461f, -0.987688363f}, {-0.175796285f, -0.984426558f},
{-0.195090324f, -0.980785251f}, {-0.214309156f, -0.976765871f},
{-0.233445361f, -0.972369909f}, {-0.252491564f, -0.967599094f},
{-0.271440446f, -0.962455213f}, {-0.290284663f, -0.956940353f},
{-0.309017003f, -0.951056540f}, {-0.327630192f, -0.944806039f},
{-0.346117049f, -0.938191354f}, {-0.364470512f, -0.931214929f},
{-0.382683426f, -0.923879504f}, {-0.400748819f, -0.916187942f},
{-0.418659747f, -0.908143163f}, {-0.436409235f, -0.899748266f},
{-0.453990489f, -0.891006529f}, {-0.471396744f, -0.881921291f},
{-0.488621235f, -0.872496009f}, {-0.505657375f, -0.862734377f},
{-0.522498548f, -0.852640152f}, {-0.539138317f, -0.842217207f},
{-0.555570245f, -0.831469595f}, {-0.571787953f, -0.820401430f},
{-0.587785244f, -0.809017003f}, {-0.603555918f, -0.797320664f},
{-0.619093955f, -0.785316944f}, {-0.634393275f, -0.773010433f},
{-0.649448037f, -0.760405958f}, {-0.664252460f, -0.747508347f},
{-0.678800762f, -0.734322488f}, {-0.693087339f, -0.720853567f},
{-0.707106769f, -0.707106769f}, {-0.720853567f, -0.693087339f},
{-0.734322488f, -0.678800762f}, {-0.747508347f, -0.664252460f},
{-0.760405958f, -0.649448037f}, {-0.773010433f, -0.634393275f},
{-0.785316944f, -0.619093955f}, {-0.797320664f, -0.603555918f},
{-0.809017003f, -0.587785244f}, {-0.820401430f, -0.571787953f},
{-0.831469595f, -0.555570245f}, {-0.842217207f, -0.539138317f},
{-0.852640152f, -0.522498548f}, {-0.862734377f, -0.505657375f},
{-0.872496009f, -0.488621235f}, {-0.881921291f, -0.471396744f},
{-0.891006529f, -0.453990489f}, {-0.899748266f, -0.436409235f},
{-0.908143163f, -0.418659747f}, {-0.916187942f, -0.400748819f},
{-0.923879504f, -0.382683426f}, {-0.931214929f, -0.364470512f},
{-0.938191354f, -0.346117049f}, {-0.944806039f, -0.327630192f},
{-0.951056540f, -0.309017003f}, {-0.956940353f, -0.290284663f},
{-0.962455213f, -0.271440446f}, {-0.967599094f, -0.252491564f},
{-0.972369909f, -0.233445361f}, {-0.976765871f, -0.214309156f},
{-0.980785251f, -0.195090324f}, {-0.984426558f, -0.175796285f},
{-0.987688363f, -0.156434461f}, {-0.990569353f, -0.137012348f},
{-0.993068457f, -0.117537394f}, {-0.995184720f, -0.0980171412f},
{-0.996917307f, -0.0784590989f}, {-0.998265624f, -0.0588708036f},
{-0.999229014f, -0.0392598175f}, {-0.999807239f, -0.0196336918f},
{-1.00000000f, -1.22464685e-16f}, {-0.999807239f, 0.0196336918f},
{-0.999229014f, 0.0392598175f}, {-0.998265624f, 0.0588708036f},
{-0.996917307f, 0.0784590989f}, {-0.995184720f, 0.0980171412f},
{-0.993068457f, 0.117537394f}, {-0.990569353f, 0.137012348f},
{-0.987688363f, 0.156434461f}, {-0.984426558f, 0.175796285f},
{-0.980785251f, 0.195090324f}, {-0.976765871f, 0.214309156f},
{-0.972369909f, 0.233445361f}, {-0.967599094f, 0.252491564f},
{-0.962455213f, 0.271440446f}, {-0.956940353f, 0.290284663f},
{-0.951056540f, 0.309017003f}, {-0.944806039f, 0.327630192f},
{-0.938191354f, 0.346117049f}, {-0.931214929f, 0.364470512f},
{-0.923879504f, 0.382683426f}, {-0.916187942f, 0.400748819f},
{-0.908143163f, 0.418659747f}, {-0.899748266f, 0.436409235f},
{-0.891006529f, 0.453990489f}, {-0.881921291f, 0.471396744f},
{-0.872496009f, 0.488621235f}, {-0.862734377f, 0.505657375f},
{-0.852640152f, 0.522498548f}, {-0.842217207f, 0.539138317f},
{-0.831469595f, 0.555570245f}, {-0.820401430f, 0.571787953f},
{-0.809017003f, 0.587785244f}, {-0.797320664f, 0.603555918f},
{-0.785316944f, 0.619093955f}, {-0.773010433f, 0.634393275f},
{-0.760405958f, 0.649448037f}, {-0.747508347f, 0.664252460f},
{-0.734322488f, 0.678800762f}, {-0.720853567f, 0.693087339f},
{-0.707106769f, 0.707106769f}, {-0.693087339f, 0.720853567f},
{-0.678800762f, 0.734322488f}, {-0.664252460f, 0.747508347f},
{-0.649448037f, 0.760405958f}, {-0.634393275f, 0.773010433f},
{-0.619093955f, 0.785316944f}, {-0.603555918f, 0.797320664f},
{-0.587785244f, 0.809017003f}, {-0.571787953f, 0.820401430f},
{-0.555570245f, 0.831469595f}, {-0.539138317f, 0.842217207f},
{-0.522498548f, 0.852640152f}, {-0.505657375f, 0.862734377f},
{-0.488621235f, 0.872496009f}, {-0.471396744f, 0.881921291f},
{-0.453990489f, 0.891006529f}, {-0.436409235f, 0.899748266f},
{-0.418659747f, 0.908143163f}, {-0.400748819f, 0.916187942f},
{-0.382683426f, 0.923879504f}, {-0.364470512f, 0.931214929f},
{-0.346117049f, 0.938191354f}, {-0.327630192f, 0.944806039f},
{-0.309017003f, 0.951056540f}, {-0.290284663f, 0.956940353f},
{-0.271440446f, 0.962455213f}, {-0.252491564f, 0.967599094f},
{-0.233445361f, 0.972369909f}, {-0.214309156f, 0.976765871f},
{-0.195090324f, 0.980785251f}, {-0.175796285f, 0.984426558f},
{-0.156434461f, 0.987688363f}, {-0.137012348f, 0.990569353f},
{-0.117537394f, 0.993068457f}, {-0.0980171412f, 0.995184720f},
{-0.0784590989f, 0.996917307f}, {-0.0588708036f, 0.998265624f},
{-0.0392598175f, 0.999229014f}, {-0.0196336918f, 0.999807239f},
{-1.83697015e-16f, 1.00000000f}, {0.0196336918f, 0.999807239f},
{0.0392598175f, 0.999229014f}, {0.0588708036f, 0.998265624f},
{0.0784590989f, 0.996917307f}, {0.0980171412f, 0.995184720f},
{0.117537394f, 0.993068457f}, {0.137012348f, 0.990569353f},
{0.156434461f, 0.987688363f}, {0.175796285f, 0.984426558f},
{0.195090324f, 0.980785251f}, {0.214309156f, 0.976765871f},
{0.233445361f, 0.972369909f}, {0.252491564f, 0.967599094f},
{0.271440446f, 0.962455213f}, {0.290284663f, 0.956940353f},
{0.309017003f, 0.951056540f}, {0.327630192f, 0.944806039f},
{0.346117049f, 0.938191354f}, {0.364470512f, 0.931214929f},
{0.382683426f, 0.923879504f}, {0.400748819f, 0.916187942f},
{0.418659747f, 0.908143163f}, {0.436409235f, 0.899748266f},
{0.453990489f, 0.891006529f}, {0.471396744f, 0.881921291f},
{0.488621235f, 0.872496009f}, {0.505657375f, 0.862734377f},
{0.522498548f, 0.852640152f}, {0.539138317f, 0.842217207f},
{0.555570245f, 0.831469595f}, {0.571787953f, 0.820401430f},
{0.587785244f, 0.809017003f}, {0.603555918f, 0.797320664f},
{0.619093955f, 0.785316944f}, {0.634393275f, 0.773010433f},
{0.649448037f, 0.760405958f}, {0.664252460f, 0.747508347f},
{0.678800762f, 0.734322488f}, {0.693087339f, 0.720853567f},
{0.707106769f, 0.707106769f}, {0.720853567f, 0.693087339f},
{0.734322488f, 0.678800762f}, {0.747508347f, 0.664252460f},
{0.760405958f, 0.649448037f}, {0.773010433f, 0.634393275f},
{0.785316944f, 0.619093955f}, {0.797320664f, 0.603555918f},
{0.809017003f, 0.587785244f}, {0.820401430f, 0.571787953f},
{0.831469595f, 0.555570245f}, {0.842217207f, 0.539138317f},
{0.852640152f, 0.522498548f}, {0.862734377f, 0.505657375f},
{0.872496009f, 0.488621235f}, {0.881921291f, 0.471396744f},
{0.891006529f, 0.453990489f}, {0.899748266f, 0.436409235f},
{0.908143163f, 0.418659747f}, {0.916187942f, 0.400748819f},
{0.923879504f, 0.382683426f}, {0.931214929f, 0.364470512f},
{0.938191354f, 0.346117049f}, {0.944806039f, 0.327630192f},
{0.951056540f, 0.309017003f}, {0.956940353f, 0.290284663f},
{0.962455213f, 0.271440446f}, {0.967599094f, 0.252491564f},
{0.972369909f, 0.233445361f}, {0.976765871f, 0.214309156f},
{0.980785251f, 0.195090324f}, {0.984426558f, 0.175796285f},
{0.987688363f, 0.156434461f}, {0.990569353f, 0.137012348f},
{0.993068457f, 0.117537394f}, {0.995184720f, 0.0980171412f},
{0.996917307f, 0.0784590989f}, {0.998265624f, 0.0588708036f},
{0.999229014f, 0.0392598175f}, {0.999807239f, 0.0196336918f},
};
const kiss_fft_state kfft = {
320, /* nfft */
0.0031250000f, /* scale */
-1, /* shift */
{5, 64, 4, 16, 4, 4, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, }, /* factors */
fft_bitrev, /* bitrev*/
fft_twiddles, /* twiddles*/
(arch_fft_state *)&arch_fft, /* arch_fft*/
};
const float half_window[] = {
3.78491532e-05f, 0.000340620492f, 0.000946046319f, 0.00185389258f, 0.00306380726f,
0.00457531959f, 0.00638783723f, 0.00850064680f, 0.0109129101f, 0.0136236614f,
0.0166318044f, 0.0199361145f, 0.0235352255f, 0.0274276342f, 0.0316116922f,
0.0360856056f, 0.0408474281f, 0.0458950549f, 0.0512262285f, 0.0568385124f,
0.0627293140f, 0.0688958541f, 0.0753351897f, 0.0820441842f, 0.0890194997f,
0.0962576419f, 0.103754878f, 0.111507311f, 0.119510807f, 0.127761051f,
0.136253506f, 0.144983411f, 0.153945804f, 0.163135484f, 0.172547072f,
0.182174906f, 0.192013159f, 0.202055752f, 0.212296382f, 0.222728521f,
0.233345464f, 0.244140238f, 0.255105674f, 0.266234398f, 0.277518868f,
0.288951218f, 0.300523549f, 0.312227666f, 0.324055225f, 0.335997701f,
0.348046392f, 0.360192508f, 0.372427016f, 0.384740859f, 0.397124738f,
0.409569323f, 0.422065198f, 0.434602767f, 0.447172493f, 0.459764689f,
0.472369671f, 0.484977663f, 0.497579008f, 0.510163903f, 0.522722721f,
0.535245717f, 0.547723293f, 0.560145974f, 0.572504222f, 0.584788740f,
0.596990347f, 0.609099925f, 0.621108532f, 0.633007407f, 0.644788086f,
0.656442165f, 0.667961538f, 0.679338276f, 0.690564752f, 0.701633692f,
0.712537885f, 0.723270535f, 0.733825266f, 0.744195819f, 0.754376352f,
0.764361382f, 0.774145722f, 0.783724606f, 0.793093503f, 0.802248418f,
0.811185598f, 0.819901764f, 0.828393936f, 0.836659551f, 0.844696403f,
0.852502763f, 0.860077202f, 0.867418647f, 0.874526560f, 0.881400526f,
0.888040781f, 0.894447744f, 0.900622249f, 0.906565487f, 0.912279010f,
0.917764664f, 0.923024654f, 0.928061485f, 0.932878017f, 0.937477291f,
0.941862822f, 0.946038187f, 0.950007319f, 0.953774393f, 0.957343817f,
0.960720181f, 0.963908315f, 0.966913164f, 0.969739914f, 0.972393870f,
0.974880517f, 0.977205336f, 0.979374051f, 0.981392324f, 0.983266115f,
0.985001266f, 0.986603677f, 0.988079309f, 0.989434063f, 0.990674019f,
0.991804957f, 0.992832899f, 0.993763626f, 0.994602919f, 0.995356441f,
0.996029854f, 0.996628702f, 0.997158289f, 0.997623861f, 0.998030603f,
0.998383403f, 0.998687088f, 0.998946249f, 0.999165416f, 0.999348700f,
0.999500215f, 0.999623775f, 0.999723017f, 0.999801278f, 0.999861658f,
0.999907196f, 0.999940455f, 0.999963880f, 0.999979615f, 0.999989510f,
0.999995291f, 0.999998271f, 0.999999523f, 0.999999940f, 1.00000000f,
};
const float dct_table[] = {
0.707106769f, 0.996194720f, 0.984807730f, 0.965925813f, 0.939692616f,
0.906307817f, 0.866025388f, 0.819152057f, 0.766044438f, 0.707106769f,
0.642787635f, 0.573576450f, 0.500000000f, 0.422618270f, 0.342020154f,
0.258819044f, 0.173648179f, 0.0871557444f, 0.707106769f, 0.965925813f,
0.866025388f, 0.707106769f, 0.500000000f, 0.258819044f, 6.12323426e-17f,
-0.258819044f, -0.500000000f, -0.707106769f, -0.866025388f, -0.965925813f,
-1.00000000f, -0.965925813f, -0.866025388f, -0.707106769f, -0.500000000f,
-0.258819044f, 0.707106769f, 0.906307817f, 0.642787635f, 0.258819044f,
-0.173648179f, -0.573576450f, -0.866025388f, -0.996194720f, -0.939692616f,
-0.707106769f, -0.342020154f, 0.0871557444f, 0.500000000f, 0.819152057f,
0.984807730f, 0.965925813f, 0.766044438f, 0.422618270f, 0.707106769f,
0.819152057f, 0.342020154f, -0.258819044f, -0.766044438f, -0.996194720f,
-0.866025388f, -0.422618270f, 0.173648179f, 0.707106769f, 0.984807730f,
0.906307817f, 0.500000000f, -0.0871557444f, -0.642787635f, -0.965925813f,
-0.939692616f, -0.573576450f, 0.707106769f, 0.707106769f, 6.12323426e-17f,
-0.707106769f, -1.00000000f, -0.707106769f, -1.83697015e-16f, 0.707106769f,
1.00000000f, 0.707106769f, 3.06161700e-16f, -0.707106769f, -1.00000000f,
-0.707106769f, -4.28626385e-16f, 0.707106769f, 1.00000000f, 0.707106769f,
0.707106769f, 0.573576450f, -0.342020154f, -0.965925813f, -0.766044438f,
0.0871557444f, 0.866025388f, 0.906307817f, 0.173648179f, -0.707106769f,
-0.984807730f, -0.422618270f, 0.500000000f, 0.996194720f, 0.642787635f,
-0.258819044f, -0.939692616f, -0.819152057f, 0.707106769f, 0.422618270f,
-0.642787635f, -0.965925813f, -0.173648179f, 0.819152057f, 0.866025388f,
-0.0871557444f, -0.939692616f, -0.707106769f, 0.342020154f, 0.996194720f,
0.500000000f, -0.573576450f, -0.984807730f, -0.258819044f, 0.766044438f,
0.906307817f, 0.707106769f, 0.258819044f, -0.866025388f, -0.707106769f,
0.500000000f, 0.965925813f, 3.06161700e-16f, -0.965925813f, -0.500000000f,
0.707106769f, 0.866025388f, -0.258819044f, -1.00000000f, -0.258819044f,
0.866025388f, 0.707106769f, -0.500000000f, -0.965925813f, 0.707106769f,
0.0871557444f, -0.984807730f, -0.258819044f, 0.939692616f, 0.422618270f,
-0.866025388f, -0.573576450f, 0.766044438f, 0.707106769f, -0.642787635f,
-0.819152057f, 0.500000000f, 0.906307817f, -0.342020154f, -0.965925813f,
0.173648179f, 0.996194720f, 0.707106769f, -0.0871557444f, -0.984807730f,
0.258819044f, 0.939692616f, -0.422618270f, -0.866025388f, 0.573576450f,
0.766044438f, -0.707106769f, -0.642787635f, 0.819152057f, 0.500000000f,
-0.906307817f, -0.342020154f, 0.965925813f, 0.173648179f, -0.996194720f,
0.707106769f, -0.258819044f, -0.866025388f, 0.707106769f, 0.500000000f,
-0.965925813f, -4.28626385e-16f, 0.965925813f, -0.500000000f, -0.707106769f,
0.866025388f, 0.258819044f, -1.00000000f, 0.258819044f, 0.866025388f,
-0.707106769f, -0.500000000f, 0.965925813f, 0.707106769f, -0.422618270f,
-0.642787635f, 0.965925813f, -0.173648179f, -0.819152057f, 0.866025388f,
0.0871557444f, -0.939692616f, 0.707106769f, 0.342020154f, -0.996194720f,
0.500000000f, 0.573576450f, -0.984807730f, 0.258819044f, 0.766044438f,
-0.906307817f, 0.707106769f, -0.573576450f, -0.342020154f, 0.965925813f,
-0.766044438f, -0.0871557444f, 0.866025388f, -0.906307817f, 0.173648179f,
0.707106769f, -0.984807730f, 0.422618270f, 0.500000000f, -0.996194720f,
0.642787635f, 0.258819044f, -0.939692616f, 0.819152057f, 0.707106769f,
-0.707106769f, -1.83697015e-16f, 0.707106769f, -1.00000000f, 0.707106769f,
5.51091070e-16f, -0.707106769f, 1.00000000f, -0.707106769f, -2.69484189e-15f,
0.707106769f, -1.00000000f, 0.707106769f, -4.90477710e-16f, -0.707106769f,
1.00000000f, -0.707106769f, 0.707106769f, -0.819152057f, 0.342020154f,
0.258819044f, -0.766044438f, 0.996194720f, -0.866025388f, 0.422618270f,
0.173648179f, -0.707106769f, 0.984807730f, -0.906307817f, 0.500000000f,
0.0871557444f, -0.642787635f, 0.965925813f, -0.939692616f, 0.573576450f,
0.707106769f, -0.906307817f, 0.642787635f, -0.258819044f, -0.173648179f,
0.573576450f, -0.866025388f, 0.996194720f, -0.939692616f, 0.707106769f,
-0.342020154f, -0.0871557444f, 0.500000000f, -0.819152057f, 0.984807730f,
-0.965925813f, 0.766044438f, -0.422618270f, 0.707106769f, -0.965925813f,
0.866025388f, -0.707106769f, 0.500000000f, -0.258819044f, 1.10280111e-15f,
0.258819044f, -0.500000000f, 0.707106769f, -0.866025388f, 0.965925813f,
-1.00000000f, 0.965925813f, -0.866025388f, 0.707106769f, -0.500000000f,
0.258819044f, 0.707106769f, -0.996194720f, 0.984807730f, -0.965925813f,
0.939692616f, -0.906307817f, 0.866025388f, -0.819152057f, 0.766044438f,
-0.707106769f, 0.642787635f, -0.573576450f, 0.500000000f, -0.422618270f,
0.342020154f, -0.258819044f, 0.173648179f, -0.0871557444f, };

View File

@ -0,0 +1,64 @@
dnn_sources = sources['DEEP_PLC_SOURCES']
dred_sources = sources['DRED_SOURCES']
if opt_enable_dred
dnn_sources += dred_sources
endif
osce_sources = sources['OSCE_SOURCES']
if opt_enable_osce
dnn_sources += osce_sources
endif
dnn_sources_sse2 = sources['DNN_SOURCES_SSE2']
dnn_sources_sse4_1 = sources['DNN_SOURCES_SSE4_1']
dnn_sources_avx2 = sources['DNN_SOURCES_AVX2']
dnn_sources_neon_intr = sources['DNN_SOURCES_NEON']
dnn_sources_dotprod_intr = sources['DNN_SOURCES_DOTPROD']
dnn_includes = [opus_includes]
dnn_static_libs = []
if host_cpu_family in ['x86', 'x86_64'] and opus_conf.has('OPUS_HAVE_RTCD')
dnn_sources += sources['DNN_SOURCES_X86_RTCD']
endif
if host_cpu_family in ['arm', 'aarch64'] and have_arm_intrinsics_or_asm
if opus_conf.has('OPUS_HAVE_RTCD')
dnn_sources += sources['DNN_SOURCES_ARM_RTCD']
endif
endif
foreach intr_name : ['sse2', 'sse4_1', 'avx2', 'neon_intr', 'dotprod_intr']
have_intr = get_variable('have_' + intr_name)
if not have_intr
continue
endif
intr_sources = get_variable('dnn_sources_' + intr_name)
intr_args = get_variable('opus_@0@_args'.format(intr_name), [])
dnn_static_libs += static_library('dnn_' + intr_name, intr_sources,
c_args: intr_args,
include_directories: dnn_includes,
install: false)
endforeach
dnn_c_args = []
if host_machine.system() == 'windows'
dnn_c_args += ['-DDLL_EXPORT']
endif
if opt_enable_deep_plc
dnn_lib = static_library('opus-dnn',
dnn_sources,
c_args: dnn_c_args,
include_directories: dnn_includes,
link_whole: [dnn_static_libs],
dependencies: libm,
install: false)
else
dnn_lib = []
endif

416
src/libs/opus/dnn/nndsp.c Normal file
View File

@ -0,0 +1,416 @@
/* Copyright (c) 2023 Amazon
Written by Jan Buethe */
/*
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "nndsp.h"
#include "arch.h"
#include "nnet.h"
#include "os_support.h"
#include "pitch.h"
#include <math.h>
#ifndef M_PI
#define M_PI 3.141592653589793f
#endif
#define KERNEL_INDEX(i_out_channels, i_in_channels, i_kernel) ((((i_out_channels) * in_channels) + (i_in_channels)) * kernel_size + (i_kernel))
void init_adaconv_state(AdaConvState *hAdaConv)
{
OPUS_CLEAR(hAdaConv, 1);
}
void init_adacomb_state(AdaCombState *hAdaComb)
{
OPUS_CLEAR(hAdaComb, 1);
}
void init_adashape_state(AdaShapeState *hAdaShape)
{
OPUS_CLEAR(hAdaShape, 1);
}
void compute_overlap_window(float *window, int overlap_size)
{
int i_sample;
for (i_sample=0; i_sample < overlap_size; i_sample++)
{
window[i_sample] = 0.5f + 0.5f * cos(M_PI * (i_sample + 0.5f) / overlap_size);
}
}
#ifdef DEBUG_NNDSP
void print_float_vector(const char* name, const float *vec, int length)
{
for (int i = 0; i < length; i ++)
{
printf("%s[%d]: %f\n", name, i, vec[i]);
}
}
#endif
static void scale_kernel(
float *kernel,
int in_channels,
int out_channels,
int kernel_size,
float *gain
)
/* normalizes (p-norm) kernel over input channel and kernel dimension */
{
float norm;
int i_in_channels, i_out_channels, i_kernel;
for (i_out_channels = 0; i_out_channels < out_channels; i_out_channels++)
{
norm = 0;
for (i_in_channels = 0; i_in_channels < in_channels; i_in_channels ++)
{
for (i_kernel = 0; i_kernel < kernel_size; i_kernel++)
{
norm += kernel[KERNEL_INDEX(i_out_channels, i_in_channels, i_kernel)] * kernel[KERNEL_INDEX(i_out_channels, i_in_channels, i_kernel)];
}
}
#ifdef DEBUG_NNDSP
printf("kernel norm: %f, %f\n", norm, sqrt(norm));
#endif
norm = 1.f / (1e-6f + sqrt(norm));
for (i_in_channels = 0; i_in_channels < in_channels; i_in_channels++)
{
for (i_kernel = 0; i_kernel < kernel_size; i_kernel++)
{
kernel[KERNEL_INDEX(i_out_channels, i_in_channels, i_kernel)] *= norm * gain[i_out_channels];
}
}
}
}
static void transform_gains(
float *gains,
int num_gains,
float filter_gain_a,
float filter_gain_b
)
{
int i;
for (i = 0; i < num_gains; i++)
{
gains[i] = exp(filter_gain_a * gains[i] + filter_gain_b);
}
}
void adaconv_process_frame(
AdaConvState* hAdaConv,
float *x_out,
const float *x_in,
const float *features,
const LinearLayer *kernel_layer,
const LinearLayer *gain_layer,
int feature_dim,
int frame_size,
int overlap_size,
int in_channels,
int out_channels,
int kernel_size,
int left_padding,
float filter_gain_a,
float filter_gain_b,
float shape_gain,
float *window,
int arch
)
{
float output_buffer[ADACONV_MAX_FRAME_SIZE * ADACONV_MAX_OUTPUT_CHANNELS];
float kernel_buffer[ADACONV_MAX_KERNEL_SIZE * ADACONV_MAX_INPUT_CHANNELS * ADACONV_MAX_OUTPUT_CHANNELS];
float input_buffer[ADACONV_MAX_INPUT_CHANNELS * (ADACONV_MAX_FRAME_SIZE + ADACONV_MAX_KERNEL_SIZE)];
float kernel0[ADACONV_MAX_KERNEL_SIZE];
float kernel1[ADACONV_MAX_KERNEL_SIZE];
float channel_buffer0[ADACONV_MAX_OVERLAP_SIZE];
float channel_buffer1[ADACONV_MAX_FRAME_SIZE];
float gain_buffer[ADACONV_MAX_OUTPUT_CHANNELS];
float *p_input;
int i_in_channels, i_out_channels, i_sample;
(void) feature_dim; /* ToDo: figure out whether we might need this information */
celt_assert(shape_gain == 1);
celt_assert(left_padding == kernel_size - 1); /* currently only supports causal version. Non-causal version not difficult to implement but will require third loop */
celt_assert(kernel_size < frame_size);
OPUS_CLEAR(output_buffer, ADACONV_MAX_FRAME_SIZE * ADACONV_MAX_OUTPUT_CHANNELS);
OPUS_CLEAR(kernel_buffer, ADACONV_MAX_KERNEL_SIZE * ADACONV_MAX_INPUT_CHANNELS * ADACONV_MAX_OUTPUT_CHANNELS);
OPUS_CLEAR(input_buffer, ADACONV_MAX_INPUT_CHANNELS * (ADACONV_MAX_FRAME_SIZE + ADACONV_MAX_KERNEL_SIZE));
#ifdef DEBUG_NNDSP
print_float_vector("x_in", x_in, in_channels * frame_size);
#endif
/* prepare input */
for (i_in_channels=0; i_in_channels < in_channels; i_in_channels ++)
{
OPUS_COPY(input_buffer + i_in_channels * (kernel_size + frame_size), hAdaConv->history + i_in_channels * kernel_size, kernel_size);
OPUS_COPY(input_buffer + kernel_size + i_in_channels * (kernel_size + frame_size), x_in + frame_size * i_in_channels, frame_size);
}
p_input = input_buffer + kernel_size;
/* calculate new kernel and new gain */
compute_generic_dense(kernel_layer, kernel_buffer, features, ACTIVATION_LINEAR, arch);
compute_generic_dense(gain_layer, gain_buffer, features, ACTIVATION_TANH, arch);
#ifdef DEBUG_NNDSP
print_float_vector("features", features, feature_dim);
print_float_vector("adaconv_kernel_raw", kernel_buffer, in_channels * out_channels * kernel_size);
print_float_vector("adaconv_gain_raw", gain_buffer, out_channels);
#endif
transform_gains(gain_buffer, out_channels, filter_gain_a, filter_gain_b);
scale_kernel(kernel_buffer, in_channels, out_channels, kernel_size, gain_buffer);
#ifdef DEBUG_NNDSP
print_float_vector("adaconv_kernel", kernel_buffer, in_channels * out_channels * kernel_size);
print_float_vector("adaconv_gain", gain_buffer, out_channels);
#endif
/* calculate overlapping part using kernel from last frame */
for (i_out_channels = 0; i_out_channels < out_channels; i_out_channels++)
{
for (i_in_channels = 0; i_in_channels < in_channels; i_in_channels++)
{
OPUS_CLEAR(kernel0, ADACONV_MAX_KERNEL_SIZE);
OPUS_CLEAR(kernel1, ADACONV_MAX_KERNEL_SIZE);
OPUS_COPY(kernel0, hAdaConv->last_kernel + KERNEL_INDEX(i_out_channels, i_in_channels, 0), kernel_size);
OPUS_COPY(kernel1, kernel_buffer + KERNEL_INDEX(i_out_channels, i_in_channels, 0), kernel_size);
celt_pitch_xcorr(kernel0, p_input + i_in_channels * (frame_size + kernel_size) - left_padding, channel_buffer0, ADACONV_MAX_KERNEL_SIZE, overlap_size, arch);
celt_pitch_xcorr(kernel1, p_input + i_in_channels * (frame_size + kernel_size) - left_padding, channel_buffer1, ADACONV_MAX_KERNEL_SIZE, frame_size, arch);
for (i_sample = 0; i_sample < overlap_size; i_sample++)
{
output_buffer[i_sample + i_out_channels * frame_size] += window[i_sample] * channel_buffer0[i_sample];
output_buffer[i_sample + i_out_channels * frame_size] += (1.f - window[i_sample]) * channel_buffer1[i_sample];
}
for (i_sample = overlap_size; i_sample < frame_size; i_sample++)
{
output_buffer[i_sample + i_out_channels * frame_size] += channel_buffer1[i_sample];
}
}
}
OPUS_COPY(x_out, output_buffer, out_channels * frame_size);
#ifdef DEBUG_NNDSP
print_float_vector("x_out", x_out, out_channels * frame_size);
#endif
/* buffer update */
for (i_in_channels=0; i_in_channels < in_channels; i_in_channels ++)
{
OPUS_COPY(hAdaConv->history + i_in_channels * kernel_size, p_input + i_in_channels * (frame_size + kernel_size) + frame_size - kernel_size, kernel_size);
}
OPUS_COPY(hAdaConv->last_kernel, kernel_buffer, kernel_size * in_channels * out_channels);
}
void adacomb_process_frame(
AdaCombState* hAdaComb,
float *x_out,
const float *x_in,
const float *features,
const LinearLayer *kernel_layer,
const LinearLayer *gain_layer,
const LinearLayer *global_gain_layer,
int pitch_lag,
int feature_dim,
int frame_size,
int overlap_size,
int kernel_size,
int left_padding,
float filter_gain_a,
float filter_gain_b,
float log_gain_limit,
float *window,
int arch
)
{
float output_buffer[ADACOMB_MAX_FRAME_SIZE];
float output_buffer_last[ADACOMB_MAX_FRAME_SIZE];
float kernel_buffer[ADACOMB_MAX_KERNEL_SIZE];
float input_buffer[ADACOMB_MAX_FRAME_SIZE + ADACOMB_MAX_LAG + ADACOMB_MAX_KERNEL_SIZE];
float gain, global_gain;
float *p_input;
int i_sample;
float kernel[16];
float last_kernel[16];
(void) feature_dim; /* ToDo: figure out whether we might need this information */
OPUS_CLEAR(output_buffer, ADACOMB_MAX_FRAME_SIZE);
OPUS_CLEAR(kernel_buffer, ADACOMB_MAX_KERNEL_SIZE);
OPUS_CLEAR(input_buffer, ADACOMB_MAX_FRAME_SIZE + ADACOMB_MAX_LAG + ADACOMB_MAX_KERNEL_SIZE);
OPUS_COPY(input_buffer, hAdaComb->history, kernel_size + ADACOMB_MAX_LAG);
OPUS_COPY(input_buffer + kernel_size + ADACOMB_MAX_LAG, x_in, frame_size);
p_input = input_buffer + kernel_size + ADACOMB_MAX_LAG;
/* calculate new kernel and new gain */
compute_generic_dense(kernel_layer, kernel_buffer, features, ACTIVATION_LINEAR, arch);
compute_generic_dense(gain_layer, &gain, features, ACTIVATION_RELU, arch);
compute_generic_dense(global_gain_layer, &global_gain, features, ACTIVATION_TANH, arch);
#ifdef DEBUG_NNDSP
print_float_vector("features", features, feature_dim);
print_float_vector("adacomb_kernel_raw", kernel_buffer, kernel_size);
print_float_vector("adacomb_gain_raw", &gain, 1);
print_float_vector("adacomb_global_gain_raw", &global_gain, 1);
#endif
gain = exp(log_gain_limit - gain);
global_gain = exp(filter_gain_a * global_gain + filter_gain_b);
scale_kernel(kernel_buffer, 1, 1, kernel_size, &gain);
#ifdef DEBUG_NNDSP
print_float_vector("adacomb_kernel", kernel_buffer, kernel_size);
print_float_vector("adacomb_gain", &gain, 1);
#endif
OPUS_CLEAR(kernel, ADACOMB_MAX_KERNEL_SIZE);
OPUS_CLEAR(last_kernel, ADACOMB_MAX_KERNEL_SIZE);
OPUS_COPY(kernel, kernel_buffer, kernel_size);
OPUS_COPY(last_kernel, hAdaComb->last_kernel, kernel_size);
celt_pitch_xcorr(last_kernel, &p_input[- left_padding - hAdaComb->last_pitch_lag], output_buffer_last, ADACOMB_MAX_KERNEL_SIZE, overlap_size, arch);
celt_pitch_xcorr(kernel, &p_input[- left_padding - pitch_lag], output_buffer, ADACOMB_MAX_KERNEL_SIZE, frame_size, arch);
for (i_sample = 0; i_sample < overlap_size; i_sample++)
{
output_buffer[i_sample] = hAdaComb->last_global_gain * window[i_sample] * output_buffer_last[i_sample] + global_gain * (1.f - window[i_sample]) * output_buffer[i_sample];
}
for (i_sample = 0; i_sample < overlap_size; i_sample++)
{
output_buffer[i_sample] += (window[i_sample] * hAdaComb->last_global_gain + (1.f - window[i_sample]) * global_gain) * p_input[i_sample];
}
for (i_sample = overlap_size; i_sample < frame_size; i_sample++)
{
output_buffer[i_sample] = global_gain * (output_buffer[i_sample] + p_input[i_sample]);
}
OPUS_COPY(x_out, output_buffer, frame_size);
#ifdef DEBUG_NNDSP
print_float_vector("x_out", x_out, frame_size);
#endif
/* buffer update */
OPUS_COPY(hAdaComb->last_kernel, kernel_buffer, kernel_size);
OPUS_COPY(hAdaComb->history, p_input + frame_size - kernel_size - ADACOMB_MAX_LAG, kernel_size + ADACOMB_MAX_LAG);
hAdaComb->last_pitch_lag = pitch_lag;
hAdaComb->last_global_gain = global_gain;
}
void adashape_process_frame(
AdaShapeState *hAdaShape,
float *x_out,
const float *x_in,
const float *features,
const LinearLayer *alpha1f,
const LinearLayer *alpha1t,
const LinearLayer *alpha2,
int feature_dim,
int frame_size,
int avg_pool_k,
int arch
)
{
float in_buffer[ADASHAPE_MAX_INPUT_DIM + ADASHAPE_MAX_FRAME_SIZE];
float out_buffer[ADASHAPE_MAX_FRAME_SIZE];
float tmp_buffer[ADASHAPE_MAX_FRAME_SIZE];
int i, k;
int tenv_size;
float mean;
float *tenv;
celt_assert(frame_size % avg_pool_k == 0);
celt_assert(feature_dim + frame_size / avg_pool_k + 1 < ADASHAPE_MAX_INPUT_DIM);
tenv_size = frame_size / avg_pool_k;
tenv = in_buffer + feature_dim;
OPUS_CLEAR(tenv, tenv_size + 1);
OPUS_COPY(in_buffer, features, feature_dim);
/* calculate temporal envelope */
mean = 0;
for (i = 0; i < tenv_size; i++)
{
for (k = 0; k < avg_pool_k; k++)
{
tenv[i] += fabs(x_in[i * avg_pool_k + k]);
}
tenv[i] = log(tenv[i] / avg_pool_k + 1.52587890625e-05f);
mean += tenv[i];
}
mean /= tenv_size;
for (i = 0; i < tenv_size; i++)
{
tenv[i] -= mean;
}
tenv[tenv_size] = mean;
#ifdef DEBUG_NNDSP
print_float_vector("tenv", tenv, tenv_size + 1);
#endif
/* calculate temporal weights */
#ifdef DEBUG_NNDSP
print_float_vector("alpha1_in", in_buffer, feature_dim + tenv_size + 1);
#endif
compute_generic_conv1d(alpha1f, out_buffer, hAdaShape->conv_alpha1f_state, in_buffer, feature_dim, ACTIVATION_LINEAR, arch);
compute_generic_conv1d(alpha1t, tmp_buffer, hAdaShape->conv_alpha1t_state, tenv, tenv_size + 1, ACTIVATION_LINEAR, arch);
#ifdef DEBUG_NNDSP
print_float_vector("alpha1_out", out_buffer, frame_size);
#endif
/* compute leaky ReLU by hand. ToDo: try tanh activation */
for (i = 0; i < frame_size; i ++)
{
float tmp = out_buffer[i] + tmp_buffer[i];
in_buffer[i] = tmp >= 0 ? tmp : 0.2 * tmp;
}
#ifdef DEBUG_NNDSP
print_float_vector("post_alpha1", in_buffer, frame_size);
#endif
compute_generic_conv1d(alpha2, out_buffer, hAdaShape->conv_alpha2_state, in_buffer, frame_size, ACTIVATION_LINEAR, arch);
/* shape signal */
for (i = 0; i < frame_size; i ++)
{
x_out[i] = exp(out_buffer[i]) * x_in[i];
}
}

143
src/libs/opus/dnn/nndsp.h Normal file
View File

@ -0,0 +1,143 @@
/* Copyright (c) 2023 Amazon
Written by Jan Buethe */
/*
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef NNDSP_H
#define NNDSP_H
#include "opus_types.h"
#include "nnet.h"
#include <string.h>
#define ADACONV_MAX_KERNEL_SIZE 16
#define ADACONV_MAX_INPUT_CHANNELS 2
#define ADACONV_MAX_OUTPUT_CHANNELS 2
#define ADACONV_MAX_FRAME_SIZE 80
#define ADACONV_MAX_OVERLAP_SIZE 40
#define ADACOMB_MAX_LAG 300
#define ADACOMB_MAX_KERNEL_SIZE 16
#define ADACOMB_MAX_FRAME_SIZE 80
#define ADACOMB_MAX_OVERLAP_SIZE 40
#define ADASHAPE_MAX_INPUT_DIM 512
#define ADASHAPE_MAX_FRAME_SIZE 160
/*#define DEBUG_NNDSP*/
#ifdef DEBUG_NNDSP
#include <stdio.h>
#endif
void print_float_vector(const char* name, const float *vec, int length);
typedef struct {
float history[ADACONV_MAX_KERNEL_SIZE * ADACONV_MAX_INPUT_CHANNELS];
float last_kernel[ADACONV_MAX_KERNEL_SIZE * ADACONV_MAX_INPUT_CHANNELS * ADACONV_MAX_OUTPUT_CHANNELS];
float last_gain;
} AdaConvState;
typedef struct {
float history[ADACOMB_MAX_KERNEL_SIZE + ADACOMB_MAX_LAG];
float last_kernel[ADACOMB_MAX_KERNEL_SIZE];
float last_global_gain;
int last_pitch_lag;
} AdaCombState;
typedef struct {
float conv_alpha1f_state[ADASHAPE_MAX_INPUT_DIM];
float conv_alpha1t_state[ADASHAPE_MAX_INPUT_DIM];
float conv_alpha2_state[ADASHAPE_MAX_FRAME_SIZE];
} AdaShapeState;
void init_adaconv_state(AdaConvState *hAdaConv);
void init_adacomb_state(AdaCombState *hAdaComb);
void init_adashape_state(AdaShapeState *hAdaShape);
void compute_overlap_window(float *window, int overlap_size);
void adaconv_process_frame(
AdaConvState* hAdaConv,
float *x_out,
const float *x_in,
const float *features,
const LinearLayer *kernel_layer,
const LinearLayer *gain_layer,
int feature_dim, /* not strictly necessary */
int frame_size,
int overlap_size,
int in_channels,
int out_channels,
int kernel_size,
int left_padding,
float filter_gain_a,
float filter_gain_b,
float shape_gain,
float *window,
int arch
);
void adacomb_process_frame(
AdaCombState* hAdaComb,
float *x_out,
const float *x_in,
const float *features,
const LinearLayer *kernel_layer,
const LinearLayer *gain_layer,
const LinearLayer *global_gain_layer,
int pitch_lag,
int feature_dim,
int frame_size,
int overlap_size,
int kernel_size,
int left_padding,
float filter_gain_a,
float filter_gain_b,
float log_gain_limit,
float *window,
int arch
);
void adashape_process_frame(
AdaShapeState *hAdaShape,
float *x_out,
const float *x_in,
const float *features,
const LinearLayer *alpha1f,
const LinearLayer *alpha1t,
const LinearLayer *alpha2,
int feature_dim,
int frame_size,
int avg_pool_k,
int arch
);
#endif

149
src/libs/opus/dnn/nnet.c Normal file
View File

@ -0,0 +1,149 @@
/* Copyright (c) 2018 Mozilla
2008-2011 Octasic Inc.
2012-2017 Jean-Marc Valin */
/*
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include <stdlib.h>
#include <math.h>
#include "opus_types.h"
#include "arch.h"
#include "nnet.h"
#include "dred_rdovae_constants.h"
#include "plc_data.h"
#include "fargan.h"
#include "os_support.h"
#include "vec.h"
#ifdef ENABLE_OSCE
#include "osce.h"
#endif
#ifdef NO_OPTIMIZATIONS
#if defined(_MSC_VER)
#pragma message ("Compiling without any vectorization. This code will be very slow")
#else
#warning Compiling without any vectorization. This code will be very slow
#endif
#endif
#define SOFTMAX_HACK
void compute_generic_dense(const LinearLayer *layer, float *output, const float *input, int activation, int arch)
{
compute_linear(layer, output, input, arch);
compute_activation(output, output, layer->nb_outputs, activation, arch);
}
#ifdef ENABLE_OSCE
#define MAX_RNN_NEURONS_ALL IMAX(IMAX(IMAX(FARGAN_MAX_RNN_NEURONS, PLC_MAX_RNN_UNITS), DRED_MAX_RNN_NEURONS), OSCE_MAX_RNN_NEURONS)
#else
#define MAX_RNN_NEURONS_ALL IMAX(IMAX(FARGAN_MAX_RNN_NEURONS, PLC_MAX_RNN_UNITS), DRED_MAX_RNN_NEURONS)
#endif
void compute_generic_gru(const LinearLayer *input_weights, const LinearLayer *recurrent_weights, float *state, const float *in, int arch)
{
int i;
int N;
float zrh[3*MAX_RNN_NEURONS_ALL];
float recur[3*MAX_RNN_NEURONS_ALL];
float *z;
float *r;
float *h;
celt_assert(3*recurrent_weights->nb_inputs == recurrent_weights->nb_outputs);
celt_assert(input_weights->nb_outputs == recurrent_weights->nb_outputs);
N = recurrent_weights->nb_inputs;
z = zrh;
r = &zrh[N];
h = &zrh[2*N];
celt_assert(recurrent_weights->nb_outputs <= 3*MAX_RNN_NEURONS_ALL);
celt_assert(in != state);
compute_linear(input_weights, zrh, in, arch);
compute_linear(recurrent_weights, recur, state, arch);
for (i=0;i<2*N;i++)
zrh[i] += recur[i];
compute_activation(zrh, zrh, 2*N, ACTIVATION_SIGMOID, arch);
for (i=0;i<N;i++)
h[i] += recur[2*N+i]*r[i];
compute_activation(h, h, N, ACTIVATION_TANH, arch);
for (i=0;i<N;i++)
h[i] = z[i]*state[i] + (1-z[i])*h[i];
for (i=0;i<N;i++)
state[i] = h[i];
}
void compute_glu(const LinearLayer *layer, float *output, const float *input, int arch)
{
int i;
float act2[MAX_INPUTS];
celt_assert(layer->nb_inputs == layer->nb_outputs);
compute_linear(layer, act2, input, arch);
compute_activation(act2, act2, layer->nb_outputs, ACTIVATION_SIGMOID, arch);
if (input == output) {
/* Give a vectorization hint to the compiler for the in-place case. */
for (i=0;i<layer->nb_outputs;i++) output[i] = output[i]*act2[i];
} else {
for (i=0;i<layer->nb_outputs;i++) output[i] = input[i]*act2[i];
}
}
#define MAX_CONV_INPUTS_ALL DRED_MAX_CONV_INPUTS
void compute_generic_conv1d(const LinearLayer *layer, float *output, float *mem, const float *input, int input_size, int activation, int arch)
{
float tmp[MAX_CONV_INPUTS_ALL];
celt_assert(input != output);
celt_assert(layer->nb_inputs <= MAX_CONV_INPUTS_ALL);
if (layer->nb_inputs!=input_size) OPUS_COPY(tmp, mem, layer->nb_inputs-input_size);
OPUS_COPY(&tmp[layer->nb_inputs-input_size], input, input_size);
compute_linear(layer, output, tmp, arch);
compute_activation(output, output, layer->nb_outputs, activation, arch);
if (layer->nb_inputs!=input_size) OPUS_COPY(mem, &tmp[input_size], layer->nb_inputs-input_size);
}
void compute_generic_conv1d_dilation(const LinearLayer *layer, float *output, float *mem, const float *input, int input_size, int dilation, int activation, int arch)
{
float tmp[MAX_CONV_INPUTS_ALL];
int ksize = layer->nb_inputs/input_size;
int i;
celt_assert(input != output);
celt_assert(layer->nb_inputs <= MAX_CONV_INPUTS_ALL);
if (dilation==1) OPUS_COPY(tmp, mem, layer->nb_inputs-input_size);
else for (i=0;i<ksize-1;i++) OPUS_COPY(&tmp[i*input_size], &mem[i*input_size*dilation], input_size);
OPUS_COPY(&tmp[layer->nb_inputs-input_size], input, input_size);
compute_linear(layer, output, tmp, arch);
compute_activation(output, output, layer->nb_outputs, activation, arch);
if (dilation==1) OPUS_COPY(mem, &tmp[input_size], layer->nb_inputs-input_size);
else {
OPUS_COPY(mem, &mem[input_size], input_size*dilation*(ksize-1)-input_size);
OPUS_COPY(&mem[input_size*dilation*(ksize-1)-input_size], input, input_size);
}
}

163
src/libs/opus/dnn/nnet.h Normal file
View File

@ -0,0 +1,163 @@
/* Copyright (c) 2018 Mozilla
Copyright (c) 2017 Jean-Marc Valin */
/*
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef NNET_H_
#define NNET_H_
#include <stddef.h>
#include "opus_types.h"
#define ACTIVATION_LINEAR 0
#define ACTIVATION_SIGMOID 1
#define ACTIVATION_TANH 2
#define ACTIVATION_RELU 3
#define ACTIVATION_SOFTMAX 4
#define ACTIVATION_SWISH 5
#define WEIGHT_BLOB_VERSION 0
#define WEIGHT_BLOCK_SIZE 64
typedef struct {
const char *name;
int type;
int size;
const void *data;
} WeightArray;
#define WEIGHT_TYPE_float 0
#define WEIGHT_TYPE_int 1
#define WEIGHT_TYPE_qweight 2
#define WEIGHT_TYPE_int8 3
typedef struct {
char head[4];
int version;
int type;
int size;
int block_size;
char name[44];
} WeightHead;
/* Generic sparse affine transformation. */
typedef struct {
const float *bias;
const float *subias;
const opus_int8 *weights;
const float *float_weights;
const int *weights_idx;
const float *diag;
const float *scale;
int nb_inputs;
int nb_outputs;
} LinearLayer;
/* Generic sparse affine transformation. */
typedef struct {
const float *bias;
const float *float_weights;
int in_channels;
int out_channels;
int ktime;
int kheight;
} Conv2dLayer;
void compute_generic_dense(const LinearLayer *layer, float *output, const float *input, int activation, int arch);
void compute_generic_gru(const LinearLayer *input_weights, const LinearLayer *recurrent_weights, float *state, const float *in, int arch);
void compute_generic_conv1d(const LinearLayer *layer, float *output, float *mem, const float *input, int input_size, int activation, int arch);
void compute_generic_conv1d_dilation(const LinearLayer *layer, float *output, float *mem, const float *input, int input_size, int dilation, int activation, int arch);
void compute_glu(const LinearLayer *layer, float *output, const float *input, int arch);
void compute_gated_activation(const LinearLayer *layer, float *output, const float *input, int activation, int arch);
int parse_weights(WeightArray **list, const void *data, int len);
extern const WeightArray lpcnet_arrays[];
extern const WeightArray plcmodel_arrays[];
extern const WeightArray rdovaeenc_arrays[];
extern const WeightArray rdovaedec_arrays[];
extern const WeightArray fwgan_arrays[];
extern const WeightArray fargan_arrays[];
extern const WeightArray pitchdnn_arrays[];
extern const WeightArray lossgen_arrays[];
int linear_init(LinearLayer *layer, const WeightArray *arrays,
const char *bias,
const char *subias,
const char *weights,
const char *float_weights,
const char *weights_idx,
const char *diag,
const char *scale,
int nb_inputs,
int nb_outputs);
int conv2d_init(Conv2dLayer *layer, const WeightArray *arrays,
const char *bias,
const char *float_weights,
int in_channels,
int out_channels,
int ktime,
int kheight);
void compute_linear_c(const LinearLayer *linear, float *out, const float *in);
void compute_activation_c(float *output, const float *input, int N, int activation);
void compute_conv2d_c(const Conv2dLayer *conv, float *out, float *mem, const float *in, int height, int hstride, int activation);
#if defined(OPUS_ARM_MAY_HAVE_DOTPROD) || defined(OPUS_ARM_MAY_HAVE_NEON_INTR)
#include "arm/dnn_arm.h"
#endif
#if defined(OPUS_X86_MAY_HAVE_SSE2)
#include "x86/dnn_x86.h"
#endif
#ifndef OVERRIDE_COMPUTE_LINEAR
#define compute_linear(linear, out, in, arch) ((void)(arch),compute_linear_c(linear, out, in))
#endif
#ifndef OVERRIDE_COMPUTE_ACTIVATION
#define compute_activation(output, input, N, activation, arch) ((void)(arch),compute_activation_c(output, input, N, activation))
#endif
#ifndef OVERRIDE_COMPUTE_CONV2D
#define compute_conv2d(conv, out, mem, in, height, hstride, activation, arch) ((void)(arch),compute_conv2d_c(conv, out, mem, in, height, hstride, activation))
#endif
#if defined(__x86_64__) && !defined(OPUS_X86_MAY_HAVE_SSE4_1) && !defined(OPUS_X86_MAY_HAVE_AVX2)
#if defined(_MSC_VER)
#pragma message ("Only SSE and SSE2 are available. On newer machines, enable SSSE3/AVX/AVX2 to get better performance")
#else
#warning "Only SSE and SSE2 are available. On newer machines, enable SSSE3/AVX/AVX2 using -march= to get better performance"
#endif
#endif
#endif /* NNET_H_ */

View File

@ -0,0 +1,247 @@
/* Copyright (c) 2018-2019 Mozilla
2023 Amazon */
/*
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef NNET_ARCH_H
#define NNET_ARCH_H
#include "nnet.h"
#include "arch.h"
#include "os_support.h"
#include "vec.h"
#define CAT_SUFFIX2(a,b) a ## b
#define CAT_SUFFIX(a,b) CAT_SUFFIX2(a, b)
#define RTCD_SUF(name) CAT_SUFFIX(name, RTCD_ARCH)
/* Force vectorization on for DNN code because some of the loops rely on
compiler vectorization rather than explicitly using intrinsics. */
#if OPUS_GNUC_PREREQ(5,1)
#define GCC_POP_OPTIONS
#pragma GCC push_options
#pragma GCC optimize("tree-vectorize")
#endif
#define MAX_ACTIVATIONS (4096)
static OPUS_INLINE void vec_swish(float *y, const float *x, int N)
{
int i;
float tmp[MAX_ACTIVATIONS];
celt_assert(N <= MAX_ACTIVATIONS);
vec_sigmoid(tmp, x, N);
for (i=0;i<N;i++)
y[i] = x[i]*tmp[i];
}
static OPUS_INLINE float relu(float x)
{
return x < 0 ? 0 : x;
}
/*#define HIGH_ACCURACY */
void RTCD_SUF(compute_activation_)(float *output, const float *input, int N, int activation)
{
int i;
if (activation == ACTIVATION_SIGMOID) {
#ifdef HIGH_ACCURACY
for (int n=0; n<N; n++)
{
output[n] = 1.f / (1 + exp(-input[n]));
}
#else
vec_sigmoid(output, input, N);
#endif
} else if (activation == ACTIVATION_TANH) {
#ifdef HIGH_ACCURACY
for (int n=0; n<N; n++)
{
output[n] = tanh(input[n]);
}
#else
vec_tanh(output, input, N);
#endif
} else if (activation == ACTIVATION_SWISH) {
vec_swish(output, input, N);
} else if (activation == ACTIVATION_RELU) {
for (i=0;i<N;i++)
output[i] = relu(input[i]);
} else if (activation == ACTIVATION_SOFTMAX) {
#ifdef SOFTMAX_HACK
OPUS_COPY(output, input, N);
/*for (i=0;i<N;i++)
output[i] = input[i];*/
#else
float sum = 0;
softmax(output, input, N);
for (i=0;i<N;i++) {
sum += output[i];
}
sum = 1.f/(sum+1e-30);
for (i=0;i<N;i++)
output[i] = sum*output[i];
#endif
} else {
celt_assert(activation == ACTIVATION_LINEAR);
if (input != output) {
for (i=0;i<N;i++)
output[i] = input[i];
}
}
}
void RTCD_SUF(compute_linear_) (const LinearLayer *linear, float *out, const float *in)
{
int i, M, N;
const float *bias;
celt_assert(in != out);
bias = linear->bias;
M = linear->nb_inputs;
N = linear->nb_outputs;
if (linear->float_weights != NULL) {
if (linear->weights_idx != NULL) sparse_sgemv8x4(out, linear->float_weights, linear->weights_idx, N, in);
else sgemv(out, linear->float_weights, N, M, N, in);
} else if (linear->weights != NULL) {
if (linear->weights_idx != NULL) sparse_cgemv8x4(out, linear->weights, linear->weights_idx, linear->scale, N, M, in);
else cgemv8x4(out, linear->weights, linear->scale, N, M, in);
/* Only use SU biases on for integer matrices on SU archs. */
#ifdef USE_SU_BIAS
bias = linear->subias;
#endif
}
else OPUS_CLEAR(out, N);
if (bias != NULL) {
for (i=0;i<N;i++) out[i] += bias[i];
}
if (linear->diag) {
/* Diag is only used for GRU recurrent weights. */
celt_assert(3*M == N);
for (i=0;i<M;i++) {
out[i] += linear->diag[i]*in[i];
out[i+M] += linear->diag[i+M]*in[i];
out[i+2*M] += linear->diag[i+2*M]*in[i];
}
}
}
/* Computes non-padded convolution for input [ ksize1 x in_channels x (len2+ksize2) ],
kernel [ out_channels x in_channels x ksize1 x ksize2 ],
storing the output as [ out_channels x len2 ].
We assume that the output dimension along the ksize1 axis is 1,
i.e. processing one frame at a time. */
static void conv2d_float(float *out, const float *weights, int in_channels, int out_channels, int ktime, int kheight, const float *in, int height, int hstride)
{
int i;
int in_stride;
in_stride = height+kheight-1;
for (i=0;i<out_channels;i++) {
int m;
OPUS_CLEAR(&out[i*hstride], height);
for (m=0;m<in_channels;m++) {
int t;
for (t=0;t<ktime;t++) {
int h;
for (h=0;h<kheight;h++) {
int j;
for (j=0;j<height;j++) {
out[i*hstride + j] += weights[i*in_channels*ktime*kheight + m*ktime*kheight + t*kheight + h] *
in[t*in_channels*in_stride + m*in_stride + j + h];
}
}
}
}
}
}
/* There's no intrinsics in this function (or the one above) because the gcc (and hopefully other compiler) auto-vectorizer is smart enough to
produce the right code by itself based on the compile flags. */
static void conv2d_3x3_float(float *out, const float *weights, int in_channels, int out_channels, const float *in, int height, int hstride)
{
int i;
int in_stride;
int kheight, ktime;
kheight = ktime = 3;
in_stride = height+kheight-1;
for (i=0;i<out_channels;i++) {
int m;
OPUS_CLEAR(&out[i*hstride], height);
for (m=0;m<in_channels;m++) {
int j;
for (j=0;j<height;j++) {
/* Unrolled version of previous function -- compiler will figure out the indexing simplifications. */
out[i*hstride + j] += weights[i*in_channels*ktime*kheight + m*ktime*kheight + 0*kheight + 0]*in[0*in_channels*in_stride + m*in_stride + j + 0]
+ weights[i*in_channels*ktime*kheight + m*ktime*kheight + 0*kheight + 1]*in[0*in_channels*in_stride + m*in_stride + j + 1]
+ weights[i*in_channels*ktime*kheight + m*ktime*kheight + 0*kheight + 2]*in[0*in_channels*in_stride + m*in_stride + j + 2]
+ weights[i*in_channels*ktime*kheight + m*ktime*kheight + 1*kheight + 0]*in[1*in_channels*in_stride + m*in_stride + j + 0]
+ weights[i*in_channels*ktime*kheight + m*ktime*kheight + 1*kheight + 1]*in[1*in_channels*in_stride + m*in_stride + j + 1]
+ weights[i*in_channels*ktime*kheight + m*ktime*kheight + 1*kheight + 2]*in[1*in_channels*in_stride + m*in_stride + j + 2]
+ weights[i*in_channels*ktime*kheight + m*ktime*kheight + 2*kheight + 0]*in[2*in_channels*in_stride + m*in_stride + j + 0]
+ weights[i*in_channels*ktime*kheight + m*ktime*kheight + 2*kheight + 1]*in[2*in_channels*in_stride + m*in_stride + j + 1]
+ weights[i*in_channels*ktime*kheight + m*ktime*kheight + 2*kheight + 2]*in[2*in_channels*in_stride + m*in_stride + j + 2];
}
}
}
}
#define MAX_CONV2D_INPUTS 8192
void RTCD_SUF(compute_conv2d_)(const Conv2dLayer *conv, float *out, float *mem, const float *in, int height, int hstride, int activation)
{
int i;
const float *bias;
float in_buf[MAX_CONV2D_INPUTS];
int time_stride;
celt_assert(in != out);
time_stride = conv->in_channels*(height+conv->kheight-1);
celt_assert(conv->ktime*time_stride <= MAX_CONV2D_INPUTS);
OPUS_COPY(in_buf, mem, (conv->ktime-1)*time_stride);
OPUS_COPY(&in_buf[(conv->ktime-1)*time_stride], in, time_stride);
OPUS_COPY(mem, &in_buf[time_stride], (conv->ktime-1)*time_stride);
bias = conv->bias;
if (conv->kheight == 3 && conv->ktime == 3)
conv2d_3x3_float(out, conv->float_weights, conv->in_channels, conv->out_channels, in_buf, height, hstride);
else
conv2d_float(out, conv->float_weights, conv->in_channels, conv->out_channels, conv->ktime, conv->kheight, in_buf, height, hstride);
if (bias != NULL) {
for (i=0;i<conv->out_channels;i++) {
int j;
for (j=0;j<height;j++) out[i*hstride+j] += bias[i];
}
}
for (i=0;i<conv->out_channels;i++) {
RTCD_SUF(compute_activation_)(&out[i*hstride], &out[i*hstride], height, activation);
}
}
#ifdef GCC_POP_OPTIONS
#pragma GCC pop_options
#endif
#endif

View File

@ -0,0 +1,35 @@
/* Copyright (c) 2018-2019 Mozilla
2023 Amazon */
/*
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#define RTCD_ARCH c
#include "nnet_arch.h"

106959
src/libs/opus/dnn/nolace_data.c Normal file

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,331 @@
/* Auto generated from checkpoint nolace_small.pth (sha1: 953bf5854e1a33e8892da48a29b19aff3a272902) */
#ifndef NOLACE_DATA_H
#define NOLACE_DATA_H
#include "nnet.h"
#define NOLACE_PREEMPH 0.85f
#define NOLACE_FRAME_SIZE 80
#define NOLACE_OVERLAP_SIZE 40
#define NOLACE_NUM_FEATURES 93
#define NOLACE_PITCH_MAX 300
#define NOLACE_PITCH_EMBEDDING_DIM 64
#define NOLACE_NUMBITS_RANGE_LOW 50
#define NOLACE_NUMBITS_RANGE_HIGH 650
#define NOLACE_NUMBITS_EMBEDDING_DIM 8
#define NOLACE_COND_DIM 160
#define NOLACE_HIDDEN_FEATURE_DIM 96
#define NOLACE_NUMBITS_SCALE_0 1.0357311964035034f
#define NOLACE_NUMBITS_SCALE_1 1.735559105873108f
#define NOLACE_NUMBITS_SCALE_2 3.6004557609558105f
#define NOLACE_NUMBITS_SCALE_3 4.552478313446045f
#define NOLACE_NUMBITS_SCALE_4 5.932559490203857f
#define NOLACE_NUMBITS_SCALE_5 7.176970481872559f
#define NOLACE_NUMBITS_SCALE_6 8.114998817443848f
#define NOLACE_NUMBITS_SCALE_7 8.77063274383545f
#define NOLACE_PITCH_EMBEDDING_OUT_SIZE 64
#define NOLACE_FNET_CONV1_OUT_SIZE 96
#define NOLACE_FNET_CONV1_IN_SIZE 173
#define NOLACE_FNET_CONV1_STATE_SIZE (173 * (0))
#define NOLACE_FNET_CONV1_DELAY 0
#define NOLACE_FNET_CONV2_OUT_SIZE 160
#define NOLACE_FNET_CONV2_IN_SIZE 384
#define NOLACE_FNET_CONV2_STATE_SIZE (384 * (1))
#define NOLACE_FNET_CONV2_DELAY 0
#define NOLACE_FNET_TCONV_KERNEL_SIZE 4
#define NOLACE_FNET_TCONV_STRIDE 4
#define NOLACE_FNET_TCONV_IN_CHANNELS 160
#define NOLACE_FNET_TCONV_OUT_CHANNELS 160
#define NOLACE_FNET_GRU_OUT_SIZE 160
#define NOLACE_FNET_GRU_STATE_SIZE 160
#define NOLACE_CF1_FILTER_GAIN_A 0.690776f
#define NOLACE_CF1_FILTER_GAIN_B 0.000000f
#define NOLACE_CF1_LOG_GAIN_LIMIT 1.151293f
#define NOLACE_CF1_KERNEL_SIZE 16
#define NOLACE_CF1_LEFT_PADDING 8
#define NOLACE_CF1_FRAME_SIZE 80
#define NOLACE_CF1_OVERLAP_SIZE 40
#define NOLACE_CF1_IN_CHANNELS 1
#define NOLACE_CF1_OUT_CHANNELS 1
#define NOLACE_CF1_NORM_P 2
#define NOLACE_CF1_FEATURE_DIM 160
#define NOLACE_CF1_MAX_LAG 301
#define NOLACE_CF1_KERNEL_OUT_SIZE 16
#define NOLACE_CF1_GAIN_OUT_SIZE 1
#define NOLACE_CF1_GLOBAL_GAIN_OUT_SIZE 1
#define NOLACE_CF2_FILTER_GAIN_A 0.690776f
#define NOLACE_CF2_FILTER_GAIN_B 0.000000f
#define NOLACE_CF2_LOG_GAIN_LIMIT 1.151293f
#define NOLACE_CF2_KERNEL_SIZE 16
#define NOLACE_CF2_LEFT_PADDING 8
#define NOLACE_CF2_FRAME_SIZE 80
#define NOLACE_CF2_OVERLAP_SIZE 40
#define NOLACE_CF2_IN_CHANNELS 1
#define NOLACE_CF2_OUT_CHANNELS 1
#define NOLACE_CF2_NORM_P 2
#define NOLACE_CF2_FEATURE_DIM 160
#define NOLACE_CF2_MAX_LAG 301
#define NOLACE_CF2_KERNEL_OUT_SIZE 16
#define NOLACE_CF2_GAIN_OUT_SIZE 1
#define NOLACE_CF2_GLOBAL_GAIN_OUT_SIZE 1
#define NOLACE_AF1_FILTER_GAIN_A 1.381551f
#define NOLACE_AF1_FILTER_GAIN_B 0.000000f
#define NOLACE_AF1_SHAPE_GAIN 1.000000f
#define NOLACE_AF1_KERNEL_SIZE 16
#define NOLACE_AF1_FRAME_SIZE 80
#define NOLACE_AF1_LEFT_PADDING 15
#define NOLACE_AF1_OVERLAP_SIZE 40
#define NOLACE_AF1_IN_CHANNELS 1
#define NOLACE_AF1_OUT_CHANNELS 2
#define NOLACE_AF1_NORM_P 2
#define NOLACE_AF1_FEATURE_DIM 160
#define NOLACE_AF1_KERNEL_OUT_SIZE 32
#define NOLACE_AF1_GAIN_OUT_SIZE 2
#define NOLACE_TDSHAPE1_FEATURE_DIM 160
#define NOLACE_TDSHAPE1_FRAME_SIZE 80
#define NOLACE_TDSHAPE1_AVG_POOL_K 4
#define NOLACE_TDSHAPE1_INNOVATE 0
#define NOLACE_TDSHAPE1_POOL_AFTER 0
#define NOLACE_TDSHAPE1_ALPHA1_F_OUT_SIZE 80
#define NOLACE_TDSHAPE1_ALPHA1_F_IN_SIZE 160
#define NOLACE_TDSHAPE1_ALPHA1_F_STATE_SIZE (160 * (1))
#define NOLACE_TDSHAPE1_ALPHA1_F_DELAY 0
#define NOLACE_TDSHAPE1_ALPHA1_T_OUT_SIZE 80
#define NOLACE_TDSHAPE1_ALPHA1_T_IN_SIZE 21
#define NOLACE_TDSHAPE1_ALPHA1_T_STATE_SIZE (21 * (1))
#define NOLACE_TDSHAPE1_ALPHA1_T_DELAY 0
#define NOLACE_TDSHAPE1_ALPHA2_OUT_SIZE 80
#define NOLACE_TDSHAPE1_ALPHA2_IN_SIZE 80
#define NOLACE_TDSHAPE1_ALPHA2_STATE_SIZE (80 * (1))
#define NOLACE_TDSHAPE1_ALPHA2_DELAY 0
#define NOLACE_TDSHAPE2_FEATURE_DIM 160
#define NOLACE_TDSHAPE2_FRAME_SIZE 80
#define NOLACE_TDSHAPE2_AVG_POOL_K 4
#define NOLACE_TDSHAPE2_INNOVATE 0
#define NOLACE_TDSHAPE2_POOL_AFTER 0
#define NOLACE_TDSHAPE2_ALPHA1_F_OUT_SIZE 80
#define NOLACE_TDSHAPE2_ALPHA1_F_IN_SIZE 160
#define NOLACE_TDSHAPE2_ALPHA1_F_STATE_SIZE (160 * (1))
#define NOLACE_TDSHAPE2_ALPHA1_F_DELAY 0
#define NOLACE_TDSHAPE2_ALPHA1_T_OUT_SIZE 80
#define NOLACE_TDSHAPE2_ALPHA1_T_IN_SIZE 21
#define NOLACE_TDSHAPE2_ALPHA1_T_STATE_SIZE (21 * (1))
#define NOLACE_TDSHAPE2_ALPHA1_T_DELAY 0
#define NOLACE_TDSHAPE2_ALPHA2_OUT_SIZE 80
#define NOLACE_TDSHAPE2_ALPHA2_IN_SIZE 80
#define NOLACE_TDSHAPE2_ALPHA2_STATE_SIZE (80 * (1))
#define NOLACE_TDSHAPE2_ALPHA2_DELAY 0
#define NOLACE_TDSHAPE3_FEATURE_DIM 160
#define NOLACE_TDSHAPE3_FRAME_SIZE 80
#define NOLACE_TDSHAPE3_AVG_POOL_K 4
#define NOLACE_TDSHAPE3_INNOVATE 0
#define NOLACE_TDSHAPE3_POOL_AFTER 0
#define NOLACE_TDSHAPE3_ALPHA1_F_OUT_SIZE 80
#define NOLACE_TDSHAPE3_ALPHA1_F_IN_SIZE 160
#define NOLACE_TDSHAPE3_ALPHA1_F_STATE_SIZE (160 * (1))
#define NOLACE_TDSHAPE3_ALPHA1_F_DELAY 0
#define NOLACE_TDSHAPE3_ALPHA1_T_OUT_SIZE 80
#define NOLACE_TDSHAPE3_ALPHA1_T_IN_SIZE 21
#define NOLACE_TDSHAPE3_ALPHA1_T_STATE_SIZE (21 * (1))
#define NOLACE_TDSHAPE3_ALPHA1_T_DELAY 0
#define NOLACE_TDSHAPE3_ALPHA2_OUT_SIZE 80
#define NOLACE_TDSHAPE3_ALPHA2_IN_SIZE 80
#define NOLACE_TDSHAPE3_ALPHA2_STATE_SIZE (80 * (1))
#define NOLACE_TDSHAPE3_ALPHA2_DELAY 0
#define NOLACE_AF2_FILTER_GAIN_A 1.381551f
#define NOLACE_AF2_FILTER_GAIN_B 0.000000f
#define NOLACE_AF2_SHAPE_GAIN 1.000000f
#define NOLACE_AF2_KERNEL_SIZE 16
#define NOLACE_AF2_FRAME_SIZE 80
#define NOLACE_AF2_LEFT_PADDING 15
#define NOLACE_AF2_OVERLAP_SIZE 40
#define NOLACE_AF2_IN_CHANNELS 2
#define NOLACE_AF2_OUT_CHANNELS 2
#define NOLACE_AF2_NORM_P 2
#define NOLACE_AF2_FEATURE_DIM 160
#define NOLACE_AF2_KERNEL_OUT_SIZE 64
#define NOLACE_AF2_GAIN_OUT_SIZE 2
#define NOLACE_AF3_FILTER_GAIN_A 1.381551f
#define NOLACE_AF3_FILTER_GAIN_B 0.000000f
#define NOLACE_AF3_SHAPE_GAIN 1.000000f
#define NOLACE_AF3_KERNEL_SIZE 16
#define NOLACE_AF3_FRAME_SIZE 80
#define NOLACE_AF3_LEFT_PADDING 15
#define NOLACE_AF3_OVERLAP_SIZE 40
#define NOLACE_AF3_IN_CHANNELS 2
#define NOLACE_AF3_OUT_CHANNELS 2
#define NOLACE_AF3_NORM_P 2
#define NOLACE_AF3_FEATURE_DIM 160
#define NOLACE_AF3_KERNEL_OUT_SIZE 64
#define NOLACE_AF3_GAIN_OUT_SIZE 2
#define NOLACE_AF4_FILTER_GAIN_A 1.381551f
#define NOLACE_AF4_FILTER_GAIN_B 0.000000f
#define NOLACE_AF4_SHAPE_GAIN 1.000000f
#define NOLACE_AF4_KERNEL_SIZE 16
#define NOLACE_AF4_FRAME_SIZE 80
#define NOLACE_AF4_LEFT_PADDING 15
#define NOLACE_AF4_OVERLAP_SIZE 40
#define NOLACE_AF4_IN_CHANNELS 2
#define NOLACE_AF4_OUT_CHANNELS 1
#define NOLACE_AF4_NORM_P 2
#define NOLACE_AF4_FEATURE_DIM 160
#define NOLACE_AF4_KERNEL_OUT_SIZE 32
#define NOLACE_AF4_GAIN_OUT_SIZE 1
#define NOLACE_POST_CF1_OUT_SIZE 160
#define NOLACE_POST_CF1_IN_SIZE 160
#define NOLACE_POST_CF1_STATE_SIZE (160 * (1))
#define NOLACE_POST_CF1_DELAY 0
#define NOLACE_POST_CF2_OUT_SIZE 160
#define NOLACE_POST_CF2_IN_SIZE 160
#define NOLACE_POST_CF2_STATE_SIZE (160 * (1))
#define NOLACE_POST_CF2_DELAY 0
#define NOLACE_POST_AF1_OUT_SIZE 160
#define NOLACE_POST_AF1_IN_SIZE 160
#define NOLACE_POST_AF1_STATE_SIZE (160 * (1))
#define NOLACE_POST_AF1_DELAY 0
#define NOLACE_POST_AF2_OUT_SIZE 160
#define NOLACE_POST_AF2_IN_SIZE 160
#define NOLACE_POST_AF2_STATE_SIZE (160 * (1))
#define NOLACE_POST_AF2_DELAY 0
#define NOLACE_POST_AF3_OUT_SIZE 160
#define NOLACE_POST_AF3_IN_SIZE 160
#define NOLACE_POST_AF3_STATE_SIZE (160 * (1))
#define NOLACE_POST_AF3_DELAY 0
typedef struct {
LinearLayer nolace_pitch_embedding;
LinearLayer nolace_fnet_conv1;
LinearLayer nolace_fnet_conv2;
LinearLayer nolace_fnet_tconv;
LinearLayer nolace_fnet_gru_input;
LinearLayer nolace_fnet_gru_recurrent;
LinearLayer nolace_cf1_kernel;
LinearLayer nolace_cf1_gain;
LinearLayer nolace_cf1_global_gain;
LinearLayer nolace_cf2_kernel;
LinearLayer nolace_cf2_gain;
LinearLayer nolace_cf2_global_gain;
LinearLayer nolace_af1_kernel;
LinearLayer nolace_af1_gain;
LinearLayer nolace_tdshape1_alpha1_f;
LinearLayer nolace_tdshape1_alpha1_t;
LinearLayer nolace_tdshape1_alpha2;
LinearLayer nolace_tdshape2_alpha1_f;
LinearLayer nolace_tdshape2_alpha1_t;
LinearLayer nolace_tdshape2_alpha2;
LinearLayer nolace_tdshape3_alpha1_f;
LinearLayer nolace_tdshape3_alpha1_t;
LinearLayer nolace_tdshape3_alpha2;
LinearLayer nolace_af2_kernel;
LinearLayer nolace_af2_gain;
LinearLayer nolace_af3_kernel;
LinearLayer nolace_af3_gain;
LinearLayer nolace_af4_kernel;
LinearLayer nolace_af4_gain;
LinearLayer nolace_post_cf1;
LinearLayer nolace_post_cf2;
LinearLayer nolace_post_af1;
LinearLayer nolace_post_af2;
LinearLayer nolace_post_af3;
} NOLACELayers;
int init_nolacelayers(NOLACELayers *model, const WeightArray *arrays);
#endif /* NOLACE_DATA_H */

1419
src/libs/opus/dnn/osce.c Normal file

File diff suppressed because it is too large Load Diff

84
src/libs/opus/dnn/osce.h Normal file
View File

@ -0,0 +1,84 @@
/* Copyright (c) 2023 Amazon
Written by Jan Buethe */
/*
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef OSCE_H
#define OSCE_H
#include "opus_types.h"
/*#include "osce_config.h"*/
#ifndef DISABLE_LACE
#include "lace_data.h"
#endif
#ifndef DISABLE_NOLACE
#include "nolace_data.h"
#endif
#include "nndsp.h"
#include "nnet.h"
#include "osce_structs.h"
#include "structs.h"
#define OSCE_METHOD_NONE 0
#ifndef DISABLE_LACE
#define OSCE_METHOD_LACE 1
#endif
#ifndef DISABLE_NOLACE
#define OSCE_METHOD_NOLACE 2
#endif
#if !defined(DISABLE_NOLACE)
#define OSCE_DEFAULT_METHOD OSCE_METHOD_NOLACE
#define OSCE_MAX_RNN_NEURONS NOLACE_FNET_GRU_STATE_SIZE
#elif !defined(DISABLE_LACE)
#define OSCE_DEFAULT_METHOD OSCE_METHOD_LACE
#define OSCE_MAX_RNN_NEURONS LACE_FNET_GRU_STATE_SIZE
#else
#define OSCE_DEFAULT_METHOD OSCE_METHOD_NONE
#define OSCE_MAX_RNN_NEURONS 0
#endif
/* API */
void osce_enhance_frame(
OSCEModel *model, /* I OSCE model struct */
silk_decoder_state *psDec, /* I/O Decoder state */
silk_decoder_control *psDecCtrl, /* I Decoder control */
opus_int16 xq[], /* I/O Decoded speech */
opus_int32 num_bits, /* I Size of SILK payload in bits */
int arch /* I Run-time architecture */
);
int osce_load_models(OSCEModel *hModel, const void *data, int len);
void osce_reset(silk_OSCE_struct *hOSCE, int method);
#endif

View File

@ -0,0 +1,60 @@
/* Copyright (c) 2023 Amazon
Written by Jan Buethe */
/*
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef OSCE_CONFIG
#define OSCE_CONFIG
#define OSCE_FEATURES_MAX_HISTORY 350
#define OSCE_FEATURE_DIM 93
#define OSCE_MAX_FEATURE_FRAMES 4
#define OSCE_CLEAN_SPEC_NUM_BANDS 64
#define OSCE_NOISY_SPEC_NUM_BANDS 18
#define OSCE_NO_PITCH_VALUE 7
#define OSCE_PREEMPH 0.85f
#define OSCE_PITCH_HANGOVER 0
#define OSCE_CLEAN_SPEC_START 0
#define OSCE_CLEAN_SPEC_LENGTH 64
#define OSCE_NOISY_CEPSTRUM_START 64
#define OSCE_NOISY_CEPSTRUM_LENGTH 18
#define OSCE_ACORR_START 82
#define OSCE_ACORR_LENGTH 5
#define OSCE_LTP_START 87
#define OSCE_LTP_LENGTH 5
#define OSCE_LOG_GAIN_START 92
#define OSCE_LOG_GAIN_LENGTH 1
#endif

View File

@ -0,0 +1,454 @@
/* Copyright (c) 2023 Amazon
Written by Jan Buethe */
/*
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#define OSCE_SPEC_WINDOW_SIZE 320
#define OSCE_SPEC_NUM_FREQS 161
/*DEBUG*/
/*#define WRITE_FEATURES*/
/*#define DEBUG_PRING*/
/*******/
#include "stack_alloc.h"
#include "osce_features.h"
#include "kiss_fft.h"
#include "os_support.h"
#include "osce.h"
#include "freq.h"
#if defined(WRITE_FEATURES) || defined(DEBUG_PRING)
#include <stdio.h>
#include <stdlib.h>
#endif
static const int center_bins_clean[64] = {
0, 2, 5, 8, 10, 12, 15, 18,
20, 22, 25, 28, 30, 33, 35, 38,
40, 42, 45, 48, 50, 52, 55, 58,
60, 62, 65, 68, 70, 73, 75, 78,
80, 82, 85, 88, 90, 92, 95, 98,
100, 102, 105, 108, 110, 112, 115, 118,
120, 122, 125, 128, 130, 132, 135, 138,
140, 142, 145, 148, 150, 152, 155, 160
};
static const int center_bins_noisy[18] = {
0, 4, 8, 12, 16, 20, 24, 28,
32, 40, 48, 56, 64, 80, 96, 112,
136, 160
};
static const float band_weights_clean[64] = {
0.666666666667f, 0.400000000000f, 0.333333333333f, 0.400000000000f,
0.500000000000f, 0.400000000000f, 0.333333333333f, 0.400000000000f,
0.500000000000f, 0.400000000000f, 0.333333333333f, 0.400000000000f,
0.400000000000f, 0.400000000000f, 0.400000000000f, 0.400000000000f,
0.500000000000f, 0.400000000000f, 0.333333333333f, 0.400000000000f,
0.500000000000f, 0.400000000000f, 0.333333333333f, 0.400000000000f,
0.500000000000f, 0.400000000000f, 0.333333333333f, 0.400000000000f,
0.400000000000f, 0.400000000000f, 0.400000000000f, 0.400000000000f,
0.500000000000f, 0.400000000000f, 0.333333333333f, 0.400000000000f,
0.500000000000f, 0.400000000000f, 0.333333333333f, 0.400000000000f,
0.500000000000f, 0.400000000000f, 0.333333333333f, 0.400000000000f,
0.500000000000f, 0.400000000000f, 0.333333333333f, 0.400000000000f,
0.500000000000f, 0.400000000000f, 0.333333333333f, 0.400000000000f,
0.500000000000f, 0.400000000000f, 0.333333333333f, 0.400000000000f,
0.500000000000f, 0.400000000000f, 0.333333333333f, 0.400000000000f,
0.500000000000f, 0.400000000000f, 0.250000000000f, 0.333333333333f
};
static const float band_weights_noisy[18] = {
0.400000000000f, 0.250000000000f, 0.250000000000f, 0.250000000000f,
0.250000000000f, 0.250000000000f, 0.250000000000f, 0.250000000000f,
0.166666666667f, 0.125000000000f, 0.125000000000f, 0.125000000000f,
0.083333333333f, 0.062500000000f, 0.062500000000f, 0.050000000000f,
0.041666666667f, 0.080000000000f
};
static float osce_window[OSCE_SPEC_WINDOW_SIZE] = {
0.004908718808f, 0.014725683311f, 0.024541228523f, 0.034354408400f, 0.044164277127f,
0.053969889210f, 0.063770299562f, 0.073564563600f, 0.083351737332f, 0.093130877450f,
0.102901041421f, 0.112661287575f, 0.122410675199f, 0.132148264628f, 0.141873117332f,
0.151584296010f, 0.161280864678f, 0.170961888760f, 0.180626435180f, 0.190273572448f,
0.199902370753f, 0.209511902052f, 0.219101240157f, 0.228669460829f, 0.238215641862f,
0.247738863176f, 0.257238206902f, 0.266712757475f, 0.276161601717f, 0.285583828929f,
0.294978530977f, 0.304344802381f, 0.313681740399f, 0.322988445118f, 0.332264019538f,
0.341507569661f, 0.350718204573f, 0.359895036535f, 0.369037181064f, 0.378143757022f,
0.387213886697f, 0.396246695891f, 0.405241314005f, 0.414196874117f, 0.423112513073f,
0.431987371563f, 0.440820594212f, 0.449611329655f, 0.458358730621f, 0.467061954019f,
0.475720161014f, 0.484332517110f, 0.492898192230f, 0.501416360796f, 0.509886201809f,
0.518306898929f, 0.526677640552f, 0.534997619887f, 0.543266035038f, 0.551482089078f,
0.559644990127f, 0.567753951426f, 0.575808191418f, 0.583806933818f, 0.591749407690f,
0.599634847523f, 0.607462493302f, 0.615231590581f, 0.622941390558f, 0.630591150148f,
0.638180132051f, 0.645707604824f, 0.653172842954f, 0.660575126926f, 0.667913743292f,
0.675187984742f, 0.682397150168f, 0.689540544737f, 0.696617479953f, 0.703627273726f,
0.710569250438f, 0.717442741007f, 0.724247082951f, 0.730981620454f, 0.737645704427f,
0.744238692572f, 0.750759949443f, 0.757208846506f, 0.763584762206f, 0.769887082016f,
0.776115198508f, 0.782268511401f, 0.788346427627f, 0.794348361383f, 0.800273734191f,
0.806121974951f, 0.811892519997f, 0.817584813152f, 0.823198305781f, 0.828732456844f,
0.834186732948f, 0.839560608398f, 0.844853565250f, 0.850065093356f, 0.855194690420f,
0.860241862039f, 0.865206121757f, 0.870086991109f, 0.874883999665f, 0.879596685080f,
0.884224593137f, 0.888767277786f, 0.893224301196f, 0.897595233788f, 0.901879654283f,
0.906077149740f, 0.910187315596f, 0.914209755704f, 0.918144082372f, 0.921989916403f,
0.925746887127f, 0.929414632439f, 0.932992798835f, 0.936481041442f, 0.939879024058f,
0.943186419177f, 0.946402908026f, 0.949528180593f, 0.952561935658f, 0.955503880820f,
0.958353732530f, 0.961111216112f, 0.963776065795f, 0.966348024735f, 0.968826845041f,
0.971212287799f, 0.973504123096f, 0.975702130039f, 0.977806096779f, 0.979815820533f,
0.981731107599f, 0.983551773378f, 0.985277642389f, 0.986908548290f, 0.988444333892f,
0.989884851171f, 0.991229961288f, 0.992479534599f, 0.993633450666f, 0.994691598273f,
0.995653875433f, 0.996520189401f, 0.997290456679f, 0.997964603026f, 0.998542563469f,
0.999024282300f, 0.999409713092f, 0.999698818696f, 0.999891571247f, 0.999987952167f,
0.999987952167f, 0.999891571247f, 0.999698818696f, 0.999409713092f, 0.999024282300f,
0.998542563469f, 0.997964603026f, 0.997290456679f, 0.996520189401f, 0.995653875433f,
0.994691598273f, 0.993633450666f, 0.992479534599f, 0.991229961288f, 0.989884851171f,
0.988444333892f, 0.986908548290f, 0.985277642389f, 0.983551773378f, 0.981731107599f,
0.979815820533f, 0.977806096779f, 0.975702130039f, 0.973504123096f, 0.971212287799f,
0.968826845041f, 0.966348024735f, 0.963776065795f, 0.961111216112f, 0.958353732530f,
0.955503880820f, 0.952561935658f, 0.949528180593f, 0.946402908026f, 0.943186419177f,
0.939879024058f, 0.936481041442f, 0.932992798835f, 0.929414632439f, 0.925746887127f,
0.921989916403f, 0.918144082372f, 0.914209755704f, 0.910187315596f, 0.906077149740f,
0.901879654283f, 0.897595233788f, 0.893224301196f, 0.888767277786f, 0.884224593137f,
0.879596685080f, 0.874883999665f, 0.870086991109f, 0.865206121757f, 0.860241862039f,
0.855194690420f, 0.850065093356f, 0.844853565250f, 0.839560608398f, 0.834186732948f,
0.828732456844f, 0.823198305781f, 0.817584813152f, 0.811892519997f, 0.806121974951f,
0.800273734191f, 0.794348361383f, 0.788346427627f, 0.782268511401f, 0.776115198508f,
0.769887082016f, 0.763584762206f, 0.757208846506f, 0.750759949443f, 0.744238692572f,
0.737645704427f, 0.730981620454f, 0.724247082951f, 0.717442741007f, 0.710569250438f,
0.703627273726f, 0.696617479953f, 0.689540544737f, 0.682397150168f, 0.675187984742f,
0.667913743292f, 0.660575126926f, 0.653172842954f, 0.645707604824f, 0.638180132051f,
0.630591150148f, 0.622941390558f, 0.615231590581f, 0.607462493302f, 0.599634847523f,
0.591749407690f, 0.583806933818f, 0.575808191418f, 0.567753951426f, 0.559644990127f,
0.551482089078f, 0.543266035038f, 0.534997619887f, 0.526677640552f, 0.518306898929f,
0.509886201809f, 0.501416360796f, 0.492898192230f, 0.484332517110f, 0.475720161014f,
0.467061954019f, 0.458358730621f, 0.449611329655f, 0.440820594212f, 0.431987371563f,
0.423112513073f, 0.414196874117f, 0.405241314005f, 0.396246695891f, 0.387213886697f,
0.378143757022f, 0.369037181064f, 0.359895036535f, 0.350718204573f, 0.341507569661f,
0.332264019538f, 0.322988445118f, 0.313681740399f, 0.304344802381f, 0.294978530977f,
0.285583828929f, 0.276161601717f, 0.266712757475f, 0.257238206902f, 0.247738863176f,
0.238215641862f, 0.228669460829f, 0.219101240157f, 0.209511902052f, 0.199902370753f,
0.190273572448f, 0.180626435180f, 0.170961888760f, 0.161280864678f, 0.151584296010f,
0.141873117332f, 0.132148264628f, 0.122410675199f, 0.112661287575f, 0.102901041421f,
0.093130877450f, 0.083351737332f, 0.073564563600f, 0.063770299562f, 0.053969889210f,
0.044164277127f, 0.034354408400f, 0.024541228523f, 0.014725683311f, 0.004908718808f
};
static void apply_filterbank(float *x_out, float *x_in, const int *center_bins, const float* band_weights, int num_bands)
{
int b, i;
float frac;
celt_assert(x_in != x_out)
x_out[0] = 0;
for (b = 0; b < num_bands - 1; b++)
{
x_out[b+1] = 0;
for (i = center_bins[b]; i < center_bins[b+1]; i++)
{
frac = (float) (center_bins[b+1] - i) / (center_bins[b+1] - center_bins[b]);
x_out[b] += band_weights[b] * frac * x_in[i];
x_out[b+1] += band_weights[b+1] * (1 - frac) * x_in[i];
}
}
x_out[num_bands - 1] += band_weights[num_bands - 1] * x_in[center_bins[num_bands - 1]];
#ifdef DEBUG_PRINT
for (b = 0; b < num_bands; b++)
{
printf("band[%d]: %f\n", b, x_out[b]);
}
#endif
}
static void mag_spec_320_onesided(float *out, float *in)
{
celt_assert(OSCE_SPEC_WINDOW_SIZE == 320);
kiss_fft_cpx buffer[OSCE_SPEC_WINDOW_SIZE];
int k;
forward_transform(buffer, in);
for (k = 0; k < OSCE_SPEC_NUM_FREQS; k++)
{
out[k] = OSCE_SPEC_WINDOW_SIZE * sqrt(buffer[k].r * buffer[k].r + buffer[k].i * buffer[k].i);
#ifdef DEBUG_PRINT
printf("magspec[%d]: %f\n", k, out[k]);
#endif
}
}
static void calculate_log_spectrum_from_lpc(float *spec, opus_int16 *a_q12, int lpc_order)
{
float buffer[OSCE_SPEC_WINDOW_SIZE] = {0};
int i;
/* zero expansion */
buffer[0] = 1;
for (i = 0; i < lpc_order; i++)
{
buffer[i+1] = - (float)a_q12[i] / (1U << 12);
}
/* calculate and invert magnitude spectrum */
mag_spec_320_onesided(buffer, buffer);
for (i = 0; i < OSCE_SPEC_NUM_FREQS; i++)
{
buffer[i] = 1.f / (buffer[i] + 1e-9f);
}
/* apply filterbank */
apply_filterbank(spec, buffer, center_bins_clean, band_weights_clean, OSCE_CLEAN_SPEC_NUM_BANDS);
/* log and scaling */
for (i = 0; i < OSCE_CLEAN_SPEC_NUM_BANDS; i++)
{
spec[i] = 0.3f * log(spec[i] + 1e-9f);
}
}
static void calculate_cepstrum(float *cepstrum, float *signal)
{
float buffer[OSCE_SPEC_WINDOW_SIZE];
float *spec = &buffer[OSCE_SPEC_NUM_FREQS + 3];
int n;
celt_assert(cepstrum != signal)
for (n = 0; n < OSCE_SPEC_WINDOW_SIZE; n++)
{
buffer[n] = osce_window[n] * signal[n];
}
/* calculate magnitude spectrum */
mag_spec_320_onesided(buffer, buffer);
/* accumulate bands */
apply_filterbank(spec, buffer, center_bins_noisy, band_weights_noisy, OSCE_NOISY_SPEC_NUM_BANDS);
/* log domain conversion */
for (n = 0; n < OSCE_NOISY_SPEC_NUM_BANDS; n++)
{
spec[n] = log(spec[n] + 1e-9f);
#ifdef DEBUG_PRINT
printf("logspec[%d]: %f\n", n, spec[n]);
#endif
}
/* DCT-II (orthonormal) */
celt_assert(OSCE_NOISY_SPEC_NUM_BANDS == NB_BANDS);
dct(cepstrum, spec);
}
static void calculate_acorr(float *acorr, float *signal, int lag)
{
int n, k;
celt_assert(acorr != signal)
for (k = -2; k <= 2; k++)
{
acorr[k+2] = 0;
float xx = 0;
float xy = 0;
float yy = 0;
for (n = 0; n < 80; n++)
{
/* obviously wasteful -> fix later */
xx += signal[n] * signal[n];
yy += signal[n - lag + k] * signal[n - lag + k];
xy += signal[n] * signal[n - lag + k];
}
acorr[k+2] = xy / sqrt(xx * yy + 1e-9f);
}
}
static int pitch_postprocessing(OSCEFeatureState *psFeatures, int lag, int type)
{
int new_lag;
int modulus;
#ifdef OSCE_HANGOVER_BUGFIX
#define TESTBIT 1
#else
#define TESTBIT 0
#endif
modulus = OSCE_PITCH_HANGOVER;
if (modulus == 0) modulus ++;
/* hangover is currently disabled to reflect a bug in the python code. ToDo: re-evaluate hangover */
if (type != TYPE_VOICED && psFeatures->last_type == TYPE_VOICED && TESTBIT)
/* enter hangover */
{
new_lag = OSCE_NO_PITCH_VALUE;
if (psFeatures->pitch_hangover_count < OSCE_PITCH_HANGOVER)
{
new_lag = psFeatures->last_lag;
psFeatures->pitch_hangover_count = (psFeatures->pitch_hangover_count + 1) % modulus;
}
}
else if (type != TYPE_VOICED && psFeatures->pitch_hangover_count && TESTBIT)
/* continue hangover */
{
new_lag = psFeatures->last_lag;
psFeatures->pitch_hangover_count = (psFeatures->pitch_hangover_count + 1) % modulus;
}
else if (type != TYPE_VOICED)
/* unvoiced frame after hangover */
{
new_lag = OSCE_NO_PITCH_VALUE;
psFeatures->pitch_hangover_count = 0;
}
else
/* voiced frame: update last_lag */
{
new_lag = lag;
psFeatures->last_lag = lag;
psFeatures->pitch_hangover_count = 0;
}
/* buffer update */
psFeatures->last_type = type;
/* with the current setup this should never happen (but who knows...) */
celt_assert(new_lag)
return new_lag;
}
void osce_calculate_features(
silk_decoder_state *psDec, /* I/O Decoder state */
silk_decoder_control *psDecCtrl, /* I Decoder control */
float *features, /* O input features */
float *numbits, /* O numbits and smoothed numbits */
int *periods, /* O pitch lags on subframe basis */
const opus_int16 xq[], /* I Decoded speech */
opus_int32 num_bits /* I Size of SILK payload in bits */
)
{
int num_subframes, num_samples;
float buffer[OSCE_FEATURES_MAX_HISTORY + OSCE_MAX_FEATURE_FRAMES * 80];
float *frame, *pfeatures;
OSCEFeatureState *psFeatures;
int i, n, k;
#ifdef WRITE_FEATURES
static FILE *f_feat = NULL;
if (f_feat == NULL)
{
f_feat = fopen("assembled_features.f32", "wb");
}
#endif
/*OPUS_CLEAR(buffer, 1);*/
memset(buffer, 0, sizeof(buffer));
num_subframes = psDec->nb_subfr;
num_samples = num_subframes * 80;
psFeatures = &psDec->osce.features;
/* smooth bit count */
psFeatures->numbits_smooth = 0.9f * psFeatures->numbits_smooth + 0.1f * num_bits;
numbits[0] = num_bits;
numbits[1] = psFeatures->numbits_smooth;
for (n = 0; n < num_samples; n++)
{
buffer[OSCE_FEATURES_MAX_HISTORY + n] = (float) xq[n] / (1U<<15);
}
OPUS_COPY(buffer, psFeatures->signal_history, OSCE_FEATURES_MAX_HISTORY);
for (k = 0; k < num_subframes; k++)
{
pfeatures = features + k * OSCE_FEATURE_DIM;
frame = &buffer[OSCE_FEATURES_MAX_HISTORY + k * 80];
memset(pfeatures, 0, OSCE_FEATURE_DIM); /* precaution */
/* clean spectrum from lpcs (update every other frame) */
if (k % 2 == 0)
{
calculate_log_spectrum_from_lpc(pfeatures + OSCE_CLEAN_SPEC_START, psDecCtrl->PredCoef_Q12[k >> 1], psDec->LPC_order);
}
else
{
OPUS_COPY(pfeatures + OSCE_CLEAN_SPEC_START, pfeatures + OSCE_CLEAN_SPEC_START - OSCE_FEATURE_DIM, OSCE_CLEAN_SPEC_LENGTH);
}
/* noisy cepstrum from signal (update every other frame) */
if (k % 2 == 0)
{
calculate_cepstrum(pfeatures + OSCE_NOISY_CEPSTRUM_START, frame - 160);
}
else
{
OPUS_COPY(pfeatures + OSCE_NOISY_CEPSTRUM_START, pfeatures + OSCE_NOISY_CEPSTRUM_START - OSCE_FEATURE_DIM, OSCE_NOISY_CEPSTRUM_LENGTH);
}
/* pitch hangover and zero value replacement */
periods[k] = pitch_postprocessing(psFeatures, psDecCtrl->pitchL[k], psDec->indices.signalType);
/* auto-correlation around pitch lag */
calculate_acorr(pfeatures + OSCE_ACORR_START, frame, periods[k]);
/* ltp */
celt_assert(OSCE_LTP_LENGTH == LTP_ORDER)
for (i = 0; i < OSCE_LTP_LENGTH; i++)
{
pfeatures[OSCE_LTP_START + i] = (float) psDecCtrl->LTPCoef_Q14[k * LTP_ORDER + i] / (1U << 14);
}
/* frame gain */
pfeatures[OSCE_LOG_GAIN_START] = log((float) psDecCtrl->Gains_Q16[k] / (1UL << 16) + 1e-9f);
#ifdef WRITE_FEATURES
fwrite(pfeatures, sizeof(*pfeatures), 93, f_feat);
#endif
}
/* buffer update */
OPUS_COPY(psFeatures->signal_history, &buffer[num_samples], OSCE_FEATURES_MAX_HISTORY);
}
void osce_cross_fade_10ms(float *x_enhanced, float *x_in, int length)
{
int i;
celt_assert(length >= 160);
for (i = 0; i < 160; i++)
{
x_enhanced[i] = osce_window[i] * x_enhanced[i] + (1.f - osce_window[i]) * x_in[i];
}
}

View File

@ -0,0 +1,50 @@
/* Copyright (c) 2023 Amazon
Written by Jan Buethe */
/*
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef OSCE_FEATURES_H
#define OSCE_FEATURES_H
#include "structs.h"
#include "opus_types.h"
#define OSCE_NUMBITS_BUGFIX
void osce_calculate_features(
silk_decoder_state *psDec, /* I/O Decoder state */
silk_decoder_control *psDecCtrl, /* I Decoder control */
float *features, /* O input features */
float *numbits, /* O numbits and smoothed numbits */
int *periods, /* O pitch lags on subframe basis */
const opus_int16 xq[], /* I Decoded speech */
opus_int32 num_bits /* I Size of SILK payload in bits */
);
void osce_cross_fade_10ms(float *x_enhanced, float *x_in, int length);
#endif

View File

@ -0,0 +1,125 @@
/* Copyright (c) 2023 Amazon
Written by Jan Buethe */
/*
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef OSCE_STRUCTS_H
#define OSCE_STRUCTS_H
#include "opus_types.h"
#include "osce_config.h"
#ifndef DISABLE_LACE
#include "lace_data.h"
#endif
#ifndef DISABLE_NOLACE
#include "nolace_data.h"
#endif
#include "nndsp.h"
#include "nnet.h"
/* feature calculation */
typedef struct {
float numbits_smooth;
int pitch_hangover_count;
int last_lag;
int last_type;
float signal_history[OSCE_FEATURES_MAX_HISTORY];
int reset;
} OSCEFeatureState;
#ifndef DISABLE_LACE
/* LACE */
typedef struct {
float feature_net_conv2_state[LACE_FNET_CONV2_STATE_SIZE];
float feature_net_gru_state[LACE_COND_DIM];
AdaCombState cf1_state;
AdaCombState cf2_state;
AdaConvState af1_state;
float preemph_mem;
float deemph_mem;
} LACEState;
typedef struct
{
LACELayers layers;
float window[LACE_OVERLAP_SIZE];
} LACE;
#endif /* #ifndef DISABLE_LACE */
#ifndef DISABLE_NOLACE
/* NoLACE */
typedef struct {
float feature_net_conv2_state[NOLACE_FNET_CONV2_STATE_SIZE];
float feature_net_gru_state[NOLACE_COND_DIM];
float post_cf1_state[NOLACE_COND_DIM];
float post_cf2_state[NOLACE_COND_DIM];
float post_af1_state[NOLACE_COND_DIM];
float post_af2_state[NOLACE_COND_DIM];
float post_af3_state[NOLACE_COND_DIM];
AdaCombState cf1_state;
AdaCombState cf2_state;
AdaConvState af1_state;
AdaConvState af2_state;
AdaConvState af3_state;
AdaConvState af4_state;
AdaShapeState tdshape1_state;
AdaShapeState tdshape2_state;
AdaShapeState tdshape3_state;
float preemph_mem;
float deemph_mem;
} NoLACEState;
typedef struct {
NOLACELayers layers;
float window[LACE_OVERLAP_SIZE];
} NoLACE;
#endif /* #ifndef DISABLE_NOLACE */
/* OSCEModel */
typedef struct {
int loaded;
#ifndef DISABLE_LACE
LACE lace;
#endif
#ifndef DISABLE_NOLACE
NoLACE nolace;
#endif
} OSCEModel;
typedef union {
#ifndef DISABLE_LACE
LACEState lace;
#endif
#ifndef DISABLE_NOLACE
NoLACEState nolace;
#endif
} OSCEState;
#endif

View File

@ -0,0 +1,238 @@
/* Copyright (c) 2023 Amazon */
/*
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include <string.h>
#include <stdlib.h>
#include "nnet.h"
#include "os_support.h"
#define SPARSE_BLOCK_SIZE 32
int parse_record(const void **data, int *len, WeightArray *array) {
WeightHead *h = (WeightHead *)*data;
if (*len < WEIGHT_BLOCK_SIZE) return -1;
if (h->block_size < h->size) return -1;
if (h->block_size > *len-WEIGHT_BLOCK_SIZE) return -1;
if (h->name[sizeof(h->name)-1] != 0) return -1;
if (h->size < 0) return -1;
array->name = h->name;
array->type = h->type;
array->size = h->size;
array->data = (void*)((unsigned char*)(*data)+WEIGHT_BLOCK_SIZE);
*data = (void*)((unsigned char*)*data + h->block_size+WEIGHT_BLOCK_SIZE);
*len -= h->block_size+WEIGHT_BLOCK_SIZE;
return array->size;
}
int parse_weights(WeightArray **list, const void *data, int len)
{
int nb_arrays=0;
int capacity=20;
*list = opus_alloc(capacity*sizeof(WeightArray));
while (len > 0) {
int ret;
WeightArray array = {NULL, 0, 0, 0};
ret = parse_record(&data, &len, &array);
if (ret > 0) {
if (nb_arrays+1 >= capacity) {
/* Make sure there's room for the ending NULL element too. */
capacity = capacity*3/2;
*list = opus_realloc(*list, capacity*sizeof(WeightArray));
}
(*list)[nb_arrays++] = array;
} else {
opus_free(*list);
*list = NULL;
return -1;
}
}
(*list)[nb_arrays].name=NULL;
return nb_arrays;
}
static const void *find_array_entry(const WeightArray *arrays, const char *name) {
while (arrays->name && strcmp(arrays->name, name) != 0) arrays++;
return arrays;
}
static const void *find_array_check(const WeightArray *arrays, const char *name, int size) {
const WeightArray *a = find_array_entry(arrays, name);
if (a->name && a->size == size) return a->data;
else return NULL;
}
static const void *opt_array_check(const WeightArray *arrays, const char *name, int size, int *error) {
const WeightArray *a = find_array_entry(arrays, name);
*error = (a->name != NULL && a->size != size);
if (a->name && a->size == size) return a->data;
else return NULL;
}
static const void *find_idx_check(const WeightArray *arrays, const char *name, int nb_in, int nb_out, int *total_blocks) {
int remain;
const int *idx;
const WeightArray *a = find_array_entry(arrays, name);
*total_blocks = 0;
if (a == NULL) return NULL;
idx = a->data;
remain = a->size/sizeof(int);
while (remain > 0) {
int nb_blocks;
int i;
nb_blocks = *idx++;
if (remain < nb_blocks+1) return NULL;
for (i=0;i<nb_blocks;i++) {
int pos = *idx++;
if (pos+3 >= nb_in || (pos&0x3)) return NULL;
}
nb_out -= 8;
remain -= nb_blocks+1;
*total_blocks += nb_blocks;
}
if (nb_out != 0) return NULL;
return a->data;
}
int linear_init(LinearLayer *layer, const WeightArray *arrays,
const char *bias,
const char *subias,
const char *weights,
const char *float_weights,
const char *weights_idx,
const char *diag,
const char *scale,
int nb_inputs,
int nb_outputs)
{
int err;
layer->bias = NULL;
layer->subias = NULL;
layer->weights = NULL;
layer->float_weights = NULL;
layer->weights_idx = NULL;
layer->diag = NULL;
layer->scale = NULL;
if (bias != NULL) {
if ((layer->bias = find_array_check(arrays, bias, nb_outputs*sizeof(layer->bias[0]))) == NULL) return 1;
}
if (subias != NULL) {
if ((layer->subias = find_array_check(arrays, subias, nb_outputs*sizeof(layer->subias[0]))) == NULL) return 1;
}
if (weights_idx != NULL) {
int total_blocks;
if ((layer->weights_idx = find_idx_check(arrays, weights_idx, nb_inputs, nb_outputs, &total_blocks)) == NULL) return 1;
if (weights != NULL) {
if ((layer->weights = find_array_check(arrays, weights, SPARSE_BLOCK_SIZE*total_blocks*sizeof(layer->weights[0]))) == NULL) return 1;
}
if (float_weights != NULL) {
layer->float_weights = opt_array_check(arrays, float_weights, SPARSE_BLOCK_SIZE*total_blocks*sizeof(layer->float_weights[0]), &err);
if (err) return 1;
}
} else {
if (weights != NULL) {
if ((layer->weights = find_array_check(arrays, weights, nb_inputs*nb_outputs*sizeof(layer->weights[0]))) == NULL) return 1;
}
if (float_weights != NULL) {
layer->float_weights = opt_array_check(arrays, float_weights, nb_inputs*nb_outputs*sizeof(layer->float_weights[0]), &err);
if (err) return 1;
}
}
if (diag != NULL) {
if ((layer->diag = find_array_check(arrays, diag, nb_outputs*sizeof(layer->diag[0]))) == NULL) return 1;
}
if (weights != NULL) {
if ((layer->scale = find_array_check(arrays, scale, nb_outputs*sizeof(layer->scale[0]))) == NULL) return 1;
}
layer->nb_inputs = nb_inputs;
layer->nb_outputs = nb_outputs;
return 0;
}
int conv2d_init(Conv2dLayer *layer, const WeightArray *arrays,
const char *bias,
const char *float_weights,
int in_channels,
int out_channels,
int ktime,
int kheight)
{
int err;
layer->bias = NULL;
layer->float_weights = NULL;
if (bias != NULL) {
if ((layer->bias = find_array_check(arrays, bias, out_channels*sizeof(layer->bias[0]))) == NULL) return 1;
}
if (float_weights != NULL) {
layer->float_weights = opt_array_check(arrays, float_weights, in_channels*out_channels*ktime*kheight*sizeof(layer->float_weights[0]), &err);
if (err) return 1;
}
layer->in_channels = in_channels;
layer->out_channels = out_channels;
layer->ktime = ktime;
layer->kheight = kheight;
return 0;
}
#if 0
#include <fcntl.h>
#include <sys/mman.h>
#include <unistd.h>
#include <sys/stat.h>
#include <stdio.h>
int main()
{
int fd;
void *data;
int len;
int nb_arrays;
int i;
WeightArray *list;
struct stat st;
const char *filename = "weights_blob.bin";
stat(filename, &st);
len = st.st_size;
fd = open(filename, O_RDONLY);
data = mmap(NULL, len, PROT_READ, MAP_SHARED, fd, 0);
printf("size is %d\n", len);
nb_arrays = parse_weights(&list, data, len);
for (i=0;i<nb_arrays;i++) {
printf("found %s: size %d\n", list[i].name, list[i].size);
}
printf("%p\n", list[i].name);
opus_free(list);
munmap(data, len);
close(fd);
return 0;
}
#endif

View File

@ -0,0 +1,79 @@
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include <math.h>
#include "pitchdnn.h"
#include "os_support.h"
#include "nnet.h"
#include "lpcnet_private.h"
float compute_pitchdnn(
PitchDNNState *st,
const float *if_features,
const float *xcorr_features,
int arch
)
{
float if1_out[DENSE_IF_UPSAMPLER_1_OUT_SIZE];
float downsampler_in[NB_XCORR_FEATURES + DENSE_IF_UPSAMPLER_2_OUT_SIZE];
float downsampler_out[DENSE_DOWNSAMPLER_OUT_SIZE];
float conv1_tmp1[(NB_XCORR_FEATURES + 2)*8] = {0};
float conv1_tmp2[(NB_XCORR_FEATURES + 2)*8] = {0};
float output[DENSE_FINAL_UPSAMPLER_OUT_SIZE];
int i;
int pos=0;
float maxval=-1;
float sum=0;
float count=0;
PitchDNN *model = &st->model;
/* IF */
compute_generic_dense(&model->dense_if_upsampler_1, if1_out, if_features, ACTIVATION_TANH, arch);
compute_generic_dense(&model->dense_if_upsampler_2, &downsampler_in[NB_XCORR_FEATURES], if1_out, ACTIVATION_TANH, arch);
/* xcorr*/
OPUS_COPY(&conv1_tmp1[1], xcorr_features, NB_XCORR_FEATURES);
compute_conv2d(&model->conv2d_1, &conv1_tmp2[1], st->xcorr_mem1, conv1_tmp1, NB_XCORR_FEATURES, NB_XCORR_FEATURES+2, ACTIVATION_TANH, arch);
compute_conv2d(&model->conv2d_2, downsampler_in, st->xcorr_mem2, conv1_tmp2, NB_XCORR_FEATURES, NB_XCORR_FEATURES, ACTIVATION_TANH, arch);
compute_generic_dense(&model->dense_downsampler, downsampler_out, downsampler_in, ACTIVATION_TANH, arch);
compute_generic_gru(&model->gru_1_input, &model->gru_1_recurrent, st->gru_state, downsampler_out, arch);
compute_generic_dense(&model->dense_final_upsampler, output, st->gru_state, ACTIVATION_LINEAR, arch);
for (i=0;i<180;i++) {
if (output[i] > maxval) {
pos = i;
maxval = output[i];
}
}
for (i=IMAX(0, pos-2); i<=IMIN(179, pos+2); i++) {
float p = exp(output[i]);
sum += p*i;
count += p;
}
/*printf("%d %f\n", pos, sum/count);*/
return (1.f/60.f)*(sum/count) - 1.5;
/*return 256.f/pow(2.f, (1.f/60.f)*i);*/
}
void pitchdnn_init(PitchDNNState *st)
{
int ret;
OPUS_CLEAR(st, 1);
#ifndef USE_WEIGHTS_FILE
ret = init_pitchdnn(&st->model, pitchdnn_arrays);
#else
ret = 0;
#endif
celt_assert(ret == 0);
}
int pitchdnn_load_model(PitchDNNState *st, const void *data, int len) {
WeightArray *list;
int ret;
parse_weights(&list, data, len);
ret = init_pitchdnn(&st->model, list);
opus_free(list);
if (ret == 0) return 0;
else return -1;
}

View File

@ -0,0 +1,34 @@
#ifndef PITCHDNN_H
#define PITCHDNN_H
typedef struct PitchDNN PitchDNN;
#include "pitchdnn_data.h"
#define PITCH_MIN_PERIOD 32
#define PITCH_MAX_PERIOD 256
#define NB_XCORR_FEATURES (PITCH_MAX_PERIOD-PITCH_MIN_PERIOD)
typedef struct {
PitchDNN model;
float gru_state[GRU_1_STATE_SIZE];
float xcorr_mem1[(NB_XCORR_FEATURES + 2)*2];
float xcorr_mem2[(NB_XCORR_FEATURES + 2)*2*8];
float xcorr_mem3[(NB_XCORR_FEATURES + 2)*2*8];
} PitchDNNState;
void pitchdnn_init(PitchDNNState *st);
int pitchdnn_load_model(PitchDNNState *st, const void *data, int len);
float compute_pitchdnn(
PitchDNNState *st,
const float *if_features,
const float *xcorr_features,
int arch
);
#endif

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,41 @@
/* Auto generated from checkpoint pitch_vsmallconv1.pth */
#ifndef PITCHDNN_DATA_H
#define PITCHDNN_DATA_H
#include "nnet.h"
#include "opus_types.h"
#define DENSE_IF_UPSAMPLER_1_OUT_SIZE 64
#define DENSE_IF_UPSAMPLER_2_OUT_SIZE 64
#define DENSE_DOWNSAMPLER_OUT_SIZE 64
#define DENSE_FINAL_UPSAMPLER_OUT_SIZE 192
#define GRU_1_OUT_SIZE 64
#define GRU_1_STATE_SIZE 64
#define PITCH_DNN_MAX_RNN_UNITS 64
struct PitchDNN {
LinearLayer dense_if_upsampler_1;
LinearLayer dense_if_upsampler_2;
LinearLayer dense_downsampler;
LinearLayer dense_final_upsampler;
Conv2dLayer conv2d_1;
Conv2dLayer conv2d_2;
LinearLayer gru_1_input;
LinearLayer gru_1_recurrent;
};
int init_pitchdnn(PitchDNN *model, const WeightArray *arrays);
#endif /* PITCHDNN_DATA_H */

53278
src/libs/opus/dnn/plc_data.c Normal file

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,39 @@
/* Auto generated from checkpoint plc4ar_16.pth */
#ifndef PLC_DATA_H
#define PLC_DATA_H
#include "nnet.h"
#include "opus_types.h"
#define PLC_DENSE_IN_OUT_SIZE 128
#define PLC_DENSE_OUT_OUT_SIZE 20
#define PLC_GRU1_OUT_SIZE 192
#define PLC_GRU1_STATE_SIZE 192
#define PLC_GRU2_OUT_SIZE 192
#define PLC_GRU2_STATE_SIZE 192
#define PLC_MAX_RNN_UNITS 192
typedef struct {
LinearLayer plc_dense_in;
LinearLayer plc_dense_out;
LinearLayer plc_gru1_input;
LinearLayer plc_gru1_recurrent;
LinearLayer plc_gru2_input;
LinearLayer plc_gru2_recurrent;
} PLCModel;
int init_plcmodel(PLCModel *model, const WeightArray *arrays);
#endif /* PLC_DATA_H */

389
src/libs/opus/dnn/vec.h Normal file
View File

@ -0,0 +1,389 @@
/* Copyright (c) 2018 Mozilla
2008-2011 Octasic Inc.
2012-2017 Jean-Marc Valin */
/*
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef VEC_H
#define VEC_H
#include "opus_types.h"
#include <math.h>
#include "arch.h"
#include "x86/x86_arch_macros.h"
#if defined(__AVX__) || defined(__SSE2__)
#include "vec_avx.h"
#elif (defined(__ARM_NEON__) || defined(__ARM_NEON)) && !defined(DISABLE_NEON)
#include "vec_neon.h"
#else
#include "os_support.h"
#define MAX_INPUTS (2048)
#define NO_OPTIMIZATIONS
static inline void sgemv16x1(float *out, const float *weights, int rows, int cols, int col_stride, const float *x)
{
int i, j;
OPUS_CLEAR(out, rows);
for (i=0;i<rows;i+=16)
{
for (j=0;j<cols;j++)
{
const float * restrict w;
float * restrict y;
float xj;
w = &weights[j*col_stride + i];
xj = x[j];
y = &out[i];
y[0] += w[0]*xj;
y[1] += w[1]*xj;
y[2] += w[2]*xj;
y[3] += w[3]*xj;
y[4] += w[4]*xj;
y[5] += w[5]*xj;
y[6] += w[6]*xj;
y[7] += w[7]*xj;
y[8] += w[8]*xj;
y[9] += w[9]*xj;
y[10] += w[10]*xj;
y[11] += w[11]*xj;
y[12] += w[12]*xj;
y[13] += w[13]*xj;
y[14] += w[14]*xj;
y[15] += w[15]*xj;
}
}
}
static inline void sgemv8x1(float *out, const float *weights, int rows, int cols, int col_stride, const float *x)
{
int i, j;
OPUS_CLEAR(out, rows);
for (i=0;i<rows;i+=8)
{
for (j=0;j<cols;j++)
{
const float * restrict w;
float * restrict y;
float xj;
w = &weights[j*col_stride + i];
xj = x[j];
y = &out[i];
y[0] += w[0]*xj;
y[1] += w[1]*xj;
y[2] += w[2]*xj;
y[3] += w[3]*xj;
y[4] += w[4]*xj;
y[5] += w[5]*xj;
y[6] += w[6]*xj;
y[7] += w[7]*xj;
}
}
}
static inline void sgemv(float *out, const float *weights, int rows, int cols, int col_stride, const float *x)
{
if ((rows&0xf) == 0) sgemv16x1(out, weights, rows, cols, col_stride, x);
else if ((rows&0x7) == 0) sgemv8x1(out, weights, rows, cols, col_stride, x);
else {
int i, j;
for (i=0;i<rows;i++)
{
out[i] = 0;
for (j=0;j<cols;j++) out[i] += weights[j*col_stride + i]*x[j];
}
}
}
static inline void sparse_sgemv8x4(float *out, const float *w, const int *idx, int rows, const float *x)
{
int i, j;
OPUS_CLEAR(out, rows);
for (i=0;i<rows;i+=8)
{
int cols;
cols = *idx++;
for (j=0;j<cols;j++)
{
int pos;
float * restrict y;
float xj0, xj1, xj2, xj3;
pos = (*idx++);
xj0 = x[pos+0];
xj1 = x[pos+1];
xj2 = x[pos+2];
xj3 = x[pos+3];
y = &out[i];
y[0] += w[0]*xj0;
y[1] += w[1]*xj0;
y[2] += w[2]*xj0;
y[3] += w[3]*xj0;
y[4] += w[4]*xj0;
y[5] += w[5]*xj0;
y[6] += w[6]*xj0;
y[7] += w[7]*xj0;
y[0] += w[8]*xj1;
y[1] += w[9]*xj1;
y[2] += w[10]*xj1;
y[3] += w[11]*xj1;
y[4] += w[12]*xj1;
y[5] += w[13]*xj1;
y[6] += w[14]*xj1;
y[7] += w[15]*xj1;
y[0] += w[16]*xj2;
y[1] += w[17]*xj2;
y[2] += w[18]*xj2;
y[3] += w[19]*xj2;
y[4] += w[20]*xj2;
y[5] += w[21]*xj2;
y[6] += w[22]*xj2;
y[7] += w[23]*xj2;
y[0] += w[24]*xj3;
y[1] += w[25]*xj3;
y[2] += w[26]*xj3;
y[3] += w[27]*xj3;
y[4] += w[28]*xj3;
y[5] += w[29]*xj3;
y[6] += w[30]*xj3;
y[7] += w[31]*xj3;
w += 32;
}
}
}
#ifdef USE_SU_BIAS
static inline void sparse_cgemv8x4(float *out, const opus_int8 *w, const int *idx, const float *scale, int rows, int cols, const float *_x)
{
int i, j;
unsigned char x[MAX_INPUTS];
for (i=0;i<rows;i++) out[i] = 0;
for (i=0;i<cols;i++) x[i] = 127+floor(.5+127*_x[i]);
for (i=0;i<rows;i+=8)
{
int colblocks;
colblocks = *idx++;
for (j=0;j<colblocks;j++)
{
int pos;
float * restrict y;
int xj0, xj1, xj2, xj3;
pos = (*idx++);
xj0 = x[pos+0];
xj1 = x[pos+1];
xj2 = x[pos+2];
xj3 = x[pos+3];
y = &out[i];
y[0] += (w[0]*xj0+w[1]*xj1+w[2]*xj2+w[3]*xj3);
y[1] += (w[4]*xj0+w[5]*xj1+w[6]*xj2+w[7]*xj3);
y[2] += (w[8]*xj0+w[9]*xj1+w[10]*xj2+w[11]*xj3);
y[3] += (w[12]*xj0+w[13]*xj1+w[14]*xj2+w[15]*xj3);
y[4] += (w[16]*xj0+w[17]*xj1+w[18]*xj2+w[19]*xj3);
y[5] += (w[20]*xj0+w[21]*xj1+w[22]*xj2+w[23]*xj3);
y[6] += (w[24]*xj0+w[25]*xj1+w[26]*xj2+w[27]*xj3);
y[7] += (w[28]*xj0+w[29]*xj1+w[30]*xj2+w[31]*xj3);
w += 32;
}
}
for (i=0;i<rows;i++) out[i] *= scale[i];
}
static inline void cgemv8x4(float *out, const opus_int8 *w, const float *scale, int rows, int cols, const float *_x)
{
int i, j;
unsigned char x[MAX_INPUTS];
for (i=0;i<rows;i++) out[i] = 0;
for (i=0;i<cols;i++) x[i] = 127+(int)floor(.5+127*_x[i]);
for (i=0;i<rows;i+=8)
{
for (j=0;j<cols;j+=4)
{
float *y;
float xj0, xj1, xj2, xj3;
xj0 = x[j+0];
xj1 = x[j+1];
xj2 = x[j+2];
xj3 = x[j+3];
y = &out[i];
y[0] += (w[0]*xj0+w[1]*xj1+w[2]*xj2+w[3]*xj3);
y[1] += (w[4]*xj0+w[5]*xj1+w[6]*xj2+w[7]*xj3);
y[2] += (w[8]*xj0+w[9]*xj1+w[10]*xj2+w[11]*xj3);
y[3] += (w[12]*xj0+w[13]*xj1+w[14]*xj2+w[15]*xj3);
y[4] += (w[16]*xj0+w[17]*xj1+w[18]*xj2+w[19]*xj3);
y[5] += (w[20]*xj0+w[21]*xj1+w[22]*xj2+w[23]*xj3);
y[6] += (w[24]*xj0+w[25]*xj1+w[26]*xj2+w[27]*xj3);
y[7] += (w[28]*xj0+w[29]*xj1+w[30]*xj2+w[31]*xj3);
w += 32;
}
}
for (i=0;i<rows;i++) out[i] *= scale[i];
}
#else
static inline void sparse_cgemv8x4(float *out, const opus_int8 *w, const int *idx, const float *scale, int rows, int cols, const float *_x)
{
int i, j;
opus_int8 x[MAX_INPUTS];
for (i=0;i<rows;i++) out[i] = 0;
for (i=0;i<cols;i++) x[i] = (int)floor(.5+127*_x[i]);
for (i=0;i<rows;i+=8)
{
int colblocks;
colblocks = *idx++;
for (j=0;j<colblocks;j++)
{
int pos;
float * restrict y;
int xj0, xj1, xj2, xj3;
pos = (*idx++);
xj0 = x[pos+0];
xj1 = x[pos+1];
xj2 = x[pos+2];
xj3 = x[pos+3];
y = &out[i];
y[0] += (w[0]*xj0+w[1]*xj1+w[2]*xj2+w[3]*xj3);
y[1] += (w[4]*xj0+w[5]*xj1+w[6]*xj2+w[7]*xj3);
y[2] += (w[8]*xj0+w[9]*xj1+w[10]*xj2+w[11]*xj3);
y[3] += (w[12]*xj0+w[13]*xj1+w[14]*xj2+w[15]*xj3);
y[4] += (w[16]*xj0+w[17]*xj1+w[18]*xj2+w[19]*xj3);
y[5] += (w[20]*xj0+w[21]*xj1+w[22]*xj2+w[23]*xj3);
y[6] += (w[24]*xj0+w[25]*xj1+w[26]*xj2+w[27]*xj3);
y[7] += (w[28]*xj0+w[29]*xj1+w[30]*xj2+w[31]*xj3);
w += 32;
}
}
for (i=0;i<rows;i++) out[i] *= scale[i];
}
static inline void cgemv8x4(float *out, const opus_int8 *w, const float *scale, int rows, int cols, const float *_x)
{
int i, j;
opus_int8 x[MAX_INPUTS];
for (i=0;i<rows;i++) out[i] = 0;
for (i=0;i<cols;i++) x[i] = (int)floor(.5+127*_x[i]);
for (i=0;i<rows;i+=8)
{
for (j=0;j<cols;j+=4)
{
float *y;
float xj0, xj1, xj2, xj3;
xj0 = x[j+0];
xj1 = x[j+1];
xj2 = x[j+2];
xj3 = x[j+3];
y = &out[i];
y[0] += (w[0]*xj0+w[1]*xj1+w[2]*xj2+w[3]*xj3);
y[1] += (w[4]*xj0+w[5]*xj1+w[6]*xj2+w[7]*xj3);
y[2] += (w[8]*xj0+w[9]*xj1+w[10]*xj2+w[11]*xj3);
y[3] += (w[12]*xj0+w[13]*xj1+w[14]*xj2+w[15]*xj3);
y[4] += (w[16]*xj0+w[17]*xj1+w[18]*xj2+w[19]*xj3);
y[5] += (w[20]*xj0+w[21]*xj1+w[22]*xj2+w[23]*xj3);
y[6] += (w[24]*xj0+w[25]*xj1+w[26]*xj2+w[27]*xj3);
y[7] += (w[28]*xj0+w[29]*xj1+w[30]*xj2+w[31]*xj3);
w += 32;
}
}
for (i=0;i<rows;i++) out[i] *= scale[i];
}
#endif
/* No AVX2/FMA support */
#ifndef LPCNET_TEST
static inline float lpcnet_exp2(float x)
{
int integer;
float frac;
union {
float f;
opus_uint32 i;
} res;
integer = floor(x);
if (integer < -50)
return 0;
frac = x-integer;
/* K0 = 1, K1 = log(2), K2 = 3-4*log(2), K3 = 3*log(2) - 2 */
res.f = 0.99992522f + frac * (0.69583354f
+ frac * (0.22606716f + 0.078024523f*frac));
res.i = (res.i + (integer<<23)) & 0x7fffffff;
return res.f;
}
#define lpcnet_exp(x) lpcnet_exp2((x)*1.44269504f)
#define fmadd(a, b, c) ((a)*(b)+(c))
static OPUS_INLINE float tanh_approx(float x)
{
const float N0 = 952.52801514f;
const float N1 = 96.39235687f;
const float N2 = 0.60863042f;
const float D0 = 952.72399902f;
const float D1 = 413.36801147f;
const float D2 = 11.88600922f;
float X2, num, den;
X2 = x*x;
num = fmadd(fmadd(N2, X2, N1), X2, N0);
den = fmadd(fmadd(D2, X2, D1), X2, D0);
num = num*x/den;
return MAX32(-1.f, MIN32(1.f, num));
}
static inline float sigmoid_approx(float x)
{
return .5f + .5f*tanh_approx(.5f*x);
}
static inline void softmax(float *y, const float *x, int N)
{
int i;
for (i=0;i<N;i++)
y[i] = lpcnet_exp(x[i]);
}
static inline void vec_tanh(float *y, const float *x, int N)
{
int i;
for (i=0;i<N;i++)
{
y[i] = tanh_approx(x[i]);
}
}
static inline void vec_sigmoid(float *y, const float *x, int N)
{
int i;
for (i=0;i<N;i++)
{
y[i] = sigmoid_approx(x[i]);
}
}
#endif
#define SCALE (128.f*127.f)
#define SCALE_1 (1.f/128.f/127.f)
#endif /*no optimizations*/
#endif /*VEC_H*/

884
src/libs/opus/dnn/vec_avx.h Normal file
View File

@ -0,0 +1,884 @@
/* Copyright (c) 2018 Mozilla
2012-2017 Jean-Marc Valin */
/*
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/*
AVX implementation of vector operations, compile with -mavx
AVX2/FMA implementation of vector operations, compile with -mavx2 -mfma
*/
#ifndef VEC_AVX_H
#define VEC_AVX_H
#include <immintrin.h>
#include <math.h>
#include "celt/x86/x86cpu.h"
#define MAX_INPUTS (2048)
#define USE_SU_BIAS
#ifndef __SSE_4_1__
static inline __m128 mm_floor_ps(__m128 x) {
__m128 half = _mm_set1_ps(0.5);
return _mm_cvtepi32_ps(_mm_cvtps_epi32(_mm_sub_ps(x, half)));
}
#undef _mm_floor_ps
#define _mm_floor_ps(x) mm_floor_ps(x)
#endif
/* If we don't have AVX available, emulate what we need with SSE up to 4.1. */
#ifndef __AVX__
typedef struct {
__m128 lo;
__m128 hi;
} mm256_emu;
#define __m256 mm256_emu
static inline mm256_emu mm256_loadu_ps(const float *src) {
mm256_emu ret;
ret.lo = _mm_loadu_ps(&src[0]);
ret.hi = _mm_loadu_ps(&src[4]);
return ret;
}
#define _mm256_loadu_ps(src) mm256_loadu_ps(src)
static inline void mm256_storeu_ps(float *dst, mm256_emu src) {
_mm_storeu_ps(dst, src.lo);
_mm_storeu_ps(&dst[4], src.hi);
}
#define _mm256_storeu_ps(dst, src) mm256_storeu_ps(dst, src)
static inline mm256_emu mm256_setzero_ps(void) {
mm256_emu ret;
ret.lo = _mm_setzero_ps();
ret.hi = ret.lo;
return ret;
}
#define _mm256_setzero_ps mm256_setzero_ps
static inline mm256_emu mm256_broadcast_ss(const float *x) {
mm256_emu ret;
ret.lo = _mm_set1_ps(*x);
ret.hi = ret.lo;
return ret;
}
#define _mm256_broadcast_ss(x) mm256_broadcast_ss(x)
static inline mm256_emu mm256_set1_ps(float x) {
mm256_emu ret;
ret.lo = _mm_set1_ps(x);
ret.hi = ret.lo;
return ret;
}
#define _mm256_set1_ps(x) mm256_set1_ps(x)
static inline mm256_emu mm256_mul_ps(mm256_emu a, mm256_emu b) {
mm256_emu ret;
ret.lo = _mm_mul_ps(a.lo, b.lo);
ret.hi = _mm_mul_ps(a.hi, b.hi);
return ret;
}
#define _mm256_mul_ps(a,b) mm256_mul_ps(a,b)
static inline mm256_emu mm256_add_ps(mm256_emu a, mm256_emu b) {
mm256_emu ret;
ret.lo = _mm_add_ps(a.lo, b.lo);
ret.hi = _mm_add_ps(a.hi, b.hi);
return ret;
}
#define _mm256_add_ps(a,b) mm256_add_ps(a,b)
static inline mm256_emu mm256_max_ps(mm256_emu a, mm256_emu b) {
mm256_emu ret;
ret.lo = _mm_max_ps(a.lo, b.lo);
ret.hi = _mm_max_ps(a.hi, b.hi);
return ret;
}
#define _mm256_max_ps(a,b) mm256_max_ps(a,b)
static inline mm256_emu mm256_min_ps(mm256_emu a, mm256_emu b) {
mm256_emu ret;
ret.lo = _mm_min_ps(a.lo, b.lo);
ret.hi = _mm_min_ps(a.hi, b.hi);
return ret;
}
#define _mm256_min_ps(a,b) mm256_min_ps(a,b)
static inline mm256_emu mm256_rcp_ps(mm256_emu a) {
mm256_emu ret;
ret.lo = _mm_rcp_ps(a.lo);
ret.hi = _mm_rcp_ps(a.hi);
return ret;
}
#define _mm256_rcp_ps(a) mm256_rcp_ps(a)
static inline __m128 mm256_extractf128_ps(mm256_emu x, int i) {
return (i==0) ? x.lo : x.hi;
}
#undef _mm256_extractf128_ps
#define _mm256_extractf128_ps(x,i) mm256_extractf128_ps(x,i)
static inline mm256_emu mm256_insertf128_ps(mm256_emu dst, __m128 src, int i) {
if (i==0) dst.lo = src;
else dst.hi = src;
return dst;
}
#undef _mm256_insertf128_ps
#define _mm256_insertf128_ps(dst,src,i) mm256_insertf128_ps(dst,src,i)
#endif /* __AVX__ */
/* If we don't have AVX2 available, emulate what we need with SSE up to 4.1. */
#ifndef __AVX2__
typedef struct {
__m128i lo;
__m128i hi;
} mm256i_emu;
typedef __m256i real_m256i;
#define __m256i mm256i_emu
static inline mm256i_emu mm256_setzero_si256(void) {
mm256i_emu ret;
ret.lo = _mm_setzero_si128();
ret.hi = ret.lo;
return ret;
}
#define _mm256_setzero_si256 mm256_setzero_si256
static inline mm256i_emu mm256_loadu_si256(const mm256i_emu *src) {
mm256i_emu ret;
ret.lo = _mm_loadu_si128((const __m128i*)src);
ret.hi = _mm_loadu_si128(&((const __m128i*)src)[1]);
return ret;
}
#define _mm256_loadu_si256(src) mm256_loadu_si256(src)
static inline void mm256_storeu_si256(mm256i_emu *dst, mm256i_emu src) {
_mm_storeu_si128((__m128i*)dst, src.lo);
_mm_storeu_si128(&((__m128i*)dst)[1], src.hi);
}
#define _mm256_storeu_si256(dst, src) mm256_storeu_si256(dst, src)
static inline mm256i_emu mm256_broadcastd_epi32(__m128i x) {
mm256i_emu ret;
ret.hi = ret.lo = _mm_shuffle_epi32(x, 0);
return ret;
}
#define _mm256_broadcastd_epi32(x) mm256_broadcastd_epi32(x)
static inline mm256i_emu mm256_set1_epi32(int x) {
mm256i_emu ret;
ret.lo = _mm_set1_epi32(x);
ret.hi = ret.lo;
return ret;
}
#define _mm256_set1_epi32(x) mm256_set1_epi32(x)
static inline mm256i_emu mm256_set1_epi16(int x) {
mm256i_emu ret;
ret.lo = _mm_set1_epi16(x);
ret.hi = ret.lo;
return ret;
}
#define _mm256_set1_epi16(x) mm256_set1_epi16(x)
static inline mm256i_emu mm256_add_epi32(mm256i_emu a, mm256i_emu b) {
mm256i_emu ret;
ret.lo = _mm_add_epi32(a.lo, b.lo);
ret.hi = _mm_add_epi32(a.hi, b.hi);
return ret;
}
#define _mm256_add_epi32(a,b) mm256_add_epi32(a,b)
static inline mm256i_emu mm256_madd_epi16(mm256i_emu a, mm256i_emu b) {
mm256i_emu ret;
ret.lo = _mm_madd_epi16(a.lo, b.lo);
ret.hi = _mm_madd_epi16(a.hi, b.hi);
return ret;
}
#define _mm256_madd_epi16(a,b) mm256_madd_epi16(a,b)
static inline mm256i_emu mm256_maddubs_epi16(mm256i_emu a, mm256i_emu b) {
mm256i_emu ret;
ret.lo = _mm_maddubs_epi16(a.lo, b.lo);
ret.hi = _mm_maddubs_epi16(a.hi, b.hi);
return ret;
}
#define _mm256_maddubs_epi16(a,b) mm256_maddubs_epi16(a,b)
/* Emulating the conversion functions is tricky because they use __m256i but are defined in AVX.
So we need to make a special when only AVX is available. */
#ifdef __AVX__
typedef union {
mm256i_emu fake;
real_m256i real;
} mm256_union;
static inline __m256 mm256_cvtepi32_ps(mm256i_emu a) {
mm256_union src;
src.fake = a;
return _mm256_cvtepi32_ps(src.real);
}
#define _mm256_cvtepi32_ps(a) mm256_cvtepi32_ps(a)
static inline mm256i_emu mm256_cvtps_epi32(__m256 a) {
mm256_union ret;
ret.real = _mm256_cvtps_epi32(a);
return ret.fake;
}
#define _mm256_cvtps_epi32(a) mm256_cvtps_epi32(a)
#else
static inline mm256_emu mm256_cvtepi32_ps(mm256i_emu a) {
mm256_emu ret;
ret.lo = _mm_cvtepi32_ps(a.lo);
ret.hi = _mm_cvtepi32_ps(a.hi);
return ret;
}
#define _mm256_cvtepi32_ps(a) mm256_cvtepi32_ps(a)
static inline mm256i_emu mm256_cvtps_epi32(mm256_emu a) {
mm256i_emu ret;
ret.lo = _mm_cvtps_epi32(a.lo);
ret.hi = _mm_cvtps_epi32(a.hi);
return ret;
}
#define _mm256_cvtps_epi32(a) mm256_cvtps_epi32(a)
#endif /* __AVX__ */
#endif /* __AVX2__ */
/* In case we don't have FMA, make it a mul and an add. */
#if !(defined(__FMA__) && defined(__AVX__))
#define _mm256_fmadd_ps(a,b,c) _mm256_add_ps(_mm256_mul_ps(a, b), c)
#define _mm_fmadd_ps(a,b,c) _mm_add_ps(_mm_mul_ps(a, b), c)
#endif
#ifdef __AVX2__
static inline __m256 exp8_approx(__m256 X)
{
const __m256 K0 = _mm256_set1_ps(0.99992522f);
const __m256 K1 = _mm256_set1_ps(0.69583354f);
const __m256 K2 = _mm256_set1_ps(0.22606716f);
const __m256 K3 = _mm256_set1_ps(0.078024523f);
const __m256 log2_E = _mm256_set1_ps(1.44269504f);
const __m256 max_in = _mm256_set1_ps(50.f);
const __m256 min_in = _mm256_set1_ps(-50.f);
__m256 XF, Y;
__m256i I;
X = _mm256_mul_ps(X, log2_E);
X = _mm256_max_ps(min_in, _mm256_min_ps(max_in, X));
XF = _mm256_floor_ps(X);
I = _mm256_cvtps_epi32(XF);
X = _mm256_sub_ps(X, XF);
Y = _mm256_fmadd_ps(_mm256_fmadd_ps(_mm256_fmadd_ps(K3, X, K2), X, K1), X, K0);
I = _mm256_slli_epi32(I, 23);
Y = _mm256_castsi256_ps(_mm256_add_epi32(I, _mm256_castps_si256(Y)));
return Y;
}
static inline void vector_ps_to_epi8(unsigned char *x, const float *_x, int len) {
int i;
__m256 const127 = _mm256_set1_ps(127.f);
for (i=0;i<len;i+=8) {
__m256 xf;
__m256i xi;
xf = _mm256_loadu_ps(&_x[i]);
xf = _mm256_fmadd_ps(xf, const127, const127);
xi = _mm256_cvtps_epi32(xf);
xi = _mm256_packus_epi32(xi, _mm256_setzero_si256());
xi = _mm256_permute4x64_epi64(xi, 0xD8);
xi = _mm256_packus_epi16(xi, _mm256_setzero_si256());
xi = _mm256_permutevar8x32_epi32(xi, _mm256_setr_epi32(0,1, 0,0, 0,0, 0,0));
_mm256_storeu_si256 ((__m256i *)(void*)&x[i], xi);
}
}
#else
static inline __m128 exp4_approx(__m128 X)
{
const __m128 K0 = _mm_set1_ps(0.99992522f);
const __m128 K1 = _mm_set1_ps(0.69583354f);
const __m128 K2 = _mm_set1_ps(0.22606716f);
const __m128 K3 = _mm_set1_ps(0.078024523f);
const __m128 log2_E = _mm_set1_ps(1.44269504);
const __m128 max_in = _mm_set1_ps(50.f);
const __m128 min_in = _mm_set1_ps(-50.f);
const __m128i mask = _mm_set1_epi32(0x7fffffff);
__m128 XF, Y;
__m128i I;
X = _mm_mul_ps(X, log2_E);
X = _mm_max_ps(min_in, _mm_min_ps(max_in, X));
XF = _mm_floor_ps(X);
I = _mm_cvtps_epi32(XF);
X = _mm_sub_ps(X, XF);
Y = _mm_fmadd_ps(_mm_fmadd_ps(_mm_fmadd_ps(K3, X, K2), X, K1), X, K0);
I = _mm_slli_epi32(I, 23);
Y = _mm_castsi128_ps(_mm_and_si128(mask, _mm_add_epi32(I, _mm_castps_si128(Y))));
return Y;
}
static inline __m256 exp8_approx(__m256 X)
{
__m256 Y;
__m128 Xhi, Xlo, Yhi, Ylo;
Xhi = _mm256_extractf128_ps(X, 1);
Xlo = _mm256_extractf128_ps(X, 0);
Yhi = exp4_approx(Xhi);
Ylo = exp4_approx(Xlo);
Y = _mm256_insertf128_ps(_mm256_setzero_ps(), Yhi, 1);
Y = _mm256_insertf128_ps(Y, Ylo, 0);
return Y;
}
static inline void vector_ps_to_epi8(unsigned char *x, const float *_x, int len) {
int i;
for (i=0;i<len;i++) x[i] = 127+(int)floor(.5+127*_x[i]);
}
#endif
#ifdef __AVX__
/* Approximating tanh() using a Padé-like rational function:
tanh(x) ~= x * (N0 + N1*x^2 + N2*x^4)/(D0 + D1*x^2 + D2*x^4)
subject to the +/- 1 bounds.
The coefficients were determined by gradient descent trying to minimize
the maximum deviation over the whole range (this is only possible because
of the bounds). The max error is around 3e-4 and is dominated by the
reciprocal approximation (the max error of the rational function is
around 6e-5).
*/
static inline __m256 tanh8_approx(__m256 X)
{
const __m256 N0 = _mm256_set1_ps(952.52801514f);
const __m256 N1 = _mm256_set1_ps(96.39235687f);
const __m256 N2 = _mm256_set1_ps(0.60863042f);
const __m256 D0 = _mm256_set1_ps(952.72399902f);
const __m256 D1 = _mm256_set1_ps(413.36801147f);
const __m256 D2 = _mm256_set1_ps(11.88600922f);
const __m256 max_out = _mm256_set1_ps(1.f);
const __m256 min_out = _mm256_set1_ps(-1.f);
__m256 X2, num, den;
X2 = _mm256_mul_ps(X, X);
num = _mm256_fmadd_ps(_mm256_fmadd_ps(N2, X2, N1), X2, N0);
den = _mm256_fmadd_ps(_mm256_fmadd_ps(D2, X2, D1), X2, D0);
num = _mm256_mul_ps(num, X);
den = _mm256_rcp_ps(den);
num = _mm256_mul_ps(num, den);
return _mm256_max_ps(min_out, _mm256_min_ps(max_out, num));
}
/* Sigmoid approximation using a Padé-like rational function:
1/(1+exp(-x)) ~= 0.5 + x * (N0 + N1*x^2 + N2*x^4)/(D0 + D1*x^2 + D2*x^4)
subject to the [0, 1] bounds.
The coefficients are directly derived by dividing the tanh() coefficients
by powers of two to get the correct scaling. The max error is around 1.5e-4
and is dominated by the reciprocal approximation (the max error of the
rational function is around 3e-5).
*/
static inline __m256 sigmoid8_approx(__m256 X)
{
const __m256 N0 = _mm256_set1_ps(238.13200378f);
const __m256 N1 = _mm256_set1_ps(6.02452230f);
const __m256 N2 = _mm256_set1_ps(0.00950985f);
const __m256 D0 = _mm256_set1_ps(952.72399902f);
const __m256 D1 = _mm256_set1_ps(103.34200287f);
const __m256 D2 = _mm256_set1_ps(0.74287558f);
const __m256 half = _mm256_set1_ps(0.5);
const __m256 max_out = _mm256_set1_ps(1.f);
const __m256 min_out = _mm256_set1_ps(0.f);
__m256 X2, num, den;
X2 = _mm256_mul_ps(X, X);
num = _mm256_fmadd_ps(_mm256_fmadd_ps(N2, X2, N1), X2, N0);
den = _mm256_fmadd_ps(_mm256_fmadd_ps(D2, X2, D1), X2, D0);
num = _mm256_mul_ps(num, X);
den = _mm256_rcp_ps(den);
num = _mm256_fmadd_ps(num, den, half);
return _mm256_max_ps(min_out, _mm256_min_ps(max_out, num));
}
static inline float tanh_approx(float x)
{
float out[8];
__m256 X, Y;
X = _mm256_set1_ps(x);
Y = tanh8_approx(X);
_mm256_storeu_ps(out, Y);
return out[0];
}
static inline float sigmoid_approx(float x)
{
float out[8];
__m256 X, Y;
X = _mm256_set1_ps(x);
Y = sigmoid8_approx(X);
_mm256_storeu_ps(out, Y);
return out[0];
}
#else
static inline __m128 tanh4_approx(__m128 X)
{
const __m128 N0 = _mm_set1_ps(952.52801514f);
const __m128 N1 = _mm_set1_ps(96.39235687f);
const __m128 N2 = _mm_set1_ps(0.60863042f);
const __m128 D0 = _mm_set1_ps(952.72399902f);
const __m128 D1 = _mm_set1_ps(413.36801147f);
const __m128 D2 = _mm_set1_ps(11.88600922f);
const __m128 max_out = _mm_set1_ps(1.f);
const __m128 min_out = _mm_set1_ps(-1.f);
__m128 X2, num, den;
X2 = _mm_mul_ps(X, X);
num = _mm_fmadd_ps(_mm_fmadd_ps(N2, X2, N1), X2, N0);
den = _mm_fmadd_ps(_mm_fmadd_ps(D2, X2, D1), X2, D0);
num = _mm_mul_ps(num, X);
den = _mm_rcp_ps(den);
num = _mm_mul_ps(num, den);
return _mm_max_ps(min_out, _mm_min_ps(max_out, num));
}
static inline __m128 sigmoid4_approx(__m128 X)
{
const __m128 N0 = _mm_set1_ps(238.13200378f);
const __m128 N1 = _mm_set1_ps(6.02452230f);
const __m128 N2 = _mm_set1_ps(0.00950985f);
const __m128 D0 = _mm_set1_ps(952.72399902f);
const __m128 D1 = _mm_set1_ps(103.34200287f);
const __m128 D2 = _mm_set1_ps(0.74287558f);
const __m128 half = _mm_set1_ps(0.5);
const __m128 max_out = _mm_set1_ps(1.f);
const __m128 min_out = _mm_set1_ps(0.f);
__m128 X2, num, den;
X2 = _mm_mul_ps(X, X);
num = _mm_fmadd_ps(_mm_fmadd_ps(N2, X2, N1), X2, N0);
den = _mm_fmadd_ps(_mm_fmadd_ps(D2, X2, D1), X2, D0);
num = _mm_mul_ps(num, X);
den = _mm_rcp_ps(den);
num = _mm_fmadd_ps(num, den, half);
return _mm_max_ps(min_out, _mm_min_ps(max_out, num));
}
static inline float tanh_approx(float x)
{
float out[4];
__m128 X, Y;
X = _mm_set1_ps(x);
Y = tanh4_approx(X);
_mm_storeu_ps(out, Y);
return out[0];
}
static inline float sigmoid_approx(float x)
{
float out[4];
__m128 X, Y;
X = _mm_set1_ps(x);
Y = sigmoid4_approx(X);
_mm_storeu_ps(out, Y);
return out[0];
}
#endif
static inline float lpcnet_exp(float x)
{
float out[8];
__m256 X, Y;
X = _mm256_set1_ps(x);
Y = exp8_approx(X);
_mm256_storeu_ps(out, Y);
return out[0];
}
static inline void softmax(float *y, const float *x, int N)
{
int i;
for (i=0;i<N-7;i+=8)
{
__m256 X, Y;
X = _mm256_loadu_ps(&x[i]);
Y = exp8_approx(X);
_mm256_storeu_ps(&y[i], Y);
}
for (;i<N;i++)
y[i] = lpcnet_exp(x[i]);
}
#ifdef __AVX__
static inline void vec_tanh(float *y, const float *x, int N)
{
int i;
for (i=0;i<N-7;i+=8)
{
__m256 X, Y;
X = _mm256_loadu_ps(&x[i]);
Y = tanh8_approx(X);
_mm256_storeu_ps(&y[i], Y);
}
for (;i<N;i++)
{
y[i] = tanh_approx(x[i]);
}
}
static inline void vec_sigmoid(float *y, const float *x, int N)
{
int i;
for (i=0;i<N-7;i+=8)
{
__m256 X, Y;
X = _mm256_loadu_ps(&x[i]);
Y = sigmoid8_approx(X);
_mm256_storeu_ps(&y[i], Y);
}
for (;i<N;i++)
{
y[i] = sigmoid_approx(x[i]);
}
}
#else
static inline void vec_tanh(float *y, const float *x, int N)
{
int i;
for (i=0;i<N-3;i+=4)
{
__m128 X, Y;
X = _mm_loadu_ps(&x[i]);
Y = tanh4_approx(X);
_mm_storeu_ps(&y[i], Y);
}
for (;i<N;i++)
{
y[i] = tanh_approx(x[i]);
}
}
static inline void vec_sigmoid(float *y, const float *x, int N)
{
int i;
for (i=0;i<N-3;i+=4)
{
__m128 X, Y;
X = _mm_loadu_ps(&x[i]);
Y = sigmoid4_approx(X);
_mm_storeu_ps(&y[i], Y);
}
for (;i<N;i++)
{
y[i] = sigmoid_approx(x[i]);
}
}
#endif
#if defined(__AVXVNNI__) || defined(__AVX512VNNI__)
#define opus_mm256_dpbusds_epi32(src, a, b) _mm256_dpbusds_epi32(src, a, b)
#elif defined(__AVX2__)
static inline __m256i opus_mm256_dpbusds_epi32(__m256i src, __m256i a, __m256i b) {
__m256i ones, tmp;
ones = _mm256_set1_epi16(1);
tmp = _mm256_maddubs_epi16(a, b);
tmp = _mm256_madd_epi16(tmp, ones);
return _mm256_add_epi32(src, tmp);
}
#elif defined(__SSSE3__)
static inline mm256i_emu opus_mm256_dpbusds_epi32(mm256i_emu src, mm256i_emu a, mm256i_emu b) {
mm256i_emu ones, tmp;
ones = _mm256_set1_epi16(1);
tmp = _mm256_maddubs_epi16(a, b);
tmp = _mm256_madd_epi16(tmp, ones);
return _mm256_add_epi32(src, tmp);
}
#elif defined(__SSE2__)
static inline __m128i mm_dpbusds_epi32(__m128i src, __m128i a, __m128i b) {
__m128i ah, al, bh, bl, tmp;
ah = _mm_srli_epi16(a, 8);
bh = _mm_srai_epi16(b, 8);
al = _mm_srli_epi16(_mm_slli_epi16(a, 8), 8);
bl = _mm_srai_epi16(_mm_slli_epi16(b, 8), 8);
tmp = _mm_add_epi32(_mm_madd_epi16(ah, bh), _mm_madd_epi16(al, bl));
return _mm_add_epi32(src, tmp);
}
static inline mm256i_emu opus_mm256_dpbusds_epi32(mm256i_emu src, mm256i_emu a, mm256i_emu b) {
mm256i_emu res;
res.hi = mm_dpbusds_epi32(src.hi, a.hi, b.hi);
res.lo = mm_dpbusds_epi32(src.lo, a.lo, b.lo);
return res;
}
#else
#error "No optimizations in vec_avx.h. This should never happen. "
#endif
static inline void sgemv(float *out, const float *weights, int rows, int cols, int col_stride, const float *x)
{
int i, j;
i=0;
for (;i<rows-15;i+=16)
{
float *y;
__m256 vy0, vy8;
y = &out[i];
vy0 = _mm256_setzero_ps();
vy8 = _mm256_setzero_ps();
for (j=0;j<cols;j++)
{
__m256 vxj;
__m256 vw;
vxj = _mm256_broadcast_ss(&x[j]);
vw = _mm256_loadu_ps(&weights[j*col_stride + i]);
vy0 = _mm256_fmadd_ps(vw, vxj, vy0);
vw = _mm256_loadu_ps(&weights[j*col_stride + i + 8]);
vy8 = _mm256_fmadd_ps(vw, vxj, vy8);
}
_mm256_storeu_ps (&y[0], vy0);
_mm256_storeu_ps (&y[8], vy8);
}
for (;i<rows-7;i+=8)
{
float *y;
__m256 vy0;
y = &out[i];
vy0 = _mm256_setzero_ps();
for (j=0;j<cols;j++)
{
__m256 vxj;
__m256 vw;
vxj = _mm256_broadcast_ss(&x[j]);
vw = _mm256_loadu_ps(&weights[j*col_stride + i]);
vy0 = _mm256_fmadd_ps(vw, vxj, vy0);
}
_mm256_storeu_ps (&y[0], vy0);
}
for (;i<rows-3;i+=4)
{
float *y;
__m128 vy0;
y = &out[i];
vy0 = _mm_setzero_ps();
for (j=0;j<cols;j++)
{
__m128 vxj;
__m128 vw;
vxj = _mm_set1_ps(x[j]);
vw = _mm_loadu_ps(&weights[j*col_stride + i]);
vy0 = _mm_fmadd_ps(vw, vxj, vy0);
}
_mm_storeu_ps (&y[0], vy0);
}
for (;i<rows;i++)
{
out[i] = 0;
for (j=0;j<cols;j++) out[i] += weights[j*col_stride + i]*x[j];
}
}
static inline void sparse_sgemv8x4(float *out, const float *weights, const int *idx, int rows, const float *x)
{
int i, j;
for (i=0;i<rows;i+=8)
{
float *y;
int cols;
__m256 vy0;
y = &out[i];
vy0 = _mm256_setzero_ps();
cols = *idx++;
for (j=0;j<cols;j++)
{
int id;
__m256 vxj;
__m256 vw;
id = *idx++;
vxj = _mm256_broadcast_ss(&x[id]);
vw = _mm256_loadu_ps(&weights[0]);
vy0 = _mm256_fmadd_ps(vw, vxj, vy0);
vxj = _mm256_broadcast_ss(&x[id+1]);
vw = _mm256_loadu_ps(&weights[8]);
vy0 = _mm256_fmadd_ps(vw, vxj, vy0);
vxj = _mm256_broadcast_ss(&x[id+2]);
vw = _mm256_loadu_ps(&weights[16]);
vy0 = _mm256_fmadd_ps(vw, vxj, vy0);
vxj = _mm256_broadcast_ss(&x[id+3]);
vw = _mm256_loadu_ps(&weights[24]);
vy0 = _mm256_fmadd_ps(vw, vxj, vy0);
weights += 32;
}
_mm256_storeu_ps (&y[0], vy0);
}
}
static inline void sparse_cgemv8x4(float *_out, const opus_int8 *w, const int *idx, const float *scale, int rows, int cols, const float *_x)
{
int i, j;
unsigned char x[MAX_INPUTS];
/*for (i=0;i<cols;i++) x[i] = 127+floor(.5+127*_x[i]);*/
vector_ps_to_epi8(x, _x, cols);
for (i=0;i<rows;i+=8)
{
int colblocks;
__m256i vy0;
__m256 vout;
colblocks = *idx++;
vy0 = _mm256_setzero_si256();
j=0;
#if 1 /* Unrolling by 4 gives some gain, comment out if it does not. */
for (;j<colblocks-3;j+=4)
{
__m256i vxj;
__m256i vw;
vxj = _mm256_broadcastd_epi32(_mm_loadu_si32(&x[*idx++]));
vw = _mm256_loadu_si256((const __m256i *)(void*)w);
vy0 = opus_mm256_dpbusds_epi32(vy0, vxj, vw);
w += 32;
vxj = _mm256_broadcastd_epi32(_mm_loadu_si32(&x[*idx++]));
vw = _mm256_loadu_si256((const __m256i *)(void*)w);
vy0 = opus_mm256_dpbusds_epi32(vy0, vxj, vw);
w += 32;
vxj = _mm256_broadcastd_epi32(_mm_loadu_si32(&x[*idx++]));
vw = _mm256_loadu_si256((const __m256i *)(void*)w);
vy0 = opus_mm256_dpbusds_epi32(vy0, vxj, vw);
w += 32;
vxj = _mm256_broadcastd_epi32(_mm_loadu_si32(&x[*idx++]));
vw = _mm256_loadu_si256((const __m256i *)(void*)w);
vy0 = opus_mm256_dpbusds_epi32(vy0, vxj, vw);
w += 32;
}
#endif
for (;j<colblocks;j++)
{
__m256i vxj;
__m256i vw;
vxj = _mm256_broadcastd_epi32(_mm_loadu_si32(&x[*idx++]));
vw = _mm256_loadu_si256((const __m256i *)(void*)w);
vy0 = opus_mm256_dpbusds_epi32(vy0, vxj, vw);
w += 32;
}
vout = _mm256_cvtepi32_ps(vy0);
vout = _mm256_mul_ps(vout, _mm256_loadu_ps(&scale[i]));
_mm256_storeu_ps(&_out[i], vout);
}
}
static inline void cgemv8x4(float *_out, const opus_int8 *w, const float *scale, int rows, int cols, const float *_x)
{
int i, j;
unsigned char x[MAX_INPUTS];
/*for (i=0;i<cols;i++) x[i] = 127+floor(.5+127*_x[i]);*/
vector_ps_to_epi8(x, _x, cols);
for (i=0;i<rows;i+=8)
{
__m256i vy0;
__m256 vout;
vy0 = _mm256_setzero_si256();
j=0;
#if 1 /* Unrolling by 4 gives some gain, comment out if it does not. */
for (;j<cols-12;j+=16)
{
__m256i vxj;
__m256i vw;
vxj = _mm256_broadcastd_epi32(_mm_loadu_si32(&x[j]));
vw = _mm256_loadu_si256((const __m256i *)(void*)w);
vy0 = opus_mm256_dpbusds_epi32(vy0, vxj, vw);
w += 32;
vxj = _mm256_broadcastd_epi32(_mm_loadu_si32(&x[j+4]));
vw = _mm256_loadu_si256((const __m256i *)(void*)w);
vy0 = opus_mm256_dpbusds_epi32(vy0, vxj, vw);
w += 32;
vxj = _mm256_broadcastd_epi32(_mm_loadu_si32(&x[j+8]));
vw = _mm256_loadu_si256((const __m256i *)(void*)w);
vy0 = opus_mm256_dpbusds_epi32(vy0, vxj, vw);
w += 32;
vxj = _mm256_broadcastd_epi32(_mm_loadu_si32(&x[j+12]));
vw = _mm256_loadu_si256((const __m256i *)(void*)w);
vy0 = opus_mm256_dpbusds_epi32(vy0, vxj, vw);
w += 32;
}
#endif
for (;j<cols;j+=4)
{
__m256i vxj;
__m256i vw;
vxj = _mm256_broadcastd_epi32(_mm_loadu_si32(&x[j]));
vw = _mm256_loadu_si256((const __m256i *)(void*)w);
vy0 = opus_mm256_dpbusds_epi32(vy0, vxj, vw);
w += 32;
}
vout = _mm256_cvtepi32_ps(vy0);
vout = _mm256_mul_ps(vout, _mm256_loadu_ps(&scale[i]));
_mm256_storeu_ps(&_out[i], vout);
}
}
#define SCALE (128.f*127.f)
#define SCALE_1 (1.f/128.f/127.f)
#define USE_SU_BIAS
#endif /*VEC_AVX_H*/

View File

@ -0,0 +1,473 @@
/* Copyright (c) 2018 David Rowe
2018 Mozilla
2008-2011 Octasic Inc.
2012-2017 Jean-Marc Valin */
/*
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/* NEON support for ARM machines */
#ifndef VEC_NEON_H
#define VEC_NEON_H
#include <arm_neon.h>
#include "os_support.h"
#if defined(__arm__) && !defined(__aarch64__)
/* Emulate vcvtnq_s32_f32() for ARMv7 Neon. */
static OPUS_INLINE int32x4_t vcvtnq_s32_f32(float32x4_t x) {
return vrshrq_n_s32(vcvtq_n_s32_f32(x, 8), 8);
}
static OPUS_INLINE int16x8_t vpaddq_s16(int16x8_t a, int16x8_t b) {
return vcombine_s16(vpadd_s16(vget_low_s16(a), vget_high_s16(a)), vpadd_s16(vget_low_s16(b), vget_high_s16(b)));
}
static OPUS_INLINE int16x8_t vmull_high_s8(int8x16_t a, int8x16_t b) {
return vmull_s8(vget_high_s8(a), vget_high_s8(b));
}
#endif
#ifdef __ARM_FEATURE_FMA
/* If we can, force the compiler to use an FMA instruction rather than break
vmlaq_f32() into fmul/fadd. */
#define vmlaq_f32(a,b,c) vfmaq_f32(a,b,c)
#endif
#ifndef LPCNET_TEST
static inline float32x4_t exp4_approx(float32x4_t x) {
int32x4_t i;
float32x4_t xf;
x = vmaxq_f32(vminq_f32(x, vdupq_n_f32(88.f)), vdupq_n_f32(-88.f));
/* express exp(x) as exp2(x/log(2)), add 127 for the exponent later */
x = vmlaq_f32(vdupq_n_f32(127.f), x, vdupq_n_f32(1.44269504f));
/* split into integer and fractional parts */
i = vcvtq_s32_f32(x);
xf = vcvtq_f32_s32(i);
x = vsubq_f32(x, xf);
float32x4_t K0 = vdupq_n_f32(0.99992522f);
float32x4_t K1 = vdupq_n_f32(0.69583354f);
float32x4_t K2 = vdupq_n_f32(0.22606716f);
float32x4_t K3 = vdupq_n_f32(0.078024523f);
float32x4_t Y = vmlaq_f32(K0, x, vmlaq_f32(K1, x, vmlaq_f32(K2, K3, x)));
/* compute 2^i */
float32x4_t exponent = vreinterpretq_f32_s32(vshlq_n_s32(i, 23));
Y = vmulq_f32(Y, exponent);
return Y;
}
static inline float32x4_t tanh4_approx(float32x4_t X)
{
const float32x4_t N0 = vdupq_n_f32(952.52801514f);
const float32x4_t N1 = vdupq_n_f32(96.39235687f);
const float32x4_t N2 = vdupq_n_f32(0.60863042f);
const float32x4_t D0 = vdupq_n_f32(952.72399902f);
const float32x4_t D1 = vdupq_n_f32(413.36801147f);
const float32x4_t D2 = vdupq_n_f32(11.88600922f);
const float32x4_t max_out = vdupq_n_f32(1.f);
const float32x4_t min_out = vdupq_n_f32(-1.f);
float32x4_t X2, num, den;
X2 = vmulq_f32(X, X);
num = vmlaq_f32(N0, X2, vmlaq_f32(N1, N2, X2));
den = vmlaq_f32(D0, X2, vmlaq_f32(D1, D2, X2));
num = vmulq_f32(num, X);
den = vrecpeq_f32(den);
num = vmulq_f32(num, den);
return vmaxq_f32(min_out, vminq_f32(max_out, num));
}
static inline float32x4_t sigmoid4_approx(float32x4_t X)
{
const float32x4_t N0 = vdupq_n_f32(238.13200378f);
const float32x4_t N1 = vdupq_n_f32(6.02452230f);
const float32x4_t N2 = vdupq_n_f32(0.00950985f);
const float32x4_t D0 = vdupq_n_f32(952.72399902f);
const float32x4_t D1 = vdupq_n_f32(103.34200287f);
const float32x4_t D2 = vdupq_n_f32(0.74287558f);
const float32x4_t half = vdupq_n_f32(0.5f);
const float32x4_t max_out = vdupq_n_f32(1.f);
const float32x4_t min_out = vdupq_n_f32(0.f);
float32x4_t X2, num, den;
X2 = vmulq_f32(X, X);
num = vmlaq_f32(N0, X2, vmlaq_f32(N1, N2, X2));
den = vmlaq_f32(D0, X2, vmlaq_f32(D1, D2, X2));
num = vmulq_f32(num, X);
den = vrecpeq_f32(den);
num = vmlaq_f32(half, num, den);
return vmaxq_f32(min_out, vminq_f32(max_out, num));
}
static inline float lpcnet_exp(float x)
{
float out[4];
float32x4_t X, Y;
X = vdupq_n_f32(x);
Y = exp4_approx(X);
vst1q_f32(out, Y);
return out[0];
}
static inline float tanh_approx(float x)
{
float out[4];
float32x4_t X, Y;
X = vdupq_n_f32(x);
Y = tanh4_approx(X);
vst1q_f32(out, Y);
return out[0];
}
static inline float sigmoid_approx(float x)
{
float out[4];
float32x4_t X, Y;
X = vdupq_n_f32(x);
Y = sigmoid4_approx(X);
vst1q_f32(out, Y);
return out[0];
}
static inline void softmax(float *y, const float *x, int N)
{
int i;
for (i=0;i<N-3;i+=4)
{
float32x4_t X, Y;
X = vld1q_f32(&x[i]);
Y = exp4_approx(X);
vst1q_f32(&y[i], Y);
}
for (;i<N;i++)
y[i] = lpcnet_exp(x[i]);
}
static inline void vec_tanh(float *y, const float *x, int N)
{
int i;
for (i=0;i<N-3;i+=4)
{
float32x4_t X, Y;
X = vld1q_f32(&x[i]);
Y = tanh4_approx(X);
vst1q_f32(&y[i], Y);
}
for (;i<N;i++)
{
float ex2;
ex2 = lpcnet_exp(2*x[i]);
y[i] = (ex2-1)/(ex2+1);
}
}
static inline void vec_sigmoid(float *y, const float *x, int N)
{
int i;
for (i=0;i<N-3;i+=4)
{
float32x4_t X, Y;
X = vld1q_f32(&x[i]);
Y = sigmoid4_approx(X);
vst1q_f32(&y[i], Y);
}
for (;i<N;i++)
{
float ex;
ex = lpcnet_exp(x[i]);
y[i] = (ex)/(ex+1);
}
}
#endif
static inline void sgemv16x1(float *out, const float *weights, int rows, int cols, int col_stride, const float *x)
{
int i, j;
for (i=0;i<rows;i+=16)
{
float * restrict y = &out[i];
/* keep y[0..15] in registers for duration of inner loop */
float32x4_t y0_3 = vdupq_n_f32(0);
float32x4_t y4_7 = vdupq_n_f32(0);
float32x4_t y8_11 = vdupq_n_f32(0);
float32x4_t y12_15 = vdupq_n_f32(0);
for (j=0;j<cols;j++)
{
const float * restrict w;
float32x4_t wvec0_3, wvec4_7, wvec8_11, wvec12_15;
float32x4_t xj;
w = &weights[j*col_stride + i];
wvec0_3 = vld1q_f32(&w[0]);
wvec4_7 = vld1q_f32(&w[4]);
wvec8_11 = vld1q_f32(&w[8]);
wvec12_15 = vld1q_f32(&w[12]);
xj = vld1q_dup_f32(&x[j]);
y0_3 = vmlaq_f32(y0_3, wvec0_3, xj);
y4_7 = vmlaq_f32(y4_7, wvec4_7, xj);
y8_11 = vmlaq_f32(y8_11, wvec8_11, xj);
y12_15 = vmlaq_f32(y12_15, wvec12_15, xj);
}
/* save y[0..15] back to memory */
vst1q_f32(&y[0], y0_3);
vst1q_f32(&y[4], y4_7);
vst1q_f32(&y[8], y8_11);
vst1q_f32(&y[12], y12_15);
}
}
static inline void sgemv8x1(float *out, const float *weights, int rows, int cols, int col_stride, const float *x)
{
int i, j;
for (i=0;i<rows;i+=8)
{
float * restrict y = &out[i];
/* keep y[0..15] in registers for duration of inner loop */
float32x4_t y0_3 = vdupq_n_f32(0);
float32x4_t y4_7 = vdupq_n_f32(0);
for (j=0;j<cols;j++)
{
const float * restrict w;
float32x4_t wvec0_3, wvec4_7;
float32x4_t xj;
w = &weights[j*col_stride + i];
wvec0_3 = vld1q_f32(&w[0]);
wvec4_7 = vld1q_f32(&w[4]);
xj = vld1q_dup_f32(&x[j]);
y0_3 = vmlaq_f32(y0_3, wvec0_3, xj);
y4_7 = vmlaq_f32(y4_7, wvec4_7, xj);
}
/* save y[0..15] back to memory */
vst1q_f32(&y[0], y0_3);
vst1q_f32(&y[4], y4_7);
}
}
static inline void sgemv(float *out, const float *weights, int rows, int cols, int col_stride, const float *x)
{
if ((rows&0xf) == 0) sgemv16x1(out, weights, rows, cols, col_stride, x);
else if ((rows&0x7) == 0) sgemv8x1(out, weights, rows, cols, col_stride, x);
else {
int i, j;
for (i=0;i<rows;i++)
{
out[i] = 0;
for (j=0;j<cols;j++) out[i] += weights[j*col_stride + i]*x[j];
}
}
}
/* Temporarily use unoptimized version */
static inline void sparse_sgemv8x4(float *out, const float *w, const int *idx, int rows, const float *x)
{
int i, j;
OPUS_CLEAR(out, rows);
for (i=0;i<rows;i+=8)
{
int cols;
cols = *idx++;
for (j=0;j<cols;j++)
{
int pos;
float * restrict y;
float xj0, xj1, xj2, xj3;
pos = (*idx++);
xj0 = x[pos+0];
xj1 = x[pos+1];
xj2 = x[pos+2];
xj3 = x[pos+3];
y = &out[i];
y[0] += w[0]*xj0;
y[1] += w[1]*xj0;
y[2] += w[2]*xj0;
y[3] += w[3]*xj0;
y[4] += w[4]*xj0;
y[5] += w[5]*xj0;
y[6] += w[6]*xj0;
y[7] += w[7]*xj0;
y[0] += w[8]*xj1;
y[1] += w[9]*xj1;
y[2] += w[10]*xj1;
y[3] += w[11]*xj1;
y[4] += w[12]*xj1;
y[5] += w[13]*xj1;
y[6] += w[14]*xj1;
y[7] += w[15]*xj1;
y[0] += w[16]*xj2;
y[1] += w[17]*xj2;
y[2] += w[18]*xj2;
y[3] += w[19]*xj2;
y[4] += w[20]*xj2;
y[5] += w[21]*xj2;
y[6] += w[22]*xj2;
y[7] += w[23]*xj2;
y[0] += w[24]*xj3;
y[1] += w[25]*xj3;
y[2] += w[26]*xj3;
y[3] += w[27]*xj3;
y[4] += w[28]*xj3;
y[5] += w[29]*xj3;
y[6] += w[30]*xj3;
y[7] += w[31]*xj3;
w += 32;
}
}
}
#define SCALE (128.f*127.f)
#define SCALE_1 (1.f/128.f/127.f)
#define MAX_INPUTS 2048
#define MAX_OUTPUTS 8192
#if __ARM_FEATURE_DOTPROD
static inline int32x4_t vdotprod(int32x4_t acc, int8x16_t a, int8x16_t b) {
return vdotq_s32(acc, a, b);
}
#else
static inline int32x4_t vdotprod(int32x4_t acc, int8x16_t a, int8x16_t b)
{
return vpadalq_s16(acc, vpaddq_s16(vmull_s8(vget_low_s8(a), vget_low_s8(b)), vmull_high_s8(a, b)));
}
#endif
static inline void cgemv8x4(float *_out, const opus_int8 *w, const float *scale, int rows, int cols, const float *_x)
{
int i, j;
opus_int32 x_int[MAX_INPUTS/4];
opus_int8 *x = (opus_int8*) x_int;
const float32x4_t const127 = vdupq_n_f32(127.);
for (i=0;i<cols;i+=8) {
int32x4_t xi0, xi4;
int16x8_t x_short;
xi0 = vcvtnq_s32_f32(vmulq_f32(const127, vld1q_f32(&_x[i])));
xi4 = vcvtnq_s32_f32(vmulq_f32(const127, vld1q_f32(&_x[i+4])));
x_short = vcombine_s16(vmovn_s32(xi0), vmovn_s32(xi4));
vst1_s8(&x[i], vmovn_s16(x_short));
}
for (i=0;i<rows;i+=8)
{
int32x4_t acc0, acc1;
int32x4_t acc2, acc3;
acc0 = vdupq_n_s32(0);
acc1 = vdupq_n_s32(0);
acc2 = vdupq_n_s32(0);
acc3 = vdupq_n_s32(0);
j=0;
for (;j<cols-4;j+=8)
{
int8x16_t vw0, vw1, vw2, vw3, vx0, vx1;
vx0 = (int8x16_t)vld1q_dup_s32((int*)(void*)&x[j]);
vw0 = vld1q_s8(w);
vw1 = vld1q_s8(&w[16]);
acc0 = vdotprod(acc0, vw0, vx0);
acc1 = vdotprod(acc1, vw1, vx0);
vx1 = (int8x16_t)vld1q_dup_s32((int*)(void*)&x[j+4]);
vw2 = vld1q_s8(&w[32]);
vw3 = vld1q_s8(&w[48]);
acc2 = vdotprod(acc2, vw2, vx1);
acc3 = vdotprod(acc3, vw3, vx1);
w += 64;
}
acc0 = vaddq_s32(acc0, acc2);
acc1 = vaddq_s32(acc1, acc3);
for (;j<cols;j+=4)
{
int8x16_t vw0, vw1, vx;
vx = (int8x16_t)vld1q_dup_s32((int*)(void*)&x[j]);
vw0 = vld1q_s8(w);
vw1 = vld1q_s8(&w[16]);
acc0 = vdotprod(acc0, vw0, vx);
acc1 = vdotprod(acc1, vw1, vx);
w += 32;
}
vst1q_f32(&_out[i], vmulq_f32(vld1q_f32(&scale[i]), vcvtq_f32_s32(acc0)));
vst1q_f32(&_out[i+4], vmulq_f32(vld1q_f32(&scale[i+4]), vcvtq_f32_s32(acc1)));
}
}
static inline void sparse_cgemv8x4(float *_out, const opus_int8 *w, const int *idx, const float *scale, int rows, int cols, const float *_x)
{
int i, j;
opus_int32 x_int[MAX_INPUTS/4];
opus_int8 *x = (opus_int8*) x_int;
const float32x4_t const127 = vdupq_n_f32(127.);
for (i=0;i<cols;i+=8) {
int32x4_t xi0, xi4;
int16x8_t x_short;
xi0 = vcvtnq_s32_f32(vmulq_f32(const127, vld1q_f32(&_x[i])));
xi4 = vcvtnq_s32_f32(vmulq_f32(const127, vld1q_f32(&_x[i+4])));
x_short = vcombine_s16(vmovn_s32(xi0), vmovn_s32(xi4));
vst1_s8(&x[i], vmovn_s16(x_short));
}
for (i=0;i<rows;i+=8)
{
int colblocks;
int32x4_t acc0, acc1;
acc0 = vdupq_n_s32(0);
acc1 = vdupq_n_s32(0);
colblocks = *idx++;
for (j=0;j<colblocks;j++)
{
int pos;
pos = (*idx++);
int8x16_t vw0, vw1, vx;
vx = (int8x16_t)vld1q_dup_s32((int*)(void*)&x[pos]);
vw0 = vld1q_s8(w);
vw1 = vld1q_s8(&w[16]);
acc0 = vdotprod(acc0, vw0, vx);
acc1 = vdotprod(acc1, vw1, vx);
w += 32;
}
vst1q_f32(&_out[i], vmulq_f32(vld1q_f32(&scale[i]), vcvtq_f32_s32(acc0)));
vst1q_f32(&_out[i+4], vmulq_f32(vld1q_f32(&scale[i+4]), vcvtq_f32_s32(acc1)));
}
}
#endif

View File

@ -0,0 +1,97 @@
/* Copyright (c) 2023 Amazon */
/*
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include <stdio.h>
#include <string.h>
#include <stddef.h>
#include "nnet.h"
#include "os_support.h"
#include "arch.h"
/* This is a bit of a hack because we need to build nnet_data.c and plc_data.c without USE_WEIGHTS_FILE,
but USE_WEIGHTS_FILE is defined in config.h. */
#undef HAVE_CONFIG_H
#ifdef USE_WEIGHTS_FILE
#undef USE_WEIGHTS_FILE
#endif
#include "pitchdnn_data.c"
#include "fargan_data.c"
#include "plc_data.c"
#include "dred_rdovae_enc_data.c"
#include "dred_rdovae_dec_data.c"
#ifdef ENABLE_OSCE
#include "lace_data.c"
#include "nolace_data.c"
#endif
void write_weights(const WeightArray *list, FILE *fout)
{
int i=0;
unsigned char zeros[WEIGHT_BLOCK_SIZE] = {0};
while (list[i].name != NULL) {
WeightHead h;
if (strlen(list[i].name) >= sizeof(h.name) - 1) {
printf("[write_weights] warning: name %s too long\n", list[i].name);
}
memcpy(h.head, "DNNw", 4);
h.version = WEIGHT_BLOB_VERSION;
h.type = list[i].type;
h.size = list[i].size;
h.block_size = (h.size+WEIGHT_BLOCK_SIZE-1)/WEIGHT_BLOCK_SIZE*WEIGHT_BLOCK_SIZE;
OPUS_CLEAR(h.name, sizeof(h.name));
strncpy(h.name, list[i].name, sizeof(h.name));
h.name[sizeof(h.name)-1] = 0;
celt_assert(sizeof(h) == WEIGHT_BLOCK_SIZE);
fwrite(&h, 1, WEIGHT_BLOCK_SIZE, fout);
fwrite(list[i].data, 1, h.size, fout);
fwrite(zeros, 1, h.block_size-h.size, fout);
i++;
}
}
int main(void)
{
FILE *fout = fopen("weights_blob.bin", "w");
write_weights(pitchdnn_arrays, fout);
write_weights(fargan_arrays, fout);
write_weights(plcmodel_arrays, fout);
write_weights(rdovaeenc_arrays, fout);
write_weights(rdovaedec_arrays, fout);
#ifdef ENABLE_OSCE
#ifndef DISABLE_LACE
write_weights(lacelayers_arrays, fout);
#endif
#ifndef DISABLE_NOLACE
write_weights(nolacelayers_arrays, fout);
#endif
#endif
fclose(fout);
return 0;
}

View File

@ -0,0 +1,121 @@
/* Copyright (c) 2011-2019 Mozilla
2023 Amazon */
/*
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef DNN_X86_H
#define DNN_X86_H
#include "cpu_support.h"
#include "opus_types.h"
#if defined(OPUS_X86_MAY_HAVE_SSE2)
void compute_linear_sse2(const LinearLayer *linear, float *out, const float *in);
void compute_activation_sse2(float *output, const float *input, int N, int activation);
void compute_conv2d_sse2(const Conv2dLayer *conv, float *out, float *mem, const float *in, int height, int hstride, int activation);
#endif
#if defined(OPUS_X86_MAY_HAVE_SSE4_1)
void compute_linear_sse4_1(const LinearLayer *linear, float *out, const float *in);
void compute_activation_sse4_1(float *output, const float *input, int N, int activation);
void compute_conv2d_sse4_1(const Conv2dLayer *conv, float *out, float *mem, const float *in, int height, int hstride, int activation);
#endif
#if defined(OPUS_X86_MAY_HAVE_AVX2)
void compute_linear_avx2(const LinearLayer *linear, float *out, const float *in);
void compute_activation_avx2(float *output, const float *input, int N, int activation);
void compute_conv2d_avx2(const Conv2dLayer *conv, float *out, float *mem, const float *in, int height, int hstride, int activation);
#endif
#if defined(OPUS_X86_PRESUME_AVX2)
#define OVERRIDE_COMPUTE_LINEAR
#define compute_linear(linear, out, in, arch) ((void)(arch),compute_linear_avx2(linear, out, in))
#define OVERRIDE_COMPUTE_ACTIVATION
#define compute_activation(output, input, N, activation, arch) ((void)(arch),compute_activation_avx2(output, input, N, activation))
#define OVERRIDE_COMPUTE_CONV2D
#define compute_conv2d(conv, out, mem, in, height, hstride, activation, arch) ((void)(arch),compute_conv2d_avx2(conv, out, mem, in, height, hstride, activation))
#elif defined(OPUS_X86_PRESUME_SSE4_1) && !defined(OPUS_X86_MAY_HAVE_AVX2)
#define OVERRIDE_COMPUTE_LINEAR
#define compute_linear(linear, out, in, arch) ((void)(arch),compute_linear_sse4_1(linear, out, in))
#define OVERRIDE_COMPUTE_ACTIVATION
#define compute_activation(output, input, N, activation, arch) ((void)(arch),compute_activation_sse4_1(output, input, N, activation))
#define OVERRIDE_COMPUTE_CONV2D
#define compute_conv2d(conv, out, mem, in, height, hstride, activation, arch) ((void)(arch),compute_conv2d_sse4_1(conv, out, mem, in, height, hstride, activation))
#elif defined(OPUS_X86_PRESUME_SSE2) && !defined(OPUS_X86_MAY_HAVE_AVX2) && !defined(OPUS_X86_MAY_HAVE_SSE4_1)
#define OVERRIDE_COMPUTE_LINEAR
#define compute_linear(linear, out, in, arch) ((void)(arch),compute_linear_sse2(linear, out, in))
#define OVERRIDE_COMPUTE_ACTIVATION
#define compute_activation(output, input, N, activation, arch) ((void)(arch),compute_activation_sse2(output, input, N, activation))
#define OVERRIDE_COMPUTE_CONV2D
#define compute_conv2d(conv, out, mem, in, height, hstride, activation, arch) ((void)(arch),compute_conv2d_sse2(conv, out, mem, in, height, hstride, activation))
#elif defined(OPUS_HAVE_RTCD) && (defined(OPUS_X86_MAY_HAVE_AVX2) || defined(OPUS_X86_MAY_HAVE_SSE4_1) || defined(OPUS_X86_MAY_HAVE_SSE2))
extern void (*const DNN_COMPUTE_LINEAR_IMPL[OPUS_ARCHMASK + 1])(
const LinearLayer *linear,
float *out,
const float *in
);
#define OVERRIDE_COMPUTE_LINEAR
#define compute_linear(linear, out, in, arch) \
((*DNN_COMPUTE_LINEAR_IMPL[(arch) & OPUS_ARCHMASK])(linear, out, in))
extern void (*const DNN_COMPUTE_ACTIVATION_IMPL[OPUS_ARCHMASK + 1])(
float *output,
const float *input,
int N,
int activation
);
#define OVERRIDE_COMPUTE_ACTIVATION
#define compute_activation(output, input, N, activation, arch) \
((*DNN_COMPUTE_ACTIVATION_IMPL[(arch) & OPUS_ARCHMASK])(output, input, N, activation))
extern void (*const DNN_COMPUTE_CONV2D_IMPL[OPUS_ARCHMASK + 1])(
const Conv2dLayer *conv,
float *out,
float *mem,
const float *in,
int height,
int hstride,
int activation
);
#define OVERRIDE_COMPUTE_CONV2D
#define compute_conv2d(conv, out, mem, in, height, hstride, activation, arch) \
((*DNN_COMPUTE_CONV2D_IMPL[(arch) & OPUS_ARCHMASK])(conv, out, mem, in, height, hstride, activation))
#endif
#endif /* DNN_X86_H */

View File

@ -0,0 +1,40 @@
/* Copyright (c) 2018-2019 Mozilla
2023 Amazon */
/*
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "x86/x86_arch_macros.h"
#ifndef __AVX2__
#error nnet_avx2.c is being compiled without AVX2 enabled
#endif
#define RTCD_ARCH avx2
#include "nnet_arch.h"

View File

@ -0,0 +1,40 @@
/* Copyright (c) 2018-2019 Mozilla
2023 Amazon */
/*
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "x86/x86_arch_macros.h"
#ifndef __SSE2__
#error nnet_sse2.c is being compiled without SSE2 enabled
#endif
#define RTCD_ARCH sse2
#include "nnet_arch.h"

View File

@ -0,0 +1,40 @@
/* Copyright (c) 2018-2019 Mozilla
2023 Amazon */
/*
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "x86/x86_arch_macros.h"
#ifndef __SSE4_1__
#error nnet_sse4_1.c is being compiled without SSE4.1 enabled
#endif
#define RTCD_ARCH sse4_1
#include "nnet_arch.h"

View File

@ -0,0 +1,83 @@
/* Copyright (c) 2018-2019 Mozilla
2023 Amazon */
/*
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "x86/x86cpu.h"
#include "nnet.h"
#if defined(OPUS_HAVE_RTCD)
#if (defined(OPUS_X86_MAY_HAVE_SSE2) && !defined(OPUS_X86_PRESUME_AVX2))
void (*const DNN_COMPUTE_LINEAR_IMPL[OPUS_ARCHMASK + 1])(
const LinearLayer *linear,
float *out,
const float *in
) = {
compute_linear_c, /* non-sse */
compute_linear_c,
MAY_HAVE_SSE2(compute_linear),
MAY_HAVE_SSE4_1(compute_linear), /* sse4.1 */
MAY_HAVE_AVX2(compute_linear) /* avx */
};
void (*const DNN_COMPUTE_ACTIVATION_IMPL[OPUS_ARCHMASK + 1])(
float *output,
const float *input,
int N,
int activation
) = {
compute_activation_c, /* non-sse */
compute_activation_c,
MAY_HAVE_SSE2(compute_activation),
MAY_HAVE_SSE4_1(compute_activation), /* sse4.1 */
MAY_HAVE_AVX2(compute_activation) /* avx */
};
void (*const DNN_COMPUTE_CONV2D_IMPL[OPUS_ARCHMASK + 1])(
const Conv2dLayer *conv,
float *out,
float *mem,
const float *in,
int height,
int hstride,
int activation
) = {
compute_conv2d_c, /* non-sse */
compute_conv2d_c,
MAY_HAVE_SSE2(compute_conv2d),
MAY_HAVE_SSE4_1(compute_conv2d), /* sse4.1 */
MAY_HAVE_AVX2(compute_conv2d) /* avx */
};
#endif
#endif

View File

@ -0,0 +1,44 @@
DEEP_PLC_HEAD = \
dnn/lpcnet.h \
dnn/burg.h \
dnn/common.h \
dnn/freq.h \
dnn/fargan.h \
dnn/fargan_data.h \
dnn/lpcnet_private.h \
dnn/nnet.h \
dnn/plc_data.h \
dnn/vec.h \
dnn/vec_avx.h \
dnn/vec_neon.h \
dnn/pitchdnn.h \
dnn/pitchdnn_data.h \
dnn/x86/dnn_x86.h \
dnn/nnet_arch.h \
dnn/arm/dnn_arm.h
DRED_HEAD = \
dnn/dred_coding.h \
dnn/dred_config.h \
dnn/dred_decoder.h \
dnn/dred_encoder.h \
dnn/dred_rdovae.h \
dnn/dred_rdovae_constants.h \
dnn/dred_rdovae_enc.h \
dnn/dred_rdovae_enc_data.h \
dnn/dred_rdovae_dec.h \
dnn/dred_rdovae_dec_data.h \
dnn/dred_rdovae_stats_data.h
OSCE_HEAD= \
dnn/osce.h \
dnn/osce_config.h \
dnn/osce_structs.h \
dnn/osce_features.h \
dnn/nndsp.h \
dnn/lace_data.h \
dnn/nolace_data.h
LOSSGEN_HEAD = \
dnn/lossgen.h \
dnn/lossgen_data.h

View File

@ -0,0 +1,44 @@
DEEP_PLC_SOURCES = \
dnn/burg.c \
dnn/freq.c \
dnn/fargan.c \
dnn/fargan_data.c \
dnn/lpcnet_enc.c \
dnn/lpcnet_plc.c \
dnn/lpcnet_tables.c \
dnn/nnet.c \
dnn/nnet_default.c \
dnn/plc_data.c \
dnn/parse_lpcnet_weights.c \
dnn/pitchdnn.c \
dnn/pitchdnn_data.c
DRED_SOURCES = \
dnn/dred_rdovae_enc.c \
dnn/dred_rdovae_enc_data.c \
dnn/dred_rdovae_dec.c \
dnn/dred_rdovae_dec_data.c \
dnn/dred_rdovae_stats_data.c \
dnn/dred_encoder.c \
dnn/dred_coding.c \
dnn/dred_decoder.c
OSCE_SOURCES = \
dnn/osce.c \
dnn/osce_features.c \
dnn/nndsp.c \
dnn/lace_data.c \
dnn/nolace_data.c
LOSSGEN_SOURCES = \
dnn/lossgen.c \
dnn/lossgen_data.c
DNN_SOURCES_X86_RTCD = dnn/x86/x86_dnn_map.c
DNN_SOURCES_AVX2 = dnn/x86/nnet_avx2.c
DNN_SOURCES_SSE4_1 = dnn/x86/nnet_sse4_1.c
DNN_SOURCES_SSE2 = dnn/x86/nnet_sse2.c
DNN_SOURCES_ARM_RTCD = dnn/arm/arm_dnn_map.c
DNN_SOURCES_DOTPROD = dnn/arm/nnet_dotprod.c
DNN_SOURCES_NEON = dnn/arm/nnet_neon.c

View File

@ -0,0 +1,85 @@
/***********************************************************************
Copyright (c) 2006-2011, Skype Limited. All rights reserved.
2023 Amazon
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
- Neither the name of Internet Society, IETF or IETF Trust, nor the
names of specific contributors, may be used to endorse or promote
products derived from this software without specific prior written
permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
***********************************************************************/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "SigProc_FLP.h"
#include <immintrin.h>
/* inner product of two silk_float arrays, with result as double */
double silk_inner_product_FLP_avx2(
const silk_float *data1,
const silk_float *data2,
opus_int dataSize
)
{
opus_int i;
__m256d accum1, accum2;
double result;
/* 4x unrolled loop */
result = 0.0;
accum1 = accum2 = _mm256_setzero_pd();
for( i = 0; i < dataSize - 7; i += 8 ) {
__m128 x1f, x2f;
__m256d x1d, x2d;
x1f = _mm_loadu_ps( &data1[ i ] );
x2f = _mm_loadu_ps( &data2[ i ] );
x1d = _mm256_cvtps_pd( x1f );
x2d = _mm256_cvtps_pd( x2f );
accum1 = _mm256_fmadd_pd( x1d, x2d, accum1 );
x1f = _mm_loadu_ps( &data1[ i + 4 ] );
x2f = _mm_loadu_ps( &data2[ i + 4 ] );
x1d = _mm256_cvtps_pd( x1f );
x2d = _mm256_cvtps_pd( x2f );
accum2 = _mm256_fmadd_pd( x1d, x2d, accum2 );
}
for( ; i < dataSize - 3; i += 4 ) {
__m128 x1f, x2f;
__m256d x1d, x2d;
x1f = _mm_loadu_ps( &data1[ i ] );
x2f = _mm_loadu_ps( &data2[ i ] );
x1d = _mm256_cvtps_pd( x1f );
x2d = _mm256_cvtps_pd( x2f );
accum1 = _mm256_fmadd_pd( x1d, x2d, accum1 );
}
accum1 = _mm256_add_pd(accum1, accum2);
accum1 = _mm256_add_pd(accum1, _mm256_permute2f128_pd(accum1, accum1, 1));
accum1 = _mm256_hadd_pd(accum1,accum1);
result = _mm256_cvtsd_f64(accum1);
/* add any remaining products */
for( ; i < dataSize; i++ ) {
result += data1[ i ] * (double)data2[ i ];
}
return result;
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,315 @@
/* Copyright (c) 2022 Amazon */
/*
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "opus_types.h"
#include "opus_defines.h"
#include "arch.h"
#include "os_support.h"
#include "opus_private.h"
/* Given an extension payload, advance data to the next extension and return the
length of the remaining extensions. */
opus_int32 skip_extension(const unsigned char **data, opus_int32 len, opus_int32 *header_size)
{
int id, L;
if (len==0)
return 0;
id = **data>>1;
L = **data&1;
if (id == 0 && L == 1)
{
*header_size = 1;
if (len < 1)
return -1;
(*data)++;
len--;
return len;
} else if (id > 0 && id < 32)
{
if (len < 1+L)
return -1;
*data += 1+L;
len -= 1+L;
*header_size = 1;
return len;
} else {
if (L==0)
{
*data += len;
*header_size = 1;
return 0;
} else {
opus_int32 bytes=0;
*header_size = 1;
do {
(*data)++;
len--;
if (len == 0)
return -1;
bytes += **data;
(*header_size)++;
} while (**data == 255);
(*data)++;
len--;
if (bytes <= len)
{
len -= bytes;
*data += bytes;
} else {
return -1;
}
return len;
}
}
}
/* Count the number of extensions, excluding real padding and separators. */
opus_int32 opus_packet_extensions_count(const unsigned char *data, opus_int32 len)
{
opus_int32 curr_len;
opus_int32 count=0;
const unsigned char *curr_data = data;
celt_assert(len >= 0);
celt_assert(data != NULL || len == 0);
curr_len = len;
while (curr_len > 0)
{
int id;
opus_int32 header_size;
id = *curr_data>>1;
curr_len = skip_extension(&curr_data, curr_len, &header_size);
if (curr_len < 0)
return OPUS_INVALID_PACKET;
if (id > 1)
count++;
}
return count;
}
/* Extract extensions from Opus padding (excluding real padding and separators) */
opus_int32 opus_packet_extensions_parse(const unsigned char *data, opus_int32 len, opus_extension_data *extensions, opus_int32 *nb_extensions)
{
const unsigned char *curr_data;
opus_int32 curr_len;
int curr_frame=0;
opus_int32 count=0;
celt_assert(len >= 0);
celt_assert(data != NULL || len == 0);
celt_assert(nb_extensions != NULL);
celt_assert(extensions != NULL || *nb_extensions == 0);
curr_data = data;
curr_len = len;
while (curr_len > 0)
{
int id;
opus_int32 header_size;
opus_extension_data curr_ext;
id = *curr_data>>1;
if (id > 1)
{
curr_ext.id = id;
curr_ext.frame = curr_frame;
curr_ext.data = curr_data;
} else if (id == 1)
{
int L = *curr_data&1;
if (L==0)
curr_frame++;
else {
if (curr_len >= 2)
curr_frame += curr_data[1];
/* Else we're at the end and it doesn't matter. */
}
if (curr_frame >= 48)
{
*nb_extensions = count;
return OPUS_INVALID_PACKET;
}
}
curr_len = skip_extension(&curr_data, curr_len, &header_size);
/* printf("curr_len = %d, header_size = %d\n", curr_len, header_size); */
if (curr_len < 0)
{
*nb_extensions = count;
return OPUS_INVALID_PACKET;
}
celt_assert(curr_data - data == len - curr_len);
if (id > 1)
{
if (count == *nb_extensions)
{
return OPUS_BUFFER_TOO_SMALL;
}
curr_ext.len = curr_data - curr_ext.data - header_size;
curr_ext.data += header_size;
extensions[count++] = curr_ext;
}
}
celt_assert(curr_len == 0);
*nb_extensions = count;
return OPUS_OK;
}
opus_int32 opus_packet_extensions_generate(unsigned char *data, opus_int32 len, const opus_extension_data *extensions, opus_int32 nb_extensions, int pad)
{
int max_frame=0;
opus_int32 i;
int frame;
int curr_frame = 0;
opus_int32 pos = 0;
opus_int32 written = 0;
celt_assert(len >= 0);
for (i=0;i<nb_extensions;i++)
{
max_frame = IMAX(max_frame, extensions[i].frame);
if (extensions[i].id < 2 || extensions[i].id > 127)
return OPUS_BAD_ARG;
}
if (max_frame >= 48) return OPUS_BAD_ARG;
for (frame=0;frame<=max_frame;frame++)
{
for (i=0;i<nb_extensions;i++)
{
if (extensions[i].frame == frame)
{
/* Insert separator when needed. */
if (frame != curr_frame) {
int diff = frame - curr_frame;
if (len-pos < 2)
return OPUS_BUFFER_TOO_SMALL;
if (diff == 1) {
if (data) data[pos] = 0x02;
pos++;
} else {
if (data) data[pos] = 0x03;
pos++;
if (data) data[pos] = diff;
pos++;
}
curr_frame = frame;
}
if (extensions[i].id < 32)
{
if (extensions[i].len < 0 || extensions[i].len > 1)
return OPUS_BAD_ARG;
if (len-pos < extensions[i].len+1)
return OPUS_BUFFER_TOO_SMALL;
if (data) data[pos] = (extensions[i].id<<1) + extensions[i].len;
pos++;
if (extensions[i].len > 0) {
if (data) data[pos] = extensions[i].data[0];
pos++;
}
} else {
int last;
opus_int32 length_bytes;
if (extensions[i].len < 0)
return OPUS_BAD_ARG;
last = (written == nb_extensions - 1);
length_bytes = 1 + extensions[i].len/255;
if (last)
length_bytes = 0;
if (len-pos < 1 + length_bytes + extensions[i].len)
return OPUS_BUFFER_TOO_SMALL;
if (data) data[pos] = (extensions[i].id<<1) + !last;
pos++;
if (!last)
{
opus_int32 j;
for (j=0;j<extensions[i].len/255;j++) {
if (data) data[pos] = 255;
pos++;
}
if (data) data[pos] = extensions[i].len % 255;
pos++;
}
if (data) OPUS_COPY(&data[pos], extensions[i].data, extensions[i].len);
pos += extensions[i].len;
}
written++;
}
}
}
/* If we need to pad, just prepend 0x01 bytes. Even better would be to fill the
end with zeros, but that requires checking that turning the last extesion into
an L=1 case still fits. */
if (pad && pos < len)
{
opus_int32 padding = len - pos;
if (data) {
OPUS_MOVE(data+padding, data, pos);
for (i=0;i<padding;i++)
data[i] = 0x01;
}
pos += padding;
}
return pos;
}
#if 0
#include <stdio.h>
int main()
{
opus_extension_data ext[] = {{2, 0, (const unsigned char *)"a", 1},
{32, 10, (const unsigned char *)"DRED", 4},
{33, 1, (const unsigned char *)"NOT DRED", 8},
{3, 4, (const unsigned char *)NULL, 0}
};
opus_extension_data ext2[10];
int i, len;
int nb_ext = 10;
unsigned char packet[10000];
len = opus_packet_extensions_generate(packet, 32, ext, 4, 1);
for (i=0;i<len;i++)
{
printf("%#04x ", packet[i]);
if (i%16 == 15)
printf("\n");
}
printf("\n");
printf("count = %d\n", opus_packet_extensions_count(packet, len));
opus_packet_extensions_parse(packet, len, ext2, &nb_ext);
for (i=0;i<nb_ext;i++)
{
int j;
printf("%d %d {", ext2[i].id, ext2[i].frame);
for (j=0;j<ext2[i].len;j++) printf("%#04x ", ext2[i].data[j]);
printf("} %d\n", ext2[i].len);
}
}
#endif

View File

@ -0,0 +1,107 @@
/* Copyright (c) 2023 Amazon
Written by Michael Klingbeil */
/*
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
#ifndef _WIN32
#include <unistd.h>
#else
#include <process.h>
#define getpid _getpid
#endif
/* including sources directly to test internal APIs */
#define CELT_C /* to make celt_assert work */
#include "opus.h"
#include "test_opus_common.h"
#define NB_RANDOM_EXTENSIONS 10000000
#define MAX_EXTENSION_SIZE 200
#define MAX_NB_EXTENSIONS 100
void test_random_dred(void)
{
int error;
int i;
OpusDREDDecoder *dred_dec;
OpusDRED *dred;
dred_dec = opus_dred_decoder_create(&error);
expect_true(error == OPUS_OK, "opus_dred_decoder_create() failed");
dred = opus_dred_alloc(&error);
expect_true(error == OPUS_OK, "opus_dred_create() failed");
for (i=0;i<NB_RANDOM_EXTENSIONS;i++)
{
unsigned char payload[MAX_EXTENSION_SIZE];
int len;
int j;
int res1, res2;
int dred_end;
len = fast_rand()%(MAX_EXTENSION_SIZE+1);
for (j=0;j<len;j++)
payload[j] = fast_rand()&0xFF;
res1 = opus_dred_parse(dred_dec, dred, payload, len, 48000, 48000, &dred_end, fast_rand()&0x1);
if (res1 > 0)
{
res2 = opus_dred_process(dred_dec, dred, dred);
expect_true(res2 == OPUS_OK, "process should succeed if parse succeeds");
expect_true(res1 >= dred_end, "end before beginning");
}
}
opus_dred_free(dred);
opus_dred_decoder_destroy(dred_dec);
}
int main(int argc, char **argv)
{
int env_used;
char *env_seed;
env_used=0;
env_seed=getenv("SEED");
if(argc>1)iseed=atoi(argv[1]);
else if(env_seed)
{
iseed=atoi(env_seed);
env_used=1;
}
else iseed=(opus_uint32)time(NULL)^(((opus_uint32)getpid()&65535)<<16);
Rw=Rz=iseed;
fprintf(stderr,"Testing dred. Random seed: %u (%.4X)\n", iseed, fast_rand() % 65535);
if(env_used)fprintf(stderr," Random seed set from the environment (SEED=%s).\n", env_seed);
test_random_dred();
fprintf(stderr,"Tests completed successfully.\n");
return 0;
}

View File

@ -0,0 +1,450 @@
/* Copyright (c) 2023 Amazon
Written by Michael Klingbeil */
/*
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
#ifndef _WIN32
#include <unistd.h>
#else
#include <process.h>
#define getpid _getpid
#endif
#include "../src/opus_private.h"
#include "test_opus_common.h"
void test_extensions_generate_success(void)
{
static const opus_extension_data ext[] = {
{2, 0, (const unsigned char *)"a", 1},
{32, 10, (const unsigned char *)"DRED", 4},
{33, 1, (const unsigned char *)"NOT DRED", 8},
{3, 4, (const unsigned char *)NULL, 0}
};
int result;
unsigned char packet[32];
const unsigned char *p = packet;
result = opus_packet_extensions_generate(packet, 23+4, ext, 4, 1);
expect_true(result == 23+4, "expected length 23+4");
/* expect padding */
expect_true(p[0] == 1 && p[1] == 1 && p[2] == 1 && p[3] == 1, "expected padding");
p += 4;
/* extension ID=2 */
expect_true((p[0] >> 1) == 2, "expected extension id 2");
/* For extension IDs 1 through 31, L=0 means that no data follows the
extension, whereas L=1 means that exactly one byte of extension data follows. */
expect_true((p[0] & 0x01) == 1, "expected L-bit set");
/* content */
expect_true(p[1] == 'a', "expected extension content");
p += 2;
/* next byte should increment the frame count, ID=1, L=0 */
expect_true(p[0] == 0x02, "bad frame separator");
p += 1;
/* extension ID=33 */
expect_true((p[0] >> 1) == 33, "expected extension id 33");
/* For IDs 32 to 127, L=0 signals that the extension data takes up the
rest of the padding, and L=1 signals that a length indicator follows. */
expect_true((p[0] & 0x01) == 1, "expected L-bit set");
/* content */
expect_true(p[1] == ext[2].len, "expected length");
p += 2;
expect_true(0 == memcmp(p, ext[2].data, ext[2].len), "expected extension content");
p += ext[2].len;
/* advance to frame 4, increment by 3 */
/* next byte should increment the frame count, ID=1, L=1 */
expect_true(p[0] == 0x03, "bad frame separator");
expect_true(p[1] == 0x03, "bad frame increment");
p += 2;
/* extension ID=3 */
expect_true((p[0] >> 1) == 3, "expected extension id 3");
/* For extension IDs 1 through 31, L=0 means that no data follows the
extension, whereas L=1 means that exactly one byte of extension data follows. */
expect_true((p[0] & 0x01) == 0, "expected L-bit unset");
p += 1;
/* advance to frame 10, increment by 6 */
/* next byte should increment the frame count, ID=1, L=1 */
expect_true(p[0] == 0x03, "bad frame separator");
expect_true(p[1] == 0x06, "bad frame increment");
p += 2;
/* extension ID=32 */
expect_true((p[0] >> 1) == 32, "expected extension id 32");
/* For IDs 32 to 127, L=0 signals that the extension data takes up the
rest of the padding */
expect_true((p[0] & 0x01) == 0, "expected L-bit unset");
p += 1;
expect_true(0 == memcmp(p, ext[1].data, ext[1].len), "expected extension content");
}
void test_extensions_generate_zero(void)
{
int result;
unsigned char packet[32];
/* zero length packet, zero extensions */
result = opus_packet_extensions_generate(packet, 0, NULL, 0, 1);
expect_true(result == 0, "expected length 0");
}
void test_extensions_generate_no_padding(void)
{
static const opus_extension_data ext[] = {
{2, 0, (const unsigned char *)"a", 1},
{32, 10, (const unsigned char *)"DRED", 4},
{33, 1, (const unsigned char *)"NOT DRED", 8},
{3, 4, (const unsigned char *)NULL, 0}
};
int result;
unsigned char packet[32];
result = opus_packet_extensions_generate(packet, sizeof(packet), ext, 4, 0);
expect_true(result == 23, "expected length 23");
}
void test_extensions_generate_fail(void)
{
static const opus_extension_data ext[] = {
{2, 0, (const unsigned char *)"a", 1},
{32, 10, (const unsigned char *)"DRED", 4},
{33, 1, (const unsigned char *)"NOT DRED", 8},
{3, 4, (const unsigned char *)NULL, 0}
};
int result;
unsigned char packet[100];
/* buffer too small */
result = opus_packet_extensions_generate(packet, 4, ext, 4, 1);
expect_true(result == OPUS_BUFFER_TOO_SMALL, "expected OPUS_BUFFER_TOO_SMALL");
/* invalid id */
{
static const opus_extension_data id_too_big[] = {
{256, 0, (const unsigned char *)"a", 1},
};
result = opus_packet_extensions_generate(packet, sizeof(packet), id_too_big, 1, 1);
expect_true(result == OPUS_BAD_ARG, "expected OPUS_BAD_ARG");
}
/* invalid id */
{
static const opus_extension_data id_too_small[] = {
{1, 0, (const unsigned char *)"a", 1},
};
result = opus_packet_extensions_generate(packet, sizeof(packet), id_too_small, 1, 1);
expect_true(result == OPUS_BAD_ARG, "expected OPUS_BAD_ARG");
}
/* frame index too big */
{
static const opus_extension_data frame_too_big[] = {
{33, 48, (const unsigned char *)"a", 1},
};
result = opus_packet_extensions_generate(packet, sizeof(packet), frame_too_big, 1, 1);
expect_true(result == OPUS_BAD_ARG, "expected OPUS_BAD_ARG");
}
/* size too big for extension IDs 1 through 31 */
{
static const opus_extension_data size_too_big[] = {
{2, 0, (const unsigned char *)"abcd", 4},
};
result = opus_packet_extensions_generate(packet, sizeof(packet), size_too_big, 1, 1);
expect_true(result == OPUS_BAD_ARG, "expected OPUS_BAD_ARG");
}
/* negative size for extension IDs 1 through 31 */
{
static const opus_extension_data neg_size[] = {
{2, 0, NULL, -4},
};
result = opus_packet_extensions_generate(packet, sizeof(packet), neg_size, 1, 1);
expect_true(result == OPUS_BAD_ARG, "expected OPUS_BAD_ARG");
}
/* negative size for extension IDs 32 through 127 */
{
static const opus_extension_data neg_size_33[] = {
{33, 0, NULL, -4},
};
result = opus_packet_extensions_generate(packet, sizeof(packet), neg_size_33, 1, 1);
expect_true(result == OPUS_BAD_ARG, "expected OPUS_BAD_ARG");
}
}
void test_extensions_parse_success(void)
{
static const opus_extension_data ext[] = {
{2, 0, (const unsigned char *)"a", 1},
{32, 10, (const unsigned char *)"DRED", 4},
{33, 1, (const unsigned char *)"NOT DRED", 8},
{3, 4, (const unsigned char *)NULL, 0}
};
opus_extension_data ext_out[10];
int nb_ext;
int len, result;
unsigned char packet[32];
nb_ext = 10;
len = opus_packet_extensions_generate(packet, 32, ext, 4, 1);
expect_true(len == 32, "expected length 32");
result = opus_packet_extensions_count(packet, len);
expect_true(result == 4, "expected opus_packet_extensions_count 4");
result = opus_packet_extensions_parse(packet, len, ext_out, &nb_ext);
expect_true(nb_ext == 4, "expected 4 extensions");
expect_true(ext_out[0].id == 2, "expected id 2");
expect_true(ext_out[0].frame == 0, "expected frame 0");
expect_true(ext_out[0].len == 1, "expected len 1");
expect_true(0 == memcmp(ext_out[0].data, ext[0].data, 1), "expected data");
expect_true(ext_out[1].id == 33, "expected id 33");
expect_true(ext_out[1].frame == 1, "expected frame 1");
expect_true(ext_out[1].len == 8, "expected len 8");
expect_true(0 == memcmp(ext_out[1].data, ext[2].data, 8), "expected data");
expect_true(ext_out[2].id == 3, "expected id 3");
expect_true(ext_out[2].frame == 4, "expected frame 4");
expect_true(ext_out[2].len == 0, "expected len 0");
expect_true(ext_out[3].id == 32, "expected id 32");
expect_true(ext_out[3].frame == 10, "expected frame 10");
expect_true(ext_out[3].len == 4, "expected len 4");
expect_true(0 == memcmp(ext_out[3].data, ext[1].data, 4), "expected data");
}
void test_extensions_parse_zero(void)
{
static const opus_extension_data ext[] = {
{32, 1, (const unsigned char *)"DRED", 4},
};
int nb_ext;
int len, result;
unsigned char packet[32];
len = opus_packet_extensions_generate(packet, 32, ext, 1, 1);
expect_true(len == 32, "expected length 32");
nb_ext = 0;
result = opus_packet_extensions_parse(packet, len, NULL, &nb_ext);
expect_true(result == OPUS_BUFFER_TOO_SMALL, "expected OPUS_BUFFER_TOO_SMALL");
}
void test_extensions_parse_fail(void)
{
static const opus_extension_data ext[] = {
{2, 0, (const unsigned char *)"a", 1},
{33, 1, (const unsigned char *)"NOT DRED", 8},
{3, 4, (const unsigned char *)NULL, 0},
{32, 10, (const unsigned char *)"DRED", 4}
};
opus_extension_data ext_out[10];
int nb_ext;
int len, result;
unsigned char packet[32];
/* create invalid length */
len = opus_packet_extensions_generate(packet, sizeof(packet), ext, 4, 0);
packet[4] = 255;
nb_ext = 10;
result = opus_packet_extensions_parse(packet, len, ext_out, &nb_ext);
expect_true(result == OPUS_INVALID_PACKET, "expected OPUS_INVALID_PACKET");
result = opus_packet_extensions_count(packet, len);
expect_true(result == OPUS_INVALID_PACKET, "expected OPUS_INVALID_PACKET");
/* create invalid frame increment */
nb_ext = 10;
len = opus_packet_extensions_generate(packet, sizeof(packet), ext, 4, 0);
packet[14] = 255;
result = opus_packet_extensions_parse(packet, len, ext_out, &nb_ext);
expect_true(result == OPUS_INVALID_PACKET, "expected OPUS_INVALID_PACKET");
/* note, opus_packet_extensions_count does not read the invalid frame increment
and tells us that we have 4 extensions */
result = opus_packet_extensions_count(packet, len);
expect_true(result == 4, "expected opus_packet_extensions_count to return 4");
/* not enough space */
nb_ext = 1;
len = opus_packet_extensions_generate(packet, sizeof(packet), ext, 4, 0);
result = opus_packet_extensions_parse(packet, len, ext_out, &nb_ext);
expect_true(result == OPUS_BUFFER_TOO_SMALL, "expected OPUS_BUFFER_TOO_SMALL");
}
#define NB_RANDOM_EXTENSIONS 100000000
#define MAX_EXTENSION_SIZE 200
#define MAX_NB_EXTENSIONS 100
void test_random_extensions_parse(void)
{
int i;
for (i=0;i<NB_RANDOM_EXTENSIONS;i++)
{
opus_extension_data ext_out[MAX_NB_EXTENSIONS];
int nb_ext;
unsigned char payload[MAX_EXTENSION_SIZE];
int len;
int j;
int result;
len = fast_rand()%(MAX_EXTENSION_SIZE+1);
for (j=0;j<len;j++)
payload[j] = fast_rand()&0xFF;
nb_ext = fast_rand()%(MAX_NB_EXTENSIONS+1);
result = opus_packet_extensions_parse(payload, len, ext_out, &nb_ext);
expect_true(result == OPUS_OK || result == OPUS_BUFFER_TOO_SMALL || result == OPUS_INVALID_PACKET, "expected OPUS_OK, OPUS_BUFFER_TOO_SMALL or OPUS_INVALID_PACKET");
/* Even if parsing fails, check that the extensions that got extracted make sense. */
for (j=0;j<nb_ext;j++)
{
expect_true(ext_out[j].frame >= 0 && ext_out[j].frame < 48, "expected frame between 0 and 47");
expect_true(ext_out[j].id >= 2 && ext_out[j].id <= 127, "expected id between 2 and 127");
expect_true(ext_out[j].data >= payload && ext_out[j].data+ext_out[j].len <= payload+len, "expected data to be within packet");
}
}
}
void test_opus_repacketizer_out_range_impl(void)
{
OpusRepacketizer rp;
unsigned char packet[1024];
unsigned char packet_out[1024];
opus_int16 size[48];
const unsigned char *padding;
opus_int32 padding_len;
opus_extension_data ext_out[10];
int i;
int nb_ext;
int res, len;
int first_count = 0, second_count = 0;
static const opus_extension_data ext[] = {
{33, 0, (const unsigned char *)"abcdefg", 7},
{100, 0, (const unsigned char *)"uvwxyz", 6},
};
opus_repacketizer_init(&rp);
memset(packet, 0, sizeof(packet));
/* Hybrid Packet with 20 msec frames, Code 3 */
packet[0] = (15 << 3) | 3;
/* Code 3, padding bit set, 1 frame */
packet[1] = 1 << 6 | 1;
packet[2] = 0;
packet[3] = 0;
/* generate 2 extensions, id 33 and 100 */
len = opus_packet_extensions_generate(&packet[4], sizeof(packet)-4, ext, 2, 0);
/* update the padding length */
packet[2] = len;
/* concatenate 3 frames */
res = opus_repacketizer_cat(&rp, packet, 4+len);
/* for the middle frame, no padding, no extensions */
packet[1] = 1;
res = opus_repacketizer_cat(&rp, packet, 4);
/* switch back to extensions for the last frame extensions */
packet[1] = 1 << 6 | 1;
res = opus_repacketizer_cat(&rp, packet, 4+len);
expect_true(rp.nb_frames == 3, "Expected 3 frames");
res = opus_repacketizer_out_range_impl(&rp,
0, 3, /* begin, end */
packet_out, /* unsigned char *data */
sizeof(packet_out), /* opus_int32 maxlen */
0, /*int self_delimited */
0, /* int pad */
NULL, /* const opus_extension_data *extensions */
0 /* int nb_extensions */);
expect_true(res > 0, "expected valid packet length");
/* now verify that we have the expected extensions */
res = opus_packet_parse_impl(packet_out, res, 0, NULL, NULL, size,
NULL, NULL, &padding, &padding_len);
nb_ext = 10;
res = opus_packet_extensions_parse(padding, padding_len, ext_out, &nb_ext);
expect_true(nb_ext == 4, "Expected 4 extensions");
for (i = 0 ; i < nb_ext; i++)
{
if (ext_out[i].id == 33)
{
opus_test_assert(ext_out[i].len == ext[0].len);
opus_test_assert(0 == memcmp(ext_out[i].data, ext[0].data, ext[0].len));
first_count++;
}
else if (ext_out[i].id == 100)
{
opus_test_assert(ext_out[i].len == ext[1].len);
opus_test_assert(0 == memcmp(ext_out[i].data, ext[1].data, ext[1].len));
second_count++;
}
if (i < 2)
opus_test_assert(ext_out[i].frame == 0)
else
opus_test_assert(ext_out[i].frame == 2)
}
opus_test_assert(first_count == 2);
opus_test_assert(second_count == 2);
}
int main(int argc, char **argv)
{
int env_used;
char *env_seed;
env_used=0;
env_seed=getenv("SEED");
if(argc>1)iseed=atoi(argv[1]);
else if(env_seed)
{
iseed=atoi(env_seed);
env_used=1;
}
else iseed=(opus_uint32)time(NULL)^(((opus_uint32)getpid()&65535)<<16);
Rw=Rz=iseed;
fprintf(stderr,"Testing extensions. Random seed: %u (%.4X)\n", iseed, fast_rand() % 65535);
if(env_used)fprintf(stderr," Random seed set from the environment (SEED=%s).\n", env_seed);
test_extensions_generate_success();
test_extensions_generate_zero();
test_extensions_generate_no_padding();
test_extensions_generate_fail();
test_extensions_parse_success();
test_extensions_parse_zero();
test_extensions_parse_fail();
test_random_extensions_parse();
test_opus_repacketizer_out_range_impl();
fprintf(stderr,"Tests completed successfully.\n");
return 0;
}