- upgrade to opus 1.5
This commit is contained in:
parent
37471d56ff
commit
62d72fda5c
|
|
@ -0,0 +1,101 @@
|
|||
/* Copyright (c) 2023 Amazon */
|
||||
/*
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
|
||||
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
|
||||
#include <immintrin.h>
|
||||
#include "x86cpu.h"
|
||||
#include "pitch.h"
|
||||
|
||||
#if defined(OPUS_X86_MAY_HAVE_AVX2) && !defined(FIXED_POINT)
|
||||
|
||||
/* Like the "regular" xcorr_kernel(), but computes 8 results at a time. */
|
||||
static void xcorr_kernel_avx(const float *x, const float *y, float sum[8], int len)
|
||||
{
|
||||
__m256 xsum0, xsum1, xsum2, xsum3, xsum4, xsum5, xsum6, xsum7;
|
||||
xsum7 = xsum6 = xsum5 = xsum4 = xsum3 = xsum2 = xsum1 = xsum0 = _mm256_setzero_ps();
|
||||
int i;
|
||||
__m256 x0;
|
||||
/* Compute 8 inner products using partial sums. */
|
||||
for (i=0;i<len-7;i+=8)
|
||||
{
|
||||
x0 = _mm256_loadu_ps(x+i);
|
||||
xsum0 = _mm256_fmadd_ps(x0, _mm256_loadu_ps(y+i ), xsum0);
|
||||
xsum1 = _mm256_fmadd_ps(x0, _mm256_loadu_ps(y+i+1), xsum1);
|
||||
xsum2 = _mm256_fmadd_ps(x0, _mm256_loadu_ps(y+i+2), xsum2);
|
||||
xsum3 = _mm256_fmadd_ps(x0, _mm256_loadu_ps(y+i+3), xsum3);
|
||||
xsum4 = _mm256_fmadd_ps(x0, _mm256_loadu_ps(y+i+4), xsum4);
|
||||
xsum5 = _mm256_fmadd_ps(x0, _mm256_loadu_ps(y+i+5), xsum5);
|
||||
xsum6 = _mm256_fmadd_ps(x0, _mm256_loadu_ps(y+i+6), xsum6);
|
||||
xsum7 = _mm256_fmadd_ps(x0, _mm256_loadu_ps(y+i+7), xsum7);
|
||||
}
|
||||
if (i != len) {
|
||||
static const int mask[15] = {-1, -1, -1, -1, -1, -1, -1, 0, 0, 0, 0, 0, 0, 0, 0};
|
||||
__m256i m;
|
||||
m = _mm256_loadu_si256((__m256i*)(void*)(mask + 7+i-len));
|
||||
x0 = _mm256_maskload_ps(x+i, m);
|
||||
xsum0 = _mm256_fmadd_ps(x0, _mm256_maskload_ps(y+i , m), xsum0);
|
||||
xsum1 = _mm256_fmadd_ps(x0, _mm256_maskload_ps(y+i+1, m), xsum1);
|
||||
xsum2 = _mm256_fmadd_ps(x0, _mm256_maskload_ps(y+i+2, m), xsum2);
|
||||
xsum3 = _mm256_fmadd_ps(x0, _mm256_maskload_ps(y+i+3, m), xsum3);
|
||||
xsum4 = _mm256_fmadd_ps(x0, _mm256_maskload_ps(y+i+4, m), xsum4);
|
||||
xsum5 = _mm256_fmadd_ps(x0, _mm256_maskload_ps(y+i+5, m), xsum5);
|
||||
xsum6 = _mm256_fmadd_ps(x0, _mm256_maskload_ps(y+i+6, m), xsum6);
|
||||
xsum7 = _mm256_fmadd_ps(x0, _mm256_maskload_ps(y+i+7, m), xsum7);
|
||||
}
|
||||
/* 8 horizontal adds. */
|
||||
/* Compute [0 4] [1 5] [2 6] [3 7] */
|
||||
xsum0 = _mm256_add_ps(_mm256_permute2f128_ps(xsum0, xsum4, 2<<4), _mm256_permute2f128_ps(xsum0, xsum4, 1 | (3<<4)));
|
||||
xsum1 = _mm256_add_ps(_mm256_permute2f128_ps(xsum1, xsum5, 2<<4), _mm256_permute2f128_ps(xsum1, xsum5, 1 | (3<<4)));
|
||||
xsum2 = _mm256_add_ps(_mm256_permute2f128_ps(xsum2, xsum6, 2<<4), _mm256_permute2f128_ps(xsum2, xsum6, 1 | (3<<4)));
|
||||
xsum3 = _mm256_add_ps(_mm256_permute2f128_ps(xsum3, xsum7, 2<<4), _mm256_permute2f128_ps(xsum3, xsum7, 1 | (3<<4)));
|
||||
/* Compute [0 1 4 5] [2 3 6 7] */
|
||||
xsum0 = _mm256_hadd_ps(xsum0, xsum1);
|
||||
xsum1 = _mm256_hadd_ps(xsum2, xsum3);
|
||||
/* Compute [0 1 2 3 4 5 6 7] */
|
||||
xsum0 = _mm256_hadd_ps(xsum0, xsum1);
|
||||
_mm256_storeu_ps(sum, xsum0);
|
||||
}
|
||||
|
||||
void celt_pitch_xcorr_avx2(const float *_x, const float *_y, float *xcorr, int len, int max_pitch, int arch)
|
||||
{
|
||||
int i;
|
||||
celt_assert(max_pitch>0);
|
||||
(void)arch;
|
||||
for (i=0;i<max_pitch-7;i+=8)
|
||||
{
|
||||
xcorr_kernel_avx(_x, _y+i, &xcorr[i], len);
|
||||
}
|
||||
for (;i<max_pitch;i++)
|
||||
{
|
||||
xcorr[i] = celt_inner_prod(_x, _y+i, len, arch);
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
@ -0,0 +1,47 @@
|
|||
/* Copyright (c) 2023 Amazon */
|
||||
/*
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
|
||||
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifdef _MSC_VER
|
||||
|
||||
# ifdef OPUS_X86_MAY_HAVE_SSE
|
||||
# ifndef __SSE__
|
||||
# define __SSE__
|
||||
# endif
|
||||
# endif
|
||||
|
||||
# ifdef OPUS_X86_MAY_HAVE_SSE2
|
||||
# ifndef __SSE2__
|
||||
# define __SSE2__
|
||||
# endif
|
||||
# endif
|
||||
|
||||
# ifdef OPUS_X86_MAY_HAVE_SSE4_1
|
||||
# ifndef __SSE4_1__
|
||||
# define __SSE4_1__
|
||||
# endif
|
||||
# endif
|
||||
|
||||
#endif
|
||||
|
|
@ -0,0 +1,88 @@
|
|||
/* Copyright (c) 2018-2019 Mozilla
|
||||
2023 Amazon */
|
||||
/*
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
|
||||
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#include "arm/armcpu.h"
|
||||
#include "nnet.h"
|
||||
|
||||
#if defined(OPUS_HAVE_RTCD)
|
||||
|
||||
#if (defined(OPUS_ARM_MAY_HAVE_DOTPROD) && !defined(OPUS_ARM_PRESUME_DOTPROD))
|
||||
|
||||
void (*const DNN_COMPUTE_LINEAR_IMPL[OPUS_ARCHMASK + 1])(
|
||||
const LinearLayer *linear,
|
||||
float *out,
|
||||
const float *in
|
||||
) = {
|
||||
compute_linear_c, /* default */
|
||||
compute_linear_c,
|
||||
compute_linear_c,
|
||||
MAY_HAVE_NEON(compute_linear), /* neon */
|
||||
MAY_HAVE_DOTPROD(compute_linear) /* dotprod */
|
||||
};
|
||||
|
||||
#endif
|
||||
|
||||
#if (defined(OPUS_ARM_MAY_HAVE_DOTPROD) || defined(OPUS_ARM_MAY_HAVE_NEON)) && !defined(OPUS_ARM_PRESUME_NEON)
|
||||
|
||||
void (*const DNN_COMPUTE_ACTIVATION_IMPL[OPUS_ARCHMASK + 1])(
|
||||
float *output,
|
||||
const float *input,
|
||||
int N,
|
||||
int activation
|
||||
) = {
|
||||
compute_activation_c, /* default */
|
||||
compute_activation_c,
|
||||
compute_activation_c,
|
||||
MAY_HAVE_NEON(compute_activation), /* neon */
|
||||
MAY_HAVE_DOTPROD(compute_activation) /* dotprod */
|
||||
};
|
||||
|
||||
void (*const DNN_COMPUTE_CONV2D_IMPL[OPUS_ARCHMASK + 1])(
|
||||
const Conv2dLayer *conv,
|
||||
float *out,
|
||||
float *mem,
|
||||
const float *in,
|
||||
int height,
|
||||
int hstride,
|
||||
int activation
|
||||
) = {
|
||||
compute_conv2d_c, /* default */
|
||||
compute_conv2d_c,
|
||||
compute_conv2d_c,
|
||||
MAY_HAVE_NEON(compute_conv2d), /* neon */
|
||||
MAY_HAVE_DOTPROD(compute_conv2d) /* dotprod */
|
||||
};
|
||||
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
#endif
|
||||
|
|
@ -0,0 +1,104 @@
|
|||
/* Copyright (c) 2011-2019 Mozilla
|
||||
2023 Amazon */
|
||||
/*
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
|
||||
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef DNN_ARM_H
|
||||
#define DNN_ARM_H
|
||||
|
||||
#include "cpu_support.h"
|
||||
#include "opus_types.h"
|
||||
|
||||
void compute_linear_dotprod(const LinearLayer *linear, float *out, const float *in);
|
||||
void compute_linear_neon(const LinearLayer *linear, float *out, const float *in);
|
||||
|
||||
void compute_activation_neon(float *output, const float *input, int N, int activation);
|
||||
void compute_activation_dotprod(float *output, const float *input, int N, int activation);
|
||||
|
||||
void compute_conv2d_neon(const Conv2dLayer *conv, float *out, float *mem, const float *in, int height, int hstride, int activation);
|
||||
void compute_conv2d_dotprod(const Conv2dLayer *conv, float *out, float *mem, const float *in, int height, int hstride, int activation);
|
||||
|
||||
#if defined(OPUS_ARM_PRESUME_DOTPROD)
|
||||
|
||||
#define OVERRIDE_COMPUTE_LINEAR
|
||||
#define compute_linear(linear, out, in, arch) ((void)(arch),compute_linear_dotprod(linear, out, in))
|
||||
|
||||
#elif defined(OPUS_ARM_PRESUME_NEON_INTR) && !defined(OPUS_ARM_MAY_HAVE_DOTPROD)
|
||||
|
||||
#define OVERRIDE_COMPUTE_LINEAR
|
||||
#define compute_linear(linear, out, in, arch) ((void)(arch),compute_linear_neon(linear, out, in))
|
||||
|
||||
#elif defined(OPUS_HAVE_RTCD) && (defined(OPUS_ARM_MAY_HAVE_DOTPROD) || defined(OPUS_ARM_MAY_HAVE_NEON))
|
||||
|
||||
extern void (*const DNN_COMPUTE_LINEAR_IMPL[OPUS_ARCHMASK + 1])(
|
||||
const LinearLayer *linear,
|
||||
float *out,
|
||||
const float *in
|
||||
);
|
||||
#define OVERRIDE_COMPUTE_LINEAR
|
||||
#define compute_linear(linear, out, in, arch) \
|
||||
((*DNN_COMPUTE_LINEAR_IMPL[(arch) & OPUS_ARCHMASK])(linear, out, in))
|
||||
|
||||
|
||||
#endif
|
||||
|
||||
#if defined(OPUS_ARM_PRESUME_NEON)
|
||||
|
||||
#define OVERRIDE_COMPUTE_ACTIVATION
|
||||
#define compute_activation(output, input, N, activation, arch) ((void)(arch),compute_activation_neon(output, input, N, activation))
|
||||
#define OVERRIDE_COMPUTE_CONV2D
|
||||
#define compute_conv2d(conv, out, mem, in, height, hstride, activation, arch) ((void)(arch),compute_conv2d_neon(conv, out, mem, in, height, hstride, activation))
|
||||
|
||||
#elif defined(OPUS_HAVE_RTCD) && (defined(OPUS_ARM_MAY_HAVE_DOTPROD) || defined(OPUS_ARM_MAY_HAVE_NEON))
|
||||
|
||||
extern void (*const DNN_COMPUTE_ACTIVATION_IMPL[OPUS_ARCHMASK + 1])(
|
||||
float *output,
|
||||
const float *input,
|
||||
int N,
|
||||
int activation
|
||||
);
|
||||
#define OVERRIDE_COMPUTE_ACTIVATION
|
||||
#define compute_activation(output, input, N, activation, arch) \
|
||||
((*DNN_COMPUTE_ACTIVATION_IMPL[(arch) & OPUS_ARCHMASK])(output, input, N, activation))
|
||||
|
||||
|
||||
extern void (*const DNN_COMPUTE_CONV2D_IMPL[OPUS_ARCHMASK + 1])(
|
||||
const Conv2dLayer *conv,
|
||||
float *out,
|
||||
float *mem,
|
||||
const float *in,
|
||||
int height,
|
||||
int hstride,
|
||||
int activation
|
||||
);
|
||||
#define OVERRIDE_COMPUTE_CONV2D
|
||||
#define compute_conv2d(conv, out, mem, in, height, hstride, activation, arch) \
|
||||
((*DNN_COMPUTE_CONV2D_IMPL[(arch) & OPUS_ARCHMASK])(conv, out, mem, in, height, hstride, activation))
|
||||
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
#endif /* DNN_ARM_H */
|
||||
|
|
@ -0,0 +1,38 @@
|
|||
/* Copyright (c) 2018-2019 Mozilla
|
||||
2023 Amazon */
|
||||
/*
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
|
||||
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#ifndef __ARM_FEATURE_DOTPROD
|
||||
#error nnet_dotprod.c is being compiled without DOTPROD enabled
|
||||
#endif
|
||||
|
||||
#define RTCD_ARCH dotprod
|
||||
|
||||
#include "nnet_arch.h"
|
||||
|
|
@ -0,0 +1,38 @@
|
|||
/* Copyright (c) 2018-2019 Mozilla
|
||||
2023 Amazon */
|
||||
/*
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
|
||||
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#if !(defined(__ARM_NEON__) || defined(__ARM_NEON))
|
||||
#error nnet_neon.c is being compiled without Neon enabled
|
||||
#endif
|
||||
|
||||
#define RTCD_ARCH neon
|
||||
|
||||
#include "nnet_arch.h"
|
||||
|
|
@ -0,0 +1,246 @@
|
|||
/***********************************************************************
|
||||
Copyright (c) 2006-2011, Skype Limited. All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
- Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
- Neither the name of Internet Society, IETF or IETF Trust, nor the
|
||||
names of specific contributors, may be used to endorse or promote
|
||||
products derived from this software without specific prior written
|
||||
permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
***********************************************************************/
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#include <math.h>
|
||||
#include <string.h>
|
||||
#include <assert.h>
|
||||
|
||||
#include "arch.h"
|
||||
#include "burg.h"
|
||||
|
||||
#define MAX_FRAME_SIZE 384 /* subfr_length * nb_subfr = ( 0.005 * 16000 + 16 ) * 4 = 384*/
|
||||
#define SILK_MAX_ORDER_LPC 16
|
||||
#define FIND_LPC_COND_FAC 1e-5f
|
||||
|
||||
/* sum of squares of a silk_float array, with result as double */
|
||||
static double silk_energy_FLP(
|
||||
const float *data,
|
||||
int dataSize
|
||||
)
|
||||
{
|
||||
int i;
|
||||
double result;
|
||||
|
||||
/* 4x unrolled loop */
|
||||
result = 0.0;
|
||||
for( i = 0; i < dataSize - 3; i += 4 ) {
|
||||
result += data[ i + 0 ] * (double)data[ i + 0 ] +
|
||||
data[ i + 1 ] * (double)data[ i + 1 ] +
|
||||
data[ i + 2 ] * (double)data[ i + 2 ] +
|
||||
data[ i + 3 ] * (double)data[ i + 3 ];
|
||||
}
|
||||
|
||||
/* add any remaining products */
|
||||
for( ; i < dataSize; i++ ) {
|
||||
result += data[ i ] * (double)data[ i ];
|
||||
}
|
||||
|
||||
assert( result >= 0.0 );
|
||||
return result;
|
||||
}
|
||||
|
||||
/* inner product of two silk_float arrays, with result as double */
|
||||
static double silk_inner_product_FLP(
|
||||
const float *data1,
|
||||
const float *data2,
|
||||
int dataSize
|
||||
)
|
||||
{
|
||||
int i;
|
||||
double result;
|
||||
|
||||
/* 4x unrolled loop */
|
||||
result = 0.0;
|
||||
for( i = 0; i < dataSize - 3; i += 4 ) {
|
||||
result += data1[ i + 0 ] * (double)data2[ i + 0 ] +
|
||||
data1[ i + 1 ] * (double)data2[ i + 1 ] +
|
||||
data1[ i + 2 ] * (double)data2[ i + 2 ] +
|
||||
data1[ i + 3 ] * (double)data2[ i + 3 ];
|
||||
}
|
||||
|
||||
/* add any remaining products */
|
||||
for( ; i < dataSize; i++ ) {
|
||||
result += data1[ i ] * (double)data2[ i ];
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
/* Compute reflection coefficients from input signal */
|
||||
float silk_burg_analysis( /* O returns residual energy */
|
||||
float A[], /* O prediction coefficients (length order) */
|
||||
const float x[], /* I input signal, length: nb_subfr*(D+L_sub) */
|
||||
const float minInvGain, /* I minimum inverse prediction gain */
|
||||
const int subfr_length, /* I input signal subframe length (incl. D preceding samples) */
|
||||
const int nb_subfr, /* I number of subframes stacked in x */
|
||||
const int D /* I order */
|
||||
)
|
||||
{
|
||||
int k, n, s, reached_max_gain;
|
||||
double C0, invGain, num, nrg_f, nrg_b, rc, Atmp, tmp1, tmp2;
|
||||
const float *x_ptr;
|
||||
double C_first_row[ SILK_MAX_ORDER_LPC ], C_last_row[ SILK_MAX_ORDER_LPC ];
|
||||
double CAf[ SILK_MAX_ORDER_LPC + 1 ], CAb[ SILK_MAX_ORDER_LPC + 1 ];
|
||||
double Af[ SILK_MAX_ORDER_LPC ];
|
||||
|
||||
assert( subfr_length * nb_subfr <= MAX_FRAME_SIZE );
|
||||
|
||||
/* Compute autocorrelations, added over subframes */
|
||||
C0 = silk_energy_FLP( x, nb_subfr * subfr_length );
|
||||
memset( C_first_row, 0, SILK_MAX_ORDER_LPC * sizeof( double ) );
|
||||
for( s = 0; s < nb_subfr; s++ ) {
|
||||
x_ptr = x + s * subfr_length;
|
||||
for( n = 1; n < D + 1; n++ ) {
|
||||
C_first_row[ n - 1 ] += silk_inner_product_FLP( x_ptr, x_ptr + n, subfr_length - n );
|
||||
}
|
||||
}
|
||||
memcpy( C_last_row, C_first_row, SILK_MAX_ORDER_LPC * sizeof( double ) );
|
||||
|
||||
/* Initialize */
|
||||
CAb[ 0 ] = CAf[ 0 ] = C0 + FIND_LPC_COND_FAC * C0 + 1e-9f;
|
||||
invGain = 1.0f;
|
||||
reached_max_gain = 0;
|
||||
for( n = 0; n < D; n++ ) {
|
||||
/* Update first row of correlation matrix (without first element) */
|
||||
/* Update last row of correlation matrix (without last element, stored in reversed order) */
|
||||
/* Update C * Af */
|
||||
/* Update C * flipud(Af) (stored in reversed order) */
|
||||
for( s = 0; s < nb_subfr; s++ ) {
|
||||
x_ptr = x + s * subfr_length;
|
||||
tmp1 = x_ptr[ n ];
|
||||
tmp2 = x_ptr[ subfr_length - n - 1 ];
|
||||
for( k = 0; k < n; k++ ) {
|
||||
C_first_row[ k ] -= x_ptr[ n ] * x_ptr[ n - k - 1 ];
|
||||
C_last_row[ k ] -= x_ptr[ subfr_length - n - 1 ] * x_ptr[ subfr_length - n + k ];
|
||||
Atmp = Af[ k ];
|
||||
tmp1 += x_ptr[ n - k - 1 ] * Atmp;
|
||||
tmp2 += x_ptr[ subfr_length - n + k ] * Atmp;
|
||||
}
|
||||
for( k = 0; k <= n; k++ ) {
|
||||
CAf[ k ] -= tmp1 * x_ptr[ n - k ];
|
||||
CAb[ k ] -= tmp2 * x_ptr[ subfr_length - n + k - 1 ];
|
||||
}
|
||||
}
|
||||
tmp1 = C_first_row[ n ];
|
||||
tmp2 = C_last_row[ n ];
|
||||
for( k = 0; k < n; k++ ) {
|
||||
Atmp = Af[ k ];
|
||||
tmp1 += C_last_row[ n - k - 1 ] * Atmp;
|
||||
tmp2 += C_first_row[ n - k - 1 ] * Atmp;
|
||||
}
|
||||
CAf[ n + 1 ] = tmp1;
|
||||
CAb[ n + 1 ] = tmp2;
|
||||
|
||||
/* Calculate nominator and denominator for the next order reflection (parcor) coefficient */
|
||||
num = CAb[ n + 1 ];
|
||||
nrg_b = CAb[ 0 ];
|
||||
nrg_f = CAf[ 0 ];
|
||||
for( k = 0; k < n; k++ ) {
|
||||
Atmp = Af[ k ];
|
||||
num += CAb[ n - k ] * Atmp;
|
||||
nrg_b += CAb[ k + 1 ] * Atmp;
|
||||
nrg_f += CAf[ k + 1 ] * Atmp;
|
||||
}
|
||||
assert( nrg_f > 0.0 );
|
||||
assert( nrg_b > 0.0 );
|
||||
|
||||
/* Calculate the next order reflection (parcor) coefficient */
|
||||
rc = -2.0 * num / ( nrg_f + nrg_b );
|
||||
assert( rc > -1.0 && rc < 1.0 );
|
||||
|
||||
/* Update inverse prediction gain */
|
||||
tmp1 = invGain * ( 1.0 - rc * rc );
|
||||
if( tmp1 <= minInvGain ) {
|
||||
/* Max prediction gain exceeded; set reflection coefficient such that max prediction gain is exactly hit */
|
||||
rc = sqrt( 1.0 - minInvGain / invGain );
|
||||
if( num > 0 ) {
|
||||
/* Ensure adjusted reflection coefficients has the original sign */
|
||||
rc = -rc;
|
||||
}
|
||||
invGain = minInvGain;
|
||||
reached_max_gain = 1;
|
||||
} else {
|
||||
invGain = tmp1;
|
||||
}
|
||||
|
||||
/* Update the AR coefficients */
|
||||
for( k = 0; k < (n + 1) >> 1; k++ ) {
|
||||
tmp1 = Af[ k ];
|
||||
tmp2 = Af[ n - k - 1 ];
|
||||
Af[ k ] = tmp1 + rc * tmp2;
|
||||
Af[ n - k - 1 ] = tmp2 + rc * tmp1;
|
||||
}
|
||||
Af[ n ] = rc;
|
||||
|
||||
if( reached_max_gain ) {
|
||||
/* Reached max prediction gain; set remaining coefficients to zero and exit loop */
|
||||
for( k = n + 1; k < D; k++ ) {
|
||||
Af[ k ] = 0.0;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
/* Update C * Af and C * Ab */
|
||||
for( k = 0; k <= n + 1; k++ ) {
|
||||
tmp1 = CAf[ k ];
|
||||
CAf[ k ] += rc * CAb[ n - k + 1 ];
|
||||
CAb[ n - k + 1 ] += rc * tmp1;
|
||||
}
|
||||
}
|
||||
|
||||
if( reached_max_gain ) {
|
||||
/* Convert to float */
|
||||
for( k = 0; k < D; k++ ) {
|
||||
A[ k ] = (float)( -Af[ k ] );
|
||||
}
|
||||
/* Subtract energy of preceding samples from C0 */
|
||||
for( s = 0; s < nb_subfr; s++ ) {
|
||||
C0 -= silk_energy_FLP( x + s * subfr_length, D );
|
||||
}
|
||||
/* Approximate residual energy */
|
||||
nrg_f = C0 * invGain;
|
||||
} else {
|
||||
/* Compute residual energy and store coefficients as float */
|
||||
nrg_f = CAf[ 0 ];
|
||||
tmp1 = 1.0;
|
||||
for( k = 0; k < D; k++ ) {
|
||||
Atmp = Af[ k ];
|
||||
nrg_f += CAf[ k + 1 ] * Atmp;
|
||||
tmp1 += Atmp * Atmp;
|
||||
A[ k ] = (float)(-Atmp);
|
||||
}
|
||||
nrg_f -= FIND_LPC_COND_FAC * C0 * tmp1;
|
||||
}
|
||||
|
||||
/* Return residual energy */
|
||||
return MAX32(0, (float)nrg_f);
|
||||
}
|
||||
|
|
@ -0,0 +1,41 @@
|
|||
/***********************************************************************
|
||||
Copyright (c) 2006-2011, Skype Limited. All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
- Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
- Neither the name of Internet Society, IETF or IETF Trust, nor the
|
||||
names of specific contributors, may be used to endorse or promote
|
||||
products derived from this software without specific prior written
|
||||
permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
***********************************************************************/
|
||||
|
||||
#ifndef BURG_H
|
||||
#define BURG_H
|
||||
|
||||
|
||||
float silk_burg_analysis( /* O returns residual energy */
|
||||
float A[], /* O prediction coefficients (length order) */
|
||||
const float x[], /* I input signal, length: nb_subfr*(D+L_sub) */
|
||||
const float minInvGain, /* I minimum inverse prediction gain */
|
||||
const int subfr_length, /* I input signal subframe length (incl. D preceding samples) */
|
||||
const int nb_subfr, /* I number of subframes stacked in x */
|
||||
const int D /* I order */
|
||||
);
|
||||
|
||||
#endif
|
||||
|
|
@ -0,0 +1,56 @@
|
|||
|
||||
|
||||
#ifndef COMMON_H
|
||||
#define COMMON_H
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <math.h>
|
||||
#include "opus_defines.h"
|
||||
|
||||
#define LOG256 5.5451774445f
|
||||
static OPUS_INLINE float log2_approx(float x)
|
||||
{
|
||||
int integer;
|
||||
float frac;
|
||||
union {
|
||||
float f;
|
||||
int i;
|
||||
} in;
|
||||
in.f = x;
|
||||
integer = (in.i>>23)-127;
|
||||
in.i -= integer<<23;
|
||||
frac = in.f - 1.5f;
|
||||
frac = -0.41445418f + frac*(0.95909232f
|
||||
+ frac*(-0.33951290f + frac*0.16541097f));
|
||||
return 1+integer+frac;
|
||||
}
|
||||
|
||||
#define log_approx(x) (0.69315f*log2_approx(x))
|
||||
|
||||
static OPUS_INLINE float ulaw2lin(float u)
|
||||
{
|
||||
float s;
|
||||
float scale_1 = 32768.f/255.f;
|
||||
u = u - 128.f;
|
||||
s = u >= 0.f ? 1.f : -1.f;
|
||||
u = fabs(u);
|
||||
return s*scale_1*(exp(u/128.*LOG256)-1);
|
||||
}
|
||||
|
||||
static OPUS_INLINE int lin2ulaw(float x)
|
||||
{
|
||||
float u;
|
||||
float scale = 255.f/32768.f;
|
||||
int s = x >= 0 ? 1 : -1;
|
||||
x = fabs(x);
|
||||
u = (s*(128*log_approx(1+scale*x)/LOG256));
|
||||
u = 128 + u;
|
||||
if (u < 0) u = 0;
|
||||
if (u > 255) u = 255;
|
||||
return (int)floor(.5 + u);
|
||||
}
|
||||
|
||||
|
||||
|
||||
#endif
|
||||
|
|
@ -0,0 +1,44 @@
|
|||
/* Copyright (c) 2022 Amazon
|
||||
Written by Jean-Marc Valin */
|
||||
/*
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
|
||||
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#include <math.h>
|
||||
|
||||
#include "celt/entenc.h"
|
||||
#include "os_support.h"
|
||||
#include "dred_config.h"
|
||||
#include "dred_coding.h"
|
||||
|
||||
int compute_quantizer(int q0, int dQ, int qmax, int i) {
|
||||
int quant;
|
||||
static const int dQ_table[8] = {0, 2, 3, 4, 6, 8, 12, 16};
|
||||
quant = q0 + (dQ_table[dQ]*i + 8)/16;
|
||||
return quant > qmax ? qmax : quant;
|
||||
}
|
||||
|
|
@ -0,0 +1,36 @@
|
|||
/* Copyright (c) 2022 Amazon
|
||||
Written by Jan Buethe */
|
||||
/*
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
|
||||
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef DRED_CODING_H
|
||||
#define DRED_CODING_H
|
||||
|
||||
#include "opus_types.h"
|
||||
#include "entcode.h"
|
||||
|
||||
int compute_quantizer(int q0, int dQ, int qmax, int i);
|
||||
|
||||
#endif
|
||||
|
|
@ -0,0 +1,54 @@
|
|||
/* Copyright (c) 2022 Amazon
|
||||
Written by Jan Buethe */
|
||||
/*
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
|
||||
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef DRED_CONFIG_H
|
||||
#define DRED_CONFIG_H
|
||||
|
||||
/* Change this once DRED gets an extension number assigned. */
|
||||
#define DRED_EXTENSION_ID 126
|
||||
|
||||
/* Remove these two completely once DRED gets an extension number assigned. */
|
||||
#define DRED_EXPERIMENTAL_VERSION 10
|
||||
#define DRED_EXPERIMENTAL_BYTES 2
|
||||
|
||||
|
||||
#define DRED_MIN_BYTES 8
|
||||
|
||||
/* these are inpart duplicates to the values defined in dred_rdovae_constants.h */
|
||||
#define DRED_SILK_ENCODER_DELAY (79+12-80)
|
||||
#define DRED_FRAME_SIZE 160
|
||||
#define DRED_DFRAME_SIZE (2 * (DRED_FRAME_SIZE))
|
||||
#define DRED_MAX_DATA_SIZE 1000
|
||||
#define DRED_ENC_Q0 6
|
||||
#define DRED_ENC_Q1 15
|
||||
|
||||
/* Covers 1.04 second so we can cover one second, after the lookahead. */
|
||||
#define DRED_MAX_LATENTS 26
|
||||
#define DRED_NUM_REDUNDANCY_FRAMES (2*DRED_MAX_LATENTS)
|
||||
#define DRED_MAX_FRAMES (4*DRED_MAX_LATENTS)
|
||||
|
||||
#endif
|
||||
|
|
@ -0,0 +1,129 @@
|
|||
/* Copyright (c) 2022 Amazon
|
||||
Written by Jan Buethe */
|
||||
/*
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
|
||||
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <string.h>
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#include "os_support.h"
|
||||
#include "dred_decoder.h"
|
||||
#include "dred_coding.h"
|
||||
#include "celt/entdec.h"
|
||||
#include "celt/laplace.h"
|
||||
#include "dred_rdovae_stats_data.h"
|
||||
#include "dred_rdovae_constants.h"
|
||||
|
||||
static void dred_decode_latents(ec_dec *dec, float *x, const opus_uint8 *scale, const opus_uint8 *r, const opus_uint8 *p0, int dim) {
|
||||
int i;
|
||||
for (i=0;i<dim;i++) {
|
||||
int q;
|
||||
if (r[i] == 0 || p0[i] == 255) q = 0;
|
||||
else q = ec_laplace_decode_p0(dec, p0[i]<<7, r[i]<<7);
|
||||
x[i] = q*256.f/(scale[i] == 0 ? 1 : scale[i]);
|
||||
}
|
||||
}
|
||||
|
||||
int dred_ec_decode(OpusDRED *dec, const opus_uint8 *bytes, int num_bytes, int min_feature_frames, int dred_frame_offset)
|
||||
{
|
||||
ec_dec ec;
|
||||
int q_level;
|
||||
int i;
|
||||
int offset;
|
||||
int q0;
|
||||
int dQ;
|
||||
int qmax;
|
||||
int state_qoffset;
|
||||
int extra_offset;
|
||||
|
||||
/* since features are decoded in quadruples, it makes no sense to go with an uneven number of redundancy frames */
|
||||
celt_assert(DRED_NUM_REDUNDANCY_FRAMES % 2 == 0);
|
||||
|
||||
/* decode initial state and initialize RDOVAE decoder */
|
||||
ec_dec_init(&ec, (unsigned char*)bytes, num_bytes);
|
||||
q0 = ec_dec_uint(&ec, 16);
|
||||
dQ = ec_dec_uint(&ec, 8);
|
||||
if (ec_dec_uint(&ec, 2)) extra_offset = 32*ec_dec_uint(&ec, 256);
|
||||
else extra_offset = 0;
|
||||
/* Compute total offset, including DRED position in a multiframe packet. */
|
||||
dec->dred_offset = 16 - ec_dec_uint(&ec, 32) - extra_offset + dred_frame_offset;
|
||||
/*printf("%d %d %d\n", dred_offset, q0, dQ);*/
|
||||
qmax = 15;
|
||||
if (q0 < 14 && dQ > 0) {
|
||||
int nvals;
|
||||
int ft;
|
||||
int s;
|
||||
/* The distribution for the dQmax symbol is split evenly between zero
|
||||
(which implies qmax == 15) and larger values, with the probability of
|
||||
all larger values being uniform.
|
||||
This is equivalent to coding 1 bit to decide if the maximum is less than
|
||||
15 followed by a uint to decide the actual value if it is less than
|
||||
15, but combined into a single symbol. */
|
||||
nvals = 15 - (q0 + 1);
|
||||
ft = 2*nvals;
|
||||
s = ec_decode(&ec, ft);
|
||||
if (s >= nvals) {
|
||||
qmax = q0 + (s - nvals) + 1;
|
||||
ec_dec_update(&ec, s, s + 1, ft);
|
||||
}
|
||||
else {
|
||||
ec_dec_update(&ec, 0, nvals, ft);
|
||||
}
|
||||
}
|
||||
state_qoffset = q0*DRED_STATE_DIM;
|
||||
dred_decode_latents(
|
||||
&ec,
|
||||
dec->state,
|
||||
dred_state_quant_scales_q8 + state_qoffset,
|
||||
dred_state_r_q8 + state_qoffset,
|
||||
dred_state_p0_q8 + state_qoffset,
|
||||
DRED_STATE_DIM);
|
||||
|
||||
/* decode newest to oldest and store oldest to newest */
|
||||
for (i = 0; i < IMIN(DRED_NUM_REDUNDANCY_FRAMES, (min_feature_frames+1)/2); i += 2)
|
||||
{
|
||||
/* FIXME: Figure out how to avoid missing a last frame that would take up < 8 bits. */
|
||||
if (8*num_bytes - ec_tell(&ec) <= 7)
|
||||
break;
|
||||
q_level = compute_quantizer(q0, dQ, qmax, i/2);
|
||||
offset = q_level*DRED_LATENT_DIM;
|
||||
dred_decode_latents(
|
||||
&ec,
|
||||
&dec->latents[(i/2)*DRED_LATENT_DIM],
|
||||
dred_latent_quant_scales_q8 + offset,
|
||||
dred_latent_r_q8 + offset,
|
||||
dred_latent_p0_q8 + offset,
|
||||
DRED_LATENT_DIM
|
||||
);
|
||||
|
||||
offset = 2 * i * DRED_NUM_FEATURES;
|
||||
}
|
||||
dec->process_stage = 1;
|
||||
dec->nb_latents = i/2;
|
||||
return i/2;
|
||||
}
|
||||
|
|
@ -0,0 +1,49 @@
|
|||
/* Copyright (c) 2022 Amazon
|
||||
Written by Jan Buethe */
|
||||
/*
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
|
||||
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef DRED_DECODER_H
|
||||
#define DRED_DECODER_H
|
||||
|
||||
#include "opus.h"
|
||||
#include "dred_config.h"
|
||||
#include "dred_rdovae.h"
|
||||
#include "entcode.h"
|
||||
#include "dred_rdovae_constants.h"
|
||||
|
||||
struct OpusDRED {
|
||||
float fec_features[2*DRED_NUM_REDUNDANCY_FRAMES*DRED_NUM_FEATURES];
|
||||
float state[DRED_STATE_DIM];
|
||||
float latents[(DRED_NUM_REDUNDANCY_FRAMES/2)*DRED_LATENT_DIM];
|
||||
int nb_latents;
|
||||
int process_stage;
|
||||
int dred_offset;
|
||||
};
|
||||
|
||||
|
||||
int dred_ec_decode(OpusDRED *dec, const opus_uint8 *bytes, int num_bytes, int min_feature_frames, int dred_frame_offset);
|
||||
|
||||
#endif
|
||||
|
|
@ -0,0 +1,363 @@
|
|||
/* Copyright (c) 2022 Amazon
|
||||
Written by Jan Buethe */
|
||||
/*
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
|
||||
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#include <string.h>
|
||||
|
||||
#if 0
|
||||
#include <stdio.h>
|
||||
#include <math.h>
|
||||
#endif
|
||||
|
||||
#include "dred_encoder.h"
|
||||
#include "dred_coding.h"
|
||||
#include "celt/entenc.h"
|
||||
|
||||
#include "dred_decoder.h"
|
||||
#include "float_cast.h"
|
||||
#include "os_support.h"
|
||||
#include "celt/laplace.h"
|
||||
#include "dred_rdovae_stats_data.h"
|
||||
|
||||
|
||||
static void DRED_rdovae_init_encoder(RDOVAEEncState *enc_state)
|
||||
{
|
||||
memset(enc_state, 0, sizeof(*enc_state));
|
||||
}
|
||||
|
||||
int dred_encoder_load_model(DREDEnc* enc, const void *data, int len)
|
||||
{
|
||||
WeightArray *list;
|
||||
int ret;
|
||||
parse_weights(&list, data, len);
|
||||
ret = init_rdovaeenc(&enc->model, list);
|
||||
opus_free(list);
|
||||
if (ret == 0) {
|
||||
ret = lpcnet_encoder_load_model(&enc->lpcnet_enc_state, data, len);
|
||||
}
|
||||
if (ret == 0) enc->loaded = 1;
|
||||
return (ret == 0) ? OPUS_OK : OPUS_BAD_ARG;
|
||||
}
|
||||
|
||||
void dred_encoder_reset(DREDEnc* enc)
|
||||
{
|
||||
OPUS_CLEAR((char*)&enc->DREDENC_RESET_START,
|
||||
sizeof(DREDEnc)-
|
||||
((char*)&enc->DREDENC_RESET_START - (char*)enc));
|
||||
enc->input_buffer_fill = DRED_SILK_ENCODER_DELAY;
|
||||
lpcnet_encoder_init(&enc->lpcnet_enc_state);
|
||||
DRED_rdovae_init_encoder(&enc->rdovae_enc);
|
||||
}
|
||||
|
||||
void dred_encoder_init(DREDEnc* enc, opus_int32 Fs, int channels)
|
||||
{
|
||||
enc->Fs = Fs;
|
||||
enc->channels = channels;
|
||||
enc->loaded = 0;
|
||||
#ifndef USE_WEIGHTS_FILE
|
||||
if (init_rdovaeenc(&enc->model, rdovaeenc_arrays) == 0) enc->loaded = 1;
|
||||
#endif
|
||||
dred_encoder_reset(enc);
|
||||
}
|
||||
|
||||
static void dred_process_frame(DREDEnc *enc, int arch)
|
||||
{
|
||||
float feature_buffer[2 * 36];
|
||||
float input_buffer[2*DRED_NUM_FEATURES] = {0};
|
||||
|
||||
celt_assert(enc->loaded);
|
||||
/* shift latents buffer */
|
||||
OPUS_MOVE(enc->latents_buffer + DRED_LATENT_DIM, enc->latents_buffer, (DRED_MAX_FRAMES - 1) * DRED_LATENT_DIM);
|
||||
OPUS_MOVE(enc->state_buffer + DRED_STATE_DIM, enc->state_buffer, (DRED_MAX_FRAMES - 1) * DRED_STATE_DIM);
|
||||
|
||||
/* calculate LPCNet features */
|
||||
lpcnet_compute_single_frame_features_float(&enc->lpcnet_enc_state, enc->input_buffer, feature_buffer, arch);
|
||||
lpcnet_compute_single_frame_features_float(&enc->lpcnet_enc_state, enc->input_buffer + DRED_FRAME_SIZE, feature_buffer + 36, arch);
|
||||
|
||||
/* prepare input buffer (discard LPC coefficients) */
|
||||
OPUS_COPY(input_buffer, feature_buffer, DRED_NUM_FEATURES);
|
||||
OPUS_COPY(input_buffer + DRED_NUM_FEATURES, feature_buffer + 36, DRED_NUM_FEATURES);
|
||||
|
||||
/* run RDOVAE encoder */
|
||||
dred_rdovae_encode_dframe(&enc->rdovae_enc, &enc->model, enc->latents_buffer, enc->state_buffer, input_buffer, arch);
|
||||
enc->latents_buffer_fill = IMIN(enc->latents_buffer_fill+1, DRED_NUM_REDUNDANCY_FRAMES);
|
||||
}
|
||||
|
||||
void filter_df2t(const float *in, float *out, int len, float b0, const float *b, const float *a, int order, float *mem)
|
||||
{
|
||||
int i;
|
||||
for (i=0;i<len;i++) {
|
||||
int j;
|
||||
float xi, yi, nyi;
|
||||
xi = in[i];
|
||||
yi = xi*b0 + mem[0];
|
||||
nyi = -yi;
|
||||
for (j=0;j<order;j++)
|
||||
{
|
||||
mem[j] = mem[j+1] + b[j]*xi + a[j]*nyi;
|
||||
}
|
||||
out[i] = yi;
|
||||
/*fprintf(stdout, "%f\n", out[i]);*/
|
||||
}
|
||||
}
|
||||
|
||||
#define MAX_DOWNMIX_BUFFER (960*2)
|
||||
static void dred_convert_to_16k(DREDEnc *enc, const float *in, int in_len, float *out, int out_len)
|
||||
{
|
||||
float downmix[MAX_DOWNMIX_BUFFER];
|
||||
int i;
|
||||
int up;
|
||||
celt_assert(enc->channels*in_len <= MAX_DOWNMIX_BUFFER);
|
||||
celt_assert(in_len * (opus_int32)16000 == out_len * enc->Fs);
|
||||
switch(enc->Fs) {
|
||||
case 8000:
|
||||
up = 2;
|
||||
break;
|
||||
case 12000:
|
||||
up = 4;
|
||||
break;
|
||||
case 16000:
|
||||
up = 1;
|
||||
break;
|
||||
case 24000:
|
||||
up = 2;
|
||||
break;
|
||||
case 48000:
|
||||
up = 1;
|
||||
break;
|
||||
default:
|
||||
celt_assert(0);
|
||||
}
|
||||
OPUS_CLEAR(downmix, up*in_len);
|
||||
if (enc->channels == 1) {
|
||||
for (i=0;i<in_len;i++) downmix[up*i] = FLOAT2INT16(up*in[i]);
|
||||
} else {
|
||||
for (i=0;i<in_len;i++) downmix[up*i] = FLOAT2INT16(.5*up*(in[2*i]+in[2*i+1]));
|
||||
}
|
||||
if (enc->Fs == 16000) {
|
||||
OPUS_COPY(out, downmix, out_len);
|
||||
} else if (enc->Fs == 48000 || enc->Fs == 24000) {
|
||||
/* ellip(7, .2, 70, 7750/24000) */
|
||||
|
||||
static const float filter_b[8] = { 0.005873358047f, 0.012980854831f, 0.014531340042f, 0.014531340042f, 0.012980854831f, 0.005873358047f, 0.004523418224f, 0.f};
|
||||
static const float filter_a[8] = {-3.878718597768f, 7.748834257468f, -9.653651699533f, 8.007342726666f, -4.379450178552f, 1.463182111810f, -0.231720677804f, 0.f};
|
||||
float b0 = 0.004523418224f;
|
||||
filter_df2t(downmix, downmix, up*in_len, b0, filter_b, filter_a, RESAMPLING_ORDER, enc->resample_mem);
|
||||
for (i=0;i<out_len;i++) out[i] = downmix[3*i];
|
||||
} else if (enc->Fs == 12000) {
|
||||
/* ellip(7, .2, 70, 7750/24000) */
|
||||
static const float filter_b[8] = {-0.001017101081f, 0.003673127243f, 0.001009165267f, 0.001009165267f, 0.003673127243f, -0.001017101081f, 0.002033596776f, 0.f};
|
||||
static const float filter_a[8] = {-4.930414411612f, 11.291643096504f, -15.322037343815f, 13.216403930898f, -7.220409219553f, 2.310550142771f, -0.334338618782f, 0.f};
|
||||
float b0 = 0.002033596776f;
|
||||
filter_df2t(downmix, downmix, up*in_len, b0, filter_b, filter_a, RESAMPLING_ORDER, enc->resample_mem);
|
||||
for (i=0;i<out_len;i++) out[i] = downmix[3*i];
|
||||
} else if (enc->Fs == 8000) {
|
||||
/* ellip(7, .2, 70, 3900/8000) */
|
||||
static const float filter_b[8] = { 0.081670120929f, 0.180401598565f, 0.259391051971f, 0.259391051971f, 0.180401598565f, 0.081670120929f, 0.020109185709f, 0.f};
|
||||
static const float filter_a[8] = {-1.393651933659f, 2.609789872676f, -2.403541968806f, 2.056814957331f, -1.148908574570f, 0.473001413788f, -0.110359852412f, 0.f};
|
||||
float b0 = 0.020109185709f;
|
||||
filter_df2t(downmix, out, out_len, b0, filter_b, filter_a, RESAMPLING_ORDER, enc->resample_mem);
|
||||
} else {
|
||||
celt_assert(0);
|
||||
}
|
||||
}
|
||||
|
||||
void dred_compute_latents(DREDEnc *enc, const float *pcm, int frame_size, int extra_delay, int arch)
|
||||
{
|
||||
int curr_offset16k;
|
||||
int frame_size16k = frame_size * 16000 / enc->Fs;
|
||||
celt_assert(enc->loaded);
|
||||
curr_offset16k = 40 + extra_delay*16000/enc->Fs - enc->input_buffer_fill;
|
||||
enc->dred_offset = (int)floor((curr_offset16k+20.f)/40.f);
|
||||
enc->latent_offset = 0;
|
||||
while (frame_size16k > 0) {
|
||||
int process_size16k;
|
||||
int process_size;
|
||||
process_size16k = IMIN(2*DRED_FRAME_SIZE, frame_size16k);
|
||||
process_size = process_size16k * enc->Fs / 16000;
|
||||
dred_convert_to_16k(enc, pcm, process_size, &enc->input_buffer[enc->input_buffer_fill], process_size16k);
|
||||
enc->input_buffer_fill += process_size16k;
|
||||
if (enc->input_buffer_fill >= 2*DRED_FRAME_SIZE)
|
||||
{
|
||||
curr_offset16k += 320;
|
||||
dred_process_frame(enc, arch);
|
||||
enc->input_buffer_fill -= 2*DRED_FRAME_SIZE;
|
||||
OPUS_MOVE(&enc->input_buffer[0], &enc->input_buffer[2*DRED_FRAME_SIZE], enc->input_buffer_fill);
|
||||
/* 15 ms (6*2.5 ms) is the ideal offset for DRED because it corresponds to our vocoder look-ahead. */
|
||||
if (enc->dred_offset < 6) {
|
||||
enc->dred_offset += 8;
|
||||
} else {
|
||||
enc->latent_offset++;
|
||||
}
|
||||
}
|
||||
|
||||
pcm += process_size;
|
||||
frame_size16k -= process_size16k;
|
||||
}
|
||||
}
|
||||
|
||||
static void dred_encode_latents(ec_enc *enc, const float *x, const opus_uint8 *scale, const opus_uint8 *dzone, const opus_uint8 *r, const opus_uint8 *p0, int dim, int arch) {
|
||||
int i;
|
||||
int q[IMAX(DRED_LATENT_DIM,DRED_STATE_DIM)];
|
||||
float xq[IMAX(DRED_LATENT_DIM,DRED_STATE_DIM)];
|
||||
float delta[IMAX(DRED_LATENT_DIM,DRED_STATE_DIM)];
|
||||
float deadzone[IMAX(DRED_LATENT_DIM,DRED_STATE_DIM)];
|
||||
float eps = .1f;
|
||||
/* This is split into multiple loops (with temporary arrays) so that the compiler
|
||||
can vectorize all of it, and so we can call the vector tanh(). */
|
||||
for (i=0;i<dim;i++) {
|
||||
delta[i] = dzone[i]*(1.f/256.f);
|
||||
xq[i] = x[i]*scale[i]*(1.f/256.f);
|
||||
deadzone[i] = xq[i]/(delta[i]+eps);
|
||||
}
|
||||
compute_activation(deadzone, deadzone, dim, ACTIVATION_TANH, arch);
|
||||
for (i=0;i<dim;i++) {
|
||||
xq[i] = xq[i] - delta[i]*deadzone[i];
|
||||
q[i] = (int)floor(.5f+xq[i]);
|
||||
}
|
||||
for (i=0;i<dim;i++) {
|
||||
/* Make the impossible actually impossible. */
|
||||
if (r[i] == 0 || p0[i] == 255) q[i] = 0;
|
||||
else ec_laplace_encode_p0(enc, q[i], p0[i]<<7, r[i]<<7);
|
||||
}
|
||||
}
|
||||
|
||||
static int dred_voice_active(const unsigned char *activity_mem, int offset) {
|
||||
int i;
|
||||
for (i=0;i<16;i++) {
|
||||
if (activity_mem[8*offset + i] == 1) return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int dred_encode_silk_frame(DREDEnc *enc, unsigned char *buf, int max_chunks, int max_bytes, int q0, int dQ, int qmax, unsigned char *activity_mem, int arch) {
|
||||
ec_enc ec_encoder;
|
||||
|
||||
int q_level;
|
||||
int i;
|
||||
int offset;
|
||||
int ec_buffer_fill;
|
||||
int state_qoffset;
|
||||
ec_enc ec_bak;
|
||||
int prev_active=0;
|
||||
int latent_offset;
|
||||
int extra_dred_offset=0;
|
||||
int dred_encoded=0;
|
||||
int delayed_dred=0;
|
||||
int total_offset;
|
||||
|
||||
latent_offset = enc->latent_offset;
|
||||
/* Delaying new DRED data when just out of silence because we already have the
|
||||
main Opus payload for that frame. */
|
||||
if (activity_mem[0] && enc->last_extra_dred_offset>0) {
|
||||
latent_offset = enc->last_extra_dred_offset;
|
||||
delayed_dred = 1;
|
||||
enc->last_extra_dred_offset = 0;
|
||||
}
|
||||
while (latent_offset < enc->latents_buffer_fill && !dred_voice_active(activity_mem, latent_offset)) {
|
||||
latent_offset++;
|
||||
extra_dred_offset++;
|
||||
}
|
||||
if (!delayed_dred) enc->last_extra_dred_offset = extra_dred_offset;
|
||||
|
||||
/* entropy coding of state and latents */
|
||||
ec_enc_init(&ec_encoder, buf, max_bytes);
|
||||
ec_enc_uint(&ec_encoder, q0, 16);
|
||||
ec_enc_uint(&ec_encoder, dQ, 8);
|
||||
total_offset = 16 - (enc->dred_offset - extra_dred_offset*8);
|
||||
celt_assert(total_offset>=0);
|
||||
if (total_offset > 31) {
|
||||
ec_enc_uint(&ec_encoder, 1, 2);
|
||||
ec_enc_uint(&ec_encoder, total_offset>>5, 256);
|
||||
ec_enc_uint(&ec_encoder, total_offset&31, 32);
|
||||
} else {
|
||||
ec_enc_uint(&ec_encoder, 0, 2);
|
||||
ec_enc_uint(&ec_encoder, total_offset, 32);
|
||||
}
|
||||
celt_assert(qmax >= q0);
|
||||
if (q0 < 14 && dQ > 0) {
|
||||
int nvals;
|
||||
/* If you want to use qmax == q0, you should have set dQ = 0. */
|
||||
celt_assert(qmax > q0);
|
||||
nvals = 15 - (q0 + 1);
|
||||
ec_encode(&ec_encoder, qmax >= 15 ? 0 : nvals + qmax - (q0 + 1),
|
||||
qmax >= 15 ? nvals : nvals + qmax - q0, 2*nvals);
|
||||
}
|
||||
state_qoffset = q0*DRED_STATE_DIM;
|
||||
dred_encode_latents(
|
||||
&ec_encoder,
|
||||
&enc->state_buffer[latent_offset*DRED_STATE_DIM],
|
||||
dred_state_quant_scales_q8 + state_qoffset,
|
||||
dred_state_dead_zone_q8 + state_qoffset,
|
||||
dred_state_r_q8 + state_qoffset,
|
||||
dred_state_p0_q8 + state_qoffset,
|
||||
DRED_STATE_DIM,
|
||||
arch);
|
||||
if (ec_tell(&ec_encoder) > 8*max_bytes) {
|
||||
return 0;
|
||||
}
|
||||
ec_bak = ec_encoder;
|
||||
for (i = 0; i < IMIN(2*max_chunks, enc->latents_buffer_fill-latent_offset-1); i += 2)
|
||||
{
|
||||
int active;
|
||||
q_level = compute_quantizer(q0, dQ, qmax, i/2);
|
||||
offset = q_level * DRED_LATENT_DIM;
|
||||
|
||||
dred_encode_latents(
|
||||
&ec_encoder,
|
||||
enc->latents_buffer + (i+latent_offset) * DRED_LATENT_DIM,
|
||||
dred_latent_quant_scales_q8 + offset,
|
||||
dred_latent_dead_zone_q8 + offset,
|
||||
dred_latent_r_q8 + offset,
|
||||
dred_latent_p0_q8 + offset,
|
||||
DRED_LATENT_DIM,
|
||||
arch
|
||||
);
|
||||
if (ec_tell(&ec_encoder) > 8*max_bytes) {
|
||||
/* If we haven't been able to code one chunk, give up on DRED completely. */
|
||||
if (i==0) return 0;
|
||||
break;
|
||||
}
|
||||
active = dred_voice_active(activity_mem, i+latent_offset);
|
||||
if (active || prev_active) {
|
||||
ec_bak = ec_encoder;
|
||||
dred_encoded = i+2;
|
||||
}
|
||||
prev_active = active;
|
||||
}
|
||||
/* Avoid sending empty DRED packets. */
|
||||
if (dred_encoded==0 || (dred_encoded<=2 && extra_dred_offset)) return 0;
|
||||
ec_encoder = ec_bak;
|
||||
|
||||
ec_buffer_fill = (ec_tell(&ec_encoder)+7)/8;
|
||||
ec_enc_shrink(&ec_encoder, ec_buffer_fill);
|
||||
ec_enc_done(&ec_encoder);
|
||||
return ec_buffer_fill;
|
||||
}
|
||||
|
|
@ -0,0 +1,71 @@
|
|||
/* Copyright (c) 2022 Amazon
|
||||
Written by Jan Buethe */
|
||||
/*
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
|
||||
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef DRED_ENCODER_H
|
||||
#define DRED_ENCODER_H
|
||||
|
||||
#include "lpcnet.h"
|
||||
#include "dred_config.h"
|
||||
#include "dred_rdovae.h"
|
||||
#include "entcode.h"
|
||||
#include "lpcnet_private.h"
|
||||
#include "dred_rdovae_enc.h"
|
||||
#include "dred_rdovae_enc_data.h"
|
||||
|
||||
#define RESAMPLING_ORDER 8
|
||||
|
||||
typedef struct {
|
||||
RDOVAEEnc model;
|
||||
LPCNetEncState lpcnet_enc_state;
|
||||
RDOVAEEncState rdovae_enc;
|
||||
int loaded;
|
||||
opus_int32 Fs;
|
||||
int channels;
|
||||
|
||||
#define DREDENC_RESET_START input_buffer
|
||||
float input_buffer[2*DRED_DFRAME_SIZE];
|
||||
int input_buffer_fill;
|
||||
int dred_offset;
|
||||
int latent_offset;
|
||||
int last_extra_dred_offset;
|
||||
float latents_buffer[DRED_MAX_FRAMES * DRED_LATENT_DIM];
|
||||
int latents_buffer_fill;
|
||||
float state_buffer[DRED_MAX_FRAMES * DRED_STATE_DIM];
|
||||
float resample_mem[RESAMPLING_ORDER + 1];
|
||||
} DREDEnc;
|
||||
|
||||
int dred_encoder_load_model(DREDEnc* enc, const void *data, int len);
|
||||
void dred_encoder_init(DREDEnc* enc, opus_int32 Fs, int channels);
|
||||
void dred_encoder_reset(DREDEnc* enc);
|
||||
|
||||
void dred_deinit_encoder(DREDEnc *enc);
|
||||
|
||||
void dred_compute_latents(DREDEnc *enc, const float *pcm, int frame_size, int extra_delay, int arch);
|
||||
|
||||
int dred_encode_silk_frame(DREDEnc *enc, unsigned char *buf, int max_chunks, int max_bytes, int q0, int dQ, int qmax, unsigned char *activity_mem, int arch);
|
||||
|
||||
#endif
|
||||
|
|
@ -0,0 +1,42 @@
|
|||
/* Copyright (c) 2022 Amazon
|
||||
Written by Jan Buethe */
|
||||
/*
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
|
||||
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef DRED_RDOVAE_H
|
||||
#define DRED_RDOVAE_H
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "opus_types.h"
|
||||
|
||||
typedef struct RDOVAEDec RDOVAEDec;
|
||||
typedef struct RDOVAEEnc RDOVAEEnc;
|
||||
typedef struct RDOVAEDecStruct RDOVAEDecState;
|
||||
typedef struct RDOVAEEncStruct RDOVAEEncState;
|
||||
|
||||
|
||||
|
||||
#endif
|
||||
|
|
@ -0,0 +1,33 @@
|
|||
/* Auto generated from checkpoint rdovae_sparse5m_32.pth */
|
||||
|
||||
|
||||
#ifndef DRED_RDOVAE_CONSTANTS_H
|
||||
#define DRED_RDOVAE_CONSTANTS_H
|
||||
|
||||
#include "nnet.h"
|
||||
|
||||
|
||||
#define DRED_NUM_FEATURES 20
|
||||
|
||||
#define DRED_LATENT_DIM 21
|
||||
|
||||
#define DRED_STATE_DIM 19
|
||||
|
||||
#define DRED_PADDED_LATENT_DIM 24
|
||||
|
||||
#define DRED_PADDED_STATE_DIM 24
|
||||
|
||||
#define DRED_NUM_QUANTIZATION_LEVELS 16
|
||||
|
||||
#define DRED_MAX_RNN_NEURONS 96
|
||||
|
||||
#define DRED_MAX_CONV_INPUTS 1536
|
||||
|
||||
#define DRED_ENC_MAX_RNN_NEURONS 1536
|
||||
|
||||
#define DRED_ENC_MAX_CONV_INPUTS 1536
|
||||
|
||||
#define DRED_DEC_MAX_RNN_NEURONS 96
|
||||
|
||||
|
||||
#endif /* DRED_RDOVAE_CONSTANTS_H */
|
||||
|
|
@ -0,0 +1,139 @@
|
|||
/* Copyright (c) 2022 Amazon
|
||||
Written by Jan Buethe */
|
||||
/*
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
|
||||
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#include "dred_rdovae_dec.h"
|
||||
#include "dred_rdovae_constants.h"
|
||||
#include "os_support.h"
|
||||
|
||||
static void conv1_cond_init(float *mem, int len, int dilation, int *init)
|
||||
{
|
||||
if (!*init) {
|
||||
int i;
|
||||
for (i=0;i<dilation;i++) OPUS_CLEAR(&mem[i*len], len);
|
||||
}
|
||||
*init = 1;
|
||||
}
|
||||
|
||||
void DRED_rdovae_decode_all(const RDOVAEDec *model, float *features, const float *state, const float *latents, int nb_latents, int arch)
|
||||
{
|
||||
int i;
|
||||
RDOVAEDecState dec;
|
||||
memset(&dec, 0, sizeof(dec));
|
||||
dred_rdovae_dec_init_states(&dec, model, state, arch);
|
||||
for (i = 0; i < 2*nb_latents; i += 2)
|
||||
{
|
||||
dred_rdovae_decode_qframe(
|
||||
&dec,
|
||||
model,
|
||||
&features[2*i*DRED_NUM_FEATURES],
|
||||
&latents[(i/2)*DRED_LATENT_DIM],
|
||||
arch);
|
||||
}
|
||||
}
|
||||
|
||||
void dred_rdovae_dec_init_states(
|
||||
RDOVAEDecState *h, /* io: state buffer handle */
|
||||
const RDOVAEDec *model,
|
||||
const float *initial_state, /* i: initial state */
|
||||
int arch
|
||||
)
|
||||
{
|
||||
float hidden[DEC_HIDDEN_INIT_OUT_SIZE];
|
||||
float state_init[DEC_GRU1_STATE_SIZE+DEC_GRU2_STATE_SIZE+DEC_GRU3_STATE_SIZE+DEC_GRU4_STATE_SIZE+DEC_GRU5_STATE_SIZE];
|
||||
int counter=0;
|
||||
compute_generic_dense(&model->dec_hidden_init, hidden, initial_state, ACTIVATION_TANH, arch);
|
||||
compute_generic_dense(&model->dec_gru_init, state_init, hidden, ACTIVATION_TANH, arch);
|
||||
OPUS_COPY(h->gru1_state, state_init, DEC_GRU1_STATE_SIZE);
|
||||
counter += DEC_GRU1_STATE_SIZE;
|
||||
OPUS_COPY(h->gru2_state, &state_init[counter], DEC_GRU2_STATE_SIZE);
|
||||
counter += DEC_GRU2_STATE_SIZE;
|
||||
OPUS_COPY(h->gru3_state, &state_init[counter], DEC_GRU3_STATE_SIZE);
|
||||
counter += DEC_GRU3_STATE_SIZE;
|
||||
OPUS_COPY(h->gru4_state, &state_init[counter], DEC_GRU4_STATE_SIZE);
|
||||
counter += DEC_GRU4_STATE_SIZE;
|
||||
OPUS_COPY(h->gru5_state, &state_init[counter], DEC_GRU5_STATE_SIZE);
|
||||
h->initialized = 0;
|
||||
}
|
||||
|
||||
|
||||
void dred_rdovae_decode_qframe(
|
||||
RDOVAEDecState *dec_state, /* io: state buffer handle */
|
||||
const RDOVAEDec *model,
|
||||
float *qframe, /* o: quadruple feature frame (four concatenated frames in reverse order) */
|
||||
const float *input, /* i: latent vector */
|
||||
int arch
|
||||
)
|
||||
{
|
||||
float buffer[DEC_DENSE1_OUT_SIZE + DEC_GRU1_OUT_SIZE + DEC_GRU2_OUT_SIZE + DEC_GRU3_OUT_SIZE + DEC_GRU4_OUT_SIZE + DEC_GRU5_OUT_SIZE
|
||||
+ DEC_CONV1_OUT_SIZE + DEC_CONV2_OUT_SIZE + DEC_CONV3_OUT_SIZE + DEC_CONV4_OUT_SIZE + DEC_CONV5_OUT_SIZE];
|
||||
int output_index = 0;
|
||||
|
||||
/* run encoder stack and concatenate output in buffer*/
|
||||
compute_generic_dense(&model->dec_dense1, &buffer[output_index], input, ACTIVATION_TANH, arch);
|
||||
output_index += DEC_DENSE1_OUT_SIZE;
|
||||
|
||||
compute_generic_gru(&model->dec_gru1_input, &model->dec_gru1_recurrent, dec_state->gru1_state, buffer, arch);
|
||||
compute_glu(&model->dec_glu1, &buffer[output_index], dec_state->gru1_state, arch);
|
||||
output_index += DEC_GRU1_OUT_SIZE;
|
||||
conv1_cond_init(dec_state->conv1_state, output_index, 1, &dec_state->initialized);
|
||||
compute_generic_conv1d(&model->dec_conv1, &buffer[output_index], dec_state->conv1_state, buffer, output_index, ACTIVATION_TANH, arch);
|
||||
output_index += DEC_CONV1_OUT_SIZE;
|
||||
|
||||
compute_generic_gru(&model->dec_gru2_input, &model->dec_gru2_recurrent, dec_state->gru2_state, buffer, arch);
|
||||
compute_glu(&model->dec_glu2, &buffer[output_index], dec_state->gru2_state, arch);
|
||||
output_index += DEC_GRU2_OUT_SIZE;
|
||||
conv1_cond_init(dec_state->conv2_state, output_index, 1, &dec_state->initialized);
|
||||
compute_generic_conv1d(&model->dec_conv2, &buffer[output_index], dec_state->conv2_state, buffer, output_index, ACTIVATION_TANH, arch);
|
||||
output_index += DEC_CONV2_OUT_SIZE;
|
||||
|
||||
compute_generic_gru(&model->dec_gru3_input, &model->dec_gru3_recurrent, dec_state->gru3_state, buffer, arch);
|
||||
compute_glu(&model->dec_glu3, &buffer[output_index], dec_state->gru3_state, arch);
|
||||
output_index += DEC_GRU3_OUT_SIZE;
|
||||
conv1_cond_init(dec_state->conv3_state, output_index, 1, &dec_state->initialized);
|
||||
compute_generic_conv1d(&model->dec_conv3, &buffer[output_index], dec_state->conv3_state, buffer, output_index, ACTIVATION_TANH, arch);
|
||||
output_index += DEC_CONV3_OUT_SIZE;
|
||||
|
||||
compute_generic_gru(&model->dec_gru4_input, &model->dec_gru4_recurrent, dec_state->gru4_state, buffer, arch);
|
||||
compute_glu(&model->dec_glu4, &buffer[output_index], dec_state->gru4_state, arch);
|
||||
output_index += DEC_GRU4_OUT_SIZE;
|
||||
conv1_cond_init(dec_state->conv4_state, output_index, 1, &dec_state->initialized);
|
||||
compute_generic_conv1d(&model->dec_conv4, &buffer[output_index], dec_state->conv4_state, buffer, output_index, ACTIVATION_TANH, arch);
|
||||
output_index += DEC_CONV4_OUT_SIZE;
|
||||
|
||||
compute_generic_gru(&model->dec_gru5_input, &model->dec_gru5_recurrent, dec_state->gru5_state, buffer, arch);
|
||||
compute_glu(&model->dec_glu5, &buffer[output_index], dec_state->gru5_state, arch);
|
||||
output_index += DEC_GRU5_OUT_SIZE;
|
||||
conv1_cond_init(dec_state->conv5_state, output_index, 1, &dec_state->initialized);
|
||||
compute_generic_conv1d(&model->dec_conv5, &buffer[output_index], dec_state->conv5_state, buffer, output_index, ACTIVATION_TANH, arch);
|
||||
output_index += DEC_CONV5_OUT_SIZE;
|
||||
|
||||
compute_generic_dense(&model->dec_output, qframe, buffer, ACTIVATION_LINEAR, arch);
|
||||
}
|
||||
|
|
@ -0,0 +1,53 @@
|
|||
/* Copyright (c) 2022 Amazon
|
||||
Written by Jan Buethe */
|
||||
/*
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
|
||||
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef DRED_RDOVAE_DEC_H
|
||||
#define DRED_RDOVAE_DEC_H
|
||||
|
||||
#include "dred_rdovae.h"
|
||||
#include "dred_rdovae_dec_data.h"
|
||||
#include "dred_rdovae_stats_data.h"
|
||||
|
||||
struct RDOVAEDecStruct {
|
||||
int initialized;
|
||||
float gru1_state[DEC_GRU1_STATE_SIZE];
|
||||
float gru2_state[DEC_GRU2_STATE_SIZE];
|
||||
float gru3_state[DEC_GRU3_STATE_SIZE];
|
||||
float gru4_state[DEC_GRU4_STATE_SIZE];
|
||||
float gru5_state[DEC_GRU5_STATE_SIZE];
|
||||
float conv1_state[DEC_CONV1_STATE_SIZE];
|
||||
float conv2_state[DEC_CONV2_STATE_SIZE];
|
||||
float conv3_state[DEC_CONV3_STATE_SIZE];
|
||||
float conv4_state[DEC_CONV4_STATE_SIZE];
|
||||
float conv5_state[DEC_CONV5_STATE_SIZE];
|
||||
};
|
||||
|
||||
void dred_rdovae_dec_init_states(RDOVAEDecState *h, const RDOVAEDec *model, const float * initial_state, int arch);
|
||||
void dred_rdovae_decode_qframe(RDOVAEDecState *h, const RDOVAEDec *model, float *qframe, const float * z, int arch);
|
||||
void DRED_rdovae_decode_all(const RDOVAEDec *model, float *features, const float *state, const float *latents, int nb_latents, int arch);
|
||||
|
||||
#endif
|
||||
File diff suppressed because it is too large
Load Diff
|
|
@ -0,0 +1,124 @@
|
|||
/* Auto generated from checkpoint rdovae_sparse5m_32.pth */
|
||||
|
||||
|
||||
#ifndef DRED_RDOVAE_DEC_DATA_H
|
||||
#define DRED_RDOVAE_DEC_DATA_H
|
||||
|
||||
#include "nnet.h"
|
||||
|
||||
|
||||
#include "opus_types.h"
|
||||
|
||||
#include "dred_rdovae.h"
|
||||
|
||||
#include "dred_rdovae_constants.h"
|
||||
|
||||
|
||||
#define DEC_DENSE1_OUT_SIZE 96
|
||||
|
||||
#define DEC_GLU1_OUT_SIZE 96
|
||||
|
||||
#define DEC_GLU2_OUT_SIZE 96
|
||||
|
||||
#define DEC_GLU3_OUT_SIZE 96
|
||||
|
||||
#define DEC_GLU4_OUT_SIZE 96
|
||||
|
||||
#define DEC_GLU5_OUT_SIZE 96
|
||||
|
||||
#define DEC_OUTPUT_OUT_SIZE 80
|
||||
|
||||
#define DEC_HIDDEN_INIT_OUT_SIZE 128
|
||||
|
||||
#define DEC_GRU_INIT_OUT_SIZE 480
|
||||
|
||||
#define DEC_GRU1_OUT_SIZE 96
|
||||
|
||||
#define DEC_GRU1_STATE_SIZE 96
|
||||
|
||||
#define DEC_GRU2_OUT_SIZE 96
|
||||
|
||||
#define DEC_GRU2_STATE_SIZE 96
|
||||
|
||||
#define DEC_GRU3_OUT_SIZE 96
|
||||
|
||||
#define DEC_GRU3_STATE_SIZE 96
|
||||
|
||||
#define DEC_GRU4_OUT_SIZE 96
|
||||
|
||||
#define DEC_GRU4_STATE_SIZE 96
|
||||
|
||||
#define DEC_GRU5_OUT_SIZE 96
|
||||
|
||||
#define DEC_GRU5_STATE_SIZE 96
|
||||
|
||||
#define DEC_CONV1_OUT_SIZE 32
|
||||
|
||||
#define DEC_CONV1_IN_SIZE 192
|
||||
|
||||
#define DEC_CONV1_STATE_SIZE (192 * (1))
|
||||
|
||||
#define DEC_CONV1_DELAY 0
|
||||
|
||||
#define DEC_CONV2_OUT_SIZE 32
|
||||
|
||||
#define DEC_CONV2_IN_SIZE 320
|
||||
|
||||
#define DEC_CONV2_STATE_SIZE (320 * (1))
|
||||
|
||||
#define DEC_CONV2_DELAY 0
|
||||
|
||||
#define DEC_CONV3_OUT_SIZE 32
|
||||
|
||||
#define DEC_CONV3_IN_SIZE 448
|
||||
|
||||
#define DEC_CONV3_STATE_SIZE (448 * (1))
|
||||
|
||||
#define DEC_CONV3_DELAY 0
|
||||
|
||||
#define DEC_CONV4_OUT_SIZE 32
|
||||
|
||||
#define DEC_CONV4_IN_SIZE 576
|
||||
|
||||
#define DEC_CONV4_STATE_SIZE (576 * (1))
|
||||
|
||||
#define DEC_CONV4_DELAY 0
|
||||
|
||||
#define DEC_CONV5_OUT_SIZE 32
|
||||
|
||||
#define DEC_CONV5_IN_SIZE 704
|
||||
|
||||
#define DEC_CONV5_STATE_SIZE (704 * (1))
|
||||
|
||||
#define DEC_CONV5_DELAY 0
|
||||
|
||||
struct RDOVAEDec {
|
||||
LinearLayer dec_dense1;
|
||||
LinearLayer dec_glu1;
|
||||
LinearLayer dec_glu2;
|
||||
LinearLayer dec_glu3;
|
||||
LinearLayer dec_glu4;
|
||||
LinearLayer dec_glu5;
|
||||
LinearLayer dec_output;
|
||||
LinearLayer dec_hidden_init;
|
||||
LinearLayer dec_gru_init;
|
||||
LinearLayer dec_gru1_input;
|
||||
LinearLayer dec_gru1_recurrent;
|
||||
LinearLayer dec_gru2_input;
|
||||
LinearLayer dec_gru2_recurrent;
|
||||
LinearLayer dec_gru3_input;
|
||||
LinearLayer dec_gru3_recurrent;
|
||||
LinearLayer dec_gru4_input;
|
||||
LinearLayer dec_gru4_recurrent;
|
||||
LinearLayer dec_gru5_input;
|
||||
LinearLayer dec_gru5_recurrent;
|
||||
LinearLayer dec_conv1;
|
||||
LinearLayer dec_conv2;
|
||||
LinearLayer dec_conv3;
|
||||
LinearLayer dec_conv4;
|
||||
LinearLayer dec_conv5;
|
||||
};
|
||||
|
||||
int init_rdovaedec(RDOVAEDec *model, const WeightArray *arrays);
|
||||
|
||||
#endif /* DRED_RDOVAE_DEC_DATA_H */
|
||||
|
|
@ -0,0 +1,110 @@
|
|||
/* Copyright (c) 2022 Amazon
|
||||
Written by Jan Buethe */
|
||||
/*
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
|
||||
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <math.h>
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
|
||||
#include "dred_rdovae_enc.h"
|
||||
#include "os_support.h"
|
||||
#include "dred_rdovae_constants.h"
|
||||
|
||||
static void conv1_cond_init(float *mem, int len, int dilation, int *init)
|
||||
{
|
||||
if (!*init) {
|
||||
int i;
|
||||
for (i=0;i<dilation;i++) OPUS_CLEAR(&mem[i*len], len);
|
||||
}
|
||||
*init = 1;
|
||||
}
|
||||
|
||||
void dred_rdovae_encode_dframe(
|
||||
RDOVAEEncState *enc_state, /* io: encoder state */
|
||||
const RDOVAEEnc *model,
|
||||
float *latents, /* o: latent vector */
|
||||
float *initial_state, /* o: initial state */
|
||||
const float *input, /* i: double feature frame (concatenated) */
|
||||
int arch
|
||||
)
|
||||
{
|
||||
float padded_latents[DRED_PADDED_LATENT_DIM];
|
||||
float padded_state[DRED_PADDED_STATE_DIM];
|
||||
float buffer[ENC_DENSE1_OUT_SIZE + ENC_GRU1_OUT_SIZE + ENC_GRU2_OUT_SIZE + ENC_GRU3_OUT_SIZE + ENC_GRU4_OUT_SIZE + ENC_GRU5_OUT_SIZE
|
||||
+ ENC_CONV1_OUT_SIZE + ENC_CONV2_OUT_SIZE + ENC_CONV3_OUT_SIZE + ENC_CONV4_OUT_SIZE + ENC_CONV5_OUT_SIZE];
|
||||
float state_hidden[GDENSE1_OUT_SIZE];
|
||||
int output_index = 0;
|
||||
|
||||
/* run encoder stack and concatenate output in buffer*/
|
||||
compute_generic_dense(&model->enc_dense1, &buffer[output_index], input, ACTIVATION_TANH, arch);
|
||||
output_index += ENC_DENSE1_OUT_SIZE;
|
||||
|
||||
compute_generic_gru(&model->enc_gru1_input, &model->enc_gru1_recurrent, enc_state->gru1_state, buffer, arch);
|
||||
OPUS_COPY(&buffer[output_index], enc_state->gru1_state, ENC_GRU1_OUT_SIZE);
|
||||
output_index += ENC_GRU1_OUT_SIZE;
|
||||
conv1_cond_init(enc_state->conv1_state, output_index, 1, &enc_state->initialized);
|
||||
compute_generic_conv1d(&model->enc_conv1, &buffer[output_index], enc_state->conv1_state, buffer, output_index, ACTIVATION_TANH, arch);
|
||||
output_index += ENC_CONV1_OUT_SIZE;
|
||||
|
||||
compute_generic_gru(&model->enc_gru2_input, &model->enc_gru2_recurrent, enc_state->gru2_state, buffer, arch);
|
||||
OPUS_COPY(&buffer[output_index], enc_state->gru2_state, ENC_GRU2_OUT_SIZE);
|
||||
output_index += ENC_GRU2_OUT_SIZE;
|
||||
conv1_cond_init(enc_state->conv2_state, output_index, 2, &enc_state->initialized);
|
||||
compute_generic_conv1d_dilation(&model->enc_conv2, &buffer[output_index], enc_state->conv2_state, buffer, output_index, 2, ACTIVATION_TANH, arch);
|
||||
output_index += ENC_CONV2_OUT_SIZE;
|
||||
|
||||
compute_generic_gru(&model->enc_gru3_input, &model->enc_gru3_recurrent, enc_state->gru3_state, buffer, arch);
|
||||
OPUS_COPY(&buffer[output_index], enc_state->gru3_state, ENC_GRU3_OUT_SIZE);
|
||||
output_index += ENC_GRU3_OUT_SIZE;
|
||||
conv1_cond_init(enc_state->conv3_state, output_index, 2, &enc_state->initialized);
|
||||
compute_generic_conv1d_dilation(&model->enc_conv3, &buffer[output_index], enc_state->conv3_state, buffer, output_index, 2, ACTIVATION_TANH, arch);
|
||||
output_index += ENC_CONV3_OUT_SIZE;
|
||||
|
||||
compute_generic_gru(&model->enc_gru4_input, &model->enc_gru4_recurrent, enc_state->gru4_state, buffer, arch);
|
||||
OPUS_COPY(&buffer[output_index], enc_state->gru4_state, ENC_GRU4_OUT_SIZE);
|
||||
output_index += ENC_GRU4_OUT_SIZE;
|
||||
conv1_cond_init(enc_state->conv4_state, output_index, 2, &enc_state->initialized);
|
||||
compute_generic_conv1d_dilation(&model->enc_conv4, &buffer[output_index], enc_state->conv4_state, buffer, output_index, 2, ACTIVATION_TANH, arch);
|
||||
output_index += ENC_CONV4_OUT_SIZE;
|
||||
|
||||
compute_generic_gru(&model->enc_gru5_input, &model->enc_gru5_recurrent, enc_state->gru5_state, buffer, arch);
|
||||
OPUS_COPY(&buffer[output_index], enc_state->gru5_state, ENC_GRU5_OUT_SIZE);
|
||||
output_index += ENC_GRU5_OUT_SIZE;
|
||||
conv1_cond_init(enc_state->conv5_state, output_index, 2, &enc_state->initialized);
|
||||
compute_generic_conv1d_dilation(&model->enc_conv5, &buffer[output_index], enc_state->conv5_state, buffer, output_index, 2, ACTIVATION_TANH, arch);
|
||||
output_index += ENC_CONV5_OUT_SIZE;
|
||||
|
||||
compute_generic_dense(&model->enc_zdense, padded_latents, buffer, ACTIVATION_LINEAR, arch);
|
||||
OPUS_COPY(latents, padded_latents, DRED_LATENT_DIM);
|
||||
|
||||
/* next, calculate initial state */
|
||||
compute_generic_dense(&model->gdense1, state_hidden, buffer, ACTIVATION_TANH, arch);
|
||||
compute_generic_dense(&model->gdense2, padded_state, state_hidden, ACTIVATION_LINEAR, arch);
|
||||
OPUS_COPY(initial_state, padded_state, DRED_STATE_DIM);
|
||||
}
|
||||
|
|
@ -0,0 +1,52 @@
|
|||
/* Copyright (c) 2022 Amazon
|
||||
Written by Jan Buethe */
|
||||
/*
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
|
||||
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef DRED_RDOVAE_ENC_H
|
||||
#define DRED_RDOVAE_ENC_H
|
||||
|
||||
#include "dred_rdovae.h"
|
||||
|
||||
#include "dred_rdovae_enc_data.h"
|
||||
|
||||
struct RDOVAEEncStruct {
|
||||
int initialized;
|
||||
float gru1_state[ENC_GRU1_STATE_SIZE];
|
||||
float gru2_state[ENC_GRU2_STATE_SIZE];
|
||||
float gru3_state[ENC_GRU3_STATE_SIZE];
|
||||
float gru4_state[ENC_GRU4_STATE_SIZE];
|
||||
float gru5_state[ENC_GRU5_STATE_SIZE];
|
||||
float conv1_state[ENC_CONV1_STATE_SIZE];
|
||||
float conv2_state[2*ENC_CONV2_STATE_SIZE];
|
||||
float conv3_state[2*ENC_CONV3_STATE_SIZE];
|
||||
float conv4_state[2*ENC_CONV4_STATE_SIZE];
|
||||
float conv5_state[2*ENC_CONV5_STATE_SIZE];
|
||||
};
|
||||
|
||||
void dred_rdovae_encode_dframe(RDOVAEEncState *enc_state, const RDOVAEEnc *model, float *latents, float *initial_state, const float *input, int arch);
|
||||
|
||||
|
||||
#endif
|
||||
File diff suppressed because it is too large
Load Diff
|
|
@ -0,0 +1,109 @@
|
|||
/* Auto generated from checkpoint rdovae_sparse5m_32.pth */
|
||||
|
||||
|
||||
#ifndef DRED_RDOVAE_ENC_DATA_H
|
||||
#define DRED_RDOVAE_ENC_DATA_H
|
||||
|
||||
#include "nnet.h"
|
||||
|
||||
|
||||
#include "opus_types.h"
|
||||
|
||||
#include "dred_rdovae.h"
|
||||
|
||||
#include "dred_rdovae_constants.h"
|
||||
|
||||
|
||||
#define ENC_DENSE1_OUT_SIZE 64
|
||||
|
||||
#define ENC_ZDENSE_OUT_SIZE 24
|
||||
|
||||
#define GDENSE1_OUT_SIZE 128
|
||||
|
||||
#define GDENSE2_OUT_SIZE 24
|
||||
|
||||
#define ENC_GRU1_OUT_SIZE 64
|
||||
|
||||
#define ENC_GRU1_STATE_SIZE 64
|
||||
|
||||
#define ENC_GRU2_OUT_SIZE 64
|
||||
|
||||
#define ENC_GRU2_STATE_SIZE 64
|
||||
|
||||
#define ENC_GRU3_OUT_SIZE 64
|
||||
|
||||
#define ENC_GRU3_STATE_SIZE 64
|
||||
|
||||
#define ENC_GRU4_OUT_SIZE 64
|
||||
|
||||
#define ENC_GRU4_STATE_SIZE 64
|
||||
|
||||
#define ENC_GRU5_OUT_SIZE 64
|
||||
|
||||
#define ENC_GRU5_STATE_SIZE 64
|
||||
|
||||
#define ENC_CONV1_OUT_SIZE 96
|
||||
|
||||
#define ENC_CONV1_IN_SIZE 128
|
||||
|
||||
#define ENC_CONV1_STATE_SIZE (128 * (1))
|
||||
|
||||
#define ENC_CONV1_DELAY 0
|
||||
|
||||
#define ENC_CONV2_OUT_SIZE 96
|
||||
|
||||
#define ENC_CONV2_IN_SIZE 288
|
||||
|
||||
#define ENC_CONV2_STATE_SIZE (288 * (1))
|
||||
|
||||
#define ENC_CONV2_DELAY 0
|
||||
|
||||
#define ENC_CONV3_OUT_SIZE 96
|
||||
|
||||
#define ENC_CONV3_IN_SIZE 448
|
||||
|
||||
#define ENC_CONV3_STATE_SIZE (448 * (1))
|
||||
|
||||
#define ENC_CONV3_DELAY 0
|
||||
|
||||
#define ENC_CONV4_OUT_SIZE 96
|
||||
|
||||
#define ENC_CONV4_IN_SIZE 608
|
||||
|
||||
#define ENC_CONV4_STATE_SIZE (608 * (1))
|
||||
|
||||
#define ENC_CONV4_DELAY 0
|
||||
|
||||
#define ENC_CONV5_OUT_SIZE 96
|
||||
|
||||
#define ENC_CONV5_IN_SIZE 768
|
||||
|
||||
#define ENC_CONV5_STATE_SIZE (768 * (1))
|
||||
|
||||
#define ENC_CONV5_DELAY 0
|
||||
|
||||
struct RDOVAEEnc {
|
||||
LinearLayer enc_dense1;
|
||||
LinearLayer enc_zdense;
|
||||
LinearLayer gdense1;
|
||||
LinearLayer gdense2;
|
||||
LinearLayer enc_gru1_input;
|
||||
LinearLayer enc_gru1_recurrent;
|
||||
LinearLayer enc_gru2_input;
|
||||
LinearLayer enc_gru2_recurrent;
|
||||
LinearLayer enc_gru3_input;
|
||||
LinearLayer enc_gru3_recurrent;
|
||||
LinearLayer enc_gru4_input;
|
||||
LinearLayer enc_gru4_recurrent;
|
||||
LinearLayer enc_gru5_input;
|
||||
LinearLayer enc_gru5_recurrent;
|
||||
LinearLayer enc_conv1;
|
||||
LinearLayer enc_conv2;
|
||||
LinearLayer enc_conv3;
|
||||
LinearLayer enc_conv4;
|
||||
LinearLayer enc_conv5;
|
||||
};
|
||||
|
||||
int init_rdovaeenc(RDOVAEEnc *model, const WeightArray *arrays);
|
||||
|
||||
#endif /* DRED_RDOVAE_ENC_DATA_H */
|
||||
|
|
@ -0,0 +1,353 @@
|
|||
/* Auto generated from checkpoint rdovae_sparse5m_32.pth */
|
||||
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#include "dred_rdovae_stats_data.h"
|
||||
|
||||
const opus_uint8 dred_latent_quant_scales_q8[336] = {
|
||||
255, 255, 255, 255, 255, 255, 255, 255,
|
||||
255, 255, 255, 255, 255, 255, 255, 255,
|
||||
255, 255, 255, 255, 255, 208, 219, 218,
|
||||
217, 216, 219, 218, 217, 222, 217, 219,
|
||||
214, 215, 218, 219, 219, 217, 217, 213,
|
||||
215, 214, 168, 187, 187, 186, 183, 189,
|
||||
187, 184, 192, 184, 189, 180, 181, 187,
|
||||
188, 189, 185, 184, 178, 181, 179, 134,
|
||||
160, 160, 159, 155, 163, 160, 157, 167,
|
||||
157, 163, 151, 152, 160, 162, 164, 158,
|
||||
157, 149, 152, 149, 106, 137, 138, 137,
|
||||
131, 141, 138, 133, 146, 135, 141, 127,
|
||||
129, 138, 139, 143, 136, 135, 124, 127,
|
||||
125, 82, 117, 118, 118, 111, 122, 119,
|
||||
113, 128, 115, 122, 108, 109, 119, 120,
|
||||
124, 117, 115, 104, 105, 104, 64, 101,
|
||||
102, 102, 95, 107, 103, 96, 114, 99,
|
||||
107, 91, 93, 102, 103, 108, 101, 99,
|
||||
87, 86, 87, 48, 81, 84, 87, 79,
|
||||
90, 87, 78, 87, 84, 90, 76, 78,
|
||||
87, 80, 69, 86, 84, 72, 58, 72,
|
||||
36, 70, 71, 76, 67, 87, 72, 67,
|
||||
78, 73, 74, 65, 67, 75, 69, 20,
|
||||
76, 74, 60, 46, 61, 26, 50, 63,
|
||||
66, 57, 31, 45, 53, 63, 65, 40,
|
||||
56, 57, 56, 34, 5, 67, 63, 50,
|
||||
21, 51, 17, 23, 31, 53, 48, 11,
|
||||
18, 34, 40, 56, 15, 47, 49, 34,
|
||||
12, 0, 58, 54, 42, 10, 43, 10,
|
||||
6, 7, 25, 42, 3, 6, 17, 9,
|
||||
48, 5, 41, 43, 19, 3, 1, 47,
|
||||
47, 35, 2, 36, 3, 6, 7, 11,
|
||||
35, 3, 5, 6, 8, 18, 4, 35,
|
||||
38, 7, 3, 1, 15, 16, 29, 0,
|
||||
31, 3, 5, 5, 5, 29, 2, 3,
|
||||
5, 6, 11, 3, 31, 33, 4, 2,
|
||||
1, 11, 10, 25, 0, 27, 2, 3,
|
||||
3, 2, 24, 1, 2, 4, 4, 6,
|
||||
2, 27, 29, 2, 1, 1, 7, 7,
|
||||
21, 0, 23, 2, 2, 2, 1, 21,
|
||||
1, 2, 3, 3, 2, 1, 24, 27,
|
||||
2, 1, 0, 6, 5, 18, 0, 20
|
||||
};
|
||||
|
||||
const opus_uint8 dred_latent_dead_zone_q8[336] = {
|
||||
1, 0, 10, 0, 0, 6, 11, 0,
|
||||
0, 0, 7, 0, 0, 0, 13, 0,
|
||||
4, 0, 0, 5, 0, 1, 0, 13,
|
||||
1, 0, 12, 15, 0, 8, 0, 12,
|
||||
0, 0, 3, 18, 0, 7, 0, 0,
|
||||
12, 0, 0, 7, 16, 5, 0, 17,
|
||||
18, 0, 25, 2, 17, 0, 1, 7,
|
||||
22, 4, 11, 3, 0, 18, 0, 0,
|
||||
17, 20, 9, 1, 24, 22, 5, 43,
|
||||
6, 22, 1, 2, 11, 28, 13, 14,
|
||||
7, 0, 26, 0, 0, 29, 24, 14,
|
||||
4, 31, 27, 11, 66, 11, 28, 2,
|
||||
4, 16, 34, 23, 19, 11, 0, 34,
|
||||
0, 1, 45, 29, 20, 6, 41, 33,
|
||||
17, 94, 16, 36, 4, 6, 21, 43,
|
||||
37, 24, 16, 0, 43, 0, 1, 70,
|
||||
35, 26, 9, 56, 41, 27, 133, 23,
|
||||
47, 5, 9, 28, 56, 56, 30, 21,
|
||||
0, 56, 1, 2, 107, 41, 37, 11,
|
||||
85, 48, 46, 168, 31, 59, 7, 11,
|
||||
39, 72, 255, 39, 28, 0, 84, 2,
|
||||
3, 160, 53, 51, 16, 255, 53, 75,
|
||||
231, 44, 81, 9, 14, 54, 255, 255,
|
||||
49, 38, 0, 255, 3, 12, 255, 255,
|
||||
81, 24, 255, 255, 124, 255, 71, 255,
|
||||
12, 20, 67, 255, 255, 70, 54, 0,
|
||||
255, 5, 27, 255, 255, 124, 37, 255,
|
||||
255, 220, 255, 104, 255, 15, 28, 255,
|
||||
255, 255, 96, 73, 2, 255, 8, 44,
|
||||
255, 255, 255, 53, 255, 255, 255, 255,
|
||||
158, 255, 19, 37, 255, 255, 255, 123,
|
||||
108, 3, 255, 11, 178, 255, 255, 255,
|
||||
87, 255, 255, 255, 255, 255, 255, 23,
|
||||
57, 255, 255, 255, 255, 255, 5, 255,
|
||||
14, 255, 255, 255, 255, 108, 255, 255,
|
||||
255, 255, 255, 255, 27, 65, 255, 255,
|
||||
255, 255, 255, 7, 255, 16, 255, 255,
|
||||
255, 255, 255, 255, 255, 255, 255, 255,
|
||||
255, 30, 75, 255, 255, 255, 255, 255,
|
||||
9, 255, 18, 255, 255, 255, 255, 255,
|
||||
255, 255, 255, 255, 255, 255, 38, 96,
|
||||
255, 255, 255, 255, 255, 11, 255, 21
|
||||
};
|
||||
|
||||
const opus_uint8 dred_latent_r_q8[336] = {
|
||||
233, 94, 91, 112, 149, 65, 92, 118,
|
||||
55, 122, 82, 190, 175, 100, 80, 62,
|
||||
125, 130, 236, 90, 219, 228, 85, 75,
|
||||
96, 138, 50, 75, 107, 47, 107, 67,
|
||||
181, 165, 85, 64, 47, 109, 114, 233,
|
||||
72, 213, 222, 72, 58, 81, 125, 36,
|
||||
59, 97, 36, 92, 53, 171, 154, 70,
|
||||
49, 33, 92, 98, 229, 54, 207, 214,
|
||||
59, 43, 65, 109, 24, 43, 74, 27,
|
||||
76, 40, 160, 143, 56, 35, 21, 75,
|
||||
82, 224, 37, 199, 204, 45, 29, 51,
|
||||
93, 14, 29, 60, 19, 60, 29, 149,
|
||||
128, 42, 23, 12, 59, 66, 219, 24,
|
||||
190, 191, 32, 17, 38, 77, 8, 18,
|
||||
48, 13, 46, 20, 135, 113, 31, 14,
|
||||
6, 43, 50, 213, 15, 181, 176, 21,
|
||||
9, 26, 61, 4, 10, 38, 8, 34,
|
||||
14, 120, 98, 21, 7, 3, 30, 37,
|
||||
206, 9, 172, 155, 10, 4, 16, 45,
|
||||
2, 5, 29, 3, 22, 8, 101, 81,
|
||||
12, 2, 0, 18, 24, 198, 3, 160,
|
||||
135, 4, 2, 10, 32, 0, 2, 17,
|
||||
2, 15, 4, 85, 67, 6, 0, 0,
|
||||
10, 15, 189, 0, 148, 106, 0, 0,
|
||||
4, 21, 0, 0, 6, 0, 9, 0,
|
||||
68, 53, 1, 0, 0, 5, 7, 180,
|
||||
0, 133, 66, 0, 0, 1, 12, 0,
|
||||
0, 0, 0, 4, 0, 52, 41, 0,
|
||||
0, 0, 1, 2, 169, 0, 118, 32,
|
||||
0, 0, 0, 7, 0, 0, 0, 0,
|
||||
2, 0, 38, 31, 0, 0, 0, 1,
|
||||
1, 158, 0, 103, 0, 0, 0, 0,
|
||||
3, 0, 0, 0, 0, 0, 0, 26,
|
||||
23, 0, 0, 0, 0, 0, 146, 0,
|
||||
88, 0, 0, 0, 0, 1, 0, 0,
|
||||
0, 0, 0, 0, 17, 15, 0, 0,
|
||||
0, 0, 0, 132, 0, 74, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 10, 9, 0, 0, 0, 0, 0,
|
||||
118, 0, 62, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 6, 5,
|
||||
0, 0, 0, 0, 0, 104, 0, 51
|
||||
};
|
||||
|
||||
const opus_uint8 dred_latent_p0_q8[336] = {
|
||||
12, 162, 137, 134, 107, 138, 118, 138,
|
||||
201, 114, 145, 66, 81, 143, 146, 159,
|
||||
122, 121, 20, 104, 37, 14, 171, 147,
|
||||
142, 118, 152, 130, 149, 209, 123, 157,
|
||||
75, 91, 153, 157, 168, 130, 128, 23,
|
||||
120, 43, 18, 184, 158, 152, 126, 167,
|
||||
144, 159, 220, 133, 169, 85, 102, 163,
|
||||
170, 179, 140, 137, 27, 139, 49, 22,
|
||||
197, 171, 163, 135, 183, 158, 167, 229,
|
||||
145, 182, 96, 113, 175, 183, 193, 150,
|
||||
147, 32, 159, 57, 27, 211, 184, 175,
|
||||
144, 199, 174, 180, 237, 158, 196, 107,
|
||||
122, 187, 198, 208, 161, 159, 37, 182,
|
||||
66, 35, 224, 198, 188, 155, 215, 190,
|
||||
194, 243, 172, 209, 115, 131, 199, 213,
|
||||
222, 174, 170, 43, 205, 75, 44, 235,
|
||||
212, 201, 166, 231, 206, 208, 248, 186,
|
||||
223, 122, 140, 211, 228, 237, 187, 183,
|
||||
50, 227, 84, 57, 246, 228, 216, 179,
|
||||
246, 223, 227, 253, 204, 237, 132, 153,
|
||||
226, 245, 255, 203, 198, 58, 251, 96,
|
||||
78, 252, 241, 228, 192, 255, 237, 239,
|
||||
254, 218, 248, 140, 164, 237, 255, 255,
|
||||
216, 212, 67, 255, 106, 106, 255, 255,
|
||||
242, 207, 255, 255, 250, 255, 234, 255,
|
||||
151, 177, 249, 255, 255, 232, 228, 76,
|
||||
255, 115, 152, 255, 255, 253, 223, 255,
|
||||
255, 255, 255, 246, 255, 163, 192, 255,
|
||||
255, 255, 245, 241, 87, 255, 126, 201,
|
||||
255, 255, 255, 235, 255, 255, 255, 255,
|
||||
253, 255, 175, 205, 255, 255, 255, 253,
|
||||
250, 98, 255, 137, 255, 255, 255, 255,
|
||||
248, 255, 255, 255, 255, 255, 255, 189,
|
||||
220, 255, 255, 255, 255, 255, 108, 255,
|
||||
148, 255, 255, 255, 255, 253, 255, 255,
|
||||
255, 255, 255, 255, 201, 230, 255, 255,
|
||||
255, 255, 255, 116, 255, 159, 255, 255,
|
||||
255, 255, 255, 255, 255, 255, 255, 255,
|
||||
255, 213, 238, 255, 255, 255, 255, 255,
|
||||
125, 255, 169, 255, 255, 255, 255, 255,
|
||||
255, 255, 255, 255, 255, 255, 224, 244,
|
||||
255, 255, 255, 255, 255, 134, 255, 180
|
||||
};
|
||||
|
||||
const opus_uint8 dred_state_quant_scales_q8[304] = {
|
||||
255, 255, 255, 255, 255, 255, 255, 255,
|
||||
255, 255, 121, 255, 255, 255, 255, 255,
|
||||
255, 255, 71, 215, 215, 233, 215, 216,
|
||||
215, 216, 216, 215, 216, 114, 215, 217,
|
||||
227, 216, 216, 216, 216, 65, 181, 181,
|
||||
205, 181, 182, 181, 182, 183, 181, 183,
|
||||
102, 182, 184, 196, 182, 184, 184, 183,
|
||||
60, 153, 153, 175, 152, 154, 152, 155,
|
||||
155, 152, 155, 84, 153, 155, 166, 154,
|
||||
156, 156, 155, 54, 129, 128, 146, 127,
|
||||
131, 128, 131, 132, 128, 131, 61, 129,
|
||||
130, 140, 130, 133, 134, 131, 49, 109,
|
||||
108, 120, 107, 111, 108, 111, 113, 108,
|
||||
112, 43, 108, 110, 118, 110, 114, 115,
|
||||
110, 45, 93, 91, 97, 89, 95, 91,
|
||||
95, 98, 91, 96, 31, 91, 92, 98,
|
||||
93, 98, 100, 93, 42, 78, 77, 77,
|
||||
74, 81, 76, 81, 87, 77, 82, 1,
|
||||
77, 77, 82, 80, 87, 91, 78, 45,
|
||||
67, 65, 62, 62, 70, 64, 71, 79,
|
||||
65, 71, 0, 65, 64, 69, 69, 77,
|
||||
82, 66, 49, 58, 55, 49, 53, 63,
|
||||
55, 65, 79, 56, 62, 2, 55, 54,
|
||||
57, 60, 72, 77, 57, 92, 51, 47,
|
||||
40, 44, 57, 46, 60, 78, 47, 54,
|
||||
131, 47, 45, 48, 53, 66, 67, 49,
|
||||
189, 45, 41, 33, 37, 51, 39, 53,
|
||||
69, 41, 47, 188, 40, 38, 40, 47,
|
||||
59, 59, 42, 235, 40, 36, 27, 32,
|
||||
47, 34, 47, 62, 36, 41, 255, 34,
|
||||
32, 34, 42, 52, 52, 37, 255, 35,
|
||||
31, 23, 28, 41, 29, 41, 53, 31,
|
||||
37, 216, 28, 27, 29, 37, 46, 46,
|
||||
32, 213, 31, 27, 19, 24, 36, 25,
|
||||
36, 46, 27, 34, 181, 24, 23, 24,
|
||||
32, 40, 40, 28, 177, 27, 24, 15,
|
||||
21, 31, 21, 32, 40, 24, 42, 151,
|
||||
20, 19, 20, 28, 36, 36, 25, 146
|
||||
};
|
||||
|
||||
const opus_uint8 dred_state_dead_zone_q8[304] = {
|
||||
13, 16, 9, 6, 20, 10, 13, 35,
|
||||
13, 15, 255, 9, 11, 10, 23, 14,
|
||||
26, 43, 255, 12, 14, 8, 8, 18,
|
||||
8, 13, 30, 11, 15, 255, 7, 9,
|
||||
8, 19, 14, 24, 38, 255, 11, 12,
|
||||
7, 8, 17, 7, 13, 25, 9, 15,
|
||||
255, 6, 6, 6, 17, 14, 21, 32,
|
||||
255, 11, 11, 6, 9, 16, 5, 13,
|
||||
22, 8, 15, 255, 5, 5, 5, 14,
|
||||
15, 19, 27, 255, 11, 9, 6, 9,
|
||||
15, 4, 13, 19, 6, 15, 255, 4,
|
||||
3, 4, 12, 16, 17, 22, 255, 11,
|
||||
8, 4, 9, 15, 4, 13, 17, 5,
|
||||
16, 255, 3, 2, 3, 11, 17, 16,
|
||||
18, 255, 11, 7, 3, 10, 15, 3,
|
||||
14, 16, 4, 17, 255, 2, 2, 2,
|
||||
9, 18, 12, 14, 255, 11, 4, 3,
|
||||
8, 14, 3, 12, 18, 3, 17, 255,
|
||||
1, 2, 2, 8, 20, 7, 7, 255,
|
||||
11, 4, 2, 8, 13, 2, 12, 15,
|
||||
2, 18, 255, 1, 2, 1, 8, 18,
|
||||
0, 1, 255, 13, 4, 3, 11, 14,
|
||||
3, 11, 22, 3, 16, 2, 0, 3,
|
||||
2, 11, 0, 11, 0, 121, 12, 4,
|
||||
2, 11, 13, 4, 2, 0, 3, 20,
|
||||
0, 0, 4, 2, 9, 8, 14, 0,
|
||||
29, 9, 5, 0, 10, 9, 6, 1,
|
||||
1, 4, 26, 0, 1, 4, 2, 4,
|
||||
13, 14, 0, 4, 7, 7, 3, 15,
|
||||
9, 8, 10, 0, 9, 34, 0, 2,
|
||||
3, 4, 4, 14, 17, 0, 1, 13,
|
||||
5, 2, 22, 14, 9, 17, 4, 6,
|
||||
46, 0, 2, 3, 3, 7, 23, 24,
|
||||
0, 0, 19, 3, 4, 28, 21, 10,
|
||||
24, 7, 2, 75, 0, 3, 3, 4,
|
||||
9, 33, 32, 0, 1, 26, 7, 4,
|
||||
37, 30, 10, 34, 12, 5, 255, 0,
|
||||
3, 2, 1, 13, 50, 46, 0, 4
|
||||
};
|
||||
|
||||
const opus_uint8 dred_state_r_q8[304] = {
|
||||
207, 224, 253, 207, 197, 233, 190, 198,
|
||||
232, 180, 4, 245, 251, 254, 210, 173,
|
||||
169, 223, 22, 199, 218, 253, 199, 187,
|
||||
229, 181, 189, 227, 168, 3, 243, 251,
|
||||
253, 203, 162, 156, 218, 17, 190, 212,
|
||||
252, 190, 177, 224, 170, 178, 223, 156,
|
||||
1, 240, 250, 253, 194, 149, 142, 212,
|
||||
12, 181, 205, 252, 180, 165, 218, 158,
|
||||
167, 217, 143, 0, 237, 249, 253, 185,
|
||||
135, 128, 205, 7, 169, 196, 251, 169,
|
||||
152, 212, 144, 154, 210, 128, 0, 234,
|
||||
247, 252, 174, 120, 112, 197, 4, 158,
|
||||
187, 250, 157, 139, 205, 130, 141, 203,
|
||||
112, 0, 230, 246, 251, 162, 105, 98,
|
||||
188, 2, 145, 177, 249, 144, 124, 197,
|
||||
115, 127, 194, 97, 0, 226, 244, 250,
|
||||
149, 91, 85, 179, 1, 130, 165, 247,
|
||||
128, 109, 187, 100, 115, 183, 79, 0,
|
||||
220, 241, 249, 136, 78, 77, 166, 2,
|
||||
116, 152, 245, 113, 95, 177, 86, 106,
|
||||
173, 64, 0, 214, 239, 248, 122, 67,
|
||||
71, 155, 11, 103, 139, 242, 99, 84,
|
||||
166, 78, 107, 161, 50, 0, 208, 235,
|
||||
246, 109, 63, 61, 143, 90, 90, 126,
|
||||
239, 82, 74, 154, 70, 107, 149, 37,
|
||||
19, 200, 232, 244, 98, 53, 48, 131,
|
||||
166, 77, 112, 235, 68, 64, 140, 60,
|
||||
96, 136, 25, 104, 191, 227, 242, 88,
|
||||
43, 37, 120, 183, 66, 101, 231, 56,
|
||||
56, 127, 48, 86, 124, 17, 132, 182,
|
||||
222, 239, 78, 32, 28, 110, 188, 52,
|
||||
87, 226, 46, 42, 112, 36, 71, 111,
|
||||
10, 117, 171, 216, 236, 64, 22, 18,
|
||||
99, 178, 39, 74, 220, 37, 30, 97,
|
||||
25, 57, 99, 7, 100, 160, 210, 233,
|
||||
51, 15, 10, 89, 164, 27, 60, 213,
|
||||
30, 19, 81, 16, 43, 84, 5, 83,
|
||||
147, 202, 229, 38, 9, 5, 79, 150
|
||||
};
|
||||
|
||||
const opus_uint8 dred_state_p0_q8[304] = {
|
||||
40, 24, 1, 35, 45, 15, 47, 44,
|
||||
15, 58, 252, 7, 2, 1, 25, 56,
|
||||
53, 17, 230, 45, 28, 2, 41, 50,
|
||||
18, 54, 49, 17, 65, 253, 8, 3,
|
||||
1, 28, 64, 61, 21, 235, 52, 32,
|
||||
2, 48, 56, 21, 62, 54, 20, 73,
|
||||
255, 9, 3, 1, 33, 73, 69, 25,
|
||||
240, 59, 37, 2, 56, 64, 24, 70,
|
||||
60, 23, 82, 255, 11, 4, 2, 39,
|
||||
83, 78, 31, 246, 67, 43, 2, 65,
|
||||
72, 29, 79, 67, 27, 92, 255, 13,
|
||||
4, 2, 45, 93, 88, 39, 250, 75,
|
||||
49, 3, 75, 81, 33, 89, 74, 31,
|
||||
102, 255, 15, 5, 2, 52, 105, 98,
|
||||
45, 252, 84, 56, 4, 85, 90, 39,
|
||||
99, 82, 35, 112, 255, 18, 6, 3,
|
||||
60, 116, 109, 52, 254, 95, 63, 5,
|
||||
97, 101, 45, 110, 90, 40, 125, 255,
|
||||
21, 7, 4, 70, 128, 116, 58, 251,
|
||||
105, 72, 6, 109, 110, 52, 119, 95,
|
||||
46, 136, 255, 24, 9, 4, 79, 135,
|
||||
121, 65, 235, 115, 80, 7, 124, 118,
|
||||
60, 126, 97, 53, 146, 255, 29, 11,
|
||||
5, 89, 131, 131, 74, 129, 124, 90,
|
||||
9, 139, 125, 69, 127, 91, 61, 160,
|
||||
189, 33, 13, 6, 98, 142, 145, 84,
|
||||
50, 132, 100, 11, 153, 132, 78, 136,
|
||||
99, 70, 176, 93, 39, 15, 7, 106,
|
||||
155, 159, 94, 39, 139, 110, 13, 168,
|
||||
139, 88, 150, 107, 78, 193, 72, 46,
|
||||
17, 8, 114, 168, 172, 105, 36, 153,
|
||||
119, 16, 183, 155, 98, 167, 121, 88,
|
||||
209, 83, 53, 21, 10, 128, 185, 188,
|
||||
116, 43, 167, 128, 19, 197, 171, 108,
|
||||
183, 135, 96, 226, 96, 60, 24, 12,
|
||||
142, 201, 204, 128, 51, 182, 142, 23,
|
||||
210, 188, 119, 200, 151, 109, 251, 110,
|
||||
69, 29, 14, 157, 218, 220, 139, 60
|
||||
};
|
||||
|
||||
|
|
@ -0,0 +1,27 @@
|
|||
/* Auto generated from checkpoint rdovae_sparse5m_32.pth */
|
||||
|
||||
|
||||
#ifndef DRED_RDOVAE_STATS_DATA_H
|
||||
#define DRED_RDOVAE_STATS_DATA_H
|
||||
|
||||
#include "nnet.h"
|
||||
|
||||
|
||||
#include "opus_types.h"
|
||||
|
||||
#include "dred_rdovae_constants.h"
|
||||
|
||||
|
||||
extern const opus_uint8 dred_latent_quant_scales_q8[336];
|
||||
extern const opus_uint8 dred_latent_dead_zone_q8[336];
|
||||
extern const opus_uint8 dred_latent_r_q8[336];
|
||||
extern const opus_uint8 dred_latent_p0_q8[336];
|
||||
|
||||
|
||||
extern const opus_uint8 dred_state_quant_scales_q8[304];
|
||||
extern const opus_uint8 dred_state_dead_zone_q8[304];
|
||||
extern const opus_uint8 dred_state_r_q8[304];
|
||||
extern const opus_uint8 dred_state_p0_q8[304];
|
||||
|
||||
|
||||
#endif /* DRED_RDOVAE_STATS_DATA_H */
|
||||
|
|
@ -0,0 +1,280 @@
|
|||
/* Copyright (c) 2017-2018 Mozilla */
|
||||
/*
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
|
||||
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <stdio.h>
|
||||
#include <unistd.h>
|
||||
#include "kiss_fft.h"
|
||||
#include "common.h"
|
||||
#include <math.h>
|
||||
#include "freq.h"
|
||||
#include "pitch.h"
|
||||
#include "arch.h"
|
||||
#include <assert.h>
|
||||
#include "lpcnet.h"
|
||||
#include "lpcnet_private.h"
|
||||
#include "os_support.h"
|
||||
#include "cpu_support.h"
|
||||
|
||||
|
||||
static void biquad(float *y, float mem[2], const float *x, const float *b, const float *a, int N) {
|
||||
int i;
|
||||
for (i=0;i<N;i++) {
|
||||
float xi, yi;
|
||||
xi = x[i];
|
||||
yi = x[i] + mem[0];
|
||||
mem[0] = mem[1] + (b[0]*(double)xi - a[0]*(double)yi);
|
||||
mem[1] = (b[1]*(double)xi - a[1]*(double)yi);
|
||||
y[i] = yi;
|
||||
}
|
||||
}
|
||||
|
||||
static float uni_rand(void) {
|
||||
return rand()/(double)RAND_MAX-.5;
|
||||
}
|
||||
|
||||
static void rand_resp(float *a, float *b) {
|
||||
a[0] = .75*uni_rand();
|
||||
a[1] = .75*uni_rand();
|
||||
b[0] = .75*uni_rand();
|
||||
b[1] = .75*uni_rand();
|
||||
}
|
||||
|
||||
void compute_noise(int *noise, float noise_std) {
|
||||
int i;
|
||||
for (i=0;i<FRAME_SIZE;i++) {
|
||||
noise[i] = (int)floor(.5 + noise_std*.707*(log_approx(rand()/(float)RAND_MAX)-log_approx(rand()/(float)RAND_MAX)));
|
||||
}
|
||||
}
|
||||
|
||||
static opus_int16 float2short(float x)
|
||||
{
|
||||
int i;
|
||||
i = (int)floor(.5+x);
|
||||
return IMAX(-32767, IMIN(32767, i));
|
||||
}
|
||||
|
||||
|
||||
void write_audio(LPCNetEncState *st, const opus_int16 *pcm, const int *noise, FILE *file) {
|
||||
int i;
|
||||
opus_int16 data[2*FRAME_SIZE];
|
||||
for (i=0;i<FRAME_SIZE;i++) {
|
||||
float p=0;
|
||||
float e;
|
||||
int j;
|
||||
for (j=0;j<LPC_ORDER;j++) p -= st->features[NB_BANDS+2+j]*st->sig_mem[j];
|
||||
e = lin2ulaw(pcm[i] - p);
|
||||
/* Signal in. */
|
||||
data[2*i] = float2short(st->sig_mem[0]);
|
||||
/* Signal out. */
|
||||
data[2*i+1] = pcm[i];
|
||||
/* Simulate error on excitation. */
|
||||
e += noise[i];
|
||||
e = IMIN(255, IMAX(0, e));
|
||||
|
||||
OPUS_MOVE(&st->sig_mem[1], &st->sig_mem[0], LPC_ORDER-1);
|
||||
st->sig_mem[0] = p + ulaw2lin(e);
|
||||
}
|
||||
fwrite(data, 4*FRAME_SIZE, 1, file);
|
||||
}
|
||||
|
||||
int main(int argc, char **argv) {
|
||||
int i;
|
||||
char *argv0;
|
||||
int count=0;
|
||||
static const float a_hp[2] = {-1.99599, 0.99600};
|
||||
static const float b_hp[2] = {-2, 1};
|
||||
float a_sig[2] = {0};
|
||||
float b_sig[2] = {0};
|
||||
float mem_hp_x[2]={0};
|
||||
float mem_resp_x[2]={0};
|
||||
float mem_preemph=0;
|
||||
float x[FRAME_SIZE];
|
||||
int gain_change_count=0;
|
||||
FILE *f1;
|
||||
FILE *ffeat;
|
||||
FILE *fpcm=NULL;
|
||||
opus_int16 pcm[FRAME_SIZE]={0};
|
||||
int noisebuf[FRAME_SIZE]={0};
|
||||
opus_int16 tmp[FRAME_SIZE] = {0};
|
||||
float speech_gain=1;
|
||||
float old_speech_gain = 1;
|
||||
int one_pass_completed = 0;
|
||||
LPCNetEncState *st;
|
||||
float noise_std=0;
|
||||
int training = -1;
|
||||
int burg = 0;
|
||||
int pitch = 0;
|
||||
FILE *fnoise = NULL;
|
||||
float noise_gain = 0;
|
||||
long noise_size=0;
|
||||
int arch;
|
||||
srand(getpid());
|
||||
arch = opus_select_arch();
|
||||
st = lpcnet_encoder_create();
|
||||
argv0=argv[0];
|
||||
if (argc == 5 && strcmp(argv[1], "-btrain")==0) {
|
||||
burg = 1;
|
||||
training = 1;
|
||||
}
|
||||
else if (argc == 4 && strcmp(argv[1], "-btest")==0) {
|
||||
burg = 1;
|
||||
training = 0;
|
||||
}
|
||||
else if (argc == 5 && strcmp(argv[1], "-ptrain")==0) {
|
||||
pitch = 1;
|
||||
training = 1;
|
||||
fnoise = fopen(argv[2], "rb");
|
||||
fseek(fnoise, 0, SEEK_END);
|
||||
noise_size = ftell(fnoise);
|
||||
fseek(fnoise, 0, SEEK_SET);
|
||||
argv++;
|
||||
}
|
||||
else if (argc == 4 && strcmp(argv[1], "-ptest")==0) {
|
||||
pitch = 1;
|
||||
training = 0;
|
||||
}
|
||||
else if (argc == 5 && strcmp(argv[1], "-train")==0) training = 1;
|
||||
else if (argc == 4 && strcmp(argv[1], "-test")==0) training = 0;
|
||||
if (training == -1) {
|
||||
fprintf(stderr, "usage: %s -train <speech> <features out> <pcm out>\n", argv0);
|
||||
fprintf(stderr, " or %s -test <speech> <features out>\n", argv0);
|
||||
return 1;
|
||||
}
|
||||
f1 = fopen(argv[2], "r");
|
||||
if (f1 == NULL) {
|
||||
fprintf(stderr,"Error opening input .s16 16kHz speech input file: %s\n", argv[2]);
|
||||
exit(1);
|
||||
}
|
||||
ffeat = fopen(argv[3], "wb");
|
||||
if (ffeat == NULL) {
|
||||
fprintf(stderr,"Error opening output feature file: %s\n", argv[3]);
|
||||
exit(1);
|
||||
}
|
||||
if (training && !pitch) {
|
||||
fpcm = fopen(argv[4], "wb");
|
||||
if (fpcm == NULL) {
|
||||
fprintf(stderr,"Error opening output PCM file: %s\n", argv[4]);
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
while (1) {
|
||||
size_t ret;
|
||||
ret = fread(tmp, sizeof(opus_int16), FRAME_SIZE, f1);
|
||||
if (feof(f1) || ret != FRAME_SIZE) {
|
||||
if (!training) break;
|
||||
rewind(f1);
|
||||
ret = fread(tmp, sizeof(opus_int16), FRAME_SIZE, f1);
|
||||
if (ret != FRAME_SIZE) {
|
||||
fprintf(stderr, "error reading\n");
|
||||
exit(1);
|
||||
}
|
||||
one_pass_completed = 1;
|
||||
}
|
||||
for (i=0;i<FRAME_SIZE;i++) x[i] = tmp[i];
|
||||
if (count*FRAME_SIZE_5MS>=10000000 && one_pass_completed) break;
|
||||
if (training && ++gain_change_count > 2821) {
|
||||
float tmp1, tmp2;
|
||||
speech_gain = pow(10., (-30+(rand()%40))/20.);
|
||||
if (rand()&1) speech_gain = -speech_gain;
|
||||
if (rand()%20==0) speech_gain *= .01;
|
||||
if (!pitch && rand()%100==0) speech_gain = 0;
|
||||
gain_change_count = 0;
|
||||
rand_resp(a_sig, b_sig);
|
||||
tmp1 = rand()/(float)RAND_MAX;
|
||||
tmp2 = rand()/(float)RAND_MAX;
|
||||
noise_std = ABS16(-1.5*log(1e-4+tmp1)-.5*log(1e-4+tmp2));
|
||||
if (fnoise != NULL) {
|
||||
long pos;
|
||||
/* Randomize the fraction because rand() only gives us 31 bits. */
|
||||
float frac_pos = rand()/(float)RAND_MAX;
|
||||
pos = (long)(frac_pos*noise_size);
|
||||
/* 32-bit alignment. */
|
||||
pos = pos/4 * 4;
|
||||
if (pos > noise_size-500000) pos = noise_size-500000;
|
||||
noise_gain = pow(10., (-15+(rand()%40))/20.);
|
||||
if (rand()%10==0) noise_gain = 0;
|
||||
fseek(fnoise, pos, SEEK_SET);
|
||||
}
|
||||
}
|
||||
if (fnoise != NULL) {
|
||||
opus_int16 noise[FRAME_SIZE];
|
||||
ret = fread(noise, sizeof(opus_int16), FRAME_SIZE, fnoise);
|
||||
for (i=0;i<FRAME_SIZE;i++) x[i] += noise[i]*noise_gain;
|
||||
}
|
||||
biquad(x, mem_hp_x, x, b_hp, a_hp, FRAME_SIZE);
|
||||
biquad(x, mem_resp_x, x, b_sig, a_sig, FRAME_SIZE);
|
||||
for (i=0;i<FRAME_SIZE;i++) {
|
||||
float g;
|
||||
float f = (float)i/FRAME_SIZE;
|
||||
g = f*speech_gain + (1-f)*old_speech_gain;
|
||||
x[i] *= g;
|
||||
}
|
||||
if (burg) {
|
||||
float ceps[2*NB_BANDS];
|
||||
burg_cepstral_analysis(ceps, x);
|
||||
fwrite(ceps, sizeof(float), 2*NB_BANDS, ffeat);
|
||||
}
|
||||
preemphasis(x, &mem_preemph, x, PREEMPHASIS, FRAME_SIZE);
|
||||
for (i=0;i<FRAME_SIZE;i++) x[i] += rand()/(float)RAND_MAX - .5f;
|
||||
/* PCM is delayed by 1/2 frame to make the features centered on the frames. */
|
||||
for (i=0;i<FRAME_SIZE-TRAINING_OFFSET;i++) pcm[i+TRAINING_OFFSET] = float2short(x[i]);
|
||||
compute_frame_features(st, x, arch);
|
||||
|
||||
if (fpcm) {
|
||||
compute_noise(noisebuf, noise_std);
|
||||
}
|
||||
|
||||
if (pitch) {
|
||||
signed char pitch_features[PITCH_MAX_PERIOD-PITCH_MIN_PERIOD+PITCH_IF_FEATURES];
|
||||
for (i=0;i<PITCH_MAX_PERIOD-PITCH_MIN_PERIOD;i++) {
|
||||
pitch_features[i] = (int)floor(.5f + 127.f*st->xcorr_features[i]);
|
||||
}
|
||||
for (i=0;i<PITCH_IF_FEATURES;i++) {
|
||||
pitch_features[i+PITCH_MAX_PERIOD-PITCH_MIN_PERIOD] = (int)floor(.5f + 127.f*st->if_features[i]);
|
||||
}
|
||||
fwrite(pitch_features, PITCH_MAX_PERIOD-PITCH_MIN_PERIOD+PITCH_IF_FEATURES, 1, ffeat);
|
||||
} else {
|
||||
fwrite(st->features, sizeof(float), NB_TOTAL_FEATURES, ffeat);
|
||||
}
|
||||
/*if(pitch) fwrite(pcm, FRAME_SIZE, 2, stdout);*/
|
||||
if (fpcm) write_audio(st, pcm, noisebuf, fpcm);
|
||||
/*if (fpcm) fwrite(pcm, sizeof(opus_int16), FRAME_SIZE, fpcm);*/
|
||||
for (i=0;i<TRAINING_OFFSET;i++) pcm[i] = float2short(x[i+FRAME_SIZE-TRAINING_OFFSET]);
|
||||
old_speech_gain = speech_gain;
|
||||
count++;
|
||||
}
|
||||
fclose(f1);
|
||||
fclose(ffeat);
|
||||
if (fpcm) fclose(fpcm);
|
||||
lpcnet_encoder_destroy(st);
|
||||
return 0;
|
||||
}
|
||||
|
|
@ -0,0 +1,225 @@
|
|||
/* Copyright (c) 2023 Amazon */
|
||||
/*
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
|
||||
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#include "fargan.h"
|
||||
#include "os_support.h"
|
||||
#include "freq.h"
|
||||
#include "fargan_data.h"
|
||||
#include "lpcnet.h"
|
||||
#include "pitch.h"
|
||||
#include "nnet.h"
|
||||
#include "lpcnet_private.h"
|
||||
#include "cpu_support.h"
|
||||
|
||||
#define FARGAN_FEATURES (NB_FEATURES)
|
||||
|
||||
static void compute_fargan_cond(FARGANState *st, float *cond, const float *features, int period)
|
||||
{
|
||||
FARGAN *model;
|
||||
float dense_in[NB_FEATURES+COND_NET_PEMBED_OUT_SIZE];
|
||||
float conv1_in[COND_NET_FCONV1_IN_SIZE];
|
||||
float fdense2_in[COND_NET_FCONV1_OUT_SIZE];
|
||||
model = &st->model;
|
||||
celt_assert(FARGAN_FEATURES+COND_NET_PEMBED_OUT_SIZE == model->cond_net_fdense1.nb_inputs);
|
||||
celt_assert(COND_NET_FCONV1_IN_SIZE == model->cond_net_fdense1.nb_outputs);
|
||||
celt_assert(COND_NET_FCONV1_OUT_SIZE == model->cond_net_fconv1.nb_outputs);
|
||||
OPUS_COPY(&dense_in[NB_FEATURES], &model->cond_net_pembed.float_weights[IMAX(0,IMIN(period-32, 223))*COND_NET_PEMBED_OUT_SIZE], COND_NET_PEMBED_OUT_SIZE);
|
||||
OPUS_COPY(dense_in, features, NB_FEATURES);
|
||||
|
||||
compute_generic_dense(&model->cond_net_fdense1, conv1_in, dense_in, ACTIVATION_TANH, st->arch);
|
||||
compute_generic_conv1d(&model->cond_net_fconv1, fdense2_in, st->cond_conv1_state, conv1_in, COND_NET_FCONV1_IN_SIZE, ACTIVATION_TANH, st->arch);
|
||||
compute_generic_dense(&model->cond_net_fdense2, cond, fdense2_in, ACTIVATION_TANH, st->arch);
|
||||
}
|
||||
|
||||
static void fargan_deemphasis(float *pcm, float *deemph_mem) {
|
||||
int i;
|
||||
for (i=0;i<FARGAN_SUBFRAME_SIZE;i++) {
|
||||
pcm[i] += FARGAN_DEEMPHASIS * *deemph_mem;
|
||||
*deemph_mem = pcm[i];
|
||||
}
|
||||
}
|
||||
|
||||
static void run_fargan_subframe(FARGANState *st, float *pcm, const float *cond, int period)
|
||||
{
|
||||
int i, pos;
|
||||
float fwc0_in[SIG_NET_INPUT_SIZE];
|
||||
float gru1_in[SIG_NET_FWC0_CONV_OUT_SIZE+2*FARGAN_SUBFRAME_SIZE];
|
||||
float gru2_in[SIG_NET_GRU1_OUT_SIZE+2*FARGAN_SUBFRAME_SIZE];
|
||||
float gru3_in[SIG_NET_GRU2_OUT_SIZE+2*FARGAN_SUBFRAME_SIZE];
|
||||
float pred[FARGAN_SUBFRAME_SIZE+4];
|
||||
float prev[FARGAN_SUBFRAME_SIZE];
|
||||
float pitch_gate[4];
|
||||
float gain;
|
||||
float gain_1;
|
||||
float skip_cat[10000];
|
||||
float skip_out[SIG_NET_SKIP_DENSE_OUT_SIZE];
|
||||
FARGAN *model;
|
||||
|
||||
celt_assert(st->cont_initialized);
|
||||
model = &st->model;
|
||||
|
||||
compute_generic_dense(&model->sig_net_cond_gain_dense, &gain, cond, ACTIVATION_LINEAR, st->arch);
|
||||
gain = exp(gain);
|
||||
gain_1 = 1.f/(1e-5f + gain);
|
||||
|
||||
pos = PITCH_MAX_PERIOD-period-2;
|
||||
for (i=0;i<FARGAN_SUBFRAME_SIZE+4;i++) {
|
||||
pred[i] = MIN32(1.f, MAX32(-1.f, gain_1*st->pitch_buf[IMAX(0, pos)]));
|
||||
pos++;
|
||||
if (pos == PITCH_MAX_PERIOD) pos -= period;
|
||||
}
|
||||
for (i=0;i<FARGAN_SUBFRAME_SIZE;i++) prev[i] = MAX32(-1.f, MIN16(1.f, gain_1*st->pitch_buf[PITCH_MAX_PERIOD-FARGAN_SUBFRAME_SIZE+i]));
|
||||
|
||||
OPUS_COPY(&fwc0_in[0], &cond[0], FARGAN_COND_SIZE);
|
||||
OPUS_COPY(&fwc0_in[FARGAN_COND_SIZE], pred, FARGAN_SUBFRAME_SIZE+4);
|
||||
OPUS_COPY(&fwc0_in[FARGAN_COND_SIZE+FARGAN_SUBFRAME_SIZE+4], prev, FARGAN_SUBFRAME_SIZE);
|
||||
|
||||
compute_generic_conv1d(&model->sig_net_fwc0_conv, gru1_in, st->fwc0_mem, fwc0_in, SIG_NET_INPUT_SIZE, ACTIVATION_TANH, st->arch);
|
||||
celt_assert(SIG_NET_FWC0_GLU_GATE_OUT_SIZE == model->sig_net_fwc0_glu_gate.nb_outputs);
|
||||
compute_glu(&model->sig_net_fwc0_glu_gate, gru1_in, gru1_in, st->arch);
|
||||
|
||||
compute_generic_dense(&model->sig_net_gain_dense_out, pitch_gate, gru1_in, ACTIVATION_SIGMOID, st->arch);
|
||||
|
||||
for (i=0;i<FARGAN_SUBFRAME_SIZE;i++) gru1_in[SIG_NET_FWC0_GLU_GATE_OUT_SIZE+i] = pitch_gate[0]*pred[i+2];
|
||||
OPUS_COPY(&gru1_in[SIG_NET_FWC0_GLU_GATE_OUT_SIZE+FARGAN_SUBFRAME_SIZE], prev, FARGAN_SUBFRAME_SIZE);
|
||||
compute_generic_gru(&model->sig_net_gru1_input, &model->sig_net_gru1_recurrent, st->gru1_state, gru1_in, st->arch);
|
||||
compute_glu(&model->sig_net_gru1_glu_gate, gru2_in, st->gru1_state, st->arch);
|
||||
|
||||
for (i=0;i<FARGAN_SUBFRAME_SIZE;i++) gru2_in[SIG_NET_GRU1_OUT_SIZE+i] = pitch_gate[1]*pred[i+2];
|
||||
OPUS_COPY(&gru2_in[SIG_NET_GRU1_OUT_SIZE+FARGAN_SUBFRAME_SIZE], prev, FARGAN_SUBFRAME_SIZE);
|
||||
compute_generic_gru(&model->sig_net_gru2_input, &model->sig_net_gru2_recurrent, st->gru2_state, gru2_in, st->arch);
|
||||
compute_glu(&model->sig_net_gru2_glu_gate, gru3_in, st->gru2_state, st->arch);
|
||||
|
||||
for (i=0;i<FARGAN_SUBFRAME_SIZE;i++) gru3_in[SIG_NET_GRU2_OUT_SIZE+i] = pitch_gate[2]*pred[i+2];
|
||||
OPUS_COPY(&gru3_in[SIG_NET_GRU2_OUT_SIZE+FARGAN_SUBFRAME_SIZE], prev, FARGAN_SUBFRAME_SIZE);
|
||||
compute_generic_gru(&model->sig_net_gru3_input, &model->sig_net_gru3_recurrent, st->gru3_state, gru3_in, st->arch);
|
||||
compute_glu(&model->sig_net_gru3_glu_gate, &skip_cat[SIG_NET_GRU1_OUT_SIZE+SIG_NET_GRU2_OUT_SIZE], st->gru3_state, st->arch);
|
||||
|
||||
OPUS_COPY(skip_cat, gru2_in, SIG_NET_GRU1_OUT_SIZE);
|
||||
OPUS_COPY(&skip_cat[SIG_NET_GRU1_OUT_SIZE], gru3_in, SIG_NET_GRU2_OUT_SIZE);
|
||||
OPUS_COPY(&skip_cat[SIG_NET_GRU1_OUT_SIZE+SIG_NET_GRU2_OUT_SIZE+SIG_NET_GRU3_OUT_SIZE], gru1_in, SIG_NET_FWC0_CONV_OUT_SIZE);
|
||||
for (i=0;i<FARGAN_SUBFRAME_SIZE;i++) skip_cat[SIG_NET_GRU1_OUT_SIZE+SIG_NET_GRU2_OUT_SIZE+SIG_NET_GRU3_OUT_SIZE+SIG_NET_FWC0_CONV_OUT_SIZE+i] = pitch_gate[3]*pred[i+2];
|
||||
OPUS_COPY(&skip_cat[SIG_NET_GRU1_OUT_SIZE+SIG_NET_GRU2_OUT_SIZE+SIG_NET_GRU3_OUT_SIZE+SIG_NET_FWC0_CONV_OUT_SIZE+FARGAN_SUBFRAME_SIZE], prev, FARGAN_SUBFRAME_SIZE);
|
||||
|
||||
compute_generic_dense(&model->sig_net_skip_dense, skip_out, skip_cat, ACTIVATION_TANH, st->arch);
|
||||
compute_glu(&model->sig_net_skip_glu_gate, skip_out, skip_out, st->arch);
|
||||
|
||||
compute_generic_dense(&model->sig_net_sig_dense_out, pcm, skip_out, ACTIVATION_TANH, st->arch);
|
||||
for (i=0;i<FARGAN_SUBFRAME_SIZE;i++) pcm[i] *= gain;
|
||||
|
||||
OPUS_MOVE(st->pitch_buf, &st->pitch_buf[FARGAN_SUBFRAME_SIZE], PITCH_MAX_PERIOD-FARGAN_SUBFRAME_SIZE);
|
||||
OPUS_COPY(&st->pitch_buf[PITCH_MAX_PERIOD-FARGAN_SUBFRAME_SIZE], pcm, FARGAN_SUBFRAME_SIZE);
|
||||
fargan_deemphasis(pcm, &st->deemph_mem);
|
||||
}
|
||||
|
||||
void fargan_cont(FARGANState *st, const float *pcm0, const float *features0)
|
||||
{
|
||||
int i;
|
||||
float cond[COND_NET_FDENSE2_OUT_SIZE];
|
||||
float x0[FARGAN_CONT_SAMPLES];
|
||||
float dummy[FARGAN_SUBFRAME_SIZE];
|
||||
int period=0;
|
||||
|
||||
/* Pre-load features. */
|
||||
for (i=0;i<5;i++) {
|
||||
const float *features = &features0[i*NB_FEATURES];
|
||||
st->last_period = period;
|
||||
period = (int)floor(.5+256./pow(2.f,((1./60.)*((features[NB_BANDS]+1.5)*60))));
|
||||
compute_fargan_cond(st, cond, features, period);
|
||||
}
|
||||
|
||||
x0[0] = 0;
|
||||
for (i=1;i<FARGAN_CONT_SAMPLES;i++) {
|
||||
x0[i] = pcm0[i] - FARGAN_DEEMPHASIS*pcm0[i-1];
|
||||
}
|
||||
|
||||
OPUS_COPY(&st->pitch_buf[PITCH_MAX_PERIOD-FARGAN_FRAME_SIZE], x0, FARGAN_FRAME_SIZE);
|
||||
st->cont_initialized = 1;
|
||||
|
||||
for (i=0;i<FARGAN_NB_SUBFRAMES;i++) {
|
||||
run_fargan_subframe(st, dummy, &cond[i*FARGAN_COND_SIZE], st->last_period);
|
||||
OPUS_COPY(&st->pitch_buf[PITCH_MAX_PERIOD-FARGAN_SUBFRAME_SIZE], &x0[FARGAN_FRAME_SIZE+i*FARGAN_SUBFRAME_SIZE], FARGAN_SUBFRAME_SIZE);
|
||||
}
|
||||
st->deemph_mem = pcm0[FARGAN_CONT_SAMPLES-1];
|
||||
}
|
||||
|
||||
|
||||
void fargan_init(FARGANState *st)
|
||||
{
|
||||
int ret;
|
||||
OPUS_CLEAR(st, 1);
|
||||
st->arch = opus_select_arch();
|
||||
#ifndef USE_WEIGHTS_FILE
|
||||
ret = init_fargan(&st->model, fargan_arrays);
|
||||
#else
|
||||
ret = 0;
|
||||
#endif
|
||||
celt_assert(ret == 0);
|
||||
}
|
||||
|
||||
int fargan_load_model(FARGANState *st, const void *data, int len) {
|
||||
WeightArray *list;
|
||||
int ret;
|
||||
parse_weights(&list, data, len);
|
||||
ret = init_fargan(&st->model, list);
|
||||
opus_free(list);
|
||||
if (ret == 0) return 0;
|
||||
else return -1;
|
||||
}
|
||||
|
||||
static void fargan_synthesize_impl(FARGANState *st, float *pcm, const float *features)
|
||||
{
|
||||
int subframe;
|
||||
float cond[COND_NET_FDENSE2_OUT_SIZE];
|
||||
int period;
|
||||
celt_assert(st->cont_initialized);
|
||||
|
||||
period = (int)floor(.5+256./pow(2.f,((1./60.)*((features[NB_BANDS]+1.5)*60))));
|
||||
compute_fargan_cond(st, cond, features, period);
|
||||
for (subframe=0;subframe<FARGAN_NB_SUBFRAMES;subframe++) {
|
||||
float *sub_cond;
|
||||
sub_cond = &cond[subframe*FARGAN_COND_SIZE];
|
||||
run_fargan_subframe(st, &pcm[subframe*FARGAN_SUBFRAME_SIZE], sub_cond, st->last_period);
|
||||
}
|
||||
st->last_period = period;
|
||||
}
|
||||
|
||||
void fargan_synthesize(FARGANState *st, float *pcm, const float *features)
|
||||
{
|
||||
fargan_synthesize_impl(st, pcm, features);
|
||||
}
|
||||
|
||||
void fargan_synthesize_int(FARGANState *st, opus_int16 *pcm, const float *features)
|
||||
{
|
||||
int i;
|
||||
float fpcm[FARGAN_FRAME_SIZE];
|
||||
fargan_synthesize(st, fpcm, features);
|
||||
for (i=0;i<LPCNET_FRAME_SIZE;i++) pcm[i] = (int)floor(.5 + MIN32(32767, MAX32(-32767, 32768.f*fpcm[i])));
|
||||
}
|
||||
|
|
@ -0,0 +1,68 @@
|
|||
/* Copyright (c) 2023 Amazon */
|
||||
/*
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
|
||||
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef FARGAN_H
|
||||
#define FARGAN_H
|
||||
|
||||
#include "freq.h"
|
||||
#include "fargan_data.h"
|
||||
#include "pitchdnn.h"
|
||||
|
||||
#define FARGAN_CONT_SAMPLES 320
|
||||
#define FARGAN_NB_SUBFRAMES 4
|
||||
#define FARGAN_SUBFRAME_SIZE 40
|
||||
#define FARGAN_FRAME_SIZE (FARGAN_NB_SUBFRAMES*FARGAN_SUBFRAME_SIZE)
|
||||
#define FARGAN_COND_SIZE (COND_NET_FDENSE2_OUT_SIZE/FARGAN_NB_SUBFRAMES)
|
||||
#define FARGAN_DEEMPHASIS 0.85f
|
||||
|
||||
#define SIG_NET_INPUT_SIZE (FARGAN_COND_SIZE+2*FARGAN_SUBFRAME_SIZE+4)
|
||||
#define SIG_NET_FWC0_STATE_SIZE (2*SIG_NET_INPUT_SIZE)
|
||||
|
||||
#define FARGAN_MAX_RNN_NEURONS SIG_NET_GRU1_OUT_SIZE
|
||||
typedef struct {
|
||||
FARGAN model;
|
||||
int arch;
|
||||
int cont_initialized;
|
||||
float deemph_mem;
|
||||
float pitch_buf[PITCH_MAX_PERIOD];
|
||||
float cond_conv1_state[COND_NET_FCONV1_STATE_SIZE];
|
||||
float fwc0_mem[SIG_NET_FWC0_STATE_SIZE];
|
||||
float gru1_state[SIG_NET_GRU1_STATE_SIZE];
|
||||
float gru2_state[SIG_NET_GRU2_STATE_SIZE];
|
||||
float gru3_state[SIG_NET_GRU3_STATE_SIZE];
|
||||
int last_period;
|
||||
} FARGANState;
|
||||
|
||||
void fargan_init(FARGANState *st);
|
||||
int fargan_load_model(FARGANState *st, const void *data, int len);
|
||||
|
||||
void fargan_cont(FARGANState *st, const float *pcm0, const float *features0);
|
||||
|
||||
void fargan_synthesize(FARGANState *st, float *pcm, const float *features);
|
||||
void fargan_synthesize_int(FARGANState *st, opus_int16 *pcm, const float *features);
|
||||
|
||||
|
||||
#endif /* FARGAN_H */
|
||||
File diff suppressed because it is too large
Load Diff
|
|
@ -0,0 +1,79 @@
|
|||
|
||||
#ifndef FARGAN_DATA_H
|
||||
#define FARGAN_DATA_H
|
||||
|
||||
#include "nnet.h"
|
||||
|
||||
|
||||
#define COND_NET_PEMBED_OUT_SIZE 12
|
||||
|
||||
#define COND_NET_FDENSE1_OUT_SIZE 64
|
||||
|
||||
#define COND_NET_FCONV1_OUT_SIZE 128
|
||||
|
||||
#define COND_NET_FCONV1_IN_SIZE 64
|
||||
|
||||
#define COND_NET_FCONV1_STATE_SIZE (64 * (2))
|
||||
|
||||
#define COND_NET_FCONV1_DELAY 1
|
||||
|
||||
#define COND_NET_FDENSE2_OUT_SIZE 320
|
||||
|
||||
#define SIG_NET_COND_GAIN_DENSE_OUT_SIZE 1
|
||||
|
||||
#define SIG_NET_FWC0_CONV_OUT_SIZE 192
|
||||
|
||||
#define SIG_NET_FWC0_GLU_GATE_OUT_SIZE 192
|
||||
|
||||
#define SIG_NET_GRU1_OUT_SIZE 160
|
||||
|
||||
#define SIG_NET_GRU1_STATE_SIZE 160
|
||||
|
||||
#define SIG_NET_GRU2_OUT_SIZE 128
|
||||
|
||||
#define SIG_NET_GRU2_STATE_SIZE 128
|
||||
|
||||
#define SIG_NET_GRU3_OUT_SIZE 128
|
||||
|
||||
#define SIG_NET_GRU3_STATE_SIZE 128
|
||||
|
||||
#define SIG_NET_GRU1_GLU_GATE_OUT_SIZE 160
|
||||
|
||||
#define SIG_NET_GRU2_GLU_GATE_OUT_SIZE 128
|
||||
|
||||
#define SIG_NET_GRU3_GLU_GATE_OUT_SIZE 128
|
||||
|
||||
#define SIG_NET_SKIP_GLU_GATE_OUT_SIZE 128
|
||||
|
||||
#define SIG_NET_SKIP_DENSE_OUT_SIZE 128
|
||||
|
||||
#define SIG_NET_SIG_DENSE_OUT_OUT_SIZE 40
|
||||
|
||||
#define SIG_NET_GAIN_DENSE_OUT_OUT_SIZE 4
|
||||
|
||||
typedef struct {
|
||||
LinearLayer cond_net_pembed;
|
||||
LinearLayer cond_net_fdense1;
|
||||
LinearLayer cond_net_fconv1;
|
||||
LinearLayer cond_net_fdense2;
|
||||
LinearLayer sig_net_cond_gain_dense;
|
||||
LinearLayer sig_net_fwc0_conv;
|
||||
LinearLayer sig_net_fwc0_glu_gate;
|
||||
LinearLayer sig_net_gru1_input;
|
||||
LinearLayer sig_net_gru1_recurrent;
|
||||
LinearLayer sig_net_gru2_input;
|
||||
LinearLayer sig_net_gru2_recurrent;
|
||||
LinearLayer sig_net_gru3_input;
|
||||
LinearLayer sig_net_gru3_recurrent;
|
||||
LinearLayer sig_net_gru1_glu_gate;
|
||||
LinearLayer sig_net_gru2_glu_gate;
|
||||
LinearLayer sig_net_gru3_glu_gate;
|
||||
LinearLayer sig_net_skip_glu_gate;
|
||||
LinearLayer sig_net_skip_dense;
|
||||
LinearLayer sig_net_sig_dense_out;
|
||||
LinearLayer sig_net_gain_dense_out;
|
||||
} FARGAN;
|
||||
|
||||
int init_fargan(FARGAN *model, const WeightArray *arrays);
|
||||
|
||||
#endif /* FARGAN_DATA_H */
|
||||
|
|
@ -0,0 +1,328 @@
|
|||
/* Copyright (c) 2017-2018 Mozilla */
|
||||
/*
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
|
||||
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <stdio.h>
|
||||
#include "kiss_fft.h"
|
||||
#include <math.h>
|
||||
#include "freq.h"
|
||||
#include "pitch.h"
|
||||
#include "arch.h"
|
||||
#include "burg.h"
|
||||
#include <assert.h>
|
||||
#include "os_support.h"
|
||||
|
||||
#define SQUARE(x) ((x)*(x))
|
||||
|
||||
static const opus_int16 eband5ms[] = {
|
||||
/*0 200 400 600 800 1k 1.2 1.4 1.6 2k 2.4 2.8 3.2 4k 4.8 5.6 6.8 8k*/
|
||||
0, 1, 2, 3, 4, 5, 6, 7, 8, 10, 12, 14, 16, 20, 24, 28, 34, 40
|
||||
};
|
||||
|
||||
static const float compensation[] = {
|
||||
0.8f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 0.666667f, 0.5f, 0.5f, 0.5f, 0.333333f, 0.25f, 0.25f, 0.2f, 0.166667f, 0.173913f
|
||||
};
|
||||
|
||||
|
||||
extern const kiss_fft_state kfft;
|
||||
extern const float half_window[OVERLAP_SIZE];
|
||||
extern const float dct_table[NB_BANDS*NB_BANDS];
|
||||
|
||||
|
||||
static void compute_band_energy_inverse(float *bandE, const kiss_fft_cpx *X) {
|
||||
int i;
|
||||
float sum[NB_BANDS] = {0};
|
||||
for (i=0;i<NB_BANDS-1;i++)
|
||||
{
|
||||
int j;
|
||||
int band_size;
|
||||
band_size = (eband5ms[i+1]-eband5ms[i])*WINDOW_SIZE_5MS;
|
||||
for (j=0;j<band_size;j++) {
|
||||
float tmp;
|
||||
float frac = (float)j/band_size;
|
||||
tmp = SQUARE(X[(eband5ms[i]*WINDOW_SIZE_5MS) + j].r);
|
||||
tmp += SQUARE(X[(eband5ms[i]*WINDOW_SIZE_5MS) + j].i);
|
||||
tmp = 1.f/(tmp + 1e-9);
|
||||
sum[i] += (1-frac)*tmp;
|
||||
sum[i+1] += frac*tmp;
|
||||
}
|
||||
}
|
||||
sum[0] *= 2;
|
||||
sum[NB_BANDS-1] *= 2;
|
||||
for (i=0;i<NB_BANDS;i++)
|
||||
{
|
||||
bandE[i] = sum[i];
|
||||
}
|
||||
}
|
||||
|
||||
static float lpcn_lpc(
|
||||
opus_val16 *lpc, /* out: [0...p-1] LPC coefficients */
|
||||
opus_val16 *rc,
|
||||
const opus_val32 *ac, /* in: [0...p] autocorrelation values */
|
||||
int p
|
||||
)
|
||||
{
|
||||
int i, j;
|
||||
opus_val32 r;
|
||||
opus_val32 error = ac[0];
|
||||
|
||||
OPUS_CLEAR(lpc, p);
|
||||
OPUS_CLEAR(rc, p);
|
||||
if (ac[0] != 0)
|
||||
{
|
||||
for (i = 0; i < p; i++) {
|
||||
/* Sum up this iteration's reflection coefficient */
|
||||
opus_val32 rr = 0;
|
||||
for (j = 0; j < i; j++)
|
||||
rr += MULT32_32_Q31(lpc[j],ac[i - j]);
|
||||
rr += SHR32(ac[i + 1],3);
|
||||
r = -SHL32(rr,3)/error;
|
||||
rc[i] = r;
|
||||
/* Update LPC coefficients and total error */
|
||||
lpc[i] = SHR32(r,3);
|
||||
for (j = 0; j < (i+1)>>1; j++)
|
||||
{
|
||||
opus_val32 tmp1, tmp2;
|
||||
tmp1 = lpc[j];
|
||||
tmp2 = lpc[i-1-j];
|
||||
lpc[j] = tmp1 + MULT32_32_Q31(r,tmp2);
|
||||
lpc[i-1-j] = tmp2 + MULT32_32_Q31(r,tmp1);
|
||||
}
|
||||
|
||||
error = error - MULT32_32_Q31(MULT32_32_Q31(r,r),error);
|
||||
/* Bail out once we get 30 dB gain */
|
||||
if (error<.001f*ac[0])
|
||||
break;
|
||||
}
|
||||
}
|
||||
return error;
|
||||
}
|
||||
|
||||
|
||||
|
||||
void lpcn_compute_band_energy(float *bandE, const kiss_fft_cpx *X) {
|
||||
int i;
|
||||
float sum[NB_BANDS] = {0};
|
||||
for (i=0;i<NB_BANDS-1;i++)
|
||||
{
|
||||
int j;
|
||||
int band_size;
|
||||
band_size = (eband5ms[i+1]-eband5ms[i])*WINDOW_SIZE_5MS;
|
||||
for (j=0;j<band_size;j++) {
|
||||
float tmp;
|
||||
float frac = (float)j/band_size;
|
||||
tmp = SQUARE(X[(eband5ms[i]*WINDOW_SIZE_5MS) + j].r);
|
||||
tmp += SQUARE(X[(eband5ms[i]*WINDOW_SIZE_5MS) + j].i);
|
||||
sum[i] += (1-frac)*tmp;
|
||||
sum[i+1] += frac*tmp;
|
||||
}
|
||||
}
|
||||
sum[0] *= 2;
|
||||
sum[NB_BANDS-1] *= 2;
|
||||
for (i=0;i<NB_BANDS;i++)
|
||||
{
|
||||
bandE[i] = sum[i];
|
||||
}
|
||||
}
|
||||
|
||||
static void compute_burg_cepstrum(const float *pcm, float *burg_cepstrum, int len, int order) {
|
||||
int i;
|
||||
float burg_in[FRAME_SIZE];
|
||||
float burg_lpc[LPC_ORDER];
|
||||
float x[WINDOW_SIZE];
|
||||
float Eburg[NB_BANDS];
|
||||
float g;
|
||||
kiss_fft_cpx LPC[FREQ_SIZE];
|
||||
float Ly[NB_BANDS];
|
||||
float logMax = -2;
|
||||
float follow = -2;
|
||||
assert(order <= LPC_ORDER);
|
||||
assert(len <= FRAME_SIZE);
|
||||
for (i=0;i<len-1;i++) burg_in[i] = pcm[i+1] - PREEMPHASIS*pcm[i];
|
||||
g = silk_burg_analysis(burg_lpc, burg_in, 1e-3, len-1, 1, order);
|
||||
g /= len - 2*(order-1);
|
||||
OPUS_CLEAR(x, WINDOW_SIZE);
|
||||
x[0] = 1;
|
||||
for (i=0;i<order;i++) x[i+1] = -burg_lpc[i]*pow(.995, i+1);
|
||||
forward_transform(LPC, x);
|
||||
compute_band_energy_inverse(Eburg, LPC);
|
||||
for (i=0;i<NB_BANDS;i++) Eburg[i] *= .45*g*(1.f/((float)WINDOW_SIZE*WINDOW_SIZE*WINDOW_SIZE));
|
||||
for (i=0;i<NB_BANDS;i++) {
|
||||
Ly[i] = log10(1e-2+Eburg[i]);
|
||||
Ly[i] = MAX16(logMax-8, MAX16(follow-2.5, Ly[i]));
|
||||
logMax = MAX16(logMax, Ly[i]);
|
||||
follow = MAX16(follow-2.5, Ly[i]);
|
||||
}
|
||||
dct(burg_cepstrum, Ly);
|
||||
burg_cepstrum[0] += - 4;
|
||||
}
|
||||
|
||||
void burg_cepstral_analysis(float *ceps, const float *x) {
|
||||
int i;
|
||||
compute_burg_cepstrum(x, &ceps[0 ], FRAME_SIZE/2, LPC_ORDER);
|
||||
compute_burg_cepstrum(&x[FRAME_SIZE/2], &ceps[NB_BANDS], FRAME_SIZE/2, LPC_ORDER);
|
||||
for (i=0;i<NB_BANDS;i++) {
|
||||
float c0, c1;
|
||||
c0 = ceps[i];
|
||||
c1 = ceps[NB_BANDS+i];
|
||||
ceps[i ] = .5*(c0+c1);
|
||||
ceps[NB_BANDS+i] = (c0-c1);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static void interp_band_gain(float *g, const float *bandE) {
|
||||
int i;
|
||||
memset(g, 0, FREQ_SIZE);
|
||||
for (i=0;i<NB_BANDS-1;i++)
|
||||
{
|
||||
int j;
|
||||
int band_size;
|
||||
band_size = (eband5ms[i+1]-eband5ms[i])*WINDOW_SIZE_5MS;
|
||||
for (j=0;j<band_size;j++) {
|
||||
float frac = (float)j/band_size;
|
||||
g[(eband5ms[i]*WINDOW_SIZE_5MS) + j] = (1-frac)*bandE[i] + frac*bandE[i+1];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void dct(float *out, const float *in) {
|
||||
int i;
|
||||
for (i=0;i<NB_BANDS;i++) {
|
||||
int j;
|
||||
float sum = 0;
|
||||
for (j=0;j<NB_BANDS;j++) {
|
||||
sum += in[j] * dct_table[j*NB_BANDS + i];
|
||||
}
|
||||
out[i] = sum*sqrt(2./NB_BANDS);
|
||||
}
|
||||
}
|
||||
|
||||
static void idct(float *out, const float *in) {
|
||||
int i;
|
||||
for (i=0;i<NB_BANDS;i++) {
|
||||
int j;
|
||||
float sum = 0;
|
||||
for (j=0;j<NB_BANDS;j++) {
|
||||
sum += in[j] * dct_table[i*NB_BANDS + j];
|
||||
}
|
||||
out[i] = sum*sqrt(2./NB_BANDS);
|
||||
}
|
||||
}
|
||||
|
||||
void forward_transform(kiss_fft_cpx *out, const float *in) {
|
||||
int i;
|
||||
kiss_fft_cpx x[WINDOW_SIZE];
|
||||
kiss_fft_cpx y[WINDOW_SIZE];
|
||||
for (i=0;i<WINDOW_SIZE;i++) {
|
||||
x[i].r = in[i];
|
||||
x[i].i = 0;
|
||||
}
|
||||
opus_fft(&kfft, x, y, 0);
|
||||
for (i=0;i<FREQ_SIZE;i++) {
|
||||
out[i] = y[i];
|
||||
}
|
||||
}
|
||||
|
||||
static void inverse_transform(float *out, const kiss_fft_cpx *in) {
|
||||
int i;
|
||||
kiss_fft_cpx x[WINDOW_SIZE];
|
||||
kiss_fft_cpx y[WINDOW_SIZE];
|
||||
for (i=0;i<FREQ_SIZE;i++) {
|
||||
x[i] = in[i];
|
||||
}
|
||||
for (;i<WINDOW_SIZE;i++) {
|
||||
x[i].r = x[WINDOW_SIZE - i].r;
|
||||
x[i].i = -x[WINDOW_SIZE - i].i;
|
||||
}
|
||||
opus_fft(&kfft, x, y, 0);
|
||||
/* output in reverse order for IFFT. */
|
||||
out[0] = WINDOW_SIZE*y[0].r;
|
||||
for (i=1;i<WINDOW_SIZE;i++) {
|
||||
out[i] = WINDOW_SIZE*y[WINDOW_SIZE - i].r;
|
||||
}
|
||||
}
|
||||
|
||||
static float lpc_from_bands(float *lpc, const float *Ex)
|
||||
{
|
||||
int i;
|
||||
float e;
|
||||
float ac[LPC_ORDER+1];
|
||||
float rc[LPC_ORDER];
|
||||
float Xr[FREQ_SIZE];
|
||||
kiss_fft_cpx X_auto[FREQ_SIZE];
|
||||
float x_auto[WINDOW_SIZE];
|
||||
interp_band_gain(Xr, Ex);
|
||||
Xr[FREQ_SIZE-1] = 0;
|
||||
OPUS_CLEAR(X_auto, FREQ_SIZE);
|
||||
for (i=0;i<FREQ_SIZE;i++) X_auto[i].r = Xr[i];
|
||||
inverse_transform(x_auto, X_auto);
|
||||
for (i=0;i<LPC_ORDER+1;i++) ac[i] = x_auto[i];
|
||||
|
||||
/* -40 dB noise floor. */
|
||||
ac[0] += ac[0]*1e-4 + 320/12/38.;
|
||||
/* Lag windowing. */
|
||||
for (i=1;i<LPC_ORDER+1;i++) ac[i] *= (1 - 6e-5*i*i);
|
||||
e = lpcn_lpc(lpc, rc, ac, LPC_ORDER);
|
||||
return e;
|
||||
}
|
||||
|
||||
void lpc_weighting(float *lpc, float gamma)
|
||||
{
|
||||
int i;
|
||||
float gamma_i = gamma;
|
||||
for (i = 0; i < LPC_ORDER; i++)
|
||||
{
|
||||
lpc[i] *= gamma_i;
|
||||
gamma_i *= gamma;
|
||||
}
|
||||
}
|
||||
|
||||
float lpc_from_cepstrum(float *lpc, const float *cepstrum)
|
||||
{
|
||||
int i;
|
||||
float Ex[NB_BANDS];
|
||||
float tmp[NB_BANDS];
|
||||
OPUS_COPY(tmp, cepstrum, NB_BANDS);
|
||||
tmp[0] += 4;
|
||||
idct(Ex, tmp);
|
||||
for (i=0;i<NB_BANDS;i++) Ex[i] = pow(10.f, Ex[i])*compensation[i];
|
||||
return lpc_from_bands(lpc, Ex);
|
||||
}
|
||||
|
||||
void apply_window(float *x) {
|
||||
int i;
|
||||
for (i=0;i<OVERLAP_SIZE;i++) {
|
||||
x[i] *= half_window[i];
|
||||
x[WINDOW_SIZE - 1 - i] *= half_window[i];
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,61 @@
|
|||
/* Copyright (c) 2017-2018 Mozilla */
|
||||
/*
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
|
||||
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef FREQ_H
|
||||
#define FREQ_H
|
||||
|
||||
#include "kiss_fft.h"
|
||||
|
||||
#define LPC_ORDER 16
|
||||
|
||||
#define PREEMPHASIS (0.85f)
|
||||
|
||||
#define FRAME_SIZE_5MS (2)
|
||||
#define OVERLAP_SIZE_5MS (2)
|
||||
#define TRAINING_OFFSET_5MS (1)
|
||||
|
||||
#define WINDOW_SIZE_5MS (FRAME_SIZE_5MS + OVERLAP_SIZE_5MS)
|
||||
|
||||
#define FRAME_SIZE (80*FRAME_SIZE_5MS)
|
||||
#define OVERLAP_SIZE (80*OVERLAP_SIZE_5MS)
|
||||
#define TRAINING_OFFSET (80*TRAINING_OFFSET_5MS)
|
||||
#define WINDOW_SIZE (FRAME_SIZE + OVERLAP_SIZE)
|
||||
#define FREQ_SIZE (WINDOW_SIZE/2 + 1)
|
||||
|
||||
#define NB_BANDS 18
|
||||
#define NB_BANDS_1 (NB_BANDS - 1)
|
||||
|
||||
void lpcn_compute_band_energy(float *bandE, const kiss_fft_cpx *X);
|
||||
void burg_cepstral_analysis(float *ceps, const float *x);
|
||||
|
||||
void apply_window(float *x);
|
||||
void dct(float *out, const float *in);
|
||||
void forward_transform(kiss_fft_cpx *out, const float *in);
|
||||
float lpc_from_cepstrum(float *lpc, const float *cepstrum);
|
||||
void apply_window(float *x);
|
||||
void lpc_weighting(float *lpc, float gamma);
|
||||
|
||||
#endif
|
||||
File diff suppressed because it is too large
Load Diff
|
|
@ -0,0 +1,133 @@
|
|||
/* Auto generated from checkpoint lace_v2.pth (sha1: 41eaab33c6cbdb192d14f43c9f292856cab789e9) */
|
||||
|
||||
|
||||
#ifndef LACE_DATA_H
|
||||
#define LACE_DATA_H
|
||||
|
||||
#include "nnet.h"
|
||||
|
||||
|
||||
#define LACE_PREEMPH 0.85f
|
||||
#define LACE_FRAME_SIZE 80
|
||||
#define LACE_OVERLAP_SIZE 40
|
||||
#define LACE_NUM_FEATURES 93
|
||||
#define LACE_PITCH_MAX 300
|
||||
#define LACE_PITCH_EMBEDDING_DIM 64
|
||||
#define LACE_NUMBITS_RANGE_LOW 50
|
||||
#define LACE_NUMBITS_RANGE_HIGH 650
|
||||
#define LACE_NUMBITS_EMBEDDING_DIM 8
|
||||
#define LACE_COND_DIM 128
|
||||
#define LACE_HIDDEN_FEATURE_DIM 96
|
||||
#define LACE_NUMBITS_SCALE_0 1.0983514785766602f
|
||||
#define LACE_NUMBITS_SCALE_1 2.0509142875671387f
|
||||
#define LACE_NUMBITS_SCALE_2 3.5729939937591553f
|
||||
#define LACE_NUMBITS_SCALE_3 4.478035926818848f
|
||||
#define LACE_NUMBITS_SCALE_4 5.926519393920898f
|
||||
#define LACE_NUMBITS_SCALE_5 7.152282238006592f
|
||||
#define LACE_NUMBITS_SCALE_6 8.277412414550781f
|
||||
#define LACE_NUMBITS_SCALE_7 8.926830291748047f
|
||||
|
||||
#define LACE_PITCH_EMBEDDING_OUT_SIZE 64
|
||||
|
||||
#define LACE_FNET_CONV1_OUT_SIZE 96
|
||||
|
||||
#define LACE_FNET_CONV1_IN_SIZE 173
|
||||
|
||||
#define LACE_FNET_CONV1_STATE_SIZE (173 * (0))
|
||||
|
||||
#define LACE_FNET_CONV1_DELAY 0
|
||||
|
||||
#define LACE_FNET_CONV2_OUT_SIZE 128
|
||||
|
||||
#define LACE_FNET_CONV2_IN_SIZE 384
|
||||
|
||||
#define LACE_FNET_CONV2_STATE_SIZE (384 * (1))
|
||||
|
||||
#define LACE_FNET_CONV2_DELAY 0
|
||||
|
||||
#define LACE_FNET_TCONV_KERNEL_SIZE 4
|
||||
|
||||
#define LACE_FNET_TCONV_STRIDE 4
|
||||
|
||||
#define LACE_FNET_TCONV_IN_CHANNELS 128
|
||||
|
||||
#define LACE_FNET_TCONV_OUT_CHANNELS 128
|
||||
|
||||
#define LACE_FNET_GRU_OUT_SIZE 128
|
||||
|
||||
#define LACE_FNET_GRU_STATE_SIZE 128
|
||||
|
||||
#define LACE_CF1_FILTER_GAIN_A 0.690776f
|
||||
#define LACE_CF1_FILTER_GAIN_B 0.000000f
|
||||
#define LACE_CF1_LOG_GAIN_LIMIT 1.151293f
|
||||
#define LACE_CF1_KERNEL_SIZE 16
|
||||
#define LACE_CF1_LEFT_PADDING 8
|
||||
#define LACE_CF1_FRAME_SIZE 80
|
||||
#define LACE_CF1_OVERLAP_SIZE 40
|
||||
#define LACE_CF1_IN_CHANNELS 1
|
||||
#define LACE_CF1_OUT_CHANNELS 1
|
||||
#define LACE_CF1_NORM_P 2
|
||||
#define LACE_CF1_FEATURE_DIM 128
|
||||
#define LACE_CF1_MAX_LAG 301
|
||||
|
||||
#define LACE_CF1_KERNEL_OUT_SIZE 16
|
||||
|
||||
#define LACE_CF1_GAIN_OUT_SIZE 1
|
||||
|
||||
#define LACE_CF1_GLOBAL_GAIN_OUT_SIZE 1
|
||||
|
||||
#define LACE_CF2_FILTER_GAIN_A 0.690776f
|
||||
#define LACE_CF2_FILTER_GAIN_B 0.000000f
|
||||
#define LACE_CF2_LOG_GAIN_LIMIT 1.151293f
|
||||
#define LACE_CF2_KERNEL_SIZE 16
|
||||
#define LACE_CF2_LEFT_PADDING 8
|
||||
#define LACE_CF2_FRAME_SIZE 80
|
||||
#define LACE_CF2_OVERLAP_SIZE 40
|
||||
#define LACE_CF2_IN_CHANNELS 1
|
||||
#define LACE_CF2_OUT_CHANNELS 1
|
||||
#define LACE_CF2_NORM_P 2
|
||||
#define LACE_CF2_FEATURE_DIM 128
|
||||
#define LACE_CF2_MAX_LAG 301
|
||||
|
||||
#define LACE_CF2_KERNEL_OUT_SIZE 16
|
||||
|
||||
#define LACE_CF2_GAIN_OUT_SIZE 1
|
||||
|
||||
#define LACE_CF2_GLOBAL_GAIN_OUT_SIZE 1
|
||||
|
||||
#define LACE_AF1_FILTER_GAIN_A 1.381551f
|
||||
#define LACE_AF1_FILTER_GAIN_B 0.000000f
|
||||
#define LACE_AF1_SHAPE_GAIN 1.000000f
|
||||
#define LACE_AF1_KERNEL_SIZE 16
|
||||
#define LACE_AF1_FRAME_SIZE 80
|
||||
#define LACE_AF1_LEFT_PADDING 15
|
||||
#define LACE_AF1_OVERLAP_SIZE 40
|
||||
#define LACE_AF1_IN_CHANNELS 1
|
||||
#define LACE_AF1_OUT_CHANNELS 1
|
||||
#define LACE_AF1_NORM_P 2
|
||||
#define LACE_AF1_FEATURE_DIM 128
|
||||
|
||||
#define LACE_AF1_KERNEL_OUT_SIZE 16
|
||||
|
||||
#define LACE_AF1_GAIN_OUT_SIZE 1
|
||||
|
||||
typedef struct {
|
||||
LinearLayer lace_pitch_embedding;
|
||||
LinearLayer lace_fnet_conv1;
|
||||
LinearLayer lace_fnet_conv2;
|
||||
LinearLayer lace_fnet_tconv;
|
||||
LinearLayer lace_fnet_gru_input;
|
||||
LinearLayer lace_fnet_gru_recurrent;
|
||||
LinearLayer lace_cf1_kernel;
|
||||
LinearLayer lace_cf1_gain;
|
||||
LinearLayer lace_cf1_global_gain;
|
||||
LinearLayer lace_cf2_kernel;
|
||||
LinearLayer lace_cf2_gain;
|
||||
LinearLayer lace_cf2_global_gain;
|
||||
LinearLayer lace_af1_kernel;
|
||||
LinearLayer lace_af1_gain;
|
||||
} LACELayers;
|
||||
|
||||
int init_lacelayers(LACELayers *model, const WeightArray *arrays);
|
||||
|
||||
#endif /* LACE_DATA_H */
|
||||
|
|
@ -0,0 +1,196 @@
|
|||
/* Copyright (c) 2023 Amazon */
|
||||
/*
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
|
||||
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/* This packet loss simulator can be used independently of the Opus codebase.
|
||||
To do that, you need to compile the following files:
|
||||
dnn/lossgen.c
|
||||
dnn/lossgen_data.c
|
||||
|
||||
with the following files needed as #include
|
||||
dnn/lossgen_data.h
|
||||
dnn/lossgen.h
|
||||
dnn/nnet_arch.h
|
||||
dnn/nnet.h
|
||||
dnn/parse_lpcnet_weights.c (included despite being a C file)
|
||||
dnn/vec_avx.h
|
||||
dnn/vec.h
|
||||
celt/os_support.h
|
||||
celt/arch.h
|
||||
celt/x86/x86_arch_macros.h
|
||||
include/opus_defines.h
|
||||
include/opus_types.h
|
||||
|
||||
Additionally, the code in dnn/lossgen_demo.c can be used to generate losses from
|
||||
the command line.
|
||||
*/
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#include "arch.h"
|
||||
|
||||
#include <math.h>
|
||||
#include "lossgen.h"
|
||||
#include "os_support.h"
|
||||
#include "nnet.h"
|
||||
#include "assert.h"
|
||||
|
||||
/* Disable RTCD for this. */
|
||||
#define RTCD_ARCH c
|
||||
|
||||
/* Override assert to avoid undefined/redefined symbols. */
|
||||
#undef celt_assert
|
||||
#define celt_assert assert
|
||||
|
||||
/* Directly include the C files we need since the symbols won't be exposed if we link in a shared object. */
|
||||
#include "parse_lpcnet_weights.c"
|
||||
#include "nnet_arch.h"
|
||||
|
||||
#undef compute_linear
|
||||
#undef compute_activation
|
||||
|
||||
/* Force the C version since the SIMD versions may be hidden. */
|
||||
#define compute_linear(linear, out, in, arch) ((void)(arch),compute_linear_c(linear, out, in))
|
||||
#define compute_activation(output, input, N, activation, arch) ((void)(arch),compute_activation_c(output, input, N, activation))
|
||||
|
||||
#define MAX_RNN_NEURONS_ALL IMAX(LOSSGEN_GRU1_STATE_SIZE, LOSSGEN_GRU2_STATE_SIZE)
|
||||
|
||||
/* These two functions are copied from nnet.c to make sure we don't have linking issues. */
|
||||
void compute_generic_gru_lossgen(const LinearLayer *input_weights, const LinearLayer *recurrent_weights, float *state, const float *in, int arch)
|
||||
{
|
||||
int i;
|
||||
int N;
|
||||
float zrh[3*MAX_RNN_NEURONS_ALL];
|
||||
float recur[3*MAX_RNN_NEURONS_ALL];
|
||||
float *z;
|
||||
float *r;
|
||||
float *h;
|
||||
celt_assert(3*recurrent_weights->nb_inputs == recurrent_weights->nb_outputs);
|
||||
celt_assert(input_weights->nb_outputs == recurrent_weights->nb_outputs);
|
||||
N = recurrent_weights->nb_inputs;
|
||||
z = zrh;
|
||||
r = &zrh[N];
|
||||
h = &zrh[2*N];
|
||||
celt_assert(recurrent_weights->nb_outputs <= 3*MAX_RNN_NEURONS_ALL);
|
||||
celt_assert(in != state);
|
||||
compute_linear(input_weights, zrh, in, arch);
|
||||
compute_linear(recurrent_weights, recur, state, arch);
|
||||
for (i=0;i<2*N;i++)
|
||||
zrh[i] += recur[i];
|
||||
compute_activation(zrh, zrh, 2*N, ACTIVATION_SIGMOID, arch);
|
||||
for (i=0;i<N;i++)
|
||||
h[i] += recur[2*N+i]*r[i];
|
||||
compute_activation(h, h, N, ACTIVATION_TANH, arch);
|
||||
for (i=0;i<N;i++)
|
||||
h[i] = z[i]*state[i] + (1-z[i])*h[i];
|
||||
for (i=0;i<N;i++)
|
||||
state[i] = h[i];
|
||||
}
|
||||
|
||||
|
||||
void compute_generic_dense_lossgen(const LinearLayer *layer, float *output, const float *input, int activation, int arch)
|
||||
{
|
||||
compute_linear(layer, output, input, arch);
|
||||
compute_activation(output, output, layer->nb_outputs, activation, arch);
|
||||
}
|
||||
|
||||
|
||||
static int sample_loss_impl(
|
||||
LossGenState *st,
|
||||
float percent_loss)
|
||||
{
|
||||
float input[2];
|
||||
float tmp[LOSSGEN_DENSE_IN_OUT_SIZE];
|
||||
float out;
|
||||
int loss;
|
||||
LossGen *model = &st->model;
|
||||
input[0] = st->last_loss;
|
||||
input[1] = percent_loss;
|
||||
compute_generic_dense_lossgen(&model->lossgen_dense_in, tmp, input, ACTIVATION_TANH, 0);
|
||||
compute_generic_gru_lossgen(&model->lossgen_gru1_input, &model->lossgen_gru1_recurrent, st->gru1_state, tmp, 0);
|
||||
compute_generic_gru_lossgen(&model->lossgen_gru2_input, &model->lossgen_gru2_recurrent, st->gru2_state, st->gru1_state, 0);
|
||||
compute_generic_dense_lossgen(&model->lossgen_dense_out, &out, st->gru2_state, ACTIVATION_SIGMOID, 0);
|
||||
loss = (float)rand()/RAND_MAX < out;
|
||||
st->last_loss = loss;
|
||||
return loss;
|
||||
}
|
||||
|
||||
int sample_loss(
|
||||
LossGenState *st,
|
||||
float percent_loss)
|
||||
{
|
||||
/* Due to GRU being initialized with zeros, the first packets aren't quite random,
|
||||
so we skip them. */
|
||||
if (!st->used) {
|
||||
int i;
|
||||
for (i=0;i<100;i++) sample_loss_impl(st, percent_loss);
|
||||
st->used = 1;
|
||||
}
|
||||
return sample_loss_impl(st, percent_loss);
|
||||
}
|
||||
|
||||
void lossgen_init(LossGenState *st)
|
||||
{
|
||||
int ret;
|
||||
OPUS_CLEAR(st, 1);
|
||||
#ifndef USE_WEIGHTS_FILE
|
||||
ret = init_lossgen(&st->model, lossgen_arrays);
|
||||
#else
|
||||
ret = 0;
|
||||
#endif
|
||||
celt_assert(ret == 0);
|
||||
(void)ret;
|
||||
}
|
||||
|
||||
int lossgen_load_model(LossGenState *st, const void *data, int len) {
|
||||
WeightArray *list;
|
||||
int ret;
|
||||
parse_weights(&list, data, len);
|
||||
ret = init_lossgen(&st->model, list);
|
||||
opus_free(list);
|
||||
if (ret == 0) return 0;
|
||||
else return -1;
|
||||
}
|
||||
|
||||
#if 0
|
||||
#include <stdio.h>
|
||||
int main(int argc, char **argv) {
|
||||
int i, N;
|
||||
float p;
|
||||
LossGenState st;
|
||||
if (argc!=3) {
|
||||
fprintf(stderr, "usage: lossgen <percentage> <length>\n");
|
||||
return 1;
|
||||
}
|
||||
lossgen_init(&st);
|
||||
p = atof(argv[1]);
|
||||
N = atoi(argv[2]);
|
||||
for (i=0;i<N;i++) {
|
||||
printf("%d\n", sample_loss(&st, p));
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
|
@ -0,0 +1,55 @@
|
|||
/* Copyright (c) 2023 Amazon */
|
||||
/*
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
|
||||
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef LOSSGEN_H
|
||||
#define LOSSGEN_H
|
||||
|
||||
|
||||
#include "lossgen_data.h"
|
||||
|
||||
#define PITCH_MIN_PERIOD 32
|
||||
#define PITCH_MAX_PERIOD 256
|
||||
|
||||
#define NB_XCORR_FEATURES (PITCH_MAX_PERIOD-PITCH_MIN_PERIOD)
|
||||
|
||||
|
||||
typedef struct {
|
||||
LossGen model;
|
||||
float gru1_state[LOSSGEN_GRU1_STATE_SIZE];
|
||||
float gru2_state[LOSSGEN_GRU2_STATE_SIZE];
|
||||
int last_loss;
|
||||
int used;
|
||||
} LossGenState;
|
||||
|
||||
|
||||
void lossgen_init(LossGenState *st);
|
||||
int lossgen_load_model(LossGenState *st, const void *data, int len);
|
||||
|
||||
int sample_loss(
|
||||
LossGenState *st,
|
||||
float percent_loss);
|
||||
|
||||
#endif
|
||||
File diff suppressed because it is too large
Load Diff
|
|
@ -0,0 +1,39 @@
|
|||
/* Auto generated from checkpoint lossgen2_2000.pth */
|
||||
|
||||
|
||||
#ifndef LOSSGEN_DATA_H
|
||||
#define LOSSGEN_DATA_H
|
||||
|
||||
#include "nnet.h"
|
||||
|
||||
|
||||
#include "opus_types.h"
|
||||
|
||||
#define LOSSGEN_DENSE_IN_OUT_SIZE 8
|
||||
|
||||
#define LOSSGEN_DENSE_OUT_OUT_SIZE 1
|
||||
|
||||
#define LOSSGEN_GRU1_OUT_SIZE 16
|
||||
|
||||
#define LOSSGEN_GRU1_STATE_SIZE 16
|
||||
|
||||
#define LOSSGEN_GRU2_OUT_SIZE 32
|
||||
|
||||
#define LOSSGEN_GRU2_STATE_SIZE 32
|
||||
|
||||
|
||||
#define LOSSGEN_MAX_RNN_UNITS 32
|
||||
|
||||
|
||||
typedef struct {
|
||||
LinearLayer lossgen_dense_in;
|
||||
LinearLayer lossgen_dense_out;
|
||||
LinearLayer lossgen_gru1_input;
|
||||
LinearLayer lossgen_gru1_recurrent;
|
||||
LinearLayer lossgen_gru2_input;
|
||||
LinearLayer lossgen_gru2_recurrent;
|
||||
} LossGen;
|
||||
|
||||
int init_lossgen(LossGen *model, const WeightArray *arrays);
|
||||
|
||||
#endif /* LOSSGEN_DATA_H */
|
||||
|
|
@ -0,0 +1,22 @@
|
|||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include "lossgen.h"
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
LossGenState st;
|
||||
long num_packets;
|
||||
long i;
|
||||
float percent;
|
||||
if (argc != 3) {
|
||||
fprintf(stderr, "usage: %s <percent_loss> <nb packets>\n", argv[0]);
|
||||
return 1;
|
||||
}
|
||||
lossgen_init(&st);
|
||||
percent = atof(argv[1]);
|
||||
num_packets = atol(argv[2]);
|
||||
/*printf("loss: %f %d\n", percent, num_packets);*/
|
||||
for (i=0;i<num_packets;i++) {
|
||||
printf("%d\n", sample_loss(&st, percent*0.01f));
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
|
@ -0,0 +1,183 @@
|
|||
/* Copyright (c) 2018 Mozilla */
|
||||
/*
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
|
||||
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef LPCNET_H_
|
||||
#define LPCNET_H_
|
||||
|
||||
#include "opus_types.h"
|
||||
|
||||
#define NB_FEATURES 20
|
||||
#define NB_TOTAL_FEATURES 36
|
||||
|
||||
/** Number of audio samples in a feature frame (not for encoding/decoding). */
|
||||
#define LPCNET_FRAME_SIZE (160)
|
||||
|
||||
typedef struct LPCNetState LPCNetState;
|
||||
|
||||
typedef struct LPCNetDecState LPCNetDecState;
|
||||
|
||||
typedef struct LPCNetEncState LPCNetEncState;
|
||||
|
||||
typedef struct LPCNetPLCState LPCNetPLCState;
|
||||
|
||||
|
||||
/** Gets the size of an <code>LPCNetDecState</code> structure.
|
||||
* @returns The size in bytes.
|
||||
*/
|
||||
int lpcnet_decoder_get_size(void);
|
||||
|
||||
/** Initializes a previously allocated decoder state
|
||||
* The memory pointed to by st must be at least the size returned by lpcnet_decoder_get_size().
|
||||
* This is intended for applications which use their own allocator instead of malloc.
|
||||
* @see lpcnet_decoder_create(),lpcnet_decoder_get_size()
|
||||
* @param [in] st <tt>LPCNetDecState*</tt>: Decoder state
|
||||
* @retval 0 Success
|
||||
*/
|
||||
int lpcnet_decoder_init(LPCNetDecState *st);
|
||||
|
||||
void lpcnet_reset(LPCNetState *lpcnet);
|
||||
|
||||
/** Allocates and initializes a decoder state.
|
||||
* @returns The newly created state
|
||||
*/
|
||||
LPCNetDecState *lpcnet_decoder_create(void);
|
||||
|
||||
/** Frees an <code>LPCNetDecState</code> allocated by lpcnet_decoder_create().
|
||||
* @param[in] st <tt>LPCNetDecState*</tt>: State to be freed.
|
||||
*/
|
||||
void lpcnet_decoder_destroy(LPCNetDecState *st);
|
||||
|
||||
/** Decodes a packet of LPCNET_COMPRESSED_SIZE bytes (currently 8) into LPCNET_PACKET_SAMPLES samples (currently 640).
|
||||
* @param [in] st <tt>LPCNetDecState*</tt>: Decoder state
|
||||
* @param [in] buf <tt>const unsigned char *</tt>: Compressed packet
|
||||
* @param [out] pcm <tt>opus_int16 *</tt>: Decoded audio
|
||||
* @retval 0 Success
|
||||
*/
|
||||
int lpcnet_decode(LPCNetDecState *st, const unsigned char *buf, opus_int16 *pcm);
|
||||
|
||||
|
||||
|
||||
/** Gets the size of an <code>LPCNetEncState</code> structure.
|
||||
* @returns The size in bytes.
|
||||
*/
|
||||
int lpcnet_encoder_get_size(void);
|
||||
|
||||
/** Initializes a previously allocated encoder state
|
||||
* The memory pointed to by st must be at least the size returned by lpcnet_encoder_get_size().
|
||||
* This is intended for applications which use their own allocator instead of malloc.
|
||||
* @see lpcnet_encoder_create(),lpcnet_encoder_get_size()
|
||||
* @param [in] st <tt>LPCNetEncState*</tt>: Encoder state
|
||||
* @retval 0 Success
|
||||
*/
|
||||
int lpcnet_encoder_init(LPCNetEncState *st);
|
||||
|
||||
int lpcnet_encoder_load_model(LPCNetEncState *st, const void *data, int len);
|
||||
|
||||
/** Allocates and initializes an encoder state.
|
||||
* @returns The newly created state
|
||||
*/
|
||||
LPCNetEncState *lpcnet_encoder_create(void);
|
||||
|
||||
/** Frees an <code>LPCNetEncState</code> allocated by lpcnet_encoder_create().
|
||||
* @param[in] st <tt>LPCNetEncState*</tt>: State to be freed.
|
||||
*/
|
||||
void lpcnet_encoder_destroy(LPCNetEncState *st);
|
||||
|
||||
/** Encodes LPCNET_PACKET_SAMPLES speech samples (currently 640) into a packet of LPCNET_COMPRESSED_SIZE bytes (currently 8).
|
||||
* @param [in] st <tt>LPCNetDecState*</tt>: Encoder state
|
||||
* @param [in] pcm <tt>opus_int16 *</tt>: Input speech to be encoded
|
||||
* @param [out] buf <tt>const unsigned char *</tt>: Compressed packet
|
||||
* @retval 0 Success
|
||||
*/
|
||||
int lpcnet_encode(LPCNetEncState *st, const opus_int16 *pcm, unsigned char *buf);
|
||||
|
||||
/** Compute features on LPCNET_FRAME_SIZE speech samples (currently 160) and output features for one 10-ms frame.
|
||||
* @param [in] st <tt>LPCNetDecState*</tt>: Encoder state
|
||||
* @param [in] pcm <tt>opus_int16 *</tt>: Input speech to be analyzed
|
||||
* @param [out] features <tt>float[NB_TOTAL_FEATURES]</tt>: Four feature vectors
|
||||
* @retval 0 Success
|
||||
*/
|
||||
int lpcnet_compute_single_frame_features(LPCNetEncState *st, const opus_int16 *pcm, float features[NB_TOTAL_FEATURES], int arch);
|
||||
|
||||
|
||||
/** Compute features on LPCNET_FRAME_SIZE speech samples (currently 160) and output features for one 10-ms frame.
|
||||
* @param [in] st <tt>LPCNetDecState*</tt>: Encoder state
|
||||
* @param [in] pcm <tt>float *</tt>: Input speech to be analyzed
|
||||
* @param [out] features <tt>float[NB_TOTAL_FEATURES]</tt>: Four feature vectors
|
||||
* @retval 0 Success
|
||||
*/
|
||||
int lpcnet_compute_single_frame_features_float(LPCNetEncState *st, const float *pcm, float features[NB_TOTAL_FEATURES], int arch);
|
||||
|
||||
/** Gets the size of an <code>LPCNetState</code> structure.
|
||||
* @returns The size in bytes.
|
||||
*/
|
||||
int lpcnet_get_size(void);
|
||||
|
||||
/** Initializes a previously allocated synthesis state
|
||||
* The memory pointed to by st must be at least the size returned by lpcnet_get_size().
|
||||
* This is intended for applications which use their own allocator instead of malloc.
|
||||
* @see lpcnet_create(),lpcnet_get_size()
|
||||
* @param [in] st <tt>LPCNetState*</tt>: Synthesis state
|
||||
* @retval 0 Success
|
||||
*/
|
||||
int lpcnet_init(LPCNetState *st);
|
||||
|
||||
/** Allocates and initializes a synthesis state.
|
||||
* @returns The newly created state
|
||||
*/
|
||||
LPCNetState *lpcnet_create(void);
|
||||
|
||||
/** Frees an <code>LPCNetState</code> allocated by lpcnet_create().
|
||||
* @param[in] st <tt>LPCNetState*</tt>: State to be freed.
|
||||
*/
|
||||
void lpcnet_destroy(LPCNetState *st);
|
||||
|
||||
/** Synthesizes speech from an LPCNet feature vector.
|
||||
* @param [in] st <tt>LPCNetState*</tt>: Synthesis state
|
||||
* @param [in] features <tt>const float *</tt>: Compressed packet
|
||||
* @param [out] output <tt>opus_int16 **</tt>: Synthesized speech
|
||||
* @param [in] N <tt>int</tt>: Number of samples to generate
|
||||
* @retval 0 Success
|
||||
*/
|
||||
void lpcnet_synthesize(LPCNetState *st, const float *features, opus_int16 *output, int N);
|
||||
|
||||
|
||||
|
||||
int lpcnet_plc_init(LPCNetPLCState *st);
|
||||
void lpcnet_plc_reset(LPCNetPLCState *st);
|
||||
|
||||
int lpcnet_plc_update(LPCNetPLCState *st, opus_int16 *pcm);
|
||||
|
||||
int lpcnet_plc_conceal(LPCNetPLCState *st, opus_int16 *pcm);
|
||||
|
||||
void lpcnet_plc_fec_add(LPCNetPLCState *st, const float *features);
|
||||
|
||||
void lpcnet_plc_fec_clear(LPCNetPLCState *st);
|
||||
|
||||
int lpcnet_load_model(LPCNetState *st, const void *data, int len);
|
||||
int lpcnet_plc_load_model(LPCNetPLCState *st, const void *data, int len);
|
||||
|
||||
#endif
|
||||
|
|
@ -0,0 +1,217 @@
|
|||
/* Copyright (c) 2018 Mozilla */
|
||||
/*
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
|
||||
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#include <math.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
#include "arch.h"
|
||||
#include "lpcnet.h"
|
||||
#include "freq.h"
|
||||
#include "os_support.h"
|
||||
#include "fargan.h"
|
||||
#include "cpu_support.h"
|
||||
|
||||
#ifdef USE_WEIGHTS_FILE
|
||||
# if __unix__
|
||||
# include <fcntl.h>
|
||||
# include <sys/mman.h>
|
||||
# include <unistd.h>
|
||||
# include <sys/stat.h>
|
||||
/* When available, mmap() is preferable to reading the file, as it leads to
|
||||
better resource utilization, especially if multiple processes are using the same
|
||||
file (mapping will be shared in cache). */
|
||||
void *load_blob(const char *filename, int *len) {
|
||||
int fd;
|
||||
void *data;
|
||||
struct stat st;
|
||||
if (stat(filename, &st)) {
|
||||
*len = 0;
|
||||
return NULL;
|
||||
}
|
||||
*len = st.st_size;
|
||||
fd = open(filename, O_RDONLY);
|
||||
if (fd<0) {
|
||||
*len = 0;
|
||||
return NULL;
|
||||
}
|
||||
data = mmap(NULL, *len, PROT_READ, MAP_SHARED, fd, 0);
|
||||
if (data == MAP_FAILED) {
|
||||
*len = 0;
|
||||
data = NULL;
|
||||
}
|
||||
close(fd);
|
||||
return data;
|
||||
}
|
||||
void free_blob(void *blob, int len) {
|
||||
if (blob) munmap(blob, len);
|
||||
}
|
||||
# else
|
||||
void *load_blob(const char *filename, int *len) {
|
||||
FILE *file;
|
||||
void *data;
|
||||
file = fopen(filename, "r");
|
||||
if (file == NULL)
|
||||
{
|
||||
perror("could not open blob file");
|
||||
*len = 0;
|
||||
return NULL;
|
||||
}
|
||||
fseek(file, 0L, SEEK_END);
|
||||
*len = ftell(file);
|
||||
fseek(file, 0L, SEEK_SET);
|
||||
if (*len <= 0) {
|
||||
*len = 0;
|
||||
return NULL;
|
||||
}
|
||||
data = malloc(*len);
|
||||
if (!data) {
|
||||
*len = 0;
|
||||
return NULL;
|
||||
}
|
||||
*len = fread(data, 1, *len, file);
|
||||
return data;
|
||||
}
|
||||
void free_blob(void *blob, int len) {
|
||||
free(blob);
|
||||
(void)len;
|
||||
}
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#define MODE_FEATURES 2
|
||||
/*#define MODE_SYNTHESIS 3*/
|
||||
#define MODE_ADDLPC 5
|
||||
#define MODE_FWGAN_SYNTHESIS 6
|
||||
#define MODE_FARGAN_SYNTHESIS 7
|
||||
|
||||
void usage(void) {
|
||||
fprintf(stderr, "usage: lpcnet_demo -features <input.pcm> <features.f32>\n");
|
||||
fprintf(stderr, " lpcnet_demo -fargan-synthesis <features.f32> <output.pcm>\n");
|
||||
fprintf(stderr, " lpcnet_demo -addlpc <features_without_lpc.f32> <features_with_lpc.lpc>\n\n");
|
||||
fprintf(stderr, " plc_options:\n");
|
||||
fprintf(stderr, " causal: normal (causal) PLC\n");
|
||||
fprintf(stderr, " codec: normal (causal) PLC without cross-fade (will glitch)\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
int main(int argc, char **argv) {
|
||||
int mode=0;
|
||||
int arch;
|
||||
FILE *fin, *fout;
|
||||
#ifdef USE_WEIGHTS_FILE
|
||||
int len;
|
||||
void *data;
|
||||
const char *filename = "weights_blob.bin";
|
||||
#endif
|
||||
arch = opus_select_arch();
|
||||
if (argc < 4) usage();
|
||||
if (strcmp(argv[1], "-features") == 0) mode=MODE_FEATURES;
|
||||
else if (strcmp(argv[1], "-fargan-synthesis") == 0) mode=MODE_FARGAN_SYNTHESIS;
|
||||
else if (strcmp(argv[1], "-addlpc") == 0){
|
||||
mode=MODE_ADDLPC;
|
||||
} else {
|
||||
usage();
|
||||
}
|
||||
if (argc != 4) usage();
|
||||
fin = fopen(argv[2], "rb");
|
||||
if (fin == NULL) {
|
||||
fprintf(stderr, "Can't open %s\n", argv[2]);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
fout = fopen(argv[3], "wb");
|
||||
if (fout == NULL) {
|
||||
fprintf(stderr, "Can't open %s\n", argv[3]);
|
||||
exit(1);
|
||||
}
|
||||
#ifdef USE_WEIGHTS_FILE
|
||||
data = load_blob(filename, &len);
|
||||
#endif
|
||||
if (mode == MODE_FEATURES) {
|
||||
LPCNetEncState *net;
|
||||
net = lpcnet_encoder_create();
|
||||
while (1) {
|
||||
float features[NB_TOTAL_FEATURES];
|
||||
opus_int16 pcm[LPCNET_FRAME_SIZE];
|
||||
size_t ret;
|
||||
ret = fread(pcm, sizeof(pcm[0]), LPCNET_FRAME_SIZE, fin);
|
||||
if (feof(fin) || ret != LPCNET_FRAME_SIZE) break;
|
||||
lpcnet_compute_single_frame_features(net, pcm, features, arch);
|
||||
fwrite(features, sizeof(float), NB_TOTAL_FEATURES, fout);
|
||||
}
|
||||
lpcnet_encoder_destroy(net);
|
||||
} else if (mode == MODE_FARGAN_SYNTHESIS) {
|
||||
FARGANState fargan;
|
||||
size_t ret, i;
|
||||
float in_features[5*NB_TOTAL_FEATURES];
|
||||
float zeros[320] = {0};
|
||||
fargan_init(&fargan);
|
||||
#ifdef USE_WEIGHTS_FILE
|
||||
fargan_load_model(&fargan, data, len);
|
||||
#endif
|
||||
/* uncomment the following to align with Python code */
|
||||
/*ret = fread(&in_features[0], sizeof(in_features[0]), NB_TOTAL_FEATURES, fin);*/
|
||||
for (i=0;i<5;i++) {
|
||||
ret = fread(&in_features[i*NB_FEATURES], sizeof(in_features[0]), NB_TOTAL_FEATURES, fin);
|
||||
}
|
||||
fargan_cont(&fargan, zeros, in_features);
|
||||
while (1) {
|
||||
float features[NB_FEATURES];
|
||||
float fpcm[LPCNET_FRAME_SIZE];
|
||||
opus_int16 pcm[LPCNET_FRAME_SIZE];
|
||||
ret = fread(in_features, sizeof(features[0]), NB_TOTAL_FEATURES, fin);
|
||||
if (feof(fin) || ret != NB_TOTAL_FEATURES) break;
|
||||
OPUS_COPY(features, in_features, NB_FEATURES);
|
||||
fargan_synthesize(&fargan, fpcm, features);
|
||||
for (i=0;i<LPCNET_FRAME_SIZE;i++) pcm[i] = (int)floor(.5 + MIN32(32767, MAX32(-32767, 32768.f*fpcm[i])));
|
||||
fwrite(pcm, sizeof(pcm[0]), LPCNET_FRAME_SIZE, fout);
|
||||
}
|
||||
} else if (mode == MODE_ADDLPC) {
|
||||
float features[36];
|
||||
size_t ret;
|
||||
|
||||
while (1) {
|
||||
ret = fread(features, sizeof(features[0]), 36, fin);
|
||||
if (ret != 36 || feof(fin)) break;
|
||||
lpc_from_cepstrum(&features[20], &features[0]);
|
||||
fwrite(features, sizeof(features[0]), 36, fout);
|
||||
}
|
||||
|
||||
} else {
|
||||
fprintf(stderr, "unknown action\n");
|
||||
}
|
||||
fclose(fin);
|
||||
fclose(fout);
|
||||
#ifdef USE_WEIGHTS_FILE
|
||||
free_blob(data, len);
|
||||
#endif
|
||||
return 0;
|
||||
}
|
||||
|
|
@ -0,0 +1,230 @@
|
|||
/* Copyright (c) 2017-2019 Mozilla */
|
||||
/*
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
|
||||
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <stdio.h>
|
||||
#include "kiss_fft.h"
|
||||
#include "common.h"
|
||||
#include <math.h>
|
||||
#include "freq.h"
|
||||
#include "pitch.h"
|
||||
#include "arch.h"
|
||||
#include <assert.h>
|
||||
#include "lpcnet_private.h"
|
||||
#include "lpcnet.h"
|
||||
#include "os_support.h"
|
||||
#include "_kiss_fft_guts.h"
|
||||
#include "celt_lpc.h"
|
||||
#include "mathops.h"
|
||||
|
||||
|
||||
int lpcnet_encoder_get_size(void) {
|
||||
return sizeof(LPCNetEncState);
|
||||
}
|
||||
|
||||
int lpcnet_encoder_init(LPCNetEncState *st) {
|
||||
memset(st, 0, sizeof(*st));
|
||||
pitchdnn_init(&st->pitchdnn);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int lpcnet_encoder_load_model(LPCNetEncState *st, const void *data, int len) {
|
||||
return pitchdnn_load_model(&st->pitchdnn, data, len);
|
||||
}
|
||||
|
||||
LPCNetEncState *lpcnet_encoder_create(void) {
|
||||
LPCNetEncState *st;
|
||||
st = opus_alloc(lpcnet_encoder_get_size());
|
||||
lpcnet_encoder_init(st);
|
||||
return st;
|
||||
}
|
||||
|
||||
void lpcnet_encoder_destroy(LPCNetEncState *st) {
|
||||
opus_free(st);
|
||||
}
|
||||
|
||||
static void frame_analysis(LPCNetEncState *st, kiss_fft_cpx *X, float *Ex, const float *in) {
|
||||
float x[WINDOW_SIZE];
|
||||
OPUS_COPY(x, st->analysis_mem, OVERLAP_SIZE);
|
||||
OPUS_COPY(&x[OVERLAP_SIZE], in, FRAME_SIZE);
|
||||
OPUS_COPY(st->analysis_mem, &in[FRAME_SIZE-OVERLAP_SIZE], OVERLAP_SIZE);
|
||||
apply_window(x);
|
||||
forward_transform(X, x);
|
||||
lpcn_compute_band_energy(Ex, X);
|
||||
}
|
||||
|
||||
static void biquad(float *y, float mem[2], const float *x, const float *b, const float *a, int N) {
|
||||
int i;
|
||||
float mem0, mem1;
|
||||
mem0 = mem[0];
|
||||
mem1 = mem[1];
|
||||
for (i=0;i<N;i++) {
|
||||
float xi, yi, mem00;
|
||||
xi = x[i];
|
||||
yi = x[i] + mem0;
|
||||
mem00 = mem0;
|
||||
/* Original code:
|
||||
mem0 = mem1 + (b[0]*xi - a[0]*yi);
|
||||
mem1 = (b[1]*xi - a[1]*yi);
|
||||
Modified to reduce dependency chains: (the +1e-30f forces the ordering and has no effect on the output)
|
||||
*/
|
||||
mem0 = (b[0]-a[0])*xi + mem1 - a[0]*mem0;
|
||||
mem1 = (b[1]-a[1])*xi + 1e-30f - a[1]*mem00;
|
||||
y[i] = yi;
|
||||
}
|
||||
mem[0] = mem0;
|
||||
mem[1] = mem1;
|
||||
}
|
||||
|
||||
#define celt_log10(x) (0.3010299957f*celt_log2(x))
|
||||
|
||||
void compute_frame_features(LPCNetEncState *st, const float *in, int arch) {
|
||||
float aligned_in[FRAME_SIZE];
|
||||
int i;
|
||||
float Ly[NB_BANDS];
|
||||
float follow, logMax;
|
||||
kiss_fft_cpx X[FREQ_SIZE];
|
||||
float Ex[NB_BANDS];
|
||||
float xcorr[PITCH_MAX_PERIOD];
|
||||
float ener0;
|
||||
float ener;
|
||||
float x[FRAME_SIZE+LPC_ORDER];
|
||||
float frame_corr;
|
||||
float xy, xx, yy;
|
||||
int pitch;
|
||||
float ener_norm[PITCH_MAX_PERIOD - PITCH_MIN_PERIOD];
|
||||
/* [b,a]=ellip(2, 2, 20, 1200/8000); */
|
||||
static const float lp_b[2] = {-0.84946f, 1.f};
|
||||
static const float lp_a[2] = {-1.54220f, 0.70781f};
|
||||
OPUS_COPY(aligned_in, &st->analysis_mem[OVERLAP_SIZE-TRAINING_OFFSET], TRAINING_OFFSET);
|
||||
frame_analysis(st, X, Ex, in);
|
||||
st->if_features[0] = MAX16(-1.f, MIN16(1.f, (1.f/64)*(10.f*celt_log10(1e-15f + X[0].r*X[0].r)-6.f)));
|
||||
for (i=1;i<PITCH_IF_MAX_FREQ;i++) {
|
||||
kiss_fft_cpx prod;
|
||||
float norm_1;
|
||||
C_MULC(prod, X[i], st->prev_if[i]);
|
||||
norm_1 = 1.f/sqrt(1e-15f + prod.r*prod.r + prod.i*prod.i);
|
||||
C_MULBYSCALAR(prod, norm_1);
|
||||
st->if_features[3*i-2] = prod.r;
|
||||
st->if_features[3*i-1] = prod.i;
|
||||
st->if_features[3*i] = MAX16(-1.f, MIN16(1.f, (1.f/64)*(10.f*celt_log10(1e-15f + X[i].r*X[i].r + X[i].i*X[i].i)-6.f)));
|
||||
}
|
||||
OPUS_COPY(st->prev_if, X, PITCH_IF_MAX_FREQ);
|
||||
/*for (i=0;i<88;i++) printf("%f ", st->if_features[i]);printf("\n");*/
|
||||
logMax = -2;
|
||||
follow = -2;
|
||||
for (i=0;i<NB_BANDS;i++) {
|
||||
Ly[i] = celt_log10(1e-2f+Ex[i]);
|
||||
Ly[i] = MAX16(logMax-8, MAX16(follow-2.5f, Ly[i]));
|
||||
logMax = MAX16(logMax, Ly[i]);
|
||||
follow = MAX16(follow-2.5f, Ly[i]);
|
||||
}
|
||||
dct(st->features, Ly);
|
||||
st->features[0] -= 4;
|
||||
lpc_from_cepstrum(st->lpc, st->features);
|
||||
for (i=0;i<LPC_ORDER;i++) st->features[NB_BANDS+2+i] = st->lpc[i];
|
||||
OPUS_MOVE(st->exc_buf, &st->exc_buf[FRAME_SIZE], PITCH_MAX_PERIOD);
|
||||
OPUS_MOVE(st->lp_buf, &st->lp_buf[FRAME_SIZE], PITCH_MAX_PERIOD);
|
||||
OPUS_COPY(&aligned_in[TRAINING_OFFSET], in, FRAME_SIZE-TRAINING_OFFSET);
|
||||
OPUS_COPY(&x[0], st->pitch_mem, LPC_ORDER);
|
||||
OPUS_COPY(&x[LPC_ORDER], aligned_in, FRAME_SIZE);
|
||||
OPUS_COPY(st->pitch_mem, &aligned_in[FRAME_SIZE-LPC_ORDER], LPC_ORDER);
|
||||
celt_fir(&x[LPC_ORDER], st->lpc, &st->lp_buf[PITCH_MAX_PERIOD], FRAME_SIZE, LPC_ORDER, arch);
|
||||
for (i=0;i<FRAME_SIZE;i++) {
|
||||
st->exc_buf[PITCH_MAX_PERIOD+i] = st->lp_buf[PITCH_MAX_PERIOD+i] + .7f*st->pitch_filt;
|
||||
st->pitch_filt = st->lp_buf[PITCH_MAX_PERIOD+i];
|
||||
/*printf("%f\n", st->exc_buf[PITCH_MAX_PERIOD+i]);*/
|
||||
}
|
||||
biquad(&st->lp_buf[PITCH_MAX_PERIOD], st->lp_mem, &st->lp_buf[PITCH_MAX_PERIOD], lp_b, lp_a, FRAME_SIZE);
|
||||
{
|
||||
double ener1;
|
||||
float *buf = st->exc_buf;
|
||||
celt_pitch_xcorr(&buf[PITCH_MAX_PERIOD], buf, xcorr, FRAME_SIZE, PITCH_MAX_PERIOD-PITCH_MIN_PERIOD, arch);
|
||||
ener0 = celt_inner_prod(&buf[PITCH_MAX_PERIOD], &buf[PITCH_MAX_PERIOD], FRAME_SIZE, arch);
|
||||
ener1 = celt_inner_prod(&buf[0], &buf[0], FRAME_SIZE, arch);
|
||||
/*printf("%f\n", st->frame_weight[sub]);*/
|
||||
for (i=0;i<PITCH_MAX_PERIOD-PITCH_MIN_PERIOD;i++) {
|
||||
ener = 1 + ener0 + ener1;
|
||||
st->xcorr_features[i] = 2*xcorr[i];
|
||||
ener_norm[i] = ener;
|
||||
ener1 += buf[i+FRAME_SIZE]*(double)buf[i+FRAME_SIZE] - buf[i]*(double)buf[i];
|
||||
/*printf("%f ", st->xcorr_features[i]);*/
|
||||
}
|
||||
/* Split in a separate loop so the compiler can vectorize it */
|
||||
for (i=0;i<PITCH_MAX_PERIOD-PITCH_MIN_PERIOD;i++) {
|
||||
st->xcorr_features[i] /= ener_norm[i];
|
||||
}
|
||||
/*printf("\n");*/
|
||||
}
|
||||
st->dnn_pitch = compute_pitchdnn(&st->pitchdnn, st->if_features, st->xcorr_features, arch);
|
||||
pitch = (int)floor(.5+256./pow(2.f,((1./60.)*((st->dnn_pitch+1.5)*60))));
|
||||
xx = celt_inner_prod(&st->lp_buf[PITCH_MAX_PERIOD], &st->lp_buf[PITCH_MAX_PERIOD], FRAME_SIZE, arch);
|
||||
yy = celt_inner_prod(&st->lp_buf[PITCH_MAX_PERIOD-pitch], &st->lp_buf[PITCH_MAX_PERIOD-pitch], FRAME_SIZE, arch);
|
||||
xy = celt_inner_prod(&st->lp_buf[PITCH_MAX_PERIOD], &st->lp_buf[PITCH_MAX_PERIOD-pitch], FRAME_SIZE, arch);
|
||||
/*printf("%f %f\n", frame_corr, xy/sqrt(1e-15+xx*yy));*/
|
||||
frame_corr = xy/sqrt(1+xx*yy);
|
||||
frame_corr = log(1.f+exp(5.f*frame_corr))/log(1+exp(5.f));
|
||||
st->features[NB_BANDS] = st->dnn_pitch;
|
||||
st->features[NB_BANDS + 1] = frame_corr-.5f;
|
||||
}
|
||||
|
||||
void preemphasis(float *y, float *mem, const float *x, float coef, int N) {
|
||||
int i;
|
||||
for (i=0;i<N;i++) {
|
||||
float yi;
|
||||
yi = x[i] + *mem;
|
||||
*mem = -coef*x[i];
|
||||
y[i] = yi;
|
||||
}
|
||||
}
|
||||
|
||||
static int lpcnet_compute_single_frame_features_impl(LPCNetEncState *st, float *x, float features[NB_TOTAL_FEATURES], int arch) {
|
||||
preemphasis(x, &st->mem_preemph, x, PREEMPHASIS, FRAME_SIZE);
|
||||
compute_frame_features(st, x, arch);
|
||||
OPUS_COPY(features, &st->features[0], NB_TOTAL_FEATURES);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int lpcnet_compute_single_frame_features(LPCNetEncState *st, const opus_int16 *pcm, float features[NB_TOTAL_FEATURES], int arch) {
|
||||
int i;
|
||||
float x[FRAME_SIZE];
|
||||
for (i=0;i<FRAME_SIZE;i++) x[i] = pcm[i];
|
||||
lpcnet_compute_single_frame_features_impl(st, x, features, arch);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int lpcnet_compute_single_frame_features_float(LPCNetEncState *st, const float *pcm, float features[NB_TOTAL_FEATURES], int arch) {
|
||||
int i;
|
||||
float x[FRAME_SIZE];
|
||||
for (i=0;i<FRAME_SIZE;i++) x[i] = pcm[i];
|
||||
lpcnet_compute_single_frame_features_impl(st, x, features, arch);
|
||||
return 0;
|
||||
}
|
||||
|
|
@ -0,0 +1,211 @@
|
|||
/* Copyright (c) 2021 Amazon */
|
||||
/*
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
|
||||
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#include "lpcnet_private.h"
|
||||
#include "lpcnet.h"
|
||||
#include "plc_data.h"
|
||||
#include "os_support.h"
|
||||
#include "common.h"
|
||||
#include "cpu_support.h"
|
||||
|
||||
#ifndef M_PI
|
||||
#define M_PI 3.141592653
|
||||
#endif
|
||||
|
||||
/* Comment this out to have LPCNet update its state on every good packet (slow). */
|
||||
#define PLC_SKIP_UPDATES
|
||||
|
||||
void lpcnet_plc_reset(LPCNetPLCState *st) {
|
||||
OPUS_CLEAR((char*)&st->LPCNET_PLC_RESET_START,
|
||||
sizeof(LPCNetPLCState)-
|
||||
((char*)&st->LPCNET_PLC_RESET_START - (char*)st));
|
||||
lpcnet_encoder_init(&st->enc);
|
||||
OPUS_CLEAR(st->pcm, PLC_BUF_SIZE);
|
||||
st->blend = 0;
|
||||
st->loss_count = 0;
|
||||
st->analysis_gap = 1;
|
||||
st->analysis_pos = PLC_BUF_SIZE;
|
||||
st->predict_pos = PLC_BUF_SIZE;
|
||||
}
|
||||
|
||||
int lpcnet_plc_init(LPCNetPLCState *st) {
|
||||
int ret;
|
||||
st->arch = opus_select_arch();
|
||||
fargan_init(&st->fargan);
|
||||
lpcnet_encoder_init(&st->enc);
|
||||
st->loaded = 0;
|
||||
#ifndef USE_WEIGHTS_FILE
|
||||
ret = init_plcmodel(&st->model, plcmodel_arrays);
|
||||
if (ret == 0) st->loaded = 1;
|
||||
#else
|
||||
ret = 0;
|
||||
#endif
|
||||
celt_assert(ret == 0);
|
||||
lpcnet_plc_reset(st);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int lpcnet_plc_load_model(LPCNetPLCState *st, const void *data, int len) {
|
||||
WeightArray *list;
|
||||
int ret;
|
||||
parse_weights(&list, data, len);
|
||||
ret = init_plcmodel(&st->model, list);
|
||||
opus_free(list);
|
||||
if (ret == 0) {
|
||||
ret = lpcnet_encoder_load_model(&st->enc, data, len);
|
||||
}
|
||||
if (ret == 0) {
|
||||
ret = fargan_load_model(&st->fargan, data, len);
|
||||
}
|
||||
if (ret == 0) st->loaded = 1;
|
||||
return ret;
|
||||
}
|
||||
|
||||
void lpcnet_plc_fec_add(LPCNetPLCState *st, const float *features) {
|
||||
if (features == NULL) {
|
||||
st->fec_skip++;
|
||||
return;
|
||||
}
|
||||
if (st->fec_fill_pos == PLC_MAX_FEC) {
|
||||
OPUS_MOVE(&st->fec[0][0], &st->fec[st->fec_read_pos][0], (st->fec_fill_pos-st->fec_read_pos)*NB_FEATURES);
|
||||
st->fec_fill_pos = st->fec_fill_pos-st->fec_read_pos;
|
||||
st->fec_read_pos -= st->fec_read_pos;
|
||||
}
|
||||
OPUS_COPY(&st->fec[st->fec_fill_pos][0], features, NB_FEATURES);
|
||||
st->fec_fill_pos++;
|
||||
}
|
||||
|
||||
void lpcnet_plc_fec_clear(LPCNetPLCState *st) {
|
||||
st->fec_read_pos = st->fec_fill_pos = st->fec_skip = 0;
|
||||
}
|
||||
|
||||
|
||||
static void compute_plc_pred(LPCNetPLCState *st, float *out, const float *in) {
|
||||
float tmp[PLC_DENSE_IN_OUT_SIZE];
|
||||
PLCModel *model = &st->model;
|
||||
PLCNetState *net = &st->plc_net;
|
||||
celt_assert(st->loaded);
|
||||
compute_generic_dense(&model->plc_dense_in, tmp, in, ACTIVATION_TANH, st->arch);
|
||||
compute_generic_gru(&model->plc_gru1_input, &model->plc_gru1_recurrent, net->gru1_state, tmp, st->arch);
|
||||
compute_generic_gru(&model->plc_gru2_input, &model->plc_gru2_recurrent, net->gru2_state, net->gru1_state, st->arch);
|
||||
compute_generic_dense(&model->plc_dense_out, out, net->gru2_state, ACTIVATION_LINEAR, st->arch);
|
||||
}
|
||||
|
||||
static int get_fec_or_pred(LPCNetPLCState *st, float *out) {
|
||||
if (st->fec_read_pos != st->fec_fill_pos && st->fec_skip==0) {
|
||||
float plc_features[2*NB_BANDS+NB_FEATURES+1] = {0};
|
||||
float discard[NB_FEATURES];
|
||||
OPUS_COPY(out, &st->fec[st->fec_read_pos][0], NB_FEATURES);
|
||||
st->fec_read_pos++;
|
||||
/* Update PLC state using FEC, so without Burg features. */
|
||||
OPUS_COPY(&plc_features[2*NB_BANDS], out, NB_FEATURES);
|
||||
plc_features[2*NB_BANDS+NB_FEATURES] = -1;
|
||||
compute_plc_pred(st, discard, plc_features);
|
||||
return 1;
|
||||
} else {
|
||||
float zeros[2*NB_BANDS+NB_FEATURES+1] = {0};
|
||||
compute_plc_pred(st, out, zeros);
|
||||
if (st->fec_skip > 0) st->fec_skip--;
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
static void queue_features(LPCNetPLCState *st, const float *features) {
|
||||
OPUS_MOVE(&st->cont_features[0], &st->cont_features[NB_FEATURES], (CONT_VECTORS-1)*NB_FEATURES);
|
||||
OPUS_COPY(&st->cont_features[(CONT_VECTORS-1)*NB_FEATURES], features, NB_FEATURES);
|
||||
}
|
||||
|
||||
/* In this causal version of the code, the DNN model implemented by compute_plc_pred()
|
||||
needs to generate two feature vectors to conceal the first lost packet.*/
|
||||
|
||||
int lpcnet_plc_update(LPCNetPLCState *st, opus_int16 *pcm) {
|
||||
int i;
|
||||
if (st->analysis_pos - FRAME_SIZE >= 0) st->analysis_pos -= FRAME_SIZE;
|
||||
else st->analysis_gap = 1;
|
||||
if (st->predict_pos - FRAME_SIZE >= 0) st->predict_pos -= FRAME_SIZE;
|
||||
OPUS_MOVE(st->pcm, &st->pcm[FRAME_SIZE], PLC_BUF_SIZE-FRAME_SIZE);
|
||||
for (i=0;i<FRAME_SIZE;i++) st->pcm[PLC_BUF_SIZE-FRAME_SIZE+i] = (1.f/32768.f)*pcm[i];
|
||||
st->loss_count = 0;
|
||||
st->blend = 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static const float att_table[10] = {0, 0, -.2, -.2, -.4, -.4, -.8, -.8, -1.6, -1.6};
|
||||
int lpcnet_plc_conceal(LPCNetPLCState *st, opus_int16 *pcm) {
|
||||
int i;
|
||||
celt_assert(st->loaded);
|
||||
if (st->blend == 0) {
|
||||
int count = 0;
|
||||
st->plc_net = st->plc_bak[0];
|
||||
while (st->analysis_pos + FRAME_SIZE <= PLC_BUF_SIZE) {
|
||||
float x[FRAME_SIZE];
|
||||
float plc_features[2*NB_BANDS+NB_FEATURES+1];
|
||||
celt_assert(st->analysis_pos >= 0);
|
||||
for (i=0;i<FRAME_SIZE;i++) x[i] = 32768.f*st->pcm[st->analysis_pos+i];
|
||||
burg_cepstral_analysis(plc_features, x);
|
||||
lpcnet_compute_single_frame_features_float(&st->enc, x, st->features, st->arch);
|
||||
if ((!st->analysis_gap || count>0) && st->analysis_pos >= st->predict_pos) {
|
||||
queue_features(st, st->features);
|
||||
OPUS_COPY(&plc_features[2*NB_BANDS], st->features, NB_FEATURES);
|
||||
plc_features[2*NB_BANDS+NB_FEATURES] = 1;
|
||||
st->plc_bak[0] = st->plc_bak[1];
|
||||
st->plc_bak[1] = st->plc_net;
|
||||
compute_plc_pred(st, st->features, plc_features);
|
||||
}
|
||||
st->analysis_pos += FRAME_SIZE;
|
||||
count++;
|
||||
}
|
||||
st->plc_bak[0] = st->plc_bak[1];
|
||||
st->plc_bak[1] = st->plc_net;
|
||||
get_fec_or_pred(st, st->features);
|
||||
queue_features(st, st->features);
|
||||
st->plc_bak[0] = st->plc_bak[1];
|
||||
st->plc_bak[1] = st->plc_net;
|
||||
get_fec_or_pred(st, st->features);
|
||||
queue_features(st, st->features);
|
||||
fargan_cont(&st->fargan, &st->pcm[PLC_BUF_SIZE-FARGAN_CONT_SAMPLES], st->cont_features);
|
||||
st->analysis_gap = 0;
|
||||
}
|
||||
st->plc_bak[0] = st->plc_bak[1];
|
||||
st->plc_bak[1] = st->plc_net;
|
||||
if (get_fec_or_pred(st, st->features)) st->loss_count = 0;
|
||||
else st->loss_count++;
|
||||
if (st->loss_count >= 10) st->features[0] = MAX16(-10, st->features[0]+att_table[9] - 2*(st->loss_count-9));
|
||||
else st->features[0] = MAX16(-10, st->features[0]+att_table[st->loss_count]);
|
||||
fargan_synthesize_int(&st->fargan, pcm, &st->features[0]);
|
||||
queue_features(st, st->features);
|
||||
if (st->analysis_pos - FRAME_SIZE >= 0) st->analysis_pos -= FRAME_SIZE;
|
||||
else st->analysis_gap = 1;
|
||||
st->predict_pos = PLC_BUF_SIZE;
|
||||
OPUS_MOVE(st->pcm, &st->pcm[FRAME_SIZE], PLC_BUF_SIZE-FRAME_SIZE);
|
||||
for (i=0;i<FRAME_SIZE;i++) st->pcm[PLC_BUF_SIZE-FRAME_SIZE+i] = (1.f/32768.f)*pcm[i];
|
||||
st->blend = 1;
|
||||
return 0;
|
||||
}
|
||||
|
|
@ -0,0 +1,90 @@
|
|||
#ifndef LPCNET_PRIVATE_H
|
||||
#define LPCNET_PRIVATE_H
|
||||
|
||||
#include <stdio.h>
|
||||
#include "freq.h"
|
||||
#include "lpcnet.h"
|
||||
#include "plc_data.h"
|
||||
#include "pitchdnn.h"
|
||||
#include "fargan.h"
|
||||
|
||||
|
||||
#define PITCH_FRAME_SIZE 320
|
||||
#define PITCH_BUF_SIZE (PITCH_MAX_PERIOD+PITCH_FRAME_SIZE)
|
||||
|
||||
#define PLC_MAX_FEC 100
|
||||
#define MAX_FEATURE_BUFFER_SIZE 4
|
||||
|
||||
#define PITCH_IF_MAX_FREQ 30
|
||||
#define PITCH_IF_FEATURES (3*PITCH_IF_MAX_FREQ - 2)
|
||||
|
||||
#define CONT_VECTORS 5
|
||||
|
||||
#define FEATURES_DELAY 1
|
||||
|
||||
struct LPCNetEncState{
|
||||
PitchDNNState pitchdnn;
|
||||
float analysis_mem[OVERLAP_SIZE];
|
||||
float mem_preemph;
|
||||
kiss_fft_cpx prev_if[PITCH_IF_MAX_FREQ];
|
||||
float if_features[PITCH_IF_FEATURES];
|
||||
float xcorr_features[PITCH_MAX_PERIOD - PITCH_MIN_PERIOD];
|
||||
float dnn_pitch;
|
||||
float pitch_mem[LPC_ORDER];
|
||||
float pitch_filt;
|
||||
float exc_buf[PITCH_BUF_SIZE];
|
||||
float lp_buf[PITCH_BUF_SIZE];
|
||||
float lp_mem[4];
|
||||
float lpc[LPC_ORDER];
|
||||
float features[NB_TOTAL_FEATURES];
|
||||
float sig_mem[LPC_ORDER];
|
||||
float burg_cepstrum[2*NB_BANDS];
|
||||
};
|
||||
|
||||
typedef struct {
|
||||
float gru1_state[PLC_GRU1_STATE_SIZE];
|
||||
float gru2_state[PLC_GRU2_STATE_SIZE];
|
||||
} PLCNetState;
|
||||
|
||||
#define PLC_BUF_SIZE ((CONT_VECTORS+5)*FRAME_SIZE)
|
||||
struct LPCNetPLCState {
|
||||
PLCModel model;
|
||||
FARGANState fargan;
|
||||
LPCNetEncState enc;
|
||||
int loaded;
|
||||
int arch;
|
||||
|
||||
#define LPCNET_PLC_RESET_START fec
|
||||
float fec[PLC_MAX_FEC][NB_FEATURES];
|
||||
int analysis_gap;
|
||||
int fec_read_pos;
|
||||
int fec_fill_pos;
|
||||
int fec_skip;
|
||||
int analysis_pos;
|
||||
int predict_pos;
|
||||
float pcm[PLC_BUF_SIZE];
|
||||
int blend;
|
||||
float features[NB_TOTAL_FEATURES];
|
||||
float cont_features[CONT_VECTORS*NB_FEATURES];
|
||||
int loss_count;
|
||||
PLCNetState plc_net;
|
||||
PLCNetState plc_bak[2];
|
||||
};
|
||||
|
||||
void preemphasis(float *y, float *mem, const float *x, float coef, int N);
|
||||
|
||||
void compute_frame_features(LPCNetEncState *st, const float *in, int arch);
|
||||
|
||||
void lpcnet_reset_signal(LPCNetState *lpcnet);
|
||||
void run_frame_network(LPCNetState *lpcnet, float *gru_a_condition, float *gru_b_condition, float *lpc, const float *features);
|
||||
void run_frame_network_deferred(LPCNetState *lpcnet, const float *features);
|
||||
void run_frame_network_flush(LPCNetState *lpcnet);
|
||||
|
||||
|
||||
void lpcnet_synthesize_tail_impl(LPCNetState *lpcnet, opus_int16 *output, int N, int preload);
|
||||
void lpcnet_synthesize_impl(LPCNetState *lpcnet, const float *features, opus_int16 *output, int N, int preload);
|
||||
void lpcnet_synthesize_blend_impl(LPCNetState *lpcnet, const opus_int16 *pcm_in, opus_int16 *output, int N);
|
||||
|
||||
void run_frame_network(LPCNetState *lpcnet, float *gru_a_condition, float *gru_b_condition, float *lpc, const float *features);
|
||||
|
||||
#endif
|
||||
|
|
@ -0,0 +1,307 @@
|
|||
/* The contents of this file was automatically generated by dump_lpcnet_tables.c*/
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
#include "kiss_fft.h"
|
||||
|
||||
static const arch_fft_state arch_fft = {0, NULL};
|
||||
|
||||
static const opus_int16 fft_bitrev[320] = {
|
||||
0, 64, 128, 192, 256, 16, 80, 144, 208, 272, 32, 96, 160, 224, 288,
|
||||
48, 112, 176, 240, 304, 4, 68, 132, 196, 260, 20, 84, 148, 212, 276,
|
||||
36, 100, 164, 228, 292, 52, 116, 180, 244, 308, 8, 72, 136, 200, 264,
|
||||
24, 88, 152, 216, 280, 40, 104, 168, 232, 296, 56, 120, 184, 248, 312,
|
||||
12, 76, 140, 204, 268, 28, 92, 156, 220, 284, 44, 108, 172, 236, 300,
|
||||
60, 124, 188, 252, 316, 1, 65, 129, 193, 257, 17, 81, 145, 209, 273,
|
||||
33, 97, 161, 225, 289, 49, 113, 177, 241, 305, 5, 69, 133, 197, 261,
|
||||
21, 85, 149, 213, 277, 37, 101, 165, 229, 293, 53, 117, 181, 245, 309,
|
||||
9, 73, 137, 201, 265, 25, 89, 153, 217, 281, 41, 105, 169, 233, 297,
|
||||
57, 121, 185, 249, 313, 13, 77, 141, 205, 269, 29, 93, 157, 221, 285,
|
||||
45, 109, 173, 237, 301, 61, 125, 189, 253, 317, 2, 66, 130, 194, 258,
|
||||
18, 82, 146, 210, 274, 34, 98, 162, 226, 290, 50, 114, 178, 242, 306,
|
||||
6, 70, 134, 198, 262, 22, 86, 150, 214, 278, 38, 102, 166, 230, 294,
|
||||
54, 118, 182, 246, 310, 10, 74, 138, 202, 266, 26, 90, 154, 218, 282,
|
||||
42, 106, 170, 234, 298, 58, 122, 186, 250, 314, 14, 78, 142, 206, 270,
|
||||
30, 94, 158, 222, 286, 46, 110, 174, 238, 302, 62, 126, 190, 254, 318,
|
||||
3, 67, 131, 195, 259, 19, 83, 147, 211, 275, 35, 99, 163, 227, 291,
|
||||
51, 115, 179, 243, 307, 7, 71, 135, 199, 263, 23, 87, 151, 215, 279,
|
||||
39, 103, 167, 231, 295, 55, 119, 183, 247, 311, 11, 75, 139, 203, 267,
|
||||
27, 91, 155, 219, 283, 43, 107, 171, 235, 299, 59, 123, 187, 251, 315,
|
||||
15, 79, 143, 207, 271, 31, 95, 159, 223, 287, 47, 111, 175, 239, 303,
|
||||
63, 127, 191, 255, 319, };
|
||||
|
||||
static const kiss_twiddle_cpx fft_twiddles[320] = {
|
||||
{1.00000000f, -0.00000000f}, {0.999807239f, -0.0196336918f},
|
||||
{0.999229014f, -0.0392598175f}, {0.998265624f, -0.0588708036f},
|
||||
{0.996917307f, -0.0784590989f}, {0.995184720f, -0.0980171412f},
|
||||
{0.993068457f, -0.117537394f}, {0.990569353f, -0.137012348f},
|
||||
{0.987688363f, -0.156434461f}, {0.984426558f, -0.175796285f},
|
||||
{0.980785251f, -0.195090324f}, {0.976765871f, -0.214309156f},
|
||||
{0.972369909f, -0.233445361f}, {0.967599094f, -0.252491564f},
|
||||
{0.962455213f, -0.271440446f}, {0.956940353f, -0.290284663f},
|
||||
{0.951056540f, -0.309017003f}, {0.944806039f, -0.327630192f},
|
||||
{0.938191354f, -0.346117049f}, {0.931214929f, -0.364470512f},
|
||||
{0.923879504f, -0.382683426f}, {0.916187942f, -0.400748819f},
|
||||
{0.908143163f, -0.418659747f}, {0.899748266f, -0.436409235f},
|
||||
{0.891006529f, -0.453990489f}, {0.881921291f, -0.471396744f},
|
||||
{0.872496009f, -0.488621235f}, {0.862734377f, -0.505657375f},
|
||||
{0.852640152f, -0.522498548f}, {0.842217207f, -0.539138317f},
|
||||
{0.831469595f, -0.555570245f}, {0.820401430f, -0.571787953f},
|
||||
{0.809017003f, -0.587785244f}, {0.797320664f, -0.603555918f},
|
||||
{0.785316944f, -0.619093955f}, {0.773010433f, -0.634393275f},
|
||||
{0.760405958f, -0.649448037f}, {0.747508347f, -0.664252460f},
|
||||
{0.734322488f, -0.678800762f}, {0.720853567f, -0.693087339f},
|
||||
{0.707106769f, -0.707106769f}, {0.693087339f, -0.720853567f},
|
||||
{0.678800762f, -0.734322488f}, {0.664252460f, -0.747508347f},
|
||||
{0.649448037f, -0.760405958f}, {0.634393275f, -0.773010433f},
|
||||
{0.619093955f, -0.785316944f}, {0.603555918f, -0.797320664f},
|
||||
{0.587785244f, -0.809017003f}, {0.571787953f, -0.820401430f},
|
||||
{0.555570245f, -0.831469595f}, {0.539138317f, -0.842217207f},
|
||||
{0.522498548f, -0.852640152f}, {0.505657375f, -0.862734377f},
|
||||
{0.488621235f, -0.872496009f}, {0.471396744f, -0.881921291f},
|
||||
{0.453990489f, -0.891006529f}, {0.436409235f, -0.899748266f},
|
||||
{0.418659747f, -0.908143163f}, {0.400748819f, -0.916187942f},
|
||||
{0.382683426f, -0.923879504f}, {0.364470512f, -0.931214929f},
|
||||
{0.346117049f, -0.938191354f}, {0.327630192f, -0.944806039f},
|
||||
{0.309017003f, -0.951056540f}, {0.290284663f, -0.956940353f},
|
||||
{0.271440446f, -0.962455213f}, {0.252491564f, -0.967599094f},
|
||||
{0.233445361f, -0.972369909f}, {0.214309156f, -0.976765871f},
|
||||
{0.195090324f, -0.980785251f}, {0.175796285f, -0.984426558f},
|
||||
{0.156434461f, -0.987688363f}, {0.137012348f, -0.990569353f},
|
||||
{0.117537394f, -0.993068457f}, {0.0980171412f, -0.995184720f},
|
||||
{0.0784590989f, -0.996917307f}, {0.0588708036f, -0.998265624f},
|
||||
{0.0392598175f, -0.999229014f}, {0.0196336918f, -0.999807239f},
|
||||
{6.12323426e-17f, -1.00000000f}, {-0.0196336918f, -0.999807239f},
|
||||
{-0.0392598175f, -0.999229014f}, {-0.0588708036f, -0.998265624f},
|
||||
{-0.0784590989f, -0.996917307f}, {-0.0980171412f, -0.995184720f},
|
||||
{-0.117537394f, -0.993068457f}, {-0.137012348f, -0.990569353f},
|
||||
{-0.156434461f, -0.987688363f}, {-0.175796285f, -0.984426558f},
|
||||
{-0.195090324f, -0.980785251f}, {-0.214309156f, -0.976765871f},
|
||||
{-0.233445361f, -0.972369909f}, {-0.252491564f, -0.967599094f},
|
||||
{-0.271440446f, -0.962455213f}, {-0.290284663f, -0.956940353f},
|
||||
{-0.309017003f, -0.951056540f}, {-0.327630192f, -0.944806039f},
|
||||
{-0.346117049f, -0.938191354f}, {-0.364470512f, -0.931214929f},
|
||||
{-0.382683426f, -0.923879504f}, {-0.400748819f, -0.916187942f},
|
||||
{-0.418659747f, -0.908143163f}, {-0.436409235f, -0.899748266f},
|
||||
{-0.453990489f, -0.891006529f}, {-0.471396744f, -0.881921291f},
|
||||
{-0.488621235f, -0.872496009f}, {-0.505657375f, -0.862734377f},
|
||||
{-0.522498548f, -0.852640152f}, {-0.539138317f, -0.842217207f},
|
||||
{-0.555570245f, -0.831469595f}, {-0.571787953f, -0.820401430f},
|
||||
{-0.587785244f, -0.809017003f}, {-0.603555918f, -0.797320664f},
|
||||
{-0.619093955f, -0.785316944f}, {-0.634393275f, -0.773010433f},
|
||||
{-0.649448037f, -0.760405958f}, {-0.664252460f, -0.747508347f},
|
||||
{-0.678800762f, -0.734322488f}, {-0.693087339f, -0.720853567f},
|
||||
{-0.707106769f, -0.707106769f}, {-0.720853567f, -0.693087339f},
|
||||
{-0.734322488f, -0.678800762f}, {-0.747508347f, -0.664252460f},
|
||||
{-0.760405958f, -0.649448037f}, {-0.773010433f, -0.634393275f},
|
||||
{-0.785316944f, -0.619093955f}, {-0.797320664f, -0.603555918f},
|
||||
{-0.809017003f, -0.587785244f}, {-0.820401430f, -0.571787953f},
|
||||
{-0.831469595f, -0.555570245f}, {-0.842217207f, -0.539138317f},
|
||||
{-0.852640152f, -0.522498548f}, {-0.862734377f, -0.505657375f},
|
||||
{-0.872496009f, -0.488621235f}, {-0.881921291f, -0.471396744f},
|
||||
{-0.891006529f, -0.453990489f}, {-0.899748266f, -0.436409235f},
|
||||
{-0.908143163f, -0.418659747f}, {-0.916187942f, -0.400748819f},
|
||||
{-0.923879504f, -0.382683426f}, {-0.931214929f, -0.364470512f},
|
||||
{-0.938191354f, -0.346117049f}, {-0.944806039f, -0.327630192f},
|
||||
{-0.951056540f, -0.309017003f}, {-0.956940353f, -0.290284663f},
|
||||
{-0.962455213f, -0.271440446f}, {-0.967599094f, -0.252491564f},
|
||||
{-0.972369909f, -0.233445361f}, {-0.976765871f, -0.214309156f},
|
||||
{-0.980785251f, -0.195090324f}, {-0.984426558f, -0.175796285f},
|
||||
{-0.987688363f, -0.156434461f}, {-0.990569353f, -0.137012348f},
|
||||
{-0.993068457f, -0.117537394f}, {-0.995184720f, -0.0980171412f},
|
||||
{-0.996917307f, -0.0784590989f}, {-0.998265624f, -0.0588708036f},
|
||||
{-0.999229014f, -0.0392598175f}, {-0.999807239f, -0.0196336918f},
|
||||
{-1.00000000f, -1.22464685e-16f}, {-0.999807239f, 0.0196336918f},
|
||||
{-0.999229014f, 0.0392598175f}, {-0.998265624f, 0.0588708036f},
|
||||
{-0.996917307f, 0.0784590989f}, {-0.995184720f, 0.0980171412f},
|
||||
{-0.993068457f, 0.117537394f}, {-0.990569353f, 0.137012348f},
|
||||
{-0.987688363f, 0.156434461f}, {-0.984426558f, 0.175796285f},
|
||||
{-0.980785251f, 0.195090324f}, {-0.976765871f, 0.214309156f},
|
||||
{-0.972369909f, 0.233445361f}, {-0.967599094f, 0.252491564f},
|
||||
{-0.962455213f, 0.271440446f}, {-0.956940353f, 0.290284663f},
|
||||
{-0.951056540f, 0.309017003f}, {-0.944806039f, 0.327630192f},
|
||||
{-0.938191354f, 0.346117049f}, {-0.931214929f, 0.364470512f},
|
||||
{-0.923879504f, 0.382683426f}, {-0.916187942f, 0.400748819f},
|
||||
{-0.908143163f, 0.418659747f}, {-0.899748266f, 0.436409235f},
|
||||
{-0.891006529f, 0.453990489f}, {-0.881921291f, 0.471396744f},
|
||||
{-0.872496009f, 0.488621235f}, {-0.862734377f, 0.505657375f},
|
||||
{-0.852640152f, 0.522498548f}, {-0.842217207f, 0.539138317f},
|
||||
{-0.831469595f, 0.555570245f}, {-0.820401430f, 0.571787953f},
|
||||
{-0.809017003f, 0.587785244f}, {-0.797320664f, 0.603555918f},
|
||||
{-0.785316944f, 0.619093955f}, {-0.773010433f, 0.634393275f},
|
||||
{-0.760405958f, 0.649448037f}, {-0.747508347f, 0.664252460f},
|
||||
{-0.734322488f, 0.678800762f}, {-0.720853567f, 0.693087339f},
|
||||
{-0.707106769f, 0.707106769f}, {-0.693087339f, 0.720853567f},
|
||||
{-0.678800762f, 0.734322488f}, {-0.664252460f, 0.747508347f},
|
||||
{-0.649448037f, 0.760405958f}, {-0.634393275f, 0.773010433f},
|
||||
{-0.619093955f, 0.785316944f}, {-0.603555918f, 0.797320664f},
|
||||
{-0.587785244f, 0.809017003f}, {-0.571787953f, 0.820401430f},
|
||||
{-0.555570245f, 0.831469595f}, {-0.539138317f, 0.842217207f},
|
||||
{-0.522498548f, 0.852640152f}, {-0.505657375f, 0.862734377f},
|
||||
{-0.488621235f, 0.872496009f}, {-0.471396744f, 0.881921291f},
|
||||
{-0.453990489f, 0.891006529f}, {-0.436409235f, 0.899748266f},
|
||||
{-0.418659747f, 0.908143163f}, {-0.400748819f, 0.916187942f},
|
||||
{-0.382683426f, 0.923879504f}, {-0.364470512f, 0.931214929f},
|
||||
{-0.346117049f, 0.938191354f}, {-0.327630192f, 0.944806039f},
|
||||
{-0.309017003f, 0.951056540f}, {-0.290284663f, 0.956940353f},
|
||||
{-0.271440446f, 0.962455213f}, {-0.252491564f, 0.967599094f},
|
||||
{-0.233445361f, 0.972369909f}, {-0.214309156f, 0.976765871f},
|
||||
{-0.195090324f, 0.980785251f}, {-0.175796285f, 0.984426558f},
|
||||
{-0.156434461f, 0.987688363f}, {-0.137012348f, 0.990569353f},
|
||||
{-0.117537394f, 0.993068457f}, {-0.0980171412f, 0.995184720f},
|
||||
{-0.0784590989f, 0.996917307f}, {-0.0588708036f, 0.998265624f},
|
||||
{-0.0392598175f, 0.999229014f}, {-0.0196336918f, 0.999807239f},
|
||||
{-1.83697015e-16f, 1.00000000f}, {0.0196336918f, 0.999807239f},
|
||||
{0.0392598175f, 0.999229014f}, {0.0588708036f, 0.998265624f},
|
||||
{0.0784590989f, 0.996917307f}, {0.0980171412f, 0.995184720f},
|
||||
{0.117537394f, 0.993068457f}, {0.137012348f, 0.990569353f},
|
||||
{0.156434461f, 0.987688363f}, {0.175796285f, 0.984426558f},
|
||||
{0.195090324f, 0.980785251f}, {0.214309156f, 0.976765871f},
|
||||
{0.233445361f, 0.972369909f}, {0.252491564f, 0.967599094f},
|
||||
{0.271440446f, 0.962455213f}, {0.290284663f, 0.956940353f},
|
||||
{0.309017003f, 0.951056540f}, {0.327630192f, 0.944806039f},
|
||||
{0.346117049f, 0.938191354f}, {0.364470512f, 0.931214929f},
|
||||
{0.382683426f, 0.923879504f}, {0.400748819f, 0.916187942f},
|
||||
{0.418659747f, 0.908143163f}, {0.436409235f, 0.899748266f},
|
||||
{0.453990489f, 0.891006529f}, {0.471396744f, 0.881921291f},
|
||||
{0.488621235f, 0.872496009f}, {0.505657375f, 0.862734377f},
|
||||
{0.522498548f, 0.852640152f}, {0.539138317f, 0.842217207f},
|
||||
{0.555570245f, 0.831469595f}, {0.571787953f, 0.820401430f},
|
||||
{0.587785244f, 0.809017003f}, {0.603555918f, 0.797320664f},
|
||||
{0.619093955f, 0.785316944f}, {0.634393275f, 0.773010433f},
|
||||
{0.649448037f, 0.760405958f}, {0.664252460f, 0.747508347f},
|
||||
{0.678800762f, 0.734322488f}, {0.693087339f, 0.720853567f},
|
||||
{0.707106769f, 0.707106769f}, {0.720853567f, 0.693087339f},
|
||||
{0.734322488f, 0.678800762f}, {0.747508347f, 0.664252460f},
|
||||
{0.760405958f, 0.649448037f}, {0.773010433f, 0.634393275f},
|
||||
{0.785316944f, 0.619093955f}, {0.797320664f, 0.603555918f},
|
||||
{0.809017003f, 0.587785244f}, {0.820401430f, 0.571787953f},
|
||||
{0.831469595f, 0.555570245f}, {0.842217207f, 0.539138317f},
|
||||
{0.852640152f, 0.522498548f}, {0.862734377f, 0.505657375f},
|
||||
{0.872496009f, 0.488621235f}, {0.881921291f, 0.471396744f},
|
||||
{0.891006529f, 0.453990489f}, {0.899748266f, 0.436409235f},
|
||||
{0.908143163f, 0.418659747f}, {0.916187942f, 0.400748819f},
|
||||
{0.923879504f, 0.382683426f}, {0.931214929f, 0.364470512f},
|
||||
{0.938191354f, 0.346117049f}, {0.944806039f, 0.327630192f},
|
||||
{0.951056540f, 0.309017003f}, {0.956940353f, 0.290284663f},
|
||||
{0.962455213f, 0.271440446f}, {0.967599094f, 0.252491564f},
|
||||
{0.972369909f, 0.233445361f}, {0.976765871f, 0.214309156f},
|
||||
{0.980785251f, 0.195090324f}, {0.984426558f, 0.175796285f},
|
||||
{0.987688363f, 0.156434461f}, {0.990569353f, 0.137012348f},
|
||||
{0.993068457f, 0.117537394f}, {0.995184720f, 0.0980171412f},
|
||||
{0.996917307f, 0.0784590989f}, {0.998265624f, 0.0588708036f},
|
||||
{0.999229014f, 0.0392598175f}, {0.999807239f, 0.0196336918f},
|
||||
};
|
||||
|
||||
const kiss_fft_state kfft = {
|
||||
320, /* nfft */
|
||||
0.0031250000f, /* scale */
|
||||
-1, /* shift */
|
||||
{5, 64, 4, 16, 4, 4, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, }, /* factors */
|
||||
fft_bitrev, /* bitrev*/
|
||||
fft_twiddles, /* twiddles*/
|
||||
(arch_fft_state *)&arch_fft, /* arch_fft*/
|
||||
};
|
||||
|
||||
const float half_window[] = {
|
||||
3.78491532e-05f, 0.000340620492f, 0.000946046319f, 0.00185389258f, 0.00306380726f,
|
||||
0.00457531959f, 0.00638783723f, 0.00850064680f, 0.0109129101f, 0.0136236614f,
|
||||
0.0166318044f, 0.0199361145f, 0.0235352255f, 0.0274276342f, 0.0316116922f,
|
||||
0.0360856056f, 0.0408474281f, 0.0458950549f, 0.0512262285f, 0.0568385124f,
|
||||
0.0627293140f, 0.0688958541f, 0.0753351897f, 0.0820441842f, 0.0890194997f,
|
||||
0.0962576419f, 0.103754878f, 0.111507311f, 0.119510807f, 0.127761051f,
|
||||
0.136253506f, 0.144983411f, 0.153945804f, 0.163135484f, 0.172547072f,
|
||||
0.182174906f, 0.192013159f, 0.202055752f, 0.212296382f, 0.222728521f,
|
||||
0.233345464f, 0.244140238f, 0.255105674f, 0.266234398f, 0.277518868f,
|
||||
0.288951218f, 0.300523549f, 0.312227666f, 0.324055225f, 0.335997701f,
|
||||
0.348046392f, 0.360192508f, 0.372427016f, 0.384740859f, 0.397124738f,
|
||||
0.409569323f, 0.422065198f, 0.434602767f, 0.447172493f, 0.459764689f,
|
||||
0.472369671f, 0.484977663f, 0.497579008f, 0.510163903f, 0.522722721f,
|
||||
0.535245717f, 0.547723293f, 0.560145974f, 0.572504222f, 0.584788740f,
|
||||
0.596990347f, 0.609099925f, 0.621108532f, 0.633007407f, 0.644788086f,
|
||||
0.656442165f, 0.667961538f, 0.679338276f, 0.690564752f, 0.701633692f,
|
||||
0.712537885f, 0.723270535f, 0.733825266f, 0.744195819f, 0.754376352f,
|
||||
0.764361382f, 0.774145722f, 0.783724606f, 0.793093503f, 0.802248418f,
|
||||
0.811185598f, 0.819901764f, 0.828393936f, 0.836659551f, 0.844696403f,
|
||||
0.852502763f, 0.860077202f, 0.867418647f, 0.874526560f, 0.881400526f,
|
||||
0.888040781f, 0.894447744f, 0.900622249f, 0.906565487f, 0.912279010f,
|
||||
0.917764664f, 0.923024654f, 0.928061485f, 0.932878017f, 0.937477291f,
|
||||
0.941862822f, 0.946038187f, 0.950007319f, 0.953774393f, 0.957343817f,
|
||||
0.960720181f, 0.963908315f, 0.966913164f, 0.969739914f, 0.972393870f,
|
||||
0.974880517f, 0.977205336f, 0.979374051f, 0.981392324f, 0.983266115f,
|
||||
0.985001266f, 0.986603677f, 0.988079309f, 0.989434063f, 0.990674019f,
|
||||
0.991804957f, 0.992832899f, 0.993763626f, 0.994602919f, 0.995356441f,
|
||||
0.996029854f, 0.996628702f, 0.997158289f, 0.997623861f, 0.998030603f,
|
||||
0.998383403f, 0.998687088f, 0.998946249f, 0.999165416f, 0.999348700f,
|
||||
0.999500215f, 0.999623775f, 0.999723017f, 0.999801278f, 0.999861658f,
|
||||
0.999907196f, 0.999940455f, 0.999963880f, 0.999979615f, 0.999989510f,
|
||||
0.999995291f, 0.999998271f, 0.999999523f, 0.999999940f, 1.00000000f,
|
||||
};
|
||||
|
||||
const float dct_table[] = {
|
||||
0.707106769f, 0.996194720f, 0.984807730f, 0.965925813f, 0.939692616f,
|
||||
0.906307817f, 0.866025388f, 0.819152057f, 0.766044438f, 0.707106769f,
|
||||
0.642787635f, 0.573576450f, 0.500000000f, 0.422618270f, 0.342020154f,
|
||||
0.258819044f, 0.173648179f, 0.0871557444f, 0.707106769f, 0.965925813f,
|
||||
0.866025388f, 0.707106769f, 0.500000000f, 0.258819044f, 6.12323426e-17f,
|
||||
-0.258819044f, -0.500000000f, -0.707106769f, -0.866025388f, -0.965925813f,
|
||||
-1.00000000f, -0.965925813f, -0.866025388f, -0.707106769f, -0.500000000f,
|
||||
-0.258819044f, 0.707106769f, 0.906307817f, 0.642787635f, 0.258819044f,
|
||||
-0.173648179f, -0.573576450f, -0.866025388f, -0.996194720f, -0.939692616f,
|
||||
-0.707106769f, -0.342020154f, 0.0871557444f, 0.500000000f, 0.819152057f,
|
||||
0.984807730f, 0.965925813f, 0.766044438f, 0.422618270f, 0.707106769f,
|
||||
0.819152057f, 0.342020154f, -0.258819044f, -0.766044438f, -0.996194720f,
|
||||
-0.866025388f, -0.422618270f, 0.173648179f, 0.707106769f, 0.984807730f,
|
||||
0.906307817f, 0.500000000f, -0.0871557444f, -0.642787635f, -0.965925813f,
|
||||
-0.939692616f, -0.573576450f, 0.707106769f, 0.707106769f, 6.12323426e-17f,
|
||||
-0.707106769f, -1.00000000f, -0.707106769f, -1.83697015e-16f, 0.707106769f,
|
||||
1.00000000f, 0.707106769f, 3.06161700e-16f, -0.707106769f, -1.00000000f,
|
||||
-0.707106769f, -4.28626385e-16f, 0.707106769f, 1.00000000f, 0.707106769f,
|
||||
0.707106769f, 0.573576450f, -0.342020154f, -0.965925813f, -0.766044438f,
|
||||
0.0871557444f, 0.866025388f, 0.906307817f, 0.173648179f, -0.707106769f,
|
||||
-0.984807730f, -0.422618270f, 0.500000000f, 0.996194720f, 0.642787635f,
|
||||
-0.258819044f, -0.939692616f, -0.819152057f, 0.707106769f, 0.422618270f,
|
||||
-0.642787635f, -0.965925813f, -0.173648179f, 0.819152057f, 0.866025388f,
|
||||
-0.0871557444f, -0.939692616f, -0.707106769f, 0.342020154f, 0.996194720f,
|
||||
0.500000000f, -0.573576450f, -0.984807730f, -0.258819044f, 0.766044438f,
|
||||
0.906307817f, 0.707106769f, 0.258819044f, -0.866025388f, -0.707106769f,
|
||||
0.500000000f, 0.965925813f, 3.06161700e-16f, -0.965925813f, -0.500000000f,
|
||||
0.707106769f, 0.866025388f, -0.258819044f, -1.00000000f, -0.258819044f,
|
||||
0.866025388f, 0.707106769f, -0.500000000f, -0.965925813f, 0.707106769f,
|
||||
0.0871557444f, -0.984807730f, -0.258819044f, 0.939692616f, 0.422618270f,
|
||||
-0.866025388f, -0.573576450f, 0.766044438f, 0.707106769f, -0.642787635f,
|
||||
-0.819152057f, 0.500000000f, 0.906307817f, -0.342020154f, -0.965925813f,
|
||||
0.173648179f, 0.996194720f, 0.707106769f, -0.0871557444f, -0.984807730f,
|
||||
0.258819044f, 0.939692616f, -0.422618270f, -0.866025388f, 0.573576450f,
|
||||
0.766044438f, -0.707106769f, -0.642787635f, 0.819152057f, 0.500000000f,
|
||||
-0.906307817f, -0.342020154f, 0.965925813f, 0.173648179f, -0.996194720f,
|
||||
0.707106769f, -0.258819044f, -0.866025388f, 0.707106769f, 0.500000000f,
|
||||
-0.965925813f, -4.28626385e-16f, 0.965925813f, -0.500000000f, -0.707106769f,
|
||||
0.866025388f, 0.258819044f, -1.00000000f, 0.258819044f, 0.866025388f,
|
||||
-0.707106769f, -0.500000000f, 0.965925813f, 0.707106769f, -0.422618270f,
|
||||
-0.642787635f, 0.965925813f, -0.173648179f, -0.819152057f, 0.866025388f,
|
||||
0.0871557444f, -0.939692616f, 0.707106769f, 0.342020154f, -0.996194720f,
|
||||
0.500000000f, 0.573576450f, -0.984807730f, 0.258819044f, 0.766044438f,
|
||||
-0.906307817f, 0.707106769f, -0.573576450f, -0.342020154f, 0.965925813f,
|
||||
-0.766044438f, -0.0871557444f, 0.866025388f, -0.906307817f, 0.173648179f,
|
||||
0.707106769f, -0.984807730f, 0.422618270f, 0.500000000f, -0.996194720f,
|
||||
0.642787635f, 0.258819044f, -0.939692616f, 0.819152057f, 0.707106769f,
|
||||
-0.707106769f, -1.83697015e-16f, 0.707106769f, -1.00000000f, 0.707106769f,
|
||||
5.51091070e-16f, -0.707106769f, 1.00000000f, -0.707106769f, -2.69484189e-15f,
|
||||
0.707106769f, -1.00000000f, 0.707106769f, -4.90477710e-16f, -0.707106769f,
|
||||
1.00000000f, -0.707106769f, 0.707106769f, -0.819152057f, 0.342020154f,
|
||||
0.258819044f, -0.766044438f, 0.996194720f, -0.866025388f, 0.422618270f,
|
||||
0.173648179f, -0.707106769f, 0.984807730f, -0.906307817f, 0.500000000f,
|
||||
0.0871557444f, -0.642787635f, 0.965925813f, -0.939692616f, 0.573576450f,
|
||||
0.707106769f, -0.906307817f, 0.642787635f, -0.258819044f, -0.173648179f,
|
||||
0.573576450f, -0.866025388f, 0.996194720f, -0.939692616f, 0.707106769f,
|
||||
-0.342020154f, -0.0871557444f, 0.500000000f, -0.819152057f, 0.984807730f,
|
||||
-0.965925813f, 0.766044438f, -0.422618270f, 0.707106769f, -0.965925813f,
|
||||
0.866025388f, -0.707106769f, 0.500000000f, -0.258819044f, 1.10280111e-15f,
|
||||
0.258819044f, -0.500000000f, 0.707106769f, -0.866025388f, 0.965925813f,
|
||||
-1.00000000f, 0.965925813f, -0.866025388f, 0.707106769f, -0.500000000f,
|
||||
0.258819044f, 0.707106769f, -0.996194720f, 0.984807730f, -0.965925813f,
|
||||
0.939692616f, -0.906307817f, 0.866025388f, -0.819152057f, 0.766044438f,
|
||||
-0.707106769f, 0.642787635f, -0.573576450f, 0.500000000f, -0.422618270f,
|
||||
0.342020154f, -0.258819044f, 0.173648179f, -0.0871557444f, };
|
||||
|
|
@ -0,0 +1,64 @@
|
|||
dnn_sources = sources['DEEP_PLC_SOURCES']
|
||||
|
||||
dred_sources = sources['DRED_SOURCES']
|
||||
if opt_enable_dred
|
||||
dnn_sources += dred_sources
|
||||
endif
|
||||
|
||||
osce_sources = sources['OSCE_SOURCES']
|
||||
if opt_enable_osce
|
||||
dnn_sources += osce_sources
|
||||
endif
|
||||
|
||||
dnn_sources_sse2 = sources['DNN_SOURCES_SSE2']
|
||||
dnn_sources_sse4_1 = sources['DNN_SOURCES_SSE4_1']
|
||||
dnn_sources_avx2 = sources['DNN_SOURCES_AVX2']
|
||||
|
||||
dnn_sources_neon_intr = sources['DNN_SOURCES_NEON']
|
||||
dnn_sources_dotprod_intr = sources['DNN_SOURCES_DOTPROD']
|
||||
|
||||
dnn_includes = [opus_includes]
|
||||
dnn_static_libs = []
|
||||
|
||||
if host_cpu_family in ['x86', 'x86_64'] and opus_conf.has('OPUS_HAVE_RTCD')
|
||||
dnn_sources += sources['DNN_SOURCES_X86_RTCD']
|
||||
endif
|
||||
|
||||
if host_cpu_family in ['arm', 'aarch64'] and have_arm_intrinsics_or_asm
|
||||
if opus_conf.has('OPUS_HAVE_RTCD')
|
||||
dnn_sources += sources['DNN_SOURCES_ARM_RTCD']
|
||||
endif
|
||||
endif
|
||||
|
||||
foreach intr_name : ['sse2', 'sse4_1', 'avx2', 'neon_intr', 'dotprod_intr']
|
||||
have_intr = get_variable('have_' + intr_name)
|
||||
if not have_intr
|
||||
continue
|
||||
endif
|
||||
|
||||
intr_sources = get_variable('dnn_sources_' + intr_name)
|
||||
|
||||
intr_args = get_variable('opus_@0@_args'.format(intr_name), [])
|
||||
dnn_static_libs += static_library('dnn_' + intr_name, intr_sources,
|
||||
c_args: intr_args,
|
||||
include_directories: dnn_includes,
|
||||
install: false)
|
||||
endforeach
|
||||
|
||||
dnn_c_args = []
|
||||
if host_machine.system() == 'windows'
|
||||
dnn_c_args += ['-DDLL_EXPORT']
|
||||
endif
|
||||
|
||||
|
||||
if opt_enable_deep_plc
|
||||
dnn_lib = static_library('opus-dnn',
|
||||
dnn_sources,
|
||||
c_args: dnn_c_args,
|
||||
include_directories: dnn_includes,
|
||||
link_whole: [dnn_static_libs],
|
||||
dependencies: libm,
|
||||
install: false)
|
||||
else
|
||||
dnn_lib = []
|
||||
endif
|
||||
|
|
@ -0,0 +1,416 @@
|
|||
/* Copyright (c) 2023 Amazon
|
||||
Written by Jan Buethe */
|
||||
/*
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
|
||||
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
|
||||
#include "nndsp.h"
|
||||
#include "arch.h"
|
||||
#include "nnet.h"
|
||||
#include "os_support.h"
|
||||
#include "pitch.h"
|
||||
|
||||
#include <math.h>
|
||||
|
||||
#ifndef M_PI
|
||||
#define M_PI 3.141592653589793f
|
||||
#endif
|
||||
|
||||
#define KERNEL_INDEX(i_out_channels, i_in_channels, i_kernel) ((((i_out_channels) * in_channels) + (i_in_channels)) * kernel_size + (i_kernel))
|
||||
|
||||
void init_adaconv_state(AdaConvState *hAdaConv)
|
||||
{
|
||||
OPUS_CLEAR(hAdaConv, 1);
|
||||
}
|
||||
|
||||
void init_adacomb_state(AdaCombState *hAdaComb)
|
||||
{
|
||||
OPUS_CLEAR(hAdaComb, 1);
|
||||
}
|
||||
|
||||
void init_adashape_state(AdaShapeState *hAdaShape)
|
||||
{
|
||||
OPUS_CLEAR(hAdaShape, 1);
|
||||
}
|
||||
|
||||
void compute_overlap_window(float *window, int overlap_size)
|
||||
{
|
||||
int i_sample;
|
||||
for (i_sample=0; i_sample < overlap_size; i_sample++)
|
||||
{
|
||||
window[i_sample] = 0.5f + 0.5f * cos(M_PI * (i_sample + 0.5f) / overlap_size);
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef DEBUG_NNDSP
|
||||
void print_float_vector(const char* name, const float *vec, int length)
|
||||
{
|
||||
for (int i = 0; i < length; i ++)
|
||||
{
|
||||
printf("%s[%d]: %f\n", name, i, vec[i]);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
static void scale_kernel(
|
||||
float *kernel,
|
||||
int in_channels,
|
||||
int out_channels,
|
||||
int kernel_size,
|
||||
float *gain
|
||||
)
|
||||
/* normalizes (p-norm) kernel over input channel and kernel dimension */
|
||||
{
|
||||
float norm;
|
||||
int i_in_channels, i_out_channels, i_kernel;
|
||||
|
||||
for (i_out_channels = 0; i_out_channels < out_channels; i_out_channels++)
|
||||
{
|
||||
norm = 0;
|
||||
for (i_in_channels = 0; i_in_channels < in_channels; i_in_channels ++)
|
||||
{
|
||||
for (i_kernel = 0; i_kernel < kernel_size; i_kernel++)
|
||||
{
|
||||
norm += kernel[KERNEL_INDEX(i_out_channels, i_in_channels, i_kernel)] * kernel[KERNEL_INDEX(i_out_channels, i_in_channels, i_kernel)];
|
||||
}
|
||||
}
|
||||
#ifdef DEBUG_NNDSP
|
||||
printf("kernel norm: %f, %f\n", norm, sqrt(norm));
|
||||
#endif
|
||||
norm = 1.f / (1e-6f + sqrt(norm));
|
||||
for (i_in_channels = 0; i_in_channels < in_channels; i_in_channels++)
|
||||
{
|
||||
for (i_kernel = 0; i_kernel < kernel_size; i_kernel++)
|
||||
{
|
||||
|
||||
kernel[KERNEL_INDEX(i_out_channels, i_in_channels, i_kernel)] *= norm * gain[i_out_channels];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void transform_gains(
|
||||
float *gains,
|
||||
int num_gains,
|
||||
float filter_gain_a,
|
||||
float filter_gain_b
|
||||
)
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < num_gains; i++)
|
||||
{
|
||||
gains[i] = exp(filter_gain_a * gains[i] + filter_gain_b);
|
||||
}
|
||||
}
|
||||
|
||||
void adaconv_process_frame(
|
||||
AdaConvState* hAdaConv,
|
||||
float *x_out,
|
||||
const float *x_in,
|
||||
const float *features,
|
||||
const LinearLayer *kernel_layer,
|
||||
const LinearLayer *gain_layer,
|
||||
int feature_dim,
|
||||
int frame_size,
|
||||
int overlap_size,
|
||||
int in_channels,
|
||||
int out_channels,
|
||||
int kernel_size,
|
||||
int left_padding,
|
||||
float filter_gain_a,
|
||||
float filter_gain_b,
|
||||
float shape_gain,
|
||||
float *window,
|
||||
int arch
|
||||
)
|
||||
{
|
||||
float output_buffer[ADACONV_MAX_FRAME_SIZE * ADACONV_MAX_OUTPUT_CHANNELS];
|
||||
float kernel_buffer[ADACONV_MAX_KERNEL_SIZE * ADACONV_MAX_INPUT_CHANNELS * ADACONV_MAX_OUTPUT_CHANNELS];
|
||||
float input_buffer[ADACONV_MAX_INPUT_CHANNELS * (ADACONV_MAX_FRAME_SIZE + ADACONV_MAX_KERNEL_SIZE)];
|
||||
float kernel0[ADACONV_MAX_KERNEL_SIZE];
|
||||
float kernel1[ADACONV_MAX_KERNEL_SIZE];
|
||||
float channel_buffer0[ADACONV_MAX_OVERLAP_SIZE];
|
||||
float channel_buffer1[ADACONV_MAX_FRAME_SIZE];
|
||||
float gain_buffer[ADACONV_MAX_OUTPUT_CHANNELS];
|
||||
float *p_input;
|
||||
int i_in_channels, i_out_channels, i_sample;
|
||||
|
||||
(void) feature_dim; /* ToDo: figure out whether we might need this information */
|
||||
|
||||
celt_assert(shape_gain == 1);
|
||||
celt_assert(left_padding == kernel_size - 1); /* currently only supports causal version. Non-causal version not difficult to implement but will require third loop */
|
||||
celt_assert(kernel_size < frame_size);
|
||||
|
||||
OPUS_CLEAR(output_buffer, ADACONV_MAX_FRAME_SIZE * ADACONV_MAX_OUTPUT_CHANNELS);
|
||||
OPUS_CLEAR(kernel_buffer, ADACONV_MAX_KERNEL_SIZE * ADACONV_MAX_INPUT_CHANNELS * ADACONV_MAX_OUTPUT_CHANNELS);
|
||||
OPUS_CLEAR(input_buffer, ADACONV_MAX_INPUT_CHANNELS * (ADACONV_MAX_FRAME_SIZE + ADACONV_MAX_KERNEL_SIZE));
|
||||
|
||||
#ifdef DEBUG_NNDSP
|
||||
print_float_vector("x_in", x_in, in_channels * frame_size);
|
||||
#endif
|
||||
|
||||
/* prepare input */
|
||||
for (i_in_channels=0; i_in_channels < in_channels; i_in_channels ++)
|
||||
{
|
||||
OPUS_COPY(input_buffer + i_in_channels * (kernel_size + frame_size), hAdaConv->history + i_in_channels * kernel_size, kernel_size);
|
||||
OPUS_COPY(input_buffer + kernel_size + i_in_channels * (kernel_size + frame_size), x_in + frame_size * i_in_channels, frame_size);
|
||||
}
|
||||
p_input = input_buffer + kernel_size;
|
||||
|
||||
|
||||
/* calculate new kernel and new gain */
|
||||
compute_generic_dense(kernel_layer, kernel_buffer, features, ACTIVATION_LINEAR, arch);
|
||||
compute_generic_dense(gain_layer, gain_buffer, features, ACTIVATION_TANH, arch);
|
||||
#ifdef DEBUG_NNDSP
|
||||
print_float_vector("features", features, feature_dim);
|
||||
print_float_vector("adaconv_kernel_raw", kernel_buffer, in_channels * out_channels * kernel_size);
|
||||
print_float_vector("adaconv_gain_raw", gain_buffer, out_channels);
|
||||
#endif
|
||||
transform_gains(gain_buffer, out_channels, filter_gain_a, filter_gain_b);
|
||||
scale_kernel(kernel_buffer, in_channels, out_channels, kernel_size, gain_buffer);
|
||||
|
||||
#ifdef DEBUG_NNDSP
|
||||
print_float_vector("adaconv_kernel", kernel_buffer, in_channels * out_channels * kernel_size);
|
||||
print_float_vector("adaconv_gain", gain_buffer, out_channels);
|
||||
#endif
|
||||
|
||||
/* calculate overlapping part using kernel from last frame */
|
||||
|
||||
for (i_out_channels = 0; i_out_channels < out_channels; i_out_channels++)
|
||||
{
|
||||
for (i_in_channels = 0; i_in_channels < in_channels; i_in_channels++)
|
||||
{
|
||||
OPUS_CLEAR(kernel0, ADACONV_MAX_KERNEL_SIZE);
|
||||
OPUS_CLEAR(kernel1, ADACONV_MAX_KERNEL_SIZE);
|
||||
|
||||
OPUS_COPY(kernel0, hAdaConv->last_kernel + KERNEL_INDEX(i_out_channels, i_in_channels, 0), kernel_size);
|
||||
OPUS_COPY(kernel1, kernel_buffer + KERNEL_INDEX(i_out_channels, i_in_channels, 0), kernel_size);
|
||||
celt_pitch_xcorr(kernel0, p_input + i_in_channels * (frame_size + kernel_size) - left_padding, channel_buffer0, ADACONV_MAX_KERNEL_SIZE, overlap_size, arch);
|
||||
celt_pitch_xcorr(kernel1, p_input + i_in_channels * (frame_size + kernel_size) - left_padding, channel_buffer1, ADACONV_MAX_KERNEL_SIZE, frame_size, arch);
|
||||
for (i_sample = 0; i_sample < overlap_size; i_sample++)
|
||||
{
|
||||
output_buffer[i_sample + i_out_channels * frame_size] += window[i_sample] * channel_buffer0[i_sample];
|
||||
output_buffer[i_sample + i_out_channels * frame_size] += (1.f - window[i_sample]) * channel_buffer1[i_sample];
|
||||
}
|
||||
for (i_sample = overlap_size; i_sample < frame_size; i_sample++)
|
||||
{
|
||||
output_buffer[i_sample + i_out_channels * frame_size] += channel_buffer1[i_sample];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
OPUS_COPY(x_out, output_buffer, out_channels * frame_size);
|
||||
|
||||
#ifdef DEBUG_NNDSP
|
||||
print_float_vector("x_out", x_out, out_channels * frame_size);
|
||||
#endif
|
||||
|
||||
/* buffer update */
|
||||
for (i_in_channels=0; i_in_channels < in_channels; i_in_channels ++)
|
||||
{
|
||||
OPUS_COPY(hAdaConv->history + i_in_channels * kernel_size, p_input + i_in_channels * (frame_size + kernel_size) + frame_size - kernel_size, kernel_size);
|
||||
}
|
||||
OPUS_COPY(hAdaConv->last_kernel, kernel_buffer, kernel_size * in_channels * out_channels);
|
||||
}
|
||||
|
||||
void adacomb_process_frame(
|
||||
AdaCombState* hAdaComb,
|
||||
float *x_out,
|
||||
const float *x_in,
|
||||
const float *features,
|
||||
const LinearLayer *kernel_layer,
|
||||
const LinearLayer *gain_layer,
|
||||
const LinearLayer *global_gain_layer,
|
||||
int pitch_lag,
|
||||
int feature_dim,
|
||||
int frame_size,
|
||||
int overlap_size,
|
||||
int kernel_size,
|
||||
int left_padding,
|
||||
float filter_gain_a,
|
||||
float filter_gain_b,
|
||||
float log_gain_limit,
|
||||
float *window,
|
||||
int arch
|
||||
)
|
||||
{
|
||||
float output_buffer[ADACOMB_MAX_FRAME_SIZE];
|
||||
float output_buffer_last[ADACOMB_MAX_FRAME_SIZE];
|
||||
float kernel_buffer[ADACOMB_MAX_KERNEL_SIZE];
|
||||
float input_buffer[ADACOMB_MAX_FRAME_SIZE + ADACOMB_MAX_LAG + ADACOMB_MAX_KERNEL_SIZE];
|
||||
float gain, global_gain;
|
||||
float *p_input;
|
||||
int i_sample;
|
||||
float kernel[16];
|
||||
float last_kernel[16];
|
||||
|
||||
(void) feature_dim; /* ToDo: figure out whether we might need this information */
|
||||
|
||||
OPUS_CLEAR(output_buffer, ADACOMB_MAX_FRAME_SIZE);
|
||||
OPUS_CLEAR(kernel_buffer, ADACOMB_MAX_KERNEL_SIZE);
|
||||
OPUS_CLEAR(input_buffer, ADACOMB_MAX_FRAME_SIZE + ADACOMB_MAX_LAG + ADACOMB_MAX_KERNEL_SIZE);
|
||||
|
||||
OPUS_COPY(input_buffer, hAdaComb->history, kernel_size + ADACOMB_MAX_LAG);
|
||||
OPUS_COPY(input_buffer + kernel_size + ADACOMB_MAX_LAG, x_in, frame_size);
|
||||
p_input = input_buffer + kernel_size + ADACOMB_MAX_LAG;
|
||||
|
||||
/* calculate new kernel and new gain */
|
||||
compute_generic_dense(kernel_layer, kernel_buffer, features, ACTIVATION_LINEAR, arch);
|
||||
compute_generic_dense(gain_layer, &gain, features, ACTIVATION_RELU, arch);
|
||||
compute_generic_dense(global_gain_layer, &global_gain, features, ACTIVATION_TANH, arch);
|
||||
#ifdef DEBUG_NNDSP
|
||||
print_float_vector("features", features, feature_dim);
|
||||
print_float_vector("adacomb_kernel_raw", kernel_buffer, kernel_size);
|
||||
print_float_vector("adacomb_gain_raw", &gain, 1);
|
||||
print_float_vector("adacomb_global_gain_raw", &global_gain, 1);
|
||||
#endif
|
||||
gain = exp(log_gain_limit - gain);
|
||||
global_gain = exp(filter_gain_a * global_gain + filter_gain_b);
|
||||
scale_kernel(kernel_buffer, 1, 1, kernel_size, &gain);
|
||||
|
||||
#ifdef DEBUG_NNDSP
|
||||
print_float_vector("adacomb_kernel", kernel_buffer, kernel_size);
|
||||
print_float_vector("adacomb_gain", &gain, 1);
|
||||
#endif
|
||||
|
||||
OPUS_CLEAR(kernel, ADACOMB_MAX_KERNEL_SIZE);
|
||||
OPUS_CLEAR(last_kernel, ADACOMB_MAX_KERNEL_SIZE);
|
||||
OPUS_COPY(kernel, kernel_buffer, kernel_size);
|
||||
OPUS_COPY(last_kernel, hAdaComb->last_kernel, kernel_size);
|
||||
|
||||
celt_pitch_xcorr(last_kernel, &p_input[- left_padding - hAdaComb->last_pitch_lag], output_buffer_last, ADACOMB_MAX_KERNEL_SIZE, overlap_size, arch);
|
||||
|
||||
celt_pitch_xcorr(kernel, &p_input[- left_padding - pitch_lag], output_buffer, ADACOMB_MAX_KERNEL_SIZE, frame_size, arch);
|
||||
for (i_sample = 0; i_sample < overlap_size; i_sample++)
|
||||
{
|
||||
output_buffer[i_sample] = hAdaComb->last_global_gain * window[i_sample] * output_buffer_last[i_sample] + global_gain * (1.f - window[i_sample]) * output_buffer[i_sample];
|
||||
}
|
||||
|
||||
for (i_sample = 0; i_sample < overlap_size; i_sample++)
|
||||
{
|
||||
output_buffer[i_sample] += (window[i_sample] * hAdaComb->last_global_gain + (1.f - window[i_sample]) * global_gain) * p_input[i_sample];
|
||||
}
|
||||
|
||||
for (i_sample = overlap_size; i_sample < frame_size; i_sample++)
|
||||
{
|
||||
output_buffer[i_sample] = global_gain * (output_buffer[i_sample] + p_input[i_sample]);
|
||||
}
|
||||
OPUS_COPY(x_out, output_buffer, frame_size);
|
||||
|
||||
#ifdef DEBUG_NNDSP
|
||||
print_float_vector("x_out", x_out, frame_size);
|
||||
#endif
|
||||
|
||||
/* buffer update */
|
||||
OPUS_COPY(hAdaComb->last_kernel, kernel_buffer, kernel_size);
|
||||
OPUS_COPY(hAdaComb->history, p_input + frame_size - kernel_size - ADACOMB_MAX_LAG, kernel_size + ADACOMB_MAX_LAG);
|
||||
hAdaComb->last_pitch_lag = pitch_lag;
|
||||
hAdaComb->last_global_gain = global_gain;
|
||||
}
|
||||
|
||||
|
||||
void adashape_process_frame(
|
||||
AdaShapeState *hAdaShape,
|
||||
float *x_out,
|
||||
const float *x_in,
|
||||
const float *features,
|
||||
const LinearLayer *alpha1f,
|
||||
const LinearLayer *alpha1t,
|
||||
const LinearLayer *alpha2,
|
||||
int feature_dim,
|
||||
int frame_size,
|
||||
int avg_pool_k,
|
||||
int arch
|
||||
)
|
||||
{
|
||||
float in_buffer[ADASHAPE_MAX_INPUT_DIM + ADASHAPE_MAX_FRAME_SIZE];
|
||||
float out_buffer[ADASHAPE_MAX_FRAME_SIZE];
|
||||
float tmp_buffer[ADASHAPE_MAX_FRAME_SIZE];
|
||||
int i, k;
|
||||
int tenv_size;
|
||||
float mean;
|
||||
float *tenv;
|
||||
|
||||
celt_assert(frame_size % avg_pool_k == 0);
|
||||
celt_assert(feature_dim + frame_size / avg_pool_k + 1 < ADASHAPE_MAX_INPUT_DIM);
|
||||
|
||||
tenv_size = frame_size / avg_pool_k;
|
||||
tenv = in_buffer + feature_dim;
|
||||
OPUS_CLEAR(tenv, tenv_size + 1);
|
||||
|
||||
OPUS_COPY(in_buffer, features, feature_dim);
|
||||
|
||||
/* calculate temporal envelope */
|
||||
mean = 0;
|
||||
for (i = 0; i < tenv_size; i++)
|
||||
{
|
||||
for (k = 0; k < avg_pool_k; k++)
|
||||
{
|
||||
tenv[i] += fabs(x_in[i * avg_pool_k + k]);
|
||||
}
|
||||
tenv[i] = log(tenv[i] / avg_pool_k + 1.52587890625e-05f);
|
||||
mean += tenv[i];
|
||||
}
|
||||
mean /= tenv_size;
|
||||
for (i = 0; i < tenv_size; i++)
|
||||
{
|
||||
tenv[i] -= mean;
|
||||
}
|
||||
tenv[tenv_size] = mean;
|
||||
#ifdef DEBUG_NNDSP
|
||||
print_float_vector("tenv", tenv, tenv_size + 1);
|
||||
#endif
|
||||
|
||||
/* calculate temporal weights */
|
||||
#ifdef DEBUG_NNDSP
|
||||
print_float_vector("alpha1_in", in_buffer, feature_dim + tenv_size + 1);
|
||||
#endif
|
||||
compute_generic_conv1d(alpha1f, out_buffer, hAdaShape->conv_alpha1f_state, in_buffer, feature_dim, ACTIVATION_LINEAR, arch);
|
||||
compute_generic_conv1d(alpha1t, tmp_buffer, hAdaShape->conv_alpha1t_state, tenv, tenv_size + 1, ACTIVATION_LINEAR, arch);
|
||||
#ifdef DEBUG_NNDSP
|
||||
print_float_vector("alpha1_out", out_buffer, frame_size);
|
||||
#endif
|
||||
/* compute leaky ReLU by hand. ToDo: try tanh activation */
|
||||
for (i = 0; i < frame_size; i ++)
|
||||
{
|
||||
float tmp = out_buffer[i] + tmp_buffer[i];
|
||||
in_buffer[i] = tmp >= 0 ? tmp : 0.2 * tmp;
|
||||
}
|
||||
#ifdef DEBUG_NNDSP
|
||||
print_float_vector("post_alpha1", in_buffer, frame_size);
|
||||
#endif
|
||||
compute_generic_conv1d(alpha2, out_buffer, hAdaShape->conv_alpha2_state, in_buffer, frame_size, ACTIVATION_LINEAR, arch);
|
||||
|
||||
/* shape signal */
|
||||
for (i = 0; i < frame_size; i ++)
|
||||
{
|
||||
x_out[i] = exp(out_buffer[i]) * x_in[i];
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -0,0 +1,143 @@
|
|||
/* Copyright (c) 2023 Amazon
|
||||
Written by Jan Buethe */
|
||||
/*
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
|
||||
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef NNDSP_H
|
||||
#define NNDSP_H
|
||||
|
||||
#include "opus_types.h"
|
||||
#include "nnet.h"
|
||||
#include <string.h>
|
||||
|
||||
|
||||
#define ADACONV_MAX_KERNEL_SIZE 16
|
||||
#define ADACONV_MAX_INPUT_CHANNELS 2
|
||||
#define ADACONV_MAX_OUTPUT_CHANNELS 2
|
||||
#define ADACONV_MAX_FRAME_SIZE 80
|
||||
#define ADACONV_MAX_OVERLAP_SIZE 40
|
||||
|
||||
#define ADACOMB_MAX_LAG 300
|
||||
#define ADACOMB_MAX_KERNEL_SIZE 16
|
||||
#define ADACOMB_MAX_FRAME_SIZE 80
|
||||
#define ADACOMB_MAX_OVERLAP_SIZE 40
|
||||
|
||||
#define ADASHAPE_MAX_INPUT_DIM 512
|
||||
#define ADASHAPE_MAX_FRAME_SIZE 160
|
||||
|
||||
/*#define DEBUG_NNDSP*/
|
||||
#ifdef DEBUG_NNDSP
|
||||
#include <stdio.h>
|
||||
#endif
|
||||
|
||||
|
||||
void print_float_vector(const char* name, const float *vec, int length);
|
||||
|
||||
typedef struct {
|
||||
float history[ADACONV_MAX_KERNEL_SIZE * ADACONV_MAX_INPUT_CHANNELS];
|
||||
float last_kernel[ADACONV_MAX_KERNEL_SIZE * ADACONV_MAX_INPUT_CHANNELS * ADACONV_MAX_OUTPUT_CHANNELS];
|
||||
float last_gain;
|
||||
} AdaConvState;
|
||||
|
||||
|
||||
typedef struct {
|
||||
float history[ADACOMB_MAX_KERNEL_SIZE + ADACOMB_MAX_LAG];
|
||||
float last_kernel[ADACOMB_MAX_KERNEL_SIZE];
|
||||
float last_global_gain;
|
||||
int last_pitch_lag;
|
||||
} AdaCombState;
|
||||
|
||||
|
||||
typedef struct {
|
||||
float conv_alpha1f_state[ADASHAPE_MAX_INPUT_DIM];
|
||||
float conv_alpha1t_state[ADASHAPE_MAX_INPUT_DIM];
|
||||
float conv_alpha2_state[ADASHAPE_MAX_FRAME_SIZE];
|
||||
} AdaShapeState;
|
||||
|
||||
void init_adaconv_state(AdaConvState *hAdaConv);
|
||||
|
||||
void init_adacomb_state(AdaCombState *hAdaComb);
|
||||
|
||||
void init_adashape_state(AdaShapeState *hAdaShape);
|
||||
|
||||
void compute_overlap_window(float *window, int overlap_size);
|
||||
|
||||
void adaconv_process_frame(
|
||||
AdaConvState* hAdaConv,
|
||||
float *x_out,
|
||||
const float *x_in,
|
||||
const float *features,
|
||||
const LinearLayer *kernel_layer,
|
||||
const LinearLayer *gain_layer,
|
||||
int feature_dim, /* not strictly necessary */
|
||||
int frame_size,
|
||||
int overlap_size,
|
||||
int in_channels,
|
||||
int out_channels,
|
||||
int kernel_size,
|
||||
int left_padding,
|
||||
float filter_gain_a,
|
||||
float filter_gain_b,
|
||||
float shape_gain,
|
||||
float *window,
|
||||
int arch
|
||||
);
|
||||
|
||||
void adacomb_process_frame(
|
||||
AdaCombState* hAdaComb,
|
||||
float *x_out,
|
||||
const float *x_in,
|
||||
const float *features,
|
||||
const LinearLayer *kernel_layer,
|
||||
const LinearLayer *gain_layer,
|
||||
const LinearLayer *global_gain_layer,
|
||||
int pitch_lag,
|
||||
int feature_dim,
|
||||
int frame_size,
|
||||
int overlap_size,
|
||||
int kernel_size,
|
||||
int left_padding,
|
||||
float filter_gain_a,
|
||||
float filter_gain_b,
|
||||
float log_gain_limit,
|
||||
float *window,
|
||||
int arch
|
||||
);
|
||||
|
||||
void adashape_process_frame(
|
||||
AdaShapeState *hAdaShape,
|
||||
float *x_out,
|
||||
const float *x_in,
|
||||
const float *features,
|
||||
const LinearLayer *alpha1f,
|
||||
const LinearLayer *alpha1t,
|
||||
const LinearLayer *alpha2,
|
||||
int feature_dim,
|
||||
int frame_size,
|
||||
int avg_pool_k,
|
||||
int arch
|
||||
);
|
||||
|
||||
#endif
|
||||
|
|
@ -0,0 +1,149 @@
|
|||
/* Copyright (c) 2018 Mozilla
|
||||
2008-2011 Octasic Inc.
|
||||
2012-2017 Jean-Marc Valin */
|
||||
/*
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
|
||||
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <math.h>
|
||||
#include "opus_types.h"
|
||||
#include "arch.h"
|
||||
#include "nnet.h"
|
||||
#include "dred_rdovae_constants.h"
|
||||
#include "plc_data.h"
|
||||
#include "fargan.h"
|
||||
#include "os_support.h"
|
||||
#include "vec.h"
|
||||
|
||||
#ifdef ENABLE_OSCE
|
||||
#include "osce.h"
|
||||
#endif
|
||||
|
||||
#ifdef NO_OPTIMIZATIONS
|
||||
#if defined(_MSC_VER)
|
||||
#pragma message ("Compiling without any vectorization. This code will be very slow")
|
||||
#else
|
||||
#warning Compiling without any vectorization. This code will be very slow
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
||||
#define SOFTMAX_HACK
|
||||
|
||||
|
||||
void compute_generic_dense(const LinearLayer *layer, float *output, const float *input, int activation, int arch)
|
||||
{
|
||||
compute_linear(layer, output, input, arch);
|
||||
compute_activation(output, output, layer->nb_outputs, activation, arch);
|
||||
}
|
||||
|
||||
#ifdef ENABLE_OSCE
|
||||
#define MAX_RNN_NEURONS_ALL IMAX(IMAX(IMAX(FARGAN_MAX_RNN_NEURONS, PLC_MAX_RNN_UNITS), DRED_MAX_RNN_NEURONS), OSCE_MAX_RNN_NEURONS)
|
||||
#else
|
||||
#define MAX_RNN_NEURONS_ALL IMAX(IMAX(FARGAN_MAX_RNN_NEURONS, PLC_MAX_RNN_UNITS), DRED_MAX_RNN_NEURONS)
|
||||
#endif
|
||||
|
||||
void compute_generic_gru(const LinearLayer *input_weights, const LinearLayer *recurrent_weights, float *state, const float *in, int arch)
|
||||
{
|
||||
int i;
|
||||
int N;
|
||||
float zrh[3*MAX_RNN_NEURONS_ALL];
|
||||
float recur[3*MAX_RNN_NEURONS_ALL];
|
||||
float *z;
|
||||
float *r;
|
||||
float *h;
|
||||
celt_assert(3*recurrent_weights->nb_inputs == recurrent_weights->nb_outputs);
|
||||
celt_assert(input_weights->nb_outputs == recurrent_weights->nb_outputs);
|
||||
N = recurrent_weights->nb_inputs;
|
||||
z = zrh;
|
||||
r = &zrh[N];
|
||||
h = &zrh[2*N];
|
||||
celt_assert(recurrent_weights->nb_outputs <= 3*MAX_RNN_NEURONS_ALL);
|
||||
celt_assert(in != state);
|
||||
compute_linear(input_weights, zrh, in, arch);
|
||||
compute_linear(recurrent_weights, recur, state, arch);
|
||||
for (i=0;i<2*N;i++)
|
||||
zrh[i] += recur[i];
|
||||
compute_activation(zrh, zrh, 2*N, ACTIVATION_SIGMOID, arch);
|
||||
for (i=0;i<N;i++)
|
||||
h[i] += recur[2*N+i]*r[i];
|
||||
compute_activation(h, h, N, ACTIVATION_TANH, arch);
|
||||
for (i=0;i<N;i++)
|
||||
h[i] = z[i]*state[i] + (1-z[i])*h[i];
|
||||
for (i=0;i<N;i++)
|
||||
state[i] = h[i];
|
||||
}
|
||||
|
||||
void compute_glu(const LinearLayer *layer, float *output, const float *input, int arch)
|
||||
{
|
||||
int i;
|
||||
float act2[MAX_INPUTS];
|
||||
celt_assert(layer->nb_inputs == layer->nb_outputs);
|
||||
compute_linear(layer, act2, input, arch);
|
||||
compute_activation(act2, act2, layer->nb_outputs, ACTIVATION_SIGMOID, arch);
|
||||
if (input == output) {
|
||||
/* Give a vectorization hint to the compiler for the in-place case. */
|
||||
for (i=0;i<layer->nb_outputs;i++) output[i] = output[i]*act2[i];
|
||||
} else {
|
||||
for (i=0;i<layer->nb_outputs;i++) output[i] = input[i]*act2[i];
|
||||
}
|
||||
}
|
||||
|
||||
#define MAX_CONV_INPUTS_ALL DRED_MAX_CONV_INPUTS
|
||||
|
||||
void compute_generic_conv1d(const LinearLayer *layer, float *output, float *mem, const float *input, int input_size, int activation, int arch)
|
||||
{
|
||||
float tmp[MAX_CONV_INPUTS_ALL];
|
||||
celt_assert(input != output);
|
||||
celt_assert(layer->nb_inputs <= MAX_CONV_INPUTS_ALL);
|
||||
if (layer->nb_inputs!=input_size) OPUS_COPY(tmp, mem, layer->nb_inputs-input_size);
|
||||
OPUS_COPY(&tmp[layer->nb_inputs-input_size], input, input_size);
|
||||
compute_linear(layer, output, tmp, arch);
|
||||
compute_activation(output, output, layer->nb_outputs, activation, arch);
|
||||
if (layer->nb_inputs!=input_size) OPUS_COPY(mem, &tmp[input_size], layer->nb_inputs-input_size);
|
||||
}
|
||||
|
||||
void compute_generic_conv1d_dilation(const LinearLayer *layer, float *output, float *mem, const float *input, int input_size, int dilation, int activation, int arch)
|
||||
{
|
||||
float tmp[MAX_CONV_INPUTS_ALL];
|
||||
int ksize = layer->nb_inputs/input_size;
|
||||
int i;
|
||||
celt_assert(input != output);
|
||||
celt_assert(layer->nb_inputs <= MAX_CONV_INPUTS_ALL);
|
||||
if (dilation==1) OPUS_COPY(tmp, mem, layer->nb_inputs-input_size);
|
||||
else for (i=0;i<ksize-1;i++) OPUS_COPY(&tmp[i*input_size], &mem[i*input_size*dilation], input_size);
|
||||
OPUS_COPY(&tmp[layer->nb_inputs-input_size], input, input_size);
|
||||
compute_linear(layer, output, tmp, arch);
|
||||
compute_activation(output, output, layer->nb_outputs, activation, arch);
|
||||
if (dilation==1) OPUS_COPY(mem, &tmp[input_size], layer->nb_inputs-input_size);
|
||||
else {
|
||||
OPUS_COPY(mem, &mem[input_size], input_size*dilation*(ksize-1)-input_size);
|
||||
OPUS_COPY(&mem[input_size*dilation*(ksize-1)-input_size], input, input_size);
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,163 @@
|
|||
/* Copyright (c) 2018 Mozilla
|
||||
Copyright (c) 2017 Jean-Marc Valin */
|
||||
/*
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
|
||||
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef NNET_H_
|
||||
#define NNET_H_
|
||||
|
||||
#include <stddef.h>
|
||||
#include "opus_types.h"
|
||||
|
||||
#define ACTIVATION_LINEAR 0
|
||||
#define ACTIVATION_SIGMOID 1
|
||||
#define ACTIVATION_TANH 2
|
||||
#define ACTIVATION_RELU 3
|
||||
#define ACTIVATION_SOFTMAX 4
|
||||
#define ACTIVATION_SWISH 5
|
||||
|
||||
#define WEIGHT_BLOB_VERSION 0
|
||||
#define WEIGHT_BLOCK_SIZE 64
|
||||
typedef struct {
|
||||
const char *name;
|
||||
int type;
|
||||
int size;
|
||||
const void *data;
|
||||
} WeightArray;
|
||||
|
||||
#define WEIGHT_TYPE_float 0
|
||||
#define WEIGHT_TYPE_int 1
|
||||
#define WEIGHT_TYPE_qweight 2
|
||||
#define WEIGHT_TYPE_int8 3
|
||||
|
||||
typedef struct {
|
||||
char head[4];
|
||||
int version;
|
||||
int type;
|
||||
int size;
|
||||
int block_size;
|
||||
char name[44];
|
||||
} WeightHead;
|
||||
|
||||
/* Generic sparse affine transformation. */
|
||||
typedef struct {
|
||||
const float *bias;
|
||||
const float *subias;
|
||||
const opus_int8 *weights;
|
||||
const float *float_weights;
|
||||
const int *weights_idx;
|
||||
const float *diag;
|
||||
const float *scale;
|
||||
int nb_inputs;
|
||||
int nb_outputs;
|
||||
} LinearLayer;
|
||||
|
||||
/* Generic sparse affine transformation. */
|
||||
typedef struct {
|
||||
const float *bias;
|
||||
const float *float_weights;
|
||||
int in_channels;
|
||||
int out_channels;
|
||||
int ktime;
|
||||
int kheight;
|
||||
} Conv2dLayer;
|
||||
|
||||
|
||||
void compute_generic_dense(const LinearLayer *layer, float *output, const float *input, int activation, int arch);
|
||||
void compute_generic_gru(const LinearLayer *input_weights, const LinearLayer *recurrent_weights, float *state, const float *in, int arch);
|
||||
void compute_generic_conv1d(const LinearLayer *layer, float *output, float *mem, const float *input, int input_size, int activation, int arch);
|
||||
void compute_generic_conv1d_dilation(const LinearLayer *layer, float *output, float *mem, const float *input, int input_size, int dilation, int activation, int arch);
|
||||
void compute_glu(const LinearLayer *layer, float *output, const float *input, int arch);
|
||||
void compute_gated_activation(const LinearLayer *layer, float *output, const float *input, int activation, int arch);
|
||||
|
||||
|
||||
int parse_weights(WeightArray **list, const void *data, int len);
|
||||
|
||||
|
||||
extern const WeightArray lpcnet_arrays[];
|
||||
extern const WeightArray plcmodel_arrays[];
|
||||
extern const WeightArray rdovaeenc_arrays[];
|
||||
extern const WeightArray rdovaedec_arrays[];
|
||||
extern const WeightArray fwgan_arrays[];
|
||||
extern const WeightArray fargan_arrays[];
|
||||
extern const WeightArray pitchdnn_arrays[];
|
||||
extern const WeightArray lossgen_arrays[];
|
||||
|
||||
int linear_init(LinearLayer *layer, const WeightArray *arrays,
|
||||
const char *bias,
|
||||
const char *subias,
|
||||
const char *weights,
|
||||
const char *float_weights,
|
||||
const char *weights_idx,
|
||||
const char *diag,
|
||||
const char *scale,
|
||||
int nb_inputs,
|
||||
int nb_outputs);
|
||||
|
||||
int conv2d_init(Conv2dLayer *layer, const WeightArray *arrays,
|
||||
const char *bias,
|
||||
const char *float_weights,
|
||||
int in_channels,
|
||||
int out_channels,
|
||||
int ktime,
|
||||
int kheight);
|
||||
|
||||
|
||||
void compute_linear_c(const LinearLayer *linear, float *out, const float *in);
|
||||
void compute_activation_c(float *output, const float *input, int N, int activation);
|
||||
void compute_conv2d_c(const Conv2dLayer *conv, float *out, float *mem, const float *in, int height, int hstride, int activation);
|
||||
|
||||
|
||||
#if defined(OPUS_ARM_MAY_HAVE_DOTPROD) || defined(OPUS_ARM_MAY_HAVE_NEON_INTR)
|
||||
#include "arm/dnn_arm.h"
|
||||
#endif
|
||||
|
||||
#if defined(OPUS_X86_MAY_HAVE_SSE2)
|
||||
#include "x86/dnn_x86.h"
|
||||
#endif
|
||||
|
||||
#ifndef OVERRIDE_COMPUTE_LINEAR
|
||||
#define compute_linear(linear, out, in, arch) ((void)(arch),compute_linear_c(linear, out, in))
|
||||
#endif
|
||||
|
||||
#ifndef OVERRIDE_COMPUTE_ACTIVATION
|
||||
#define compute_activation(output, input, N, activation, arch) ((void)(arch),compute_activation_c(output, input, N, activation))
|
||||
#endif
|
||||
|
||||
#ifndef OVERRIDE_COMPUTE_CONV2D
|
||||
#define compute_conv2d(conv, out, mem, in, height, hstride, activation, arch) ((void)(arch),compute_conv2d_c(conv, out, mem, in, height, hstride, activation))
|
||||
#endif
|
||||
|
||||
#if defined(__x86_64__) && !defined(OPUS_X86_MAY_HAVE_SSE4_1) && !defined(OPUS_X86_MAY_HAVE_AVX2)
|
||||
#if defined(_MSC_VER)
|
||||
#pragma message ("Only SSE and SSE2 are available. On newer machines, enable SSSE3/AVX/AVX2 to get better performance")
|
||||
#else
|
||||
#warning "Only SSE and SSE2 are available. On newer machines, enable SSSE3/AVX/AVX2 using -march= to get better performance"
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
#endif /* NNET_H_ */
|
||||
|
|
@ -0,0 +1,247 @@
|
|||
/* Copyright (c) 2018-2019 Mozilla
|
||||
2023 Amazon */
|
||||
/*
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
|
||||
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef NNET_ARCH_H
|
||||
#define NNET_ARCH_H
|
||||
|
||||
#include "nnet.h"
|
||||
#include "arch.h"
|
||||
#include "os_support.h"
|
||||
#include "vec.h"
|
||||
|
||||
#define CAT_SUFFIX2(a,b) a ## b
|
||||
#define CAT_SUFFIX(a,b) CAT_SUFFIX2(a, b)
|
||||
|
||||
#define RTCD_SUF(name) CAT_SUFFIX(name, RTCD_ARCH)
|
||||
|
||||
/* Force vectorization on for DNN code because some of the loops rely on
|
||||
compiler vectorization rather than explicitly using intrinsics. */
|
||||
#if OPUS_GNUC_PREREQ(5,1)
|
||||
#define GCC_POP_OPTIONS
|
||||
#pragma GCC push_options
|
||||
#pragma GCC optimize("tree-vectorize")
|
||||
#endif
|
||||
|
||||
|
||||
#define MAX_ACTIVATIONS (4096)
|
||||
|
||||
static OPUS_INLINE void vec_swish(float *y, const float *x, int N)
|
||||
{
|
||||
int i;
|
||||
float tmp[MAX_ACTIVATIONS];
|
||||
celt_assert(N <= MAX_ACTIVATIONS);
|
||||
vec_sigmoid(tmp, x, N);
|
||||
for (i=0;i<N;i++)
|
||||
y[i] = x[i]*tmp[i];
|
||||
}
|
||||
|
||||
static OPUS_INLINE float relu(float x)
|
||||
{
|
||||
return x < 0 ? 0 : x;
|
||||
}
|
||||
|
||||
/*#define HIGH_ACCURACY */
|
||||
|
||||
void RTCD_SUF(compute_activation_)(float *output, const float *input, int N, int activation)
|
||||
{
|
||||
int i;
|
||||
if (activation == ACTIVATION_SIGMOID) {
|
||||
#ifdef HIGH_ACCURACY
|
||||
for (int n=0; n<N; n++)
|
||||
{
|
||||
output[n] = 1.f / (1 + exp(-input[n]));
|
||||
}
|
||||
#else
|
||||
vec_sigmoid(output, input, N);
|
||||
#endif
|
||||
} else if (activation == ACTIVATION_TANH) {
|
||||
#ifdef HIGH_ACCURACY
|
||||
for (int n=0; n<N; n++)
|
||||
{
|
||||
output[n] = tanh(input[n]);
|
||||
}
|
||||
#else
|
||||
vec_tanh(output, input, N);
|
||||
#endif
|
||||
} else if (activation == ACTIVATION_SWISH) {
|
||||
vec_swish(output, input, N);
|
||||
} else if (activation == ACTIVATION_RELU) {
|
||||
for (i=0;i<N;i++)
|
||||
output[i] = relu(input[i]);
|
||||
} else if (activation == ACTIVATION_SOFTMAX) {
|
||||
#ifdef SOFTMAX_HACK
|
||||
OPUS_COPY(output, input, N);
|
||||
/*for (i=0;i<N;i++)
|
||||
output[i] = input[i];*/
|
||||
#else
|
||||
float sum = 0;
|
||||
softmax(output, input, N);
|
||||
for (i=0;i<N;i++) {
|
||||
sum += output[i];
|
||||
}
|
||||
sum = 1.f/(sum+1e-30);
|
||||
for (i=0;i<N;i++)
|
||||
output[i] = sum*output[i];
|
||||
#endif
|
||||
} else {
|
||||
celt_assert(activation == ACTIVATION_LINEAR);
|
||||
if (input != output) {
|
||||
for (i=0;i<N;i++)
|
||||
output[i] = input[i];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void RTCD_SUF(compute_linear_) (const LinearLayer *linear, float *out, const float *in)
|
||||
{
|
||||
int i, M, N;
|
||||
const float *bias;
|
||||
celt_assert(in != out);
|
||||
bias = linear->bias;
|
||||
M = linear->nb_inputs;
|
||||
N = linear->nb_outputs;
|
||||
if (linear->float_weights != NULL) {
|
||||
if (linear->weights_idx != NULL) sparse_sgemv8x4(out, linear->float_weights, linear->weights_idx, N, in);
|
||||
else sgemv(out, linear->float_weights, N, M, N, in);
|
||||
} else if (linear->weights != NULL) {
|
||||
if (linear->weights_idx != NULL) sparse_cgemv8x4(out, linear->weights, linear->weights_idx, linear->scale, N, M, in);
|
||||
else cgemv8x4(out, linear->weights, linear->scale, N, M, in);
|
||||
/* Only use SU biases on for integer matrices on SU archs. */
|
||||
#ifdef USE_SU_BIAS
|
||||
bias = linear->subias;
|
||||
#endif
|
||||
}
|
||||
else OPUS_CLEAR(out, N);
|
||||
if (bias != NULL) {
|
||||
for (i=0;i<N;i++) out[i] += bias[i];
|
||||
}
|
||||
if (linear->diag) {
|
||||
/* Diag is only used for GRU recurrent weights. */
|
||||
celt_assert(3*M == N);
|
||||
for (i=0;i<M;i++) {
|
||||
out[i] += linear->diag[i]*in[i];
|
||||
out[i+M] += linear->diag[i+M]*in[i];
|
||||
out[i+2*M] += linear->diag[i+2*M]*in[i];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Computes non-padded convolution for input [ ksize1 x in_channels x (len2+ksize2) ],
|
||||
kernel [ out_channels x in_channels x ksize1 x ksize2 ],
|
||||
storing the output as [ out_channels x len2 ].
|
||||
We assume that the output dimension along the ksize1 axis is 1,
|
||||
i.e. processing one frame at a time. */
|
||||
static void conv2d_float(float *out, const float *weights, int in_channels, int out_channels, int ktime, int kheight, const float *in, int height, int hstride)
|
||||
{
|
||||
int i;
|
||||
int in_stride;
|
||||
in_stride = height+kheight-1;
|
||||
for (i=0;i<out_channels;i++) {
|
||||
int m;
|
||||
OPUS_CLEAR(&out[i*hstride], height);
|
||||
for (m=0;m<in_channels;m++) {
|
||||
int t;
|
||||
for (t=0;t<ktime;t++) {
|
||||
int h;
|
||||
for (h=0;h<kheight;h++) {
|
||||
int j;
|
||||
for (j=0;j<height;j++) {
|
||||
out[i*hstride + j] += weights[i*in_channels*ktime*kheight + m*ktime*kheight + t*kheight + h] *
|
||||
in[t*in_channels*in_stride + m*in_stride + j + h];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* There's no intrinsics in this function (or the one above) because the gcc (and hopefully other compiler) auto-vectorizer is smart enough to
|
||||
produce the right code by itself based on the compile flags. */
|
||||
static void conv2d_3x3_float(float *out, const float *weights, int in_channels, int out_channels, const float *in, int height, int hstride)
|
||||
{
|
||||
int i;
|
||||
int in_stride;
|
||||
int kheight, ktime;
|
||||
kheight = ktime = 3;
|
||||
in_stride = height+kheight-1;
|
||||
for (i=0;i<out_channels;i++) {
|
||||
int m;
|
||||
OPUS_CLEAR(&out[i*hstride], height);
|
||||
for (m=0;m<in_channels;m++) {
|
||||
int j;
|
||||
for (j=0;j<height;j++) {
|
||||
/* Unrolled version of previous function -- compiler will figure out the indexing simplifications. */
|
||||
out[i*hstride + j] += weights[i*in_channels*ktime*kheight + m*ktime*kheight + 0*kheight + 0]*in[0*in_channels*in_stride + m*in_stride + j + 0]
|
||||
+ weights[i*in_channels*ktime*kheight + m*ktime*kheight + 0*kheight + 1]*in[0*in_channels*in_stride + m*in_stride + j + 1]
|
||||
+ weights[i*in_channels*ktime*kheight + m*ktime*kheight + 0*kheight + 2]*in[0*in_channels*in_stride + m*in_stride + j + 2]
|
||||
+ weights[i*in_channels*ktime*kheight + m*ktime*kheight + 1*kheight + 0]*in[1*in_channels*in_stride + m*in_stride + j + 0]
|
||||
+ weights[i*in_channels*ktime*kheight + m*ktime*kheight + 1*kheight + 1]*in[1*in_channels*in_stride + m*in_stride + j + 1]
|
||||
+ weights[i*in_channels*ktime*kheight + m*ktime*kheight + 1*kheight + 2]*in[1*in_channels*in_stride + m*in_stride + j + 2]
|
||||
+ weights[i*in_channels*ktime*kheight + m*ktime*kheight + 2*kheight + 0]*in[2*in_channels*in_stride + m*in_stride + j + 0]
|
||||
+ weights[i*in_channels*ktime*kheight + m*ktime*kheight + 2*kheight + 1]*in[2*in_channels*in_stride + m*in_stride + j + 1]
|
||||
+ weights[i*in_channels*ktime*kheight + m*ktime*kheight + 2*kheight + 2]*in[2*in_channels*in_stride + m*in_stride + j + 2];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#define MAX_CONV2D_INPUTS 8192
|
||||
|
||||
void RTCD_SUF(compute_conv2d_)(const Conv2dLayer *conv, float *out, float *mem, const float *in, int height, int hstride, int activation)
|
||||
{
|
||||
int i;
|
||||
const float *bias;
|
||||
float in_buf[MAX_CONV2D_INPUTS];
|
||||
int time_stride;
|
||||
celt_assert(in != out);
|
||||
time_stride = conv->in_channels*(height+conv->kheight-1);
|
||||
celt_assert(conv->ktime*time_stride <= MAX_CONV2D_INPUTS);
|
||||
OPUS_COPY(in_buf, mem, (conv->ktime-1)*time_stride);
|
||||
OPUS_COPY(&in_buf[(conv->ktime-1)*time_stride], in, time_stride);
|
||||
OPUS_COPY(mem, &in_buf[time_stride], (conv->ktime-1)*time_stride);
|
||||
bias = conv->bias;
|
||||
if (conv->kheight == 3 && conv->ktime == 3)
|
||||
conv2d_3x3_float(out, conv->float_weights, conv->in_channels, conv->out_channels, in_buf, height, hstride);
|
||||
else
|
||||
conv2d_float(out, conv->float_weights, conv->in_channels, conv->out_channels, conv->ktime, conv->kheight, in_buf, height, hstride);
|
||||
if (bias != NULL) {
|
||||
for (i=0;i<conv->out_channels;i++) {
|
||||
int j;
|
||||
for (j=0;j<height;j++) out[i*hstride+j] += bias[i];
|
||||
}
|
||||
}
|
||||
for (i=0;i<conv->out_channels;i++) {
|
||||
RTCD_SUF(compute_activation_)(&out[i*hstride], &out[i*hstride], height, activation);
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef GCC_POP_OPTIONS
|
||||
#pragma GCC pop_options
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
|
@ -0,0 +1,35 @@
|
|||
/* Copyright (c) 2018-2019 Mozilla
|
||||
2023 Amazon */
|
||||
/*
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
|
||||
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
|
||||
#define RTCD_ARCH c
|
||||
|
||||
#include "nnet_arch.h"
|
||||
File diff suppressed because it is too large
Load Diff
|
|
@ -0,0 +1,331 @@
|
|||
/* Auto generated from checkpoint nolace_small.pth (sha1: 953bf5854e1a33e8892da48a29b19aff3a272902) */
|
||||
|
||||
|
||||
#ifndef NOLACE_DATA_H
|
||||
#define NOLACE_DATA_H
|
||||
|
||||
#include "nnet.h"
|
||||
|
||||
|
||||
#define NOLACE_PREEMPH 0.85f
|
||||
#define NOLACE_FRAME_SIZE 80
|
||||
#define NOLACE_OVERLAP_SIZE 40
|
||||
#define NOLACE_NUM_FEATURES 93
|
||||
#define NOLACE_PITCH_MAX 300
|
||||
#define NOLACE_PITCH_EMBEDDING_DIM 64
|
||||
#define NOLACE_NUMBITS_RANGE_LOW 50
|
||||
#define NOLACE_NUMBITS_RANGE_HIGH 650
|
||||
#define NOLACE_NUMBITS_EMBEDDING_DIM 8
|
||||
#define NOLACE_COND_DIM 160
|
||||
#define NOLACE_HIDDEN_FEATURE_DIM 96
|
||||
#define NOLACE_NUMBITS_SCALE_0 1.0357311964035034f
|
||||
#define NOLACE_NUMBITS_SCALE_1 1.735559105873108f
|
||||
#define NOLACE_NUMBITS_SCALE_2 3.6004557609558105f
|
||||
#define NOLACE_NUMBITS_SCALE_3 4.552478313446045f
|
||||
#define NOLACE_NUMBITS_SCALE_4 5.932559490203857f
|
||||
#define NOLACE_NUMBITS_SCALE_5 7.176970481872559f
|
||||
#define NOLACE_NUMBITS_SCALE_6 8.114998817443848f
|
||||
#define NOLACE_NUMBITS_SCALE_7 8.77063274383545f
|
||||
|
||||
#define NOLACE_PITCH_EMBEDDING_OUT_SIZE 64
|
||||
|
||||
#define NOLACE_FNET_CONV1_OUT_SIZE 96
|
||||
|
||||
#define NOLACE_FNET_CONV1_IN_SIZE 173
|
||||
|
||||
#define NOLACE_FNET_CONV1_STATE_SIZE (173 * (0))
|
||||
|
||||
#define NOLACE_FNET_CONV1_DELAY 0
|
||||
|
||||
#define NOLACE_FNET_CONV2_OUT_SIZE 160
|
||||
|
||||
#define NOLACE_FNET_CONV2_IN_SIZE 384
|
||||
|
||||
#define NOLACE_FNET_CONV2_STATE_SIZE (384 * (1))
|
||||
|
||||
#define NOLACE_FNET_CONV2_DELAY 0
|
||||
|
||||
#define NOLACE_FNET_TCONV_KERNEL_SIZE 4
|
||||
|
||||
#define NOLACE_FNET_TCONV_STRIDE 4
|
||||
|
||||
#define NOLACE_FNET_TCONV_IN_CHANNELS 160
|
||||
|
||||
#define NOLACE_FNET_TCONV_OUT_CHANNELS 160
|
||||
|
||||
#define NOLACE_FNET_GRU_OUT_SIZE 160
|
||||
|
||||
#define NOLACE_FNET_GRU_STATE_SIZE 160
|
||||
|
||||
#define NOLACE_CF1_FILTER_GAIN_A 0.690776f
|
||||
#define NOLACE_CF1_FILTER_GAIN_B 0.000000f
|
||||
#define NOLACE_CF1_LOG_GAIN_LIMIT 1.151293f
|
||||
#define NOLACE_CF1_KERNEL_SIZE 16
|
||||
#define NOLACE_CF1_LEFT_PADDING 8
|
||||
#define NOLACE_CF1_FRAME_SIZE 80
|
||||
#define NOLACE_CF1_OVERLAP_SIZE 40
|
||||
#define NOLACE_CF1_IN_CHANNELS 1
|
||||
#define NOLACE_CF1_OUT_CHANNELS 1
|
||||
#define NOLACE_CF1_NORM_P 2
|
||||
#define NOLACE_CF1_FEATURE_DIM 160
|
||||
#define NOLACE_CF1_MAX_LAG 301
|
||||
|
||||
#define NOLACE_CF1_KERNEL_OUT_SIZE 16
|
||||
|
||||
#define NOLACE_CF1_GAIN_OUT_SIZE 1
|
||||
|
||||
#define NOLACE_CF1_GLOBAL_GAIN_OUT_SIZE 1
|
||||
|
||||
#define NOLACE_CF2_FILTER_GAIN_A 0.690776f
|
||||
#define NOLACE_CF2_FILTER_GAIN_B 0.000000f
|
||||
#define NOLACE_CF2_LOG_GAIN_LIMIT 1.151293f
|
||||
#define NOLACE_CF2_KERNEL_SIZE 16
|
||||
#define NOLACE_CF2_LEFT_PADDING 8
|
||||
#define NOLACE_CF2_FRAME_SIZE 80
|
||||
#define NOLACE_CF2_OVERLAP_SIZE 40
|
||||
#define NOLACE_CF2_IN_CHANNELS 1
|
||||
#define NOLACE_CF2_OUT_CHANNELS 1
|
||||
#define NOLACE_CF2_NORM_P 2
|
||||
#define NOLACE_CF2_FEATURE_DIM 160
|
||||
#define NOLACE_CF2_MAX_LAG 301
|
||||
|
||||
#define NOLACE_CF2_KERNEL_OUT_SIZE 16
|
||||
|
||||
#define NOLACE_CF2_GAIN_OUT_SIZE 1
|
||||
|
||||
#define NOLACE_CF2_GLOBAL_GAIN_OUT_SIZE 1
|
||||
|
||||
#define NOLACE_AF1_FILTER_GAIN_A 1.381551f
|
||||
#define NOLACE_AF1_FILTER_GAIN_B 0.000000f
|
||||
#define NOLACE_AF1_SHAPE_GAIN 1.000000f
|
||||
#define NOLACE_AF1_KERNEL_SIZE 16
|
||||
#define NOLACE_AF1_FRAME_SIZE 80
|
||||
#define NOLACE_AF1_LEFT_PADDING 15
|
||||
#define NOLACE_AF1_OVERLAP_SIZE 40
|
||||
#define NOLACE_AF1_IN_CHANNELS 1
|
||||
#define NOLACE_AF1_OUT_CHANNELS 2
|
||||
#define NOLACE_AF1_NORM_P 2
|
||||
#define NOLACE_AF1_FEATURE_DIM 160
|
||||
|
||||
#define NOLACE_AF1_KERNEL_OUT_SIZE 32
|
||||
|
||||
#define NOLACE_AF1_GAIN_OUT_SIZE 2
|
||||
|
||||
#define NOLACE_TDSHAPE1_FEATURE_DIM 160
|
||||
#define NOLACE_TDSHAPE1_FRAME_SIZE 80
|
||||
#define NOLACE_TDSHAPE1_AVG_POOL_K 4
|
||||
#define NOLACE_TDSHAPE1_INNOVATE 0
|
||||
#define NOLACE_TDSHAPE1_POOL_AFTER 0
|
||||
|
||||
#define NOLACE_TDSHAPE1_ALPHA1_F_OUT_SIZE 80
|
||||
|
||||
#define NOLACE_TDSHAPE1_ALPHA1_F_IN_SIZE 160
|
||||
|
||||
#define NOLACE_TDSHAPE1_ALPHA1_F_STATE_SIZE (160 * (1))
|
||||
|
||||
#define NOLACE_TDSHAPE1_ALPHA1_F_DELAY 0
|
||||
|
||||
#define NOLACE_TDSHAPE1_ALPHA1_T_OUT_SIZE 80
|
||||
|
||||
#define NOLACE_TDSHAPE1_ALPHA1_T_IN_SIZE 21
|
||||
|
||||
#define NOLACE_TDSHAPE1_ALPHA1_T_STATE_SIZE (21 * (1))
|
||||
|
||||
#define NOLACE_TDSHAPE1_ALPHA1_T_DELAY 0
|
||||
|
||||
#define NOLACE_TDSHAPE1_ALPHA2_OUT_SIZE 80
|
||||
|
||||
#define NOLACE_TDSHAPE1_ALPHA2_IN_SIZE 80
|
||||
|
||||
#define NOLACE_TDSHAPE1_ALPHA2_STATE_SIZE (80 * (1))
|
||||
|
||||
#define NOLACE_TDSHAPE1_ALPHA2_DELAY 0
|
||||
|
||||
#define NOLACE_TDSHAPE2_FEATURE_DIM 160
|
||||
#define NOLACE_TDSHAPE2_FRAME_SIZE 80
|
||||
#define NOLACE_TDSHAPE2_AVG_POOL_K 4
|
||||
#define NOLACE_TDSHAPE2_INNOVATE 0
|
||||
#define NOLACE_TDSHAPE2_POOL_AFTER 0
|
||||
|
||||
#define NOLACE_TDSHAPE2_ALPHA1_F_OUT_SIZE 80
|
||||
|
||||
#define NOLACE_TDSHAPE2_ALPHA1_F_IN_SIZE 160
|
||||
|
||||
#define NOLACE_TDSHAPE2_ALPHA1_F_STATE_SIZE (160 * (1))
|
||||
|
||||
#define NOLACE_TDSHAPE2_ALPHA1_F_DELAY 0
|
||||
|
||||
#define NOLACE_TDSHAPE2_ALPHA1_T_OUT_SIZE 80
|
||||
|
||||
#define NOLACE_TDSHAPE2_ALPHA1_T_IN_SIZE 21
|
||||
|
||||
#define NOLACE_TDSHAPE2_ALPHA1_T_STATE_SIZE (21 * (1))
|
||||
|
||||
#define NOLACE_TDSHAPE2_ALPHA1_T_DELAY 0
|
||||
|
||||
#define NOLACE_TDSHAPE2_ALPHA2_OUT_SIZE 80
|
||||
|
||||
#define NOLACE_TDSHAPE2_ALPHA2_IN_SIZE 80
|
||||
|
||||
#define NOLACE_TDSHAPE2_ALPHA2_STATE_SIZE (80 * (1))
|
||||
|
||||
#define NOLACE_TDSHAPE2_ALPHA2_DELAY 0
|
||||
|
||||
#define NOLACE_TDSHAPE3_FEATURE_DIM 160
|
||||
#define NOLACE_TDSHAPE3_FRAME_SIZE 80
|
||||
#define NOLACE_TDSHAPE3_AVG_POOL_K 4
|
||||
#define NOLACE_TDSHAPE3_INNOVATE 0
|
||||
#define NOLACE_TDSHAPE3_POOL_AFTER 0
|
||||
|
||||
#define NOLACE_TDSHAPE3_ALPHA1_F_OUT_SIZE 80
|
||||
|
||||
#define NOLACE_TDSHAPE3_ALPHA1_F_IN_SIZE 160
|
||||
|
||||
#define NOLACE_TDSHAPE3_ALPHA1_F_STATE_SIZE (160 * (1))
|
||||
|
||||
#define NOLACE_TDSHAPE3_ALPHA1_F_DELAY 0
|
||||
|
||||
#define NOLACE_TDSHAPE3_ALPHA1_T_OUT_SIZE 80
|
||||
|
||||
#define NOLACE_TDSHAPE3_ALPHA1_T_IN_SIZE 21
|
||||
|
||||
#define NOLACE_TDSHAPE3_ALPHA1_T_STATE_SIZE (21 * (1))
|
||||
|
||||
#define NOLACE_TDSHAPE3_ALPHA1_T_DELAY 0
|
||||
|
||||
#define NOLACE_TDSHAPE3_ALPHA2_OUT_SIZE 80
|
||||
|
||||
#define NOLACE_TDSHAPE3_ALPHA2_IN_SIZE 80
|
||||
|
||||
#define NOLACE_TDSHAPE3_ALPHA2_STATE_SIZE (80 * (1))
|
||||
|
||||
#define NOLACE_TDSHAPE3_ALPHA2_DELAY 0
|
||||
|
||||
#define NOLACE_AF2_FILTER_GAIN_A 1.381551f
|
||||
#define NOLACE_AF2_FILTER_GAIN_B 0.000000f
|
||||
#define NOLACE_AF2_SHAPE_GAIN 1.000000f
|
||||
#define NOLACE_AF2_KERNEL_SIZE 16
|
||||
#define NOLACE_AF2_FRAME_SIZE 80
|
||||
#define NOLACE_AF2_LEFT_PADDING 15
|
||||
#define NOLACE_AF2_OVERLAP_SIZE 40
|
||||
#define NOLACE_AF2_IN_CHANNELS 2
|
||||
#define NOLACE_AF2_OUT_CHANNELS 2
|
||||
#define NOLACE_AF2_NORM_P 2
|
||||
#define NOLACE_AF2_FEATURE_DIM 160
|
||||
|
||||
#define NOLACE_AF2_KERNEL_OUT_SIZE 64
|
||||
|
||||
#define NOLACE_AF2_GAIN_OUT_SIZE 2
|
||||
|
||||
#define NOLACE_AF3_FILTER_GAIN_A 1.381551f
|
||||
#define NOLACE_AF3_FILTER_GAIN_B 0.000000f
|
||||
#define NOLACE_AF3_SHAPE_GAIN 1.000000f
|
||||
#define NOLACE_AF3_KERNEL_SIZE 16
|
||||
#define NOLACE_AF3_FRAME_SIZE 80
|
||||
#define NOLACE_AF3_LEFT_PADDING 15
|
||||
#define NOLACE_AF3_OVERLAP_SIZE 40
|
||||
#define NOLACE_AF3_IN_CHANNELS 2
|
||||
#define NOLACE_AF3_OUT_CHANNELS 2
|
||||
#define NOLACE_AF3_NORM_P 2
|
||||
#define NOLACE_AF3_FEATURE_DIM 160
|
||||
|
||||
#define NOLACE_AF3_KERNEL_OUT_SIZE 64
|
||||
|
||||
#define NOLACE_AF3_GAIN_OUT_SIZE 2
|
||||
|
||||
#define NOLACE_AF4_FILTER_GAIN_A 1.381551f
|
||||
#define NOLACE_AF4_FILTER_GAIN_B 0.000000f
|
||||
#define NOLACE_AF4_SHAPE_GAIN 1.000000f
|
||||
#define NOLACE_AF4_KERNEL_SIZE 16
|
||||
#define NOLACE_AF4_FRAME_SIZE 80
|
||||
#define NOLACE_AF4_LEFT_PADDING 15
|
||||
#define NOLACE_AF4_OVERLAP_SIZE 40
|
||||
#define NOLACE_AF4_IN_CHANNELS 2
|
||||
#define NOLACE_AF4_OUT_CHANNELS 1
|
||||
#define NOLACE_AF4_NORM_P 2
|
||||
#define NOLACE_AF4_FEATURE_DIM 160
|
||||
|
||||
#define NOLACE_AF4_KERNEL_OUT_SIZE 32
|
||||
|
||||
#define NOLACE_AF4_GAIN_OUT_SIZE 1
|
||||
|
||||
#define NOLACE_POST_CF1_OUT_SIZE 160
|
||||
|
||||
#define NOLACE_POST_CF1_IN_SIZE 160
|
||||
|
||||
#define NOLACE_POST_CF1_STATE_SIZE (160 * (1))
|
||||
|
||||
#define NOLACE_POST_CF1_DELAY 0
|
||||
|
||||
#define NOLACE_POST_CF2_OUT_SIZE 160
|
||||
|
||||
#define NOLACE_POST_CF2_IN_SIZE 160
|
||||
|
||||
#define NOLACE_POST_CF2_STATE_SIZE (160 * (1))
|
||||
|
||||
#define NOLACE_POST_CF2_DELAY 0
|
||||
|
||||
#define NOLACE_POST_AF1_OUT_SIZE 160
|
||||
|
||||
#define NOLACE_POST_AF1_IN_SIZE 160
|
||||
|
||||
#define NOLACE_POST_AF1_STATE_SIZE (160 * (1))
|
||||
|
||||
#define NOLACE_POST_AF1_DELAY 0
|
||||
|
||||
#define NOLACE_POST_AF2_OUT_SIZE 160
|
||||
|
||||
#define NOLACE_POST_AF2_IN_SIZE 160
|
||||
|
||||
#define NOLACE_POST_AF2_STATE_SIZE (160 * (1))
|
||||
|
||||
#define NOLACE_POST_AF2_DELAY 0
|
||||
|
||||
#define NOLACE_POST_AF3_OUT_SIZE 160
|
||||
|
||||
#define NOLACE_POST_AF3_IN_SIZE 160
|
||||
|
||||
#define NOLACE_POST_AF3_STATE_SIZE (160 * (1))
|
||||
|
||||
#define NOLACE_POST_AF3_DELAY 0
|
||||
|
||||
typedef struct {
|
||||
LinearLayer nolace_pitch_embedding;
|
||||
LinearLayer nolace_fnet_conv1;
|
||||
LinearLayer nolace_fnet_conv2;
|
||||
LinearLayer nolace_fnet_tconv;
|
||||
LinearLayer nolace_fnet_gru_input;
|
||||
LinearLayer nolace_fnet_gru_recurrent;
|
||||
LinearLayer nolace_cf1_kernel;
|
||||
LinearLayer nolace_cf1_gain;
|
||||
LinearLayer nolace_cf1_global_gain;
|
||||
LinearLayer nolace_cf2_kernel;
|
||||
LinearLayer nolace_cf2_gain;
|
||||
LinearLayer nolace_cf2_global_gain;
|
||||
LinearLayer nolace_af1_kernel;
|
||||
LinearLayer nolace_af1_gain;
|
||||
LinearLayer nolace_tdshape1_alpha1_f;
|
||||
LinearLayer nolace_tdshape1_alpha1_t;
|
||||
LinearLayer nolace_tdshape1_alpha2;
|
||||
LinearLayer nolace_tdshape2_alpha1_f;
|
||||
LinearLayer nolace_tdshape2_alpha1_t;
|
||||
LinearLayer nolace_tdshape2_alpha2;
|
||||
LinearLayer nolace_tdshape3_alpha1_f;
|
||||
LinearLayer nolace_tdshape3_alpha1_t;
|
||||
LinearLayer nolace_tdshape3_alpha2;
|
||||
LinearLayer nolace_af2_kernel;
|
||||
LinearLayer nolace_af2_gain;
|
||||
LinearLayer nolace_af3_kernel;
|
||||
LinearLayer nolace_af3_gain;
|
||||
LinearLayer nolace_af4_kernel;
|
||||
LinearLayer nolace_af4_gain;
|
||||
LinearLayer nolace_post_cf1;
|
||||
LinearLayer nolace_post_cf2;
|
||||
LinearLayer nolace_post_af1;
|
||||
LinearLayer nolace_post_af2;
|
||||
LinearLayer nolace_post_af3;
|
||||
} NOLACELayers;
|
||||
|
||||
int init_nolacelayers(NOLACELayers *model, const WeightArray *arrays);
|
||||
|
||||
#endif /* NOLACE_DATA_H */
|
||||
File diff suppressed because it is too large
Load Diff
|
|
@ -0,0 +1,84 @@
|
|||
/* Copyright (c) 2023 Amazon
|
||||
Written by Jan Buethe */
|
||||
/*
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
|
||||
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef OSCE_H
|
||||
#define OSCE_H
|
||||
|
||||
|
||||
#include "opus_types.h"
|
||||
/*#include "osce_config.h"*/
|
||||
#ifndef DISABLE_LACE
|
||||
#include "lace_data.h"
|
||||
#endif
|
||||
#ifndef DISABLE_NOLACE
|
||||
#include "nolace_data.h"
|
||||
#endif
|
||||
#include "nndsp.h"
|
||||
#include "nnet.h"
|
||||
#include "osce_structs.h"
|
||||
#include "structs.h"
|
||||
|
||||
#define OSCE_METHOD_NONE 0
|
||||
#ifndef DISABLE_LACE
|
||||
#define OSCE_METHOD_LACE 1
|
||||
#endif
|
||||
#ifndef DISABLE_NOLACE
|
||||
#define OSCE_METHOD_NOLACE 2
|
||||
#endif
|
||||
|
||||
#if !defined(DISABLE_NOLACE)
|
||||
#define OSCE_DEFAULT_METHOD OSCE_METHOD_NOLACE
|
||||
#define OSCE_MAX_RNN_NEURONS NOLACE_FNET_GRU_STATE_SIZE
|
||||
#elif !defined(DISABLE_LACE)
|
||||
#define OSCE_DEFAULT_METHOD OSCE_METHOD_LACE
|
||||
#define OSCE_MAX_RNN_NEURONS LACE_FNET_GRU_STATE_SIZE
|
||||
#else
|
||||
#define OSCE_DEFAULT_METHOD OSCE_METHOD_NONE
|
||||
#define OSCE_MAX_RNN_NEURONS 0
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
|
||||
/* API */
|
||||
|
||||
|
||||
void osce_enhance_frame(
|
||||
OSCEModel *model, /* I OSCE model struct */
|
||||
silk_decoder_state *psDec, /* I/O Decoder state */
|
||||
silk_decoder_control *psDecCtrl, /* I Decoder control */
|
||||
opus_int16 xq[], /* I/O Decoded speech */
|
||||
opus_int32 num_bits, /* I Size of SILK payload in bits */
|
||||
int arch /* I Run-time architecture */
|
||||
);
|
||||
|
||||
|
||||
int osce_load_models(OSCEModel *hModel, const void *data, int len);
|
||||
void osce_reset(silk_OSCE_struct *hOSCE, int method);
|
||||
|
||||
|
||||
#endif
|
||||
|
|
@ -0,0 +1,60 @@
|
|||
/* Copyright (c) 2023 Amazon
|
||||
Written by Jan Buethe */
|
||||
/*
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
|
||||
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef OSCE_CONFIG
|
||||
#define OSCE_CONFIG
|
||||
|
||||
#define OSCE_FEATURES_MAX_HISTORY 350
|
||||
#define OSCE_FEATURE_DIM 93
|
||||
#define OSCE_MAX_FEATURE_FRAMES 4
|
||||
|
||||
#define OSCE_CLEAN_SPEC_NUM_BANDS 64
|
||||
#define OSCE_NOISY_SPEC_NUM_BANDS 18
|
||||
|
||||
#define OSCE_NO_PITCH_VALUE 7
|
||||
|
||||
#define OSCE_PREEMPH 0.85f
|
||||
|
||||
#define OSCE_PITCH_HANGOVER 0
|
||||
|
||||
#define OSCE_CLEAN_SPEC_START 0
|
||||
#define OSCE_CLEAN_SPEC_LENGTH 64
|
||||
|
||||
#define OSCE_NOISY_CEPSTRUM_START 64
|
||||
#define OSCE_NOISY_CEPSTRUM_LENGTH 18
|
||||
|
||||
#define OSCE_ACORR_START 82
|
||||
#define OSCE_ACORR_LENGTH 5
|
||||
|
||||
#define OSCE_LTP_START 87
|
||||
#define OSCE_LTP_LENGTH 5
|
||||
|
||||
#define OSCE_LOG_GAIN_START 92
|
||||
#define OSCE_LOG_GAIN_LENGTH 1
|
||||
|
||||
|
||||
#endif
|
||||
|
|
@ -0,0 +1,454 @@
|
|||
/* Copyright (c) 2023 Amazon
|
||||
Written by Jan Buethe */
|
||||
/*
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
|
||||
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#define OSCE_SPEC_WINDOW_SIZE 320
|
||||
#define OSCE_SPEC_NUM_FREQS 161
|
||||
|
||||
|
||||
/*DEBUG*/
|
||||
/*#define WRITE_FEATURES*/
|
||||
/*#define DEBUG_PRING*/
|
||||
/*******/
|
||||
|
||||
#include "stack_alloc.h"
|
||||
#include "osce_features.h"
|
||||
#include "kiss_fft.h"
|
||||
#include "os_support.h"
|
||||
#include "osce.h"
|
||||
#include "freq.h"
|
||||
|
||||
|
||||
#if defined(WRITE_FEATURES) || defined(DEBUG_PRING)
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#endif
|
||||
|
||||
static const int center_bins_clean[64] = {
|
||||
0, 2, 5, 8, 10, 12, 15, 18,
|
||||
20, 22, 25, 28, 30, 33, 35, 38,
|
||||
40, 42, 45, 48, 50, 52, 55, 58,
|
||||
60, 62, 65, 68, 70, 73, 75, 78,
|
||||
80, 82, 85, 88, 90, 92, 95, 98,
|
||||
100, 102, 105, 108, 110, 112, 115, 118,
|
||||
120, 122, 125, 128, 130, 132, 135, 138,
|
||||
140, 142, 145, 148, 150, 152, 155, 160
|
||||
};
|
||||
|
||||
static const int center_bins_noisy[18] = {
|
||||
0, 4, 8, 12, 16, 20, 24, 28,
|
||||
32, 40, 48, 56, 64, 80, 96, 112,
|
||||
136, 160
|
||||
};
|
||||
|
||||
static const float band_weights_clean[64] = {
|
||||
0.666666666667f, 0.400000000000f, 0.333333333333f, 0.400000000000f,
|
||||
0.500000000000f, 0.400000000000f, 0.333333333333f, 0.400000000000f,
|
||||
0.500000000000f, 0.400000000000f, 0.333333333333f, 0.400000000000f,
|
||||
0.400000000000f, 0.400000000000f, 0.400000000000f, 0.400000000000f,
|
||||
0.500000000000f, 0.400000000000f, 0.333333333333f, 0.400000000000f,
|
||||
0.500000000000f, 0.400000000000f, 0.333333333333f, 0.400000000000f,
|
||||
0.500000000000f, 0.400000000000f, 0.333333333333f, 0.400000000000f,
|
||||
0.400000000000f, 0.400000000000f, 0.400000000000f, 0.400000000000f,
|
||||
0.500000000000f, 0.400000000000f, 0.333333333333f, 0.400000000000f,
|
||||
0.500000000000f, 0.400000000000f, 0.333333333333f, 0.400000000000f,
|
||||
0.500000000000f, 0.400000000000f, 0.333333333333f, 0.400000000000f,
|
||||
0.500000000000f, 0.400000000000f, 0.333333333333f, 0.400000000000f,
|
||||
0.500000000000f, 0.400000000000f, 0.333333333333f, 0.400000000000f,
|
||||
0.500000000000f, 0.400000000000f, 0.333333333333f, 0.400000000000f,
|
||||
0.500000000000f, 0.400000000000f, 0.333333333333f, 0.400000000000f,
|
||||
0.500000000000f, 0.400000000000f, 0.250000000000f, 0.333333333333f
|
||||
};
|
||||
|
||||
static const float band_weights_noisy[18] = {
|
||||
0.400000000000f, 0.250000000000f, 0.250000000000f, 0.250000000000f,
|
||||
0.250000000000f, 0.250000000000f, 0.250000000000f, 0.250000000000f,
|
||||
0.166666666667f, 0.125000000000f, 0.125000000000f, 0.125000000000f,
|
||||
0.083333333333f, 0.062500000000f, 0.062500000000f, 0.050000000000f,
|
||||
0.041666666667f, 0.080000000000f
|
||||
};
|
||||
|
||||
static float osce_window[OSCE_SPEC_WINDOW_SIZE] = {
|
||||
0.004908718808f, 0.014725683311f, 0.024541228523f, 0.034354408400f, 0.044164277127f,
|
||||
0.053969889210f, 0.063770299562f, 0.073564563600f, 0.083351737332f, 0.093130877450f,
|
||||
0.102901041421f, 0.112661287575f, 0.122410675199f, 0.132148264628f, 0.141873117332f,
|
||||
0.151584296010f, 0.161280864678f, 0.170961888760f, 0.180626435180f, 0.190273572448f,
|
||||
0.199902370753f, 0.209511902052f, 0.219101240157f, 0.228669460829f, 0.238215641862f,
|
||||
0.247738863176f, 0.257238206902f, 0.266712757475f, 0.276161601717f, 0.285583828929f,
|
||||
0.294978530977f, 0.304344802381f, 0.313681740399f, 0.322988445118f, 0.332264019538f,
|
||||
0.341507569661f, 0.350718204573f, 0.359895036535f, 0.369037181064f, 0.378143757022f,
|
||||
0.387213886697f, 0.396246695891f, 0.405241314005f, 0.414196874117f, 0.423112513073f,
|
||||
0.431987371563f, 0.440820594212f, 0.449611329655f, 0.458358730621f, 0.467061954019f,
|
||||
0.475720161014f, 0.484332517110f, 0.492898192230f, 0.501416360796f, 0.509886201809f,
|
||||
0.518306898929f, 0.526677640552f, 0.534997619887f, 0.543266035038f, 0.551482089078f,
|
||||
0.559644990127f, 0.567753951426f, 0.575808191418f, 0.583806933818f, 0.591749407690f,
|
||||
0.599634847523f, 0.607462493302f, 0.615231590581f, 0.622941390558f, 0.630591150148f,
|
||||
0.638180132051f, 0.645707604824f, 0.653172842954f, 0.660575126926f, 0.667913743292f,
|
||||
0.675187984742f, 0.682397150168f, 0.689540544737f, 0.696617479953f, 0.703627273726f,
|
||||
0.710569250438f, 0.717442741007f, 0.724247082951f, 0.730981620454f, 0.737645704427f,
|
||||
0.744238692572f, 0.750759949443f, 0.757208846506f, 0.763584762206f, 0.769887082016f,
|
||||
0.776115198508f, 0.782268511401f, 0.788346427627f, 0.794348361383f, 0.800273734191f,
|
||||
0.806121974951f, 0.811892519997f, 0.817584813152f, 0.823198305781f, 0.828732456844f,
|
||||
0.834186732948f, 0.839560608398f, 0.844853565250f, 0.850065093356f, 0.855194690420f,
|
||||
0.860241862039f, 0.865206121757f, 0.870086991109f, 0.874883999665f, 0.879596685080f,
|
||||
0.884224593137f, 0.888767277786f, 0.893224301196f, 0.897595233788f, 0.901879654283f,
|
||||
0.906077149740f, 0.910187315596f, 0.914209755704f, 0.918144082372f, 0.921989916403f,
|
||||
0.925746887127f, 0.929414632439f, 0.932992798835f, 0.936481041442f, 0.939879024058f,
|
||||
0.943186419177f, 0.946402908026f, 0.949528180593f, 0.952561935658f, 0.955503880820f,
|
||||
0.958353732530f, 0.961111216112f, 0.963776065795f, 0.966348024735f, 0.968826845041f,
|
||||
0.971212287799f, 0.973504123096f, 0.975702130039f, 0.977806096779f, 0.979815820533f,
|
||||
0.981731107599f, 0.983551773378f, 0.985277642389f, 0.986908548290f, 0.988444333892f,
|
||||
0.989884851171f, 0.991229961288f, 0.992479534599f, 0.993633450666f, 0.994691598273f,
|
||||
0.995653875433f, 0.996520189401f, 0.997290456679f, 0.997964603026f, 0.998542563469f,
|
||||
0.999024282300f, 0.999409713092f, 0.999698818696f, 0.999891571247f, 0.999987952167f,
|
||||
0.999987952167f, 0.999891571247f, 0.999698818696f, 0.999409713092f, 0.999024282300f,
|
||||
0.998542563469f, 0.997964603026f, 0.997290456679f, 0.996520189401f, 0.995653875433f,
|
||||
0.994691598273f, 0.993633450666f, 0.992479534599f, 0.991229961288f, 0.989884851171f,
|
||||
0.988444333892f, 0.986908548290f, 0.985277642389f, 0.983551773378f, 0.981731107599f,
|
||||
0.979815820533f, 0.977806096779f, 0.975702130039f, 0.973504123096f, 0.971212287799f,
|
||||
0.968826845041f, 0.966348024735f, 0.963776065795f, 0.961111216112f, 0.958353732530f,
|
||||
0.955503880820f, 0.952561935658f, 0.949528180593f, 0.946402908026f, 0.943186419177f,
|
||||
0.939879024058f, 0.936481041442f, 0.932992798835f, 0.929414632439f, 0.925746887127f,
|
||||
0.921989916403f, 0.918144082372f, 0.914209755704f, 0.910187315596f, 0.906077149740f,
|
||||
0.901879654283f, 0.897595233788f, 0.893224301196f, 0.888767277786f, 0.884224593137f,
|
||||
0.879596685080f, 0.874883999665f, 0.870086991109f, 0.865206121757f, 0.860241862039f,
|
||||
0.855194690420f, 0.850065093356f, 0.844853565250f, 0.839560608398f, 0.834186732948f,
|
||||
0.828732456844f, 0.823198305781f, 0.817584813152f, 0.811892519997f, 0.806121974951f,
|
||||
0.800273734191f, 0.794348361383f, 0.788346427627f, 0.782268511401f, 0.776115198508f,
|
||||
0.769887082016f, 0.763584762206f, 0.757208846506f, 0.750759949443f, 0.744238692572f,
|
||||
0.737645704427f, 0.730981620454f, 0.724247082951f, 0.717442741007f, 0.710569250438f,
|
||||
0.703627273726f, 0.696617479953f, 0.689540544737f, 0.682397150168f, 0.675187984742f,
|
||||
0.667913743292f, 0.660575126926f, 0.653172842954f, 0.645707604824f, 0.638180132051f,
|
||||
0.630591150148f, 0.622941390558f, 0.615231590581f, 0.607462493302f, 0.599634847523f,
|
||||
0.591749407690f, 0.583806933818f, 0.575808191418f, 0.567753951426f, 0.559644990127f,
|
||||
0.551482089078f, 0.543266035038f, 0.534997619887f, 0.526677640552f, 0.518306898929f,
|
||||
0.509886201809f, 0.501416360796f, 0.492898192230f, 0.484332517110f, 0.475720161014f,
|
||||
0.467061954019f, 0.458358730621f, 0.449611329655f, 0.440820594212f, 0.431987371563f,
|
||||
0.423112513073f, 0.414196874117f, 0.405241314005f, 0.396246695891f, 0.387213886697f,
|
||||
0.378143757022f, 0.369037181064f, 0.359895036535f, 0.350718204573f, 0.341507569661f,
|
||||
0.332264019538f, 0.322988445118f, 0.313681740399f, 0.304344802381f, 0.294978530977f,
|
||||
0.285583828929f, 0.276161601717f, 0.266712757475f, 0.257238206902f, 0.247738863176f,
|
||||
0.238215641862f, 0.228669460829f, 0.219101240157f, 0.209511902052f, 0.199902370753f,
|
||||
0.190273572448f, 0.180626435180f, 0.170961888760f, 0.161280864678f, 0.151584296010f,
|
||||
0.141873117332f, 0.132148264628f, 0.122410675199f, 0.112661287575f, 0.102901041421f,
|
||||
0.093130877450f, 0.083351737332f, 0.073564563600f, 0.063770299562f, 0.053969889210f,
|
||||
0.044164277127f, 0.034354408400f, 0.024541228523f, 0.014725683311f, 0.004908718808f
|
||||
};
|
||||
|
||||
static void apply_filterbank(float *x_out, float *x_in, const int *center_bins, const float* band_weights, int num_bands)
|
||||
{
|
||||
int b, i;
|
||||
float frac;
|
||||
|
||||
celt_assert(x_in != x_out)
|
||||
|
||||
x_out[0] = 0;
|
||||
for (b = 0; b < num_bands - 1; b++)
|
||||
{
|
||||
x_out[b+1] = 0;
|
||||
for (i = center_bins[b]; i < center_bins[b+1]; i++)
|
||||
{
|
||||
frac = (float) (center_bins[b+1] - i) / (center_bins[b+1] - center_bins[b]);
|
||||
x_out[b] += band_weights[b] * frac * x_in[i];
|
||||
x_out[b+1] += band_weights[b+1] * (1 - frac) * x_in[i];
|
||||
|
||||
}
|
||||
}
|
||||
x_out[num_bands - 1] += band_weights[num_bands - 1] * x_in[center_bins[num_bands - 1]];
|
||||
#ifdef DEBUG_PRINT
|
||||
for (b = 0; b < num_bands; b++)
|
||||
{
|
||||
printf("band[%d]: %f\n", b, x_out[b]);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
static void mag_spec_320_onesided(float *out, float *in)
|
||||
{
|
||||
celt_assert(OSCE_SPEC_WINDOW_SIZE == 320);
|
||||
kiss_fft_cpx buffer[OSCE_SPEC_WINDOW_SIZE];
|
||||
int k;
|
||||
forward_transform(buffer, in);
|
||||
|
||||
for (k = 0; k < OSCE_SPEC_NUM_FREQS; k++)
|
||||
{
|
||||
out[k] = OSCE_SPEC_WINDOW_SIZE * sqrt(buffer[k].r * buffer[k].r + buffer[k].i * buffer[k].i);
|
||||
#ifdef DEBUG_PRINT
|
||||
printf("magspec[%d]: %f\n", k, out[k]);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static void calculate_log_spectrum_from_lpc(float *spec, opus_int16 *a_q12, int lpc_order)
|
||||
{
|
||||
float buffer[OSCE_SPEC_WINDOW_SIZE] = {0};
|
||||
int i;
|
||||
|
||||
/* zero expansion */
|
||||
buffer[0] = 1;
|
||||
for (i = 0; i < lpc_order; i++)
|
||||
{
|
||||
buffer[i+1] = - (float)a_q12[i] / (1U << 12);
|
||||
}
|
||||
|
||||
/* calculate and invert magnitude spectrum */
|
||||
mag_spec_320_onesided(buffer, buffer);
|
||||
|
||||
for (i = 0; i < OSCE_SPEC_NUM_FREQS; i++)
|
||||
{
|
||||
buffer[i] = 1.f / (buffer[i] + 1e-9f);
|
||||
}
|
||||
|
||||
/* apply filterbank */
|
||||
apply_filterbank(spec, buffer, center_bins_clean, band_weights_clean, OSCE_CLEAN_SPEC_NUM_BANDS);
|
||||
|
||||
/* log and scaling */
|
||||
for (i = 0; i < OSCE_CLEAN_SPEC_NUM_BANDS; i++)
|
||||
{
|
||||
spec[i] = 0.3f * log(spec[i] + 1e-9f);
|
||||
}
|
||||
}
|
||||
|
||||
static void calculate_cepstrum(float *cepstrum, float *signal)
|
||||
{
|
||||
float buffer[OSCE_SPEC_WINDOW_SIZE];
|
||||
float *spec = &buffer[OSCE_SPEC_NUM_FREQS + 3];
|
||||
int n;
|
||||
|
||||
celt_assert(cepstrum != signal)
|
||||
|
||||
for (n = 0; n < OSCE_SPEC_WINDOW_SIZE; n++)
|
||||
{
|
||||
buffer[n] = osce_window[n] * signal[n];
|
||||
}
|
||||
|
||||
/* calculate magnitude spectrum */
|
||||
mag_spec_320_onesided(buffer, buffer);
|
||||
|
||||
/* accumulate bands */
|
||||
apply_filterbank(spec, buffer, center_bins_noisy, band_weights_noisy, OSCE_NOISY_SPEC_NUM_BANDS);
|
||||
|
||||
/* log domain conversion */
|
||||
for (n = 0; n < OSCE_NOISY_SPEC_NUM_BANDS; n++)
|
||||
{
|
||||
spec[n] = log(spec[n] + 1e-9f);
|
||||
#ifdef DEBUG_PRINT
|
||||
printf("logspec[%d]: %f\n", n, spec[n]);
|
||||
#endif
|
||||
}
|
||||
|
||||
/* DCT-II (orthonormal) */
|
||||
celt_assert(OSCE_NOISY_SPEC_NUM_BANDS == NB_BANDS);
|
||||
dct(cepstrum, spec);
|
||||
}
|
||||
|
||||
static void calculate_acorr(float *acorr, float *signal, int lag)
|
||||
{
|
||||
int n, k;
|
||||
celt_assert(acorr != signal)
|
||||
|
||||
for (k = -2; k <= 2; k++)
|
||||
{
|
||||
acorr[k+2] = 0;
|
||||
float xx = 0;
|
||||
float xy = 0;
|
||||
float yy = 0;
|
||||
for (n = 0; n < 80; n++)
|
||||
{
|
||||
/* obviously wasteful -> fix later */
|
||||
xx += signal[n] * signal[n];
|
||||
yy += signal[n - lag + k] * signal[n - lag + k];
|
||||
xy += signal[n] * signal[n - lag + k];
|
||||
}
|
||||
acorr[k+2] = xy / sqrt(xx * yy + 1e-9f);
|
||||
}
|
||||
}
|
||||
|
||||
static int pitch_postprocessing(OSCEFeatureState *psFeatures, int lag, int type)
|
||||
{
|
||||
int new_lag;
|
||||
int modulus;
|
||||
|
||||
#ifdef OSCE_HANGOVER_BUGFIX
|
||||
#define TESTBIT 1
|
||||
#else
|
||||
#define TESTBIT 0
|
||||
#endif
|
||||
|
||||
modulus = OSCE_PITCH_HANGOVER;
|
||||
if (modulus == 0) modulus ++;
|
||||
|
||||
/* hangover is currently disabled to reflect a bug in the python code. ToDo: re-evaluate hangover */
|
||||
if (type != TYPE_VOICED && psFeatures->last_type == TYPE_VOICED && TESTBIT)
|
||||
/* enter hangover */
|
||||
{
|
||||
new_lag = OSCE_NO_PITCH_VALUE;
|
||||
if (psFeatures->pitch_hangover_count < OSCE_PITCH_HANGOVER)
|
||||
{
|
||||
new_lag = psFeatures->last_lag;
|
||||
psFeatures->pitch_hangover_count = (psFeatures->pitch_hangover_count + 1) % modulus;
|
||||
}
|
||||
}
|
||||
else if (type != TYPE_VOICED && psFeatures->pitch_hangover_count && TESTBIT)
|
||||
/* continue hangover */
|
||||
{
|
||||
new_lag = psFeatures->last_lag;
|
||||
psFeatures->pitch_hangover_count = (psFeatures->pitch_hangover_count + 1) % modulus;
|
||||
}
|
||||
else if (type != TYPE_VOICED)
|
||||
/* unvoiced frame after hangover */
|
||||
{
|
||||
new_lag = OSCE_NO_PITCH_VALUE;
|
||||
psFeatures->pitch_hangover_count = 0;
|
||||
}
|
||||
else
|
||||
/* voiced frame: update last_lag */
|
||||
{
|
||||
new_lag = lag;
|
||||
psFeatures->last_lag = lag;
|
||||
psFeatures->pitch_hangover_count = 0;
|
||||
}
|
||||
|
||||
/* buffer update */
|
||||
psFeatures->last_type = type;
|
||||
|
||||
/* with the current setup this should never happen (but who knows...) */
|
||||
celt_assert(new_lag)
|
||||
|
||||
return new_lag;
|
||||
}
|
||||
|
||||
void osce_calculate_features(
|
||||
silk_decoder_state *psDec, /* I/O Decoder state */
|
||||
silk_decoder_control *psDecCtrl, /* I Decoder control */
|
||||
float *features, /* O input features */
|
||||
float *numbits, /* O numbits and smoothed numbits */
|
||||
int *periods, /* O pitch lags on subframe basis */
|
||||
const opus_int16 xq[], /* I Decoded speech */
|
||||
opus_int32 num_bits /* I Size of SILK payload in bits */
|
||||
)
|
||||
{
|
||||
int num_subframes, num_samples;
|
||||
float buffer[OSCE_FEATURES_MAX_HISTORY + OSCE_MAX_FEATURE_FRAMES * 80];
|
||||
float *frame, *pfeatures;
|
||||
OSCEFeatureState *psFeatures;
|
||||
int i, n, k;
|
||||
#ifdef WRITE_FEATURES
|
||||
static FILE *f_feat = NULL;
|
||||
if (f_feat == NULL)
|
||||
{
|
||||
f_feat = fopen("assembled_features.f32", "wb");
|
||||
}
|
||||
#endif
|
||||
|
||||
/*OPUS_CLEAR(buffer, 1);*/
|
||||
memset(buffer, 0, sizeof(buffer));
|
||||
|
||||
num_subframes = psDec->nb_subfr;
|
||||
num_samples = num_subframes * 80;
|
||||
psFeatures = &psDec->osce.features;
|
||||
|
||||
/* smooth bit count */
|
||||
psFeatures->numbits_smooth = 0.9f * psFeatures->numbits_smooth + 0.1f * num_bits;
|
||||
numbits[0] = num_bits;
|
||||
numbits[1] = psFeatures->numbits_smooth;
|
||||
|
||||
for (n = 0; n < num_samples; n++)
|
||||
{
|
||||
buffer[OSCE_FEATURES_MAX_HISTORY + n] = (float) xq[n] / (1U<<15);
|
||||
}
|
||||
OPUS_COPY(buffer, psFeatures->signal_history, OSCE_FEATURES_MAX_HISTORY);
|
||||
|
||||
for (k = 0; k < num_subframes; k++)
|
||||
{
|
||||
pfeatures = features + k * OSCE_FEATURE_DIM;
|
||||
frame = &buffer[OSCE_FEATURES_MAX_HISTORY + k * 80];
|
||||
memset(pfeatures, 0, OSCE_FEATURE_DIM); /* precaution */
|
||||
|
||||
/* clean spectrum from lpcs (update every other frame) */
|
||||
if (k % 2 == 0)
|
||||
{
|
||||
calculate_log_spectrum_from_lpc(pfeatures + OSCE_CLEAN_SPEC_START, psDecCtrl->PredCoef_Q12[k >> 1], psDec->LPC_order);
|
||||
}
|
||||
else
|
||||
{
|
||||
OPUS_COPY(pfeatures + OSCE_CLEAN_SPEC_START, pfeatures + OSCE_CLEAN_SPEC_START - OSCE_FEATURE_DIM, OSCE_CLEAN_SPEC_LENGTH);
|
||||
}
|
||||
|
||||
/* noisy cepstrum from signal (update every other frame) */
|
||||
if (k % 2 == 0)
|
||||
{
|
||||
calculate_cepstrum(pfeatures + OSCE_NOISY_CEPSTRUM_START, frame - 160);
|
||||
}
|
||||
else
|
||||
{
|
||||
OPUS_COPY(pfeatures + OSCE_NOISY_CEPSTRUM_START, pfeatures + OSCE_NOISY_CEPSTRUM_START - OSCE_FEATURE_DIM, OSCE_NOISY_CEPSTRUM_LENGTH);
|
||||
}
|
||||
|
||||
/* pitch hangover and zero value replacement */
|
||||
periods[k] = pitch_postprocessing(psFeatures, psDecCtrl->pitchL[k], psDec->indices.signalType);
|
||||
|
||||
/* auto-correlation around pitch lag */
|
||||
calculate_acorr(pfeatures + OSCE_ACORR_START, frame, periods[k]);
|
||||
|
||||
/* ltp */
|
||||
celt_assert(OSCE_LTP_LENGTH == LTP_ORDER)
|
||||
for (i = 0; i < OSCE_LTP_LENGTH; i++)
|
||||
{
|
||||
pfeatures[OSCE_LTP_START + i] = (float) psDecCtrl->LTPCoef_Q14[k * LTP_ORDER + i] / (1U << 14);
|
||||
}
|
||||
|
||||
/* frame gain */
|
||||
pfeatures[OSCE_LOG_GAIN_START] = log((float) psDecCtrl->Gains_Q16[k] / (1UL << 16) + 1e-9f);
|
||||
|
||||
#ifdef WRITE_FEATURES
|
||||
fwrite(pfeatures, sizeof(*pfeatures), 93, f_feat);
|
||||
#endif
|
||||
}
|
||||
|
||||
/* buffer update */
|
||||
OPUS_COPY(psFeatures->signal_history, &buffer[num_samples], OSCE_FEATURES_MAX_HISTORY);
|
||||
}
|
||||
|
||||
|
||||
void osce_cross_fade_10ms(float *x_enhanced, float *x_in, int length)
|
||||
{
|
||||
int i;
|
||||
celt_assert(length >= 160);
|
||||
|
||||
for (i = 0; i < 160; i++)
|
||||
{
|
||||
x_enhanced[i] = osce_window[i] * x_enhanced[i] + (1.f - osce_window[i]) * x_in[i];
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
|
@ -0,0 +1,50 @@
|
|||
/* Copyright (c) 2023 Amazon
|
||||
Written by Jan Buethe */
|
||||
/*
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
|
||||
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef OSCE_FEATURES_H
|
||||
#define OSCE_FEATURES_H
|
||||
|
||||
|
||||
#include "structs.h"
|
||||
#include "opus_types.h"
|
||||
|
||||
#define OSCE_NUMBITS_BUGFIX
|
||||
|
||||
void osce_calculate_features(
|
||||
silk_decoder_state *psDec, /* I/O Decoder state */
|
||||
silk_decoder_control *psDecCtrl, /* I Decoder control */
|
||||
float *features, /* O input features */
|
||||
float *numbits, /* O numbits and smoothed numbits */
|
||||
int *periods, /* O pitch lags on subframe basis */
|
||||
const opus_int16 xq[], /* I Decoded speech */
|
||||
opus_int32 num_bits /* I Size of SILK payload in bits */
|
||||
);
|
||||
|
||||
|
||||
void osce_cross_fade_10ms(float *x_enhanced, float *x_in, int length);
|
||||
|
||||
#endif
|
||||
|
|
@ -0,0 +1,125 @@
|
|||
/* Copyright (c) 2023 Amazon
|
||||
Written by Jan Buethe */
|
||||
/*
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
|
||||
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef OSCE_STRUCTS_H
|
||||
#define OSCE_STRUCTS_H
|
||||
|
||||
#include "opus_types.h"
|
||||
#include "osce_config.h"
|
||||
#ifndef DISABLE_LACE
|
||||
#include "lace_data.h"
|
||||
#endif
|
||||
#ifndef DISABLE_NOLACE
|
||||
#include "nolace_data.h"
|
||||
#endif
|
||||
#include "nndsp.h"
|
||||
#include "nnet.h"
|
||||
|
||||
/* feature calculation */
|
||||
|
||||
typedef struct {
|
||||
float numbits_smooth;
|
||||
int pitch_hangover_count;
|
||||
int last_lag;
|
||||
int last_type;
|
||||
float signal_history[OSCE_FEATURES_MAX_HISTORY];
|
||||
int reset;
|
||||
} OSCEFeatureState;
|
||||
|
||||
|
||||
#ifndef DISABLE_LACE
|
||||
/* LACE */
|
||||
typedef struct {
|
||||
float feature_net_conv2_state[LACE_FNET_CONV2_STATE_SIZE];
|
||||
float feature_net_gru_state[LACE_COND_DIM];
|
||||
AdaCombState cf1_state;
|
||||
AdaCombState cf2_state;
|
||||
AdaConvState af1_state;
|
||||
float preemph_mem;
|
||||
float deemph_mem;
|
||||
} LACEState;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
LACELayers layers;
|
||||
float window[LACE_OVERLAP_SIZE];
|
||||
} LACE;
|
||||
|
||||
#endif /* #ifndef DISABLE_LACE */
|
||||
|
||||
|
||||
#ifndef DISABLE_NOLACE
|
||||
/* NoLACE */
|
||||
typedef struct {
|
||||
float feature_net_conv2_state[NOLACE_FNET_CONV2_STATE_SIZE];
|
||||
float feature_net_gru_state[NOLACE_COND_DIM];
|
||||
float post_cf1_state[NOLACE_COND_DIM];
|
||||
float post_cf2_state[NOLACE_COND_DIM];
|
||||
float post_af1_state[NOLACE_COND_DIM];
|
||||
float post_af2_state[NOLACE_COND_DIM];
|
||||
float post_af3_state[NOLACE_COND_DIM];
|
||||
AdaCombState cf1_state;
|
||||
AdaCombState cf2_state;
|
||||
AdaConvState af1_state;
|
||||
AdaConvState af2_state;
|
||||
AdaConvState af3_state;
|
||||
AdaConvState af4_state;
|
||||
AdaShapeState tdshape1_state;
|
||||
AdaShapeState tdshape2_state;
|
||||
AdaShapeState tdshape3_state;
|
||||
float preemph_mem;
|
||||
float deemph_mem;
|
||||
} NoLACEState;
|
||||
|
||||
typedef struct {
|
||||
NOLACELayers layers;
|
||||
float window[LACE_OVERLAP_SIZE];
|
||||
} NoLACE;
|
||||
|
||||
#endif /* #ifndef DISABLE_NOLACE */
|
||||
|
||||
/* OSCEModel */
|
||||
typedef struct {
|
||||
int loaded;
|
||||
#ifndef DISABLE_LACE
|
||||
LACE lace;
|
||||
#endif
|
||||
#ifndef DISABLE_NOLACE
|
||||
NoLACE nolace;
|
||||
#endif
|
||||
} OSCEModel;
|
||||
|
||||
typedef union {
|
||||
#ifndef DISABLE_LACE
|
||||
LACEState lace;
|
||||
#endif
|
||||
#ifndef DISABLE_NOLACE
|
||||
NoLACEState nolace;
|
||||
#endif
|
||||
} OSCEState;
|
||||
|
||||
#endif
|
||||
|
|
@ -0,0 +1,238 @@
|
|||
/* Copyright (c) 2023 Amazon */
|
||||
/*
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
|
||||
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
#include "nnet.h"
|
||||
#include "os_support.h"
|
||||
|
||||
#define SPARSE_BLOCK_SIZE 32
|
||||
|
||||
int parse_record(const void **data, int *len, WeightArray *array) {
|
||||
WeightHead *h = (WeightHead *)*data;
|
||||
if (*len < WEIGHT_BLOCK_SIZE) return -1;
|
||||
if (h->block_size < h->size) return -1;
|
||||
if (h->block_size > *len-WEIGHT_BLOCK_SIZE) return -1;
|
||||
if (h->name[sizeof(h->name)-1] != 0) return -1;
|
||||
if (h->size < 0) return -1;
|
||||
array->name = h->name;
|
||||
array->type = h->type;
|
||||
array->size = h->size;
|
||||
array->data = (void*)((unsigned char*)(*data)+WEIGHT_BLOCK_SIZE);
|
||||
|
||||
*data = (void*)((unsigned char*)*data + h->block_size+WEIGHT_BLOCK_SIZE);
|
||||
*len -= h->block_size+WEIGHT_BLOCK_SIZE;
|
||||
return array->size;
|
||||
}
|
||||
|
||||
int parse_weights(WeightArray **list, const void *data, int len)
|
||||
{
|
||||
int nb_arrays=0;
|
||||
int capacity=20;
|
||||
*list = opus_alloc(capacity*sizeof(WeightArray));
|
||||
while (len > 0) {
|
||||
int ret;
|
||||
WeightArray array = {NULL, 0, 0, 0};
|
||||
ret = parse_record(&data, &len, &array);
|
||||
if (ret > 0) {
|
||||
if (nb_arrays+1 >= capacity) {
|
||||
/* Make sure there's room for the ending NULL element too. */
|
||||
capacity = capacity*3/2;
|
||||
*list = opus_realloc(*list, capacity*sizeof(WeightArray));
|
||||
}
|
||||
(*list)[nb_arrays++] = array;
|
||||
} else {
|
||||
opus_free(*list);
|
||||
*list = NULL;
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
(*list)[nb_arrays].name=NULL;
|
||||
return nb_arrays;
|
||||
}
|
||||
|
||||
static const void *find_array_entry(const WeightArray *arrays, const char *name) {
|
||||
while (arrays->name && strcmp(arrays->name, name) != 0) arrays++;
|
||||
return arrays;
|
||||
}
|
||||
|
||||
static const void *find_array_check(const WeightArray *arrays, const char *name, int size) {
|
||||
const WeightArray *a = find_array_entry(arrays, name);
|
||||
if (a->name && a->size == size) return a->data;
|
||||
else return NULL;
|
||||
}
|
||||
|
||||
static const void *opt_array_check(const WeightArray *arrays, const char *name, int size, int *error) {
|
||||
const WeightArray *a = find_array_entry(arrays, name);
|
||||
*error = (a->name != NULL && a->size != size);
|
||||
if (a->name && a->size == size) return a->data;
|
||||
else return NULL;
|
||||
}
|
||||
|
||||
static const void *find_idx_check(const WeightArray *arrays, const char *name, int nb_in, int nb_out, int *total_blocks) {
|
||||
int remain;
|
||||
const int *idx;
|
||||
const WeightArray *a = find_array_entry(arrays, name);
|
||||
*total_blocks = 0;
|
||||
if (a == NULL) return NULL;
|
||||
idx = a->data;
|
||||
remain = a->size/sizeof(int);
|
||||
while (remain > 0) {
|
||||
int nb_blocks;
|
||||
int i;
|
||||
nb_blocks = *idx++;
|
||||
if (remain < nb_blocks+1) return NULL;
|
||||
for (i=0;i<nb_blocks;i++) {
|
||||
int pos = *idx++;
|
||||
if (pos+3 >= nb_in || (pos&0x3)) return NULL;
|
||||
}
|
||||
nb_out -= 8;
|
||||
remain -= nb_blocks+1;
|
||||
*total_blocks += nb_blocks;
|
||||
}
|
||||
if (nb_out != 0) return NULL;
|
||||
return a->data;
|
||||
}
|
||||
|
||||
int linear_init(LinearLayer *layer, const WeightArray *arrays,
|
||||
const char *bias,
|
||||
const char *subias,
|
||||
const char *weights,
|
||||
const char *float_weights,
|
||||
const char *weights_idx,
|
||||
const char *diag,
|
||||
const char *scale,
|
||||
int nb_inputs,
|
||||
int nb_outputs)
|
||||
{
|
||||
int err;
|
||||
layer->bias = NULL;
|
||||
layer->subias = NULL;
|
||||
layer->weights = NULL;
|
||||
layer->float_weights = NULL;
|
||||
layer->weights_idx = NULL;
|
||||
layer->diag = NULL;
|
||||
layer->scale = NULL;
|
||||
if (bias != NULL) {
|
||||
if ((layer->bias = find_array_check(arrays, bias, nb_outputs*sizeof(layer->bias[0]))) == NULL) return 1;
|
||||
}
|
||||
if (subias != NULL) {
|
||||
if ((layer->subias = find_array_check(arrays, subias, nb_outputs*sizeof(layer->subias[0]))) == NULL) return 1;
|
||||
}
|
||||
if (weights_idx != NULL) {
|
||||
int total_blocks;
|
||||
if ((layer->weights_idx = find_idx_check(arrays, weights_idx, nb_inputs, nb_outputs, &total_blocks)) == NULL) return 1;
|
||||
if (weights != NULL) {
|
||||
if ((layer->weights = find_array_check(arrays, weights, SPARSE_BLOCK_SIZE*total_blocks*sizeof(layer->weights[0]))) == NULL) return 1;
|
||||
}
|
||||
if (float_weights != NULL) {
|
||||
layer->float_weights = opt_array_check(arrays, float_weights, SPARSE_BLOCK_SIZE*total_blocks*sizeof(layer->float_weights[0]), &err);
|
||||
if (err) return 1;
|
||||
}
|
||||
} else {
|
||||
if (weights != NULL) {
|
||||
if ((layer->weights = find_array_check(arrays, weights, nb_inputs*nb_outputs*sizeof(layer->weights[0]))) == NULL) return 1;
|
||||
}
|
||||
if (float_weights != NULL) {
|
||||
layer->float_weights = opt_array_check(arrays, float_weights, nb_inputs*nb_outputs*sizeof(layer->float_weights[0]), &err);
|
||||
if (err) return 1;
|
||||
}
|
||||
}
|
||||
if (diag != NULL) {
|
||||
if ((layer->diag = find_array_check(arrays, diag, nb_outputs*sizeof(layer->diag[0]))) == NULL) return 1;
|
||||
}
|
||||
if (weights != NULL) {
|
||||
if ((layer->scale = find_array_check(arrays, scale, nb_outputs*sizeof(layer->scale[0]))) == NULL) return 1;
|
||||
}
|
||||
layer->nb_inputs = nb_inputs;
|
||||
layer->nb_outputs = nb_outputs;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int conv2d_init(Conv2dLayer *layer, const WeightArray *arrays,
|
||||
const char *bias,
|
||||
const char *float_weights,
|
||||
int in_channels,
|
||||
int out_channels,
|
||||
int ktime,
|
||||
int kheight)
|
||||
{
|
||||
int err;
|
||||
layer->bias = NULL;
|
||||
layer->float_weights = NULL;
|
||||
if (bias != NULL) {
|
||||
if ((layer->bias = find_array_check(arrays, bias, out_channels*sizeof(layer->bias[0]))) == NULL) return 1;
|
||||
}
|
||||
if (float_weights != NULL) {
|
||||
layer->float_weights = opt_array_check(arrays, float_weights, in_channels*out_channels*ktime*kheight*sizeof(layer->float_weights[0]), &err);
|
||||
if (err) return 1;
|
||||
}
|
||||
layer->in_channels = in_channels;
|
||||
layer->out_channels = out_channels;
|
||||
layer->ktime = ktime;
|
||||
layer->kheight = kheight;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
|
||||
#if 0
|
||||
#include <fcntl.h>
|
||||
#include <sys/mman.h>
|
||||
#include <unistd.h>
|
||||
#include <sys/stat.h>
|
||||
#include <stdio.h>
|
||||
|
||||
int main()
|
||||
{
|
||||
int fd;
|
||||
void *data;
|
||||
int len;
|
||||
int nb_arrays;
|
||||
int i;
|
||||
WeightArray *list;
|
||||
struct stat st;
|
||||
const char *filename = "weights_blob.bin";
|
||||
stat(filename, &st);
|
||||
len = st.st_size;
|
||||
fd = open(filename, O_RDONLY);
|
||||
data = mmap(NULL, len, PROT_READ, MAP_SHARED, fd, 0);
|
||||
printf("size is %d\n", len);
|
||||
nb_arrays = parse_weights(&list, data, len);
|
||||
for (i=0;i<nb_arrays;i++) {
|
||||
printf("found %s: size %d\n", list[i].name, list[i].size);
|
||||
}
|
||||
printf("%p\n", list[i].name);
|
||||
opus_free(list);
|
||||
munmap(data, len);
|
||||
close(fd);
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
|
@ -0,0 +1,79 @@
|
|||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#include <math.h>
|
||||
#include "pitchdnn.h"
|
||||
#include "os_support.h"
|
||||
#include "nnet.h"
|
||||
#include "lpcnet_private.h"
|
||||
|
||||
|
||||
float compute_pitchdnn(
|
||||
PitchDNNState *st,
|
||||
const float *if_features,
|
||||
const float *xcorr_features,
|
||||
int arch
|
||||
)
|
||||
{
|
||||
float if1_out[DENSE_IF_UPSAMPLER_1_OUT_SIZE];
|
||||
float downsampler_in[NB_XCORR_FEATURES + DENSE_IF_UPSAMPLER_2_OUT_SIZE];
|
||||
float downsampler_out[DENSE_DOWNSAMPLER_OUT_SIZE];
|
||||
float conv1_tmp1[(NB_XCORR_FEATURES + 2)*8] = {0};
|
||||
float conv1_tmp2[(NB_XCORR_FEATURES + 2)*8] = {0};
|
||||
float output[DENSE_FINAL_UPSAMPLER_OUT_SIZE];
|
||||
int i;
|
||||
int pos=0;
|
||||
float maxval=-1;
|
||||
float sum=0;
|
||||
float count=0;
|
||||
PitchDNN *model = &st->model;
|
||||
/* IF */
|
||||
compute_generic_dense(&model->dense_if_upsampler_1, if1_out, if_features, ACTIVATION_TANH, arch);
|
||||
compute_generic_dense(&model->dense_if_upsampler_2, &downsampler_in[NB_XCORR_FEATURES], if1_out, ACTIVATION_TANH, arch);
|
||||
/* xcorr*/
|
||||
OPUS_COPY(&conv1_tmp1[1], xcorr_features, NB_XCORR_FEATURES);
|
||||
compute_conv2d(&model->conv2d_1, &conv1_tmp2[1], st->xcorr_mem1, conv1_tmp1, NB_XCORR_FEATURES, NB_XCORR_FEATURES+2, ACTIVATION_TANH, arch);
|
||||
compute_conv2d(&model->conv2d_2, downsampler_in, st->xcorr_mem2, conv1_tmp2, NB_XCORR_FEATURES, NB_XCORR_FEATURES, ACTIVATION_TANH, arch);
|
||||
|
||||
compute_generic_dense(&model->dense_downsampler, downsampler_out, downsampler_in, ACTIVATION_TANH, arch);
|
||||
compute_generic_gru(&model->gru_1_input, &model->gru_1_recurrent, st->gru_state, downsampler_out, arch);
|
||||
compute_generic_dense(&model->dense_final_upsampler, output, st->gru_state, ACTIVATION_LINEAR, arch);
|
||||
for (i=0;i<180;i++) {
|
||||
if (output[i] > maxval) {
|
||||
pos = i;
|
||||
maxval = output[i];
|
||||
}
|
||||
}
|
||||
for (i=IMAX(0, pos-2); i<=IMIN(179, pos+2); i++) {
|
||||
float p = exp(output[i]);
|
||||
sum += p*i;
|
||||
count += p;
|
||||
}
|
||||
/*printf("%d %f\n", pos, sum/count);*/
|
||||
return (1.f/60.f)*(sum/count) - 1.5;
|
||||
/*return 256.f/pow(2.f, (1.f/60.f)*i);*/
|
||||
}
|
||||
|
||||
|
||||
void pitchdnn_init(PitchDNNState *st)
|
||||
{
|
||||
int ret;
|
||||
OPUS_CLEAR(st, 1);
|
||||
#ifndef USE_WEIGHTS_FILE
|
||||
ret = init_pitchdnn(&st->model, pitchdnn_arrays);
|
||||
#else
|
||||
ret = 0;
|
||||
#endif
|
||||
celt_assert(ret == 0);
|
||||
}
|
||||
|
||||
int pitchdnn_load_model(PitchDNNState *st, const void *data, int len) {
|
||||
WeightArray *list;
|
||||
int ret;
|
||||
parse_weights(&list, data, len);
|
||||
ret = init_pitchdnn(&st->model, list);
|
||||
opus_free(list);
|
||||
if (ret == 0) return 0;
|
||||
else return -1;
|
||||
}
|
||||
|
|
@ -0,0 +1,34 @@
|
|||
#ifndef PITCHDNN_H
|
||||
#define PITCHDNN_H
|
||||
|
||||
|
||||
typedef struct PitchDNN PitchDNN;
|
||||
|
||||
#include "pitchdnn_data.h"
|
||||
|
||||
#define PITCH_MIN_PERIOD 32
|
||||
#define PITCH_MAX_PERIOD 256
|
||||
|
||||
#define NB_XCORR_FEATURES (PITCH_MAX_PERIOD-PITCH_MIN_PERIOD)
|
||||
|
||||
|
||||
typedef struct {
|
||||
PitchDNN model;
|
||||
float gru_state[GRU_1_STATE_SIZE];
|
||||
float xcorr_mem1[(NB_XCORR_FEATURES + 2)*2];
|
||||
float xcorr_mem2[(NB_XCORR_FEATURES + 2)*2*8];
|
||||
float xcorr_mem3[(NB_XCORR_FEATURES + 2)*2*8];
|
||||
} PitchDNNState;
|
||||
|
||||
|
||||
void pitchdnn_init(PitchDNNState *st);
|
||||
int pitchdnn_load_model(PitchDNNState *st, const void *data, int len);
|
||||
|
||||
float compute_pitchdnn(
|
||||
PitchDNNState *st,
|
||||
const float *if_features,
|
||||
const float *xcorr_features,
|
||||
int arch
|
||||
);
|
||||
|
||||
#endif
|
||||
File diff suppressed because it is too large
Load Diff
|
|
@ -0,0 +1,41 @@
|
|||
/* Auto generated from checkpoint pitch_vsmallconv1.pth */
|
||||
|
||||
|
||||
#ifndef PITCHDNN_DATA_H
|
||||
#define PITCHDNN_DATA_H
|
||||
|
||||
#include "nnet.h"
|
||||
|
||||
|
||||
#include "opus_types.h"
|
||||
|
||||
#define DENSE_IF_UPSAMPLER_1_OUT_SIZE 64
|
||||
|
||||
#define DENSE_IF_UPSAMPLER_2_OUT_SIZE 64
|
||||
|
||||
#define DENSE_DOWNSAMPLER_OUT_SIZE 64
|
||||
|
||||
#define DENSE_FINAL_UPSAMPLER_OUT_SIZE 192
|
||||
|
||||
#define GRU_1_OUT_SIZE 64
|
||||
|
||||
#define GRU_1_STATE_SIZE 64
|
||||
|
||||
|
||||
#define PITCH_DNN_MAX_RNN_UNITS 64
|
||||
|
||||
|
||||
struct PitchDNN {
|
||||
LinearLayer dense_if_upsampler_1;
|
||||
LinearLayer dense_if_upsampler_2;
|
||||
LinearLayer dense_downsampler;
|
||||
LinearLayer dense_final_upsampler;
|
||||
Conv2dLayer conv2d_1;
|
||||
Conv2dLayer conv2d_2;
|
||||
LinearLayer gru_1_input;
|
||||
LinearLayer gru_1_recurrent;
|
||||
};
|
||||
|
||||
int init_pitchdnn(PitchDNN *model, const WeightArray *arrays);
|
||||
|
||||
#endif /* PITCHDNN_DATA_H */
|
||||
File diff suppressed because it is too large
Load Diff
|
|
@ -0,0 +1,39 @@
|
|||
/* Auto generated from checkpoint plc4ar_16.pth */
|
||||
|
||||
|
||||
#ifndef PLC_DATA_H
|
||||
#define PLC_DATA_H
|
||||
|
||||
#include "nnet.h"
|
||||
|
||||
|
||||
#include "opus_types.h"
|
||||
|
||||
#define PLC_DENSE_IN_OUT_SIZE 128
|
||||
|
||||
#define PLC_DENSE_OUT_OUT_SIZE 20
|
||||
|
||||
#define PLC_GRU1_OUT_SIZE 192
|
||||
|
||||
#define PLC_GRU1_STATE_SIZE 192
|
||||
|
||||
#define PLC_GRU2_OUT_SIZE 192
|
||||
|
||||
#define PLC_GRU2_STATE_SIZE 192
|
||||
|
||||
|
||||
#define PLC_MAX_RNN_UNITS 192
|
||||
|
||||
|
||||
typedef struct {
|
||||
LinearLayer plc_dense_in;
|
||||
LinearLayer plc_dense_out;
|
||||
LinearLayer plc_gru1_input;
|
||||
LinearLayer plc_gru1_recurrent;
|
||||
LinearLayer plc_gru2_input;
|
||||
LinearLayer plc_gru2_recurrent;
|
||||
} PLCModel;
|
||||
|
||||
int init_plcmodel(PLCModel *model, const WeightArray *arrays);
|
||||
|
||||
#endif /* PLC_DATA_H */
|
||||
|
|
@ -0,0 +1,389 @@
|
|||
/* Copyright (c) 2018 Mozilla
|
||||
2008-2011 Octasic Inc.
|
||||
2012-2017 Jean-Marc Valin */
|
||||
/*
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
|
||||
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef VEC_H
|
||||
#define VEC_H
|
||||
|
||||
#include "opus_types.h"
|
||||
#include <math.h>
|
||||
#include "arch.h"
|
||||
#include "x86/x86_arch_macros.h"
|
||||
|
||||
|
||||
#if defined(__AVX__) || defined(__SSE2__)
|
||||
#include "vec_avx.h"
|
||||
#elif (defined(__ARM_NEON__) || defined(__ARM_NEON)) && !defined(DISABLE_NEON)
|
||||
#include "vec_neon.h"
|
||||
#else
|
||||
|
||||
#include "os_support.h"
|
||||
|
||||
#define MAX_INPUTS (2048)
|
||||
|
||||
#define NO_OPTIMIZATIONS
|
||||
|
||||
static inline void sgemv16x1(float *out, const float *weights, int rows, int cols, int col_stride, const float *x)
|
||||
{
|
||||
int i, j;
|
||||
OPUS_CLEAR(out, rows);
|
||||
for (i=0;i<rows;i+=16)
|
||||
{
|
||||
for (j=0;j<cols;j++)
|
||||
{
|
||||
const float * restrict w;
|
||||
float * restrict y;
|
||||
float xj;
|
||||
w = &weights[j*col_stride + i];
|
||||
xj = x[j];
|
||||
y = &out[i];
|
||||
y[0] += w[0]*xj;
|
||||
y[1] += w[1]*xj;
|
||||
y[2] += w[2]*xj;
|
||||
y[3] += w[3]*xj;
|
||||
y[4] += w[4]*xj;
|
||||
y[5] += w[5]*xj;
|
||||
y[6] += w[6]*xj;
|
||||
y[7] += w[7]*xj;
|
||||
y[8] += w[8]*xj;
|
||||
y[9] += w[9]*xj;
|
||||
y[10] += w[10]*xj;
|
||||
y[11] += w[11]*xj;
|
||||
y[12] += w[12]*xj;
|
||||
y[13] += w[13]*xj;
|
||||
y[14] += w[14]*xj;
|
||||
y[15] += w[15]*xj;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static inline void sgemv8x1(float *out, const float *weights, int rows, int cols, int col_stride, const float *x)
|
||||
{
|
||||
int i, j;
|
||||
OPUS_CLEAR(out, rows);
|
||||
for (i=0;i<rows;i+=8)
|
||||
{
|
||||
for (j=0;j<cols;j++)
|
||||
{
|
||||
const float * restrict w;
|
||||
float * restrict y;
|
||||
float xj;
|
||||
w = &weights[j*col_stride + i];
|
||||
xj = x[j];
|
||||
y = &out[i];
|
||||
y[0] += w[0]*xj;
|
||||
y[1] += w[1]*xj;
|
||||
y[2] += w[2]*xj;
|
||||
y[3] += w[3]*xj;
|
||||
y[4] += w[4]*xj;
|
||||
y[5] += w[5]*xj;
|
||||
y[6] += w[6]*xj;
|
||||
y[7] += w[7]*xj;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static inline void sgemv(float *out, const float *weights, int rows, int cols, int col_stride, const float *x)
|
||||
{
|
||||
if ((rows&0xf) == 0) sgemv16x1(out, weights, rows, cols, col_stride, x);
|
||||
else if ((rows&0x7) == 0) sgemv8x1(out, weights, rows, cols, col_stride, x);
|
||||
else {
|
||||
int i, j;
|
||||
for (i=0;i<rows;i++)
|
||||
{
|
||||
out[i] = 0;
|
||||
for (j=0;j<cols;j++) out[i] += weights[j*col_stride + i]*x[j];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static inline void sparse_sgemv8x4(float *out, const float *w, const int *idx, int rows, const float *x)
|
||||
{
|
||||
int i, j;
|
||||
OPUS_CLEAR(out, rows);
|
||||
for (i=0;i<rows;i+=8)
|
||||
{
|
||||
int cols;
|
||||
cols = *idx++;
|
||||
for (j=0;j<cols;j++)
|
||||
{
|
||||
int pos;
|
||||
float * restrict y;
|
||||
float xj0, xj1, xj2, xj3;
|
||||
pos = (*idx++);
|
||||
xj0 = x[pos+0];
|
||||
xj1 = x[pos+1];
|
||||
xj2 = x[pos+2];
|
||||
xj3 = x[pos+3];
|
||||
y = &out[i];
|
||||
y[0] += w[0]*xj0;
|
||||
y[1] += w[1]*xj0;
|
||||
y[2] += w[2]*xj0;
|
||||
y[3] += w[3]*xj0;
|
||||
y[4] += w[4]*xj0;
|
||||
y[5] += w[5]*xj0;
|
||||
y[6] += w[6]*xj0;
|
||||
y[7] += w[7]*xj0;
|
||||
|
||||
y[0] += w[8]*xj1;
|
||||
y[1] += w[9]*xj1;
|
||||
y[2] += w[10]*xj1;
|
||||
y[3] += w[11]*xj1;
|
||||
y[4] += w[12]*xj1;
|
||||
y[5] += w[13]*xj1;
|
||||
y[6] += w[14]*xj1;
|
||||
y[7] += w[15]*xj1;
|
||||
|
||||
y[0] += w[16]*xj2;
|
||||
y[1] += w[17]*xj2;
|
||||
y[2] += w[18]*xj2;
|
||||
y[3] += w[19]*xj2;
|
||||
y[4] += w[20]*xj2;
|
||||
y[5] += w[21]*xj2;
|
||||
y[6] += w[22]*xj2;
|
||||
y[7] += w[23]*xj2;
|
||||
|
||||
y[0] += w[24]*xj3;
|
||||
y[1] += w[25]*xj3;
|
||||
y[2] += w[26]*xj3;
|
||||
y[3] += w[27]*xj3;
|
||||
y[4] += w[28]*xj3;
|
||||
y[5] += w[29]*xj3;
|
||||
y[6] += w[30]*xj3;
|
||||
y[7] += w[31]*xj3;
|
||||
w += 32;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef USE_SU_BIAS
|
||||
static inline void sparse_cgemv8x4(float *out, const opus_int8 *w, const int *idx, const float *scale, int rows, int cols, const float *_x)
|
||||
{
|
||||
int i, j;
|
||||
unsigned char x[MAX_INPUTS];
|
||||
for (i=0;i<rows;i++) out[i] = 0;
|
||||
for (i=0;i<cols;i++) x[i] = 127+floor(.5+127*_x[i]);
|
||||
for (i=0;i<rows;i+=8)
|
||||
{
|
||||
int colblocks;
|
||||
colblocks = *idx++;
|
||||
for (j=0;j<colblocks;j++)
|
||||
{
|
||||
int pos;
|
||||
float * restrict y;
|
||||
int xj0, xj1, xj2, xj3;
|
||||
pos = (*idx++);
|
||||
xj0 = x[pos+0];
|
||||
xj1 = x[pos+1];
|
||||
xj2 = x[pos+2];
|
||||
xj3 = x[pos+3];
|
||||
y = &out[i];
|
||||
y[0] += (w[0]*xj0+w[1]*xj1+w[2]*xj2+w[3]*xj3);
|
||||
y[1] += (w[4]*xj0+w[5]*xj1+w[6]*xj2+w[7]*xj3);
|
||||
y[2] += (w[8]*xj0+w[9]*xj1+w[10]*xj2+w[11]*xj3);
|
||||
y[3] += (w[12]*xj0+w[13]*xj1+w[14]*xj2+w[15]*xj3);
|
||||
y[4] += (w[16]*xj0+w[17]*xj1+w[18]*xj2+w[19]*xj3);
|
||||
y[5] += (w[20]*xj0+w[21]*xj1+w[22]*xj2+w[23]*xj3);
|
||||
y[6] += (w[24]*xj0+w[25]*xj1+w[26]*xj2+w[27]*xj3);
|
||||
y[7] += (w[28]*xj0+w[29]*xj1+w[30]*xj2+w[31]*xj3);
|
||||
w += 32;
|
||||
}
|
||||
}
|
||||
for (i=0;i<rows;i++) out[i] *= scale[i];
|
||||
}
|
||||
static inline void cgemv8x4(float *out, const opus_int8 *w, const float *scale, int rows, int cols, const float *_x)
|
||||
{
|
||||
int i, j;
|
||||
unsigned char x[MAX_INPUTS];
|
||||
for (i=0;i<rows;i++) out[i] = 0;
|
||||
for (i=0;i<cols;i++) x[i] = 127+(int)floor(.5+127*_x[i]);
|
||||
for (i=0;i<rows;i+=8)
|
||||
{
|
||||
for (j=0;j<cols;j+=4)
|
||||
{
|
||||
float *y;
|
||||
float xj0, xj1, xj2, xj3;
|
||||
xj0 = x[j+0];
|
||||
xj1 = x[j+1];
|
||||
xj2 = x[j+2];
|
||||
xj3 = x[j+3];
|
||||
y = &out[i];
|
||||
y[0] += (w[0]*xj0+w[1]*xj1+w[2]*xj2+w[3]*xj3);
|
||||
y[1] += (w[4]*xj0+w[5]*xj1+w[6]*xj2+w[7]*xj3);
|
||||
y[2] += (w[8]*xj0+w[9]*xj1+w[10]*xj2+w[11]*xj3);
|
||||
y[3] += (w[12]*xj0+w[13]*xj1+w[14]*xj2+w[15]*xj3);
|
||||
y[4] += (w[16]*xj0+w[17]*xj1+w[18]*xj2+w[19]*xj3);
|
||||
y[5] += (w[20]*xj0+w[21]*xj1+w[22]*xj2+w[23]*xj3);
|
||||
y[6] += (w[24]*xj0+w[25]*xj1+w[26]*xj2+w[27]*xj3);
|
||||
y[7] += (w[28]*xj0+w[29]*xj1+w[30]*xj2+w[31]*xj3);
|
||||
w += 32;
|
||||
}
|
||||
}
|
||||
for (i=0;i<rows;i++) out[i] *= scale[i];
|
||||
}
|
||||
#else
|
||||
static inline void sparse_cgemv8x4(float *out, const opus_int8 *w, const int *idx, const float *scale, int rows, int cols, const float *_x)
|
||||
{
|
||||
int i, j;
|
||||
opus_int8 x[MAX_INPUTS];
|
||||
for (i=0;i<rows;i++) out[i] = 0;
|
||||
for (i=0;i<cols;i++) x[i] = (int)floor(.5+127*_x[i]);
|
||||
for (i=0;i<rows;i+=8)
|
||||
{
|
||||
int colblocks;
|
||||
colblocks = *idx++;
|
||||
for (j=0;j<colblocks;j++)
|
||||
{
|
||||
int pos;
|
||||
float * restrict y;
|
||||
int xj0, xj1, xj2, xj3;
|
||||
pos = (*idx++);
|
||||
xj0 = x[pos+0];
|
||||
xj1 = x[pos+1];
|
||||
xj2 = x[pos+2];
|
||||
xj3 = x[pos+3];
|
||||
y = &out[i];
|
||||
y[0] += (w[0]*xj0+w[1]*xj1+w[2]*xj2+w[3]*xj3);
|
||||
y[1] += (w[4]*xj0+w[5]*xj1+w[6]*xj2+w[7]*xj3);
|
||||
y[2] += (w[8]*xj0+w[9]*xj1+w[10]*xj2+w[11]*xj3);
|
||||
y[3] += (w[12]*xj0+w[13]*xj1+w[14]*xj2+w[15]*xj3);
|
||||
y[4] += (w[16]*xj0+w[17]*xj1+w[18]*xj2+w[19]*xj3);
|
||||
y[5] += (w[20]*xj0+w[21]*xj1+w[22]*xj2+w[23]*xj3);
|
||||
y[6] += (w[24]*xj0+w[25]*xj1+w[26]*xj2+w[27]*xj3);
|
||||
y[7] += (w[28]*xj0+w[29]*xj1+w[30]*xj2+w[31]*xj3);
|
||||
w += 32;
|
||||
}
|
||||
}
|
||||
for (i=0;i<rows;i++) out[i] *= scale[i];
|
||||
}
|
||||
static inline void cgemv8x4(float *out, const opus_int8 *w, const float *scale, int rows, int cols, const float *_x)
|
||||
{
|
||||
int i, j;
|
||||
opus_int8 x[MAX_INPUTS];
|
||||
for (i=0;i<rows;i++) out[i] = 0;
|
||||
for (i=0;i<cols;i++) x[i] = (int)floor(.5+127*_x[i]);
|
||||
for (i=0;i<rows;i+=8)
|
||||
{
|
||||
for (j=0;j<cols;j+=4)
|
||||
{
|
||||
float *y;
|
||||
float xj0, xj1, xj2, xj3;
|
||||
xj0 = x[j+0];
|
||||
xj1 = x[j+1];
|
||||
xj2 = x[j+2];
|
||||
xj3 = x[j+3];
|
||||
y = &out[i];
|
||||
y[0] += (w[0]*xj0+w[1]*xj1+w[2]*xj2+w[3]*xj3);
|
||||
y[1] += (w[4]*xj0+w[5]*xj1+w[6]*xj2+w[7]*xj3);
|
||||
y[2] += (w[8]*xj0+w[9]*xj1+w[10]*xj2+w[11]*xj3);
|
||||
y[3] += (w[12]*xj0+w[13]*xj1+w[14]*xj2+w[15]*xj3);
|
||||
y[4] += (w[16]*xj0+w[17]*xj1+w[18]*xj2+w[19]*xj3);
|
||||
y[5] += (w[20]*xj0+w[21]*xj1+w[22]*xj2+w[23]*xj3);
|
||||
y[6] += (w[24]*xj0+w[25]*xj1+w[26]*xj2+w[27]*xj3);
|
||||
y[7] += (w[28]*xj0+w[29]*xj1+w[30]*xj2+w[31]*xj3);
|
||||
w += 32;
|
||||
}
|
||||
}
|
||||
for (i=0;i<rows;i++) out[i] *= scale[i];
|
||||
}
|
||||
#endif
|
||||
|
||||
/* No AVX2/FMA support */
|
||||
#ifndef LPCNET_TEST
|
||||
static inline float lpcnet_exp2(float x)
|
||||
{
|
||||
int integer;
|
||||
float frac;
|
||||
union {
|
||||
float f;
|
||||
opus_uint32 i;
|
||||
} res;
|
||||
integer = floor(x);
|
||||
if (integer < -50)
|
||||
return 0;
|
||||
frac = x-integer;
|
||||
/* K0 = 1, K1 = log(2), K2 = 3-4*log(2), K3 = 3*log(2) - 2 */
|
||||
res.f = 0.99992522f + frac * (0.69583354f
|
||||
+ frac * (0.22606716f + 0.078024523f*frac));
|
||||
res.i = (res.i + (integer<<23)) & 0x7fffffff;
|
||||
return res.f;
|
||||
}
|
||||
#define lpcnet_exp(x) lpcnet_exp2((x)*1.44269504f)
|
||||
|
||||
#define fmadd(a, b, c) ((a)*(b)+(c))
|
||||
static OPUS_INLINE float tanh_approx(float x)
|
||||
{
|
||||
const float N0 = 952.52801514f;
|
||||
const float N1 = 96.39235687f;
|
||||
const float N2 = 0.60863042f;
|
||||
const float D0 = 952.72399902f;
|
||||
const float D1 = 413.36801147f;
|
||||
const float D2 = 11.88600922f;
|
||||
float X2, num, den;
|
||||
X2 = x*x;
|
||||
num = fmadd(fmadd(N2, X2, N1), X2, N0);
|
||||
den = fmadd(fmadd(D2, X2, D1), X2, D0);
|
||||
num = num*x/den;
|
||||
return MAX32(-1.f, MIN32(1.f, num));
|
||||
}
|
||||
|
||||
static inline float sigmoid_approx(float x)
|
||||
{
|
||||
return .5f + .5f*tanh_approx(.5f*x);
|
||||
}
|
||||
|
||||
static inline void softmax(float *y, const float *x, int N)
|
||||
{
|
||||
int i;
|
||||
for (i=0;i<N;i++)
|
||||
y[i] = lpcnet_exp(x[i]);
|
||||
}
|
||||
|
||||
static inline void vec_tanh(float *y, const float *x, int N)
|
||||
{
|
||||
int i;
|
||||
for (i=0;i<N;i++)
|
||||
{
|
||||
y[i] = tanh_approx(x[i]);
|
||||
}
|
||||
}
|
||||
|
||||
static inline void vec_sigmoid(float *y, const float *x, int N)
|
||||
{
|
||||
int i;
|
||||
for (i=0;i<N;i++)
|
||||
{
|
||||
y[i] = sigmoid_approx(x[i]);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
#define SCALE (128.f*127.f)
|
||||
#define SCALE_1 (1.f/128.f/127.f)
|
||||
|
||||
#endif /*no optimizations*/
|
||||
#endif /*VEC_H*/
|
||||
|
|
@ -0,0 +1,884 @@
|
|||
/* Copyright (c) 2018 Mozilla
|
||||
2012-2017 Jean-Marc Valin */
|
||||
/*
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
|
||||
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
/*
|
||||
AVX implementation of vector operations, compile with -mavx
|
||||
AVX2/FMA implementation of vector operations, compile with -mavx2 -mfma
|
||||
*/
|
||||
|
||||
#ifndef VEC_AVX_H
|
||||
#define VEC_AVX_H
|
||||
|
||||
#include <immintrin.h>
|
||||
#include <math.h>
|
||||
#include "celt/x86/x86cpu.h"
|
||||
|
||||
#define MAX_INPUTS (2048)
|
||||
|
||||
#define USE_SU_BIAS
|
||||
|
||||
#ifndef __SSE_4_1__
|
||||
static inline __m128 mm_floor_ps(__m128 x) {
|
||||
__m128 half = _mm_set1_ps(0.5);
|
||||
return _mm_cvtepi32_ps(_mm_cvtps_epi32(_mm_sub_ps(x, half)));
|
||||
}
|
||||
#undef _mm_floor_ps
|
||||
#define _mm_floor_ps(x) mm_floor_ps(x)
|
||||
#endif
|
||||
|
||||
|
||||
/* If we don't have AVX available, emulate what we need with SSE up to 4.1. */
|
||||
#ifndef __AVX__
|
||||
|
||||
typedef struct {
|
||||
__m128 lo;
|
||||
__m128 hi;
|
||||
} mm256_emu;
|
||||
#define __m256 mm256_emu
|
||||
|
||||
static inline mm256_emu mm256_loadu_ps(const float *src) {
|
||||
mm256_emu ret;
|
||||
ret.lo = _mm_loadu_ps(&src[0]);
|
||||
ret.hi = _mm_loadu_ps(&src[4]);
|
||||
return ret;
|
||||
}
|
||||
#define _mm256_loadu_ps(src) mm256_loadu_ps(src)
|
||||
|
||||
|
||||
static inline void mm256_storeu_ps(float *dst, mm256_emu src) {
|
||||
_mm_storeu_ps(dst, src.lo);
|
||||
_mm_storeu_ps(&dst[4], src.hi);
|
||||
}
|
||||
#define _mm256_storeu_ps(dst, src) mm256_storeu_ps(dst, src)
|
||||
|
||||
|
||||
static inline mm256_emu mm256_setzero_ps(void) {
|
||||
mm256_emu ret;
|
||||
ret.lo = _mm_setzero_ps();
|
||||
ret.hi = ret.lo;
|
||||
return ret;
|
||||
}
|
||||
#define _mm256_setzero_ps mm256_setzero_ps
|
||||
|
||||
static inline mm256_emu mm256_broadcast_ss(const float *x) {
|
||||
mm256_emu ret;
|
||||
ret.lo = _mm_set1_ps(*x);
|
||||
ret.hi = ret.lo;
|
||||
return ret;
|
||||
}
|
||||
#define _mm256_broadcast_ss(x) mm256_broadcast_ss(x)
|
||||
|
||||
static inline mm256_emu mm256_set1_ps(float x) {
|
||||
mm256_emu ret;
|
||||
ret.lo = _mm_set1_ps(x);
|
||||
ret.hi = ret.lo;
|
||||
return ret;
|
||||
}
|
||||
#define _mm256_set1_ps(x) mm256_set1_ps(x)
|
||||
|
||||
|
||||
|
||||
static inline mm256_emu mm256_mul_ps(mm256_emu a, mm256_emu b) {
|
||||
mm256_emu ret;
|
||||
ret.lo = _mm_mul_ps(a.lo, b.lo);
|
||||
ret.hi = _mm_mul_ps(a.hi, b.hi);
|
||||
return ret;
|
||||
}
|
||||
#define _mm256_mul_ps(a,b) mm256_mul_ps(a,b)
|
||||
|
||||
static inline mm256_emu mm256_add_ps(mm256_emu a, mm256_emu b) {
|
||||
mm256_emu ret;
|
||||
ret.lo = _mm_add_ps(a.lo, b.lo);
|
||||
ret.hi = _mm_add_ps(a.hi, b.hi);
|
||||
return ret;
|
||||
}
|
||||
#define _mm256_add_ps(a,b) mm256_add_ps(a,b)
|
||||
|
||||
|
||||
static inline mm256_emu mm256_max_ps(mm256_emu a, mm256_emu b) {
|
||||
mm256_emu ret;
|
||||
ret.lo = _mm_max_ps(a.lo, b.lo);
|
||||
ret.hi = _mm_max_ps(a.hi, b.hi);
|
||||
return ret;
|
||||
}
|
||||
#define _mm256_max_ps(a,b) mm256_max_ps(a,b)
|
||||
|
||||
static inline mm256_emu mm256_min_ps(mm256_emu a, mm256_emu b) {
|
||||
mm256_emu ret;
|
||||
ret.lo = _mm_min_ps(a.lo, b.lo);
|
||||
ret.hi = _mm_min_ps(a.hi, b.hi);
|
||||
return ret;
|
||||
}
|
||||
#define _mm256_min_ps(a,b) mm256_min_ps(a,b)
|
||||
|
||||
static inline mm256_emu mm256_rcp_ps(mm256_emu a) {
|
||||
mm256_emu ret;
|
||||
ret.lo = _mm_rcp_ps(a.lo);
|
||||
ret.hi = _mm_rcp_ps(a.hi);
|
||||
return ret;
|
||||
}
|
||||
#define _mm256_rcp_ps(a) mm256_rcp_ps(a)
|
||||
|
||||
|
||||
static inline __m128 mm256_extractf128_ps(mm256_emu x, int i) {
|
||||
return (i==0) ? x.lo : x.hi;
|
||||
}
|
||||
#undef _mm256_extractf128_ps
|
||||
#define _mm256_extractf128_ps(x,i) mm256_extractf128_ps(x,i)
|
||||
|
||||
static inline mm256_emu mm256_insertf128_ps(mm256_emu dst, __m128 src, int i) {
|
||||
if (i==0) dst.lo = src;
|
||||
else dst.hi = src;
|
||||
return dst;
|
||||
}
|
||||
#undef _mm256_insertf128_ps
|
||||
#define _mm256_insertf128_ps(dst,src,i) mm256_insertf128_ps(dst,src,i)
|
||||
|
||||
#endif /* __AVX__ */
|
||||
|
||||
|
||||
|
||||
/* If we don't have AVX2 available, emulate what we need with SSE up to 4.1. */
|
||||
#ifndef __AVX2__
|
||||
|
||||
typedef struct {
|
||||
__m128i lo;
|
||||
__m128i hi;
|
||||
} mm256i_emu;
|
||||
typedef __m256i real_m256i;
|
||||
#define __m256i mm256i_emu
|
||||
|
||||
static inline mm256i_emu mm256_setzero_si256(void) {
|
||||
mm256i_emu ret;
|
||||
ret.lo = _mm_setzero_si128();
|
||||
ret.hi = ret.lo;
|
||||
return ret;
|
||||
}
|
||||
#define _mm256_setzero_si256 mm256_setzero_si256
|
||||
|
||||
|
||||
static inline mm256i_emu mm256_loadu_si256(const mm256i_emu *src) {
|
||||
mm256i_emu ret;
|
||||
ret.lo = _mm_loadu_si128((const __m128i*)src);
|
||||
ret.hi = _mm_loadu_si128(&((const __m128i*)src)[1]);
|
||||
return ret;
|
||||
}
|
||||
#define _mm256_loadu_si256(src) mm256_loadu_si256(src)
|
||||
|
||||
|
||||
static inline void mm256_storeu_si256(mm256i_emu *dst, mm256i_emu src) {
|
||||
_mm_storeu_si128((__m128i*)dst, src.lo);
|
||||
_mm_storeu_si128(&((__m128i*)dst)[1], src.hi);
|
||||
}
|
||||
#define _mm256_storeu_si256(dst, src) mm256_storeu_si256(dst, src)
|
||||
|
||||
|
||||
static inline mm256i_emu mm256_broadcastd_epi32(__m128i x) {
|
||||
mm256i_emu ret;
|
||||
ret.hi = ret.lo = _mm_shuffle_epi32(x, 0);
|
||||
return ret;
|
||||
}
|
||||
#define _mm256_broadcastd_epi32(x) mm256_broadcastd_epi32(x)
|
||||
|
||||
|
||||
static inline mm256i_emu mm256_set1_epi32(int x) {
|
||||
mm256i_emu ret;
|
||||
ret.lo = _mm_set1_epi32(x);
|
||||
ret.hi = ret.lo;
|
||||
return ret;
|
||||
}
|
||||
#define _mm256_set1_epi32(x) mm256_set1_epi32(x)
|
||||
|
||||
static inline mm256i_emu mm256_set1_epi16(int x) {
|
||||
mm256i_emu ret;
|
||||
ret.lo = _mm_set1_epi16(x);
|
||||
ret.hi = ret.lo;
|
||||
return ret;
|
||||
}
|
||||
#define _mm256_set1_epi16(x) mm256_set1_epi16(x)
|
||||
|
||||
|
||||
static inline mm256i_emu mm256_add_epi32(mm256i_emu a, mm256i_emu b) {
|
||||
mm256i_emu ret;
|
||||
ret.lo = _mm_add_epi32(a.lo, b.lo);
|
||||
ret.hi = _mm_add_epi32(a.hi, b.hi);
|
||||
return ret;
|
||||
}
|
||||
#define _mm256_add_epi32(a,b) mm256_add_epi32(a,b)
|
||||
|
||||
static inline mm256i_emu mm256_madd_epi16(mm256i_emu a, mm256i_emu b) {
|
||||
mm256i_emu ret;
|
||||
ret.lo = _mm_madd_epi16(a.lo, b.lo);
|
||||
ret.hi = _mm_madd_epi16(a.hi, b.hi);
|
||||
return ret;
|
||||
}
|
||||
#define _mm256_madd_epi16(a,b) mm256_madd_epi16(a,b)
|
||||
|
||||
static inline mm256i_emu mm256_maddubs_epi16(mm256i_emu a, mm256i_emu b) {
|
||||
mm256i_emu ret;
|
||||
ret.lo = _mm_maddubs_epi16(a.lo, b.lo);
|
||||
ret.hi = _mm_maddubs_epi16(a.hi, b.hi);
|
||||
return ret;
|
||||
}
|
||||
#define _mm256_maddubs_epi16(a,b) mm256_maddubs_epi16(a,b)
|
||||
|
||||
|
||||
|
||||
/* Emulating the conversion functions is tricky because they use __m256i but are defined in AVX.
|
||||
So we need to make a special when only AVX is available. */
|
||||
#ifdef __AVX__
|
||||
|
||||
typedef union {
|
||||
mm256i_emu fake;
|
||||
real_m256i real;
|
||||
} mm256_union;
|
||||
|
||||
static inline __m256 mm256_cvtepi32_ps(mm256i_emu a) {
|
||||
mm256_union src;
|
||||
src.fake = a;
|
||||
return _mm256_cvtepi32_ps(src.real);
|
||||
}
|
||||
#define _mm256_cvtepi32_ps(a) mm256_cvtepi32_ps(a)
|
||||
|
||||
static inline mm256i_emu mm256_cvtps_epi32(__m256 a) {
|
||||
mm256_union ret;
|
||||
ret.real = _mm256_cvtps_epi32(a);
|
||||
return ret.fake;
|
||||
}
|
||||
#define _mm256_cvtps_epi32(a) mm256_cvtps_epi32(a)
|
||||
|
||||
|
||||
#else
|
||||
|
||||
static inline mm256_emu mm256_cvtepi32_ps(mm256i_emu a) {
|
||||
mm256_emu ret;
|
||||
ret.lo = _mm_cvtepi32_ps(a.lo);
|
||||
ret.hi = _mm_cvtepi32_ps(a.hi);
|
||||
return ret;
|
||||
}
|
||||
#define _mm256_cvtepi32_ps(a) mm256_cvtepi32_ps(a)
|
||||
|
||||
static inline mm256i_emu mm256_cvtps_epi32(mm256_emu a) {
|
||||
mm256i_emu ret;
|
||||
ret.lo = _mm_cvtps_epi32(a.lo);
|
||||
ret.hi = _mm_cvtps_epi32(a.hi);
|
||||
return ret;
|
||||
}
|
||||
#define _mm256_cvtps_epi32(a) mm256_cvtps_epi32(a)
|
||||
|
||||
#endif /* __AVX__ */
|
||||
|
||||
|
||||
#endif /* __AVX2__ */
|
||||
|
||||
/* In case we don't have FMA, make it a mul and an add. */
|
||||
#if !(defined(__FMA__) && defined(__AVX__))
|
||||
#define _mm256_fmadd_ps(a,b,c) _mm256_add_ps(_mm256_mul_ps(a, b), c)
|
||||
#define _mm_fmadd_ps(a,b,c) _mm_add_ps(_mm_mul_ps(a, b), c)
|
||||
#endif
|
||||
|
||||
#ifdef __AVX2__
|
||||
static inline __m256 exp8_approx(__m256 X)
|
||||
{
|
||||
const __m256 K0 = _mm256_set1_ps(0.99992522f);
|
||||
const __m256 K1 = _mm256_set1_ps(0.69583354f);
|
||||
const __m256 K2 = _mm256_set1_ps(0.22606716f);
|
||||
const __m256 K3 = _mm256_set1_ps(0.078024523f);
|
||||
const __m256 log2_E = _mm256_set1_ps(1.44269504f);
|
||||
const __m256 max_in = _mm256_set1_ps(50.f);
|
||||
const __m256 min_in = _mm256_set1_ps(-50.f);
|
||||
__m256 XF, Y;
|
||||
__m256i I;
|
||||
X = _mm256_mul_ps(X, log2_E);
|
||||
X = _mm256_max_ps(min_in, _mm256_min_ps(max_in, X));
|
||||
XF = _mm256_floor_ps(X);
|
||||
I = _mm256_cvtps_epi32(XF);
|
||||
X = _mm256_sub_ps(X, XF);
|
||||
Y = _mm256_fmadd_ps(_mm256_fmadd_ps(_mm256_fmadd_ps(K3, X, K2), X, K1), X, K0);
|
||||
I = _mm256_slli_epi32(I, 23);
|
||||
Y = _mm256_castsi256_ps(_mm256_add_epi32(I, _mm256_castps_si256(Y)));
|
||||
return Y;
|
||||
}
|
||||
|
||||
static inline void vector_ps_to_epi8(unsigned char *x, const float *_x, int len) {
|
||||
int i;
|
||||
__m256 const127 = _mm256_set1_ps(127.f);
|
||||
for (i=0;i<len;i+=8) {
|
||||
__m256 xf;
|
||||
__m256i xi;
|
||||
xf = _mm256_loadu_ps(&_x[i]);
|
||||
xf = _mm256_fmadd_ps(xf, const127, const127);
|
||||
xi = _mm256_cvtps_epi32(xf);
|
||||
xi = _mm256_packus_epi32(xi, _mm256_setzero_si256());
|
||||
xi = _mm256_permute4x64_epi64(xi, 0xD8);
|
||||
xi = _mm256_packus_epi16(xi, _mm256_setzero_si256());
|
||||
xi = _mm256_permutevar8x32_epi32(xi, _mm256_setr_epi32(0,1, 0,0, 0,0, 0,0));
|
||||
_mm256_storeu_si256 ((__m256i *)(void*)&x[i], xi);
|
||||
}
|
||||
}
|
||||
|
||||
#else
|
||||
static inline __m128 exp4_approx(__m128 X)
|
||||
{
|
||||
const __m128 K0 = _mm_set1_ps(0.99992522f);
|
||||
const __m128 K1 = _mm_set1_ps(0.69583354f);
|
||||
const __m128 K2 = _mm_set1_ps(0.22606716f);
|
||||
const __m128 K3 = _mm_set1_ps(0.078024523f);
|
||||
const __m128 log2_E = _mm_set1_ps(1.44269504);
|
||||
const __m128 max_in = _mm_set1_ps(50.f);
|
||||
const __m128 min_in = _mm_set1_ps(-50.f);
|
||||
const __m128i mask = _mm_set1_epi32(0x7fffffff);
|
||||
__m128 XF, Y;
|
||||
__m128i I;
|
||||
X = _mm_mul_ps(X, log2_E);
|
||||
X = _mm_max_ps(min_in, _mm_min_ps(max_in, X));
|
||||
XF = _mm_floor_ps(X);
|
||||
I = _mm_cvtps_epi32(XF);
|
||||
X = _mm_sub_ps(X, XF);
|
||||
Y = _mm_fmadd_ps(_mm_fmadd_ps(_mm_fmadd_ps(K3, X, K2), X, K1), X, K0);
|
||||
I = _mm_slli_epi32(I, 23);
|
||||
Y = _mm_castsi128_ps(_mm_and_si128(mask, _mm_add_epi32(I, _mm_castps_si128(Y))));
|
||||
return Y;
|
||||
}
|
||||
static inline __m256 exp8_approx(__m256 X)
|
||||
{
|
||||
__m256 Y;
|
||||
__m128 Xhi, Xlo, Yhi, Ylo;
|
||||
Xhi = _mm256_extractf128_ps(X, 1);
|
||||
Xlo = _mm256_extractf128_ps(X, 0);
|
||||
Yhi = exp4_approx(Xhi);
|
||||
Ylo = exp4_approx(Xlo);
|
||||
Y = _mm256_insertf128_ps(_mm256_setzero_ps(), Yhi, 1);
|
||||
Y = _mm256_insertf128_ps(Y, Ylo, 0);
|
||||
return Y;
|
||||
}
|
||||
|
||||
static inline void vector_ps_to_epi8(unsigned char *x, const float *_x, int len) {
|
||||
int i;
|
||||
for (i=0;i<len;i++) x[i] = 127+(int)floor(.5+127*_x[i]);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef __AVX__
|
||||
|
||||
/* Approximating tanh() using a Padé-like rational function:
|
||||
tanh(x) ~= x * (N0 + N1*x^2 + N2*x^4)/(D0 + D1*x^2 + D2*x^4)
|
||||
subject to the +/- 1 bounds.
|
||||
The coefficients were determined by gradient descent trying to minimize
|
||||
the maximum deviation over the whole range (this is only possible because
|
||||
of the bounds). The max error is around 3e-4 and is dominated by the
|
||||
reciprocal approximation (the max error of the rational function is
|
||||
around 6e-5).
|
||||
*/
|
||||
static inline __m256 tanh8_approx(__m256 X)
|
||||
{
|
||||
const __m256 N0 = _mm256_set1_ps(952.52801514f);
|
||||
const __m256 N1 = _mm256_set1_ps(96.39235687f);
|
||||
const __m256 N2 = _mm256_set1_ps(0.60863042f);
|
||||
const __m256 D0 = _mm256_set1_ps(952.72399902f);
|
||||
const __m256 D1 = _mm256_set1_ps(413.36801147f);
|
||||
const __m256 D2 = _mm256_set1_ps(11.88600922f);
|
||||
const __m256 max_out = _mm256_set1_ps(1.f);
|
||||
const __m256 min_out = _mm256_set1_ps(-1.f);
|
||||
__m256 X2, num, den;
|
||||
X2 = _mm256_mul_ps(X, X);
|
||||
num = _mm256_fmadd_ps(_mm256_fmadd_ps(N2, X2, N1), X2, N0);
|
||||
den = _mm256_fmadd_ps(_mm256_fmadd_ps(D2, X2, D1), X2, D0);
|
||||
num = _mm256_mul_ps(num, X);
|
||||
den = _mm256_rcp_ps(den);
|
||||
num = _mm256_mul_ps(num, den);
|
||||
return _mm256_max_ps(min_out, _mm256_min_ps(max_out, num));
|
||||
}
|
||||
|
||||
/* Sigmoid approximation using a Padé-like rational function:
|
||||
1/(1+exp(-x)) ~= 0.5 + x * (N0 + N1*x^2 + N2*x^4)/(D0 + D1*x^2 + D2*x^4)
|
||||
subject to the [0, 1] bounds.
|
||||
The coefficients are directly derived by dividing the tanh() coefficients
|
||||
by powers of two to get the correct scaling. The max error is around 1.5e-4
|
||||
and is dominated by the reciprocal approximation (the max error of the
|
||||
rational function is around 3e-5).
|
||||
*/
|
||||
static inline __m256 sigmoid8_approx(__m256 X)
|
||||
{
|
||||
const __m256 N0 = _mm256_set1_ps(238.13200378f);
|
||||
const __m256 N1 = _mm256_set1_ps(6.02452230f);
|
||||
const __m256 N2 = _mm256_set1_ps(0.00950985f);
|
||||
const __m256 D0 = _mm256_set1_ps(952.72399902f);
|
||||
const __m256 D1 = _mm256_set1_ps(103.34200287f);
|
||||
const __m256 D2 = _mm256_set1_ps(0.74287558f);
|
||||
const __m256 half = _mm256_set1_ps(0.5);
|
||||
const __m256 max_out = _mm256_set1_ps(1.f);
|
||||
const __m256 min_out = _mm256_set1_ps(0.f);
|
||||
__m256 X2, num, den;
|
||||
X2 = _mm256_mul_ps(X, X);
|
||||
num = _mm256_fmadd_ps(_mm256_fmadd_ps(N2, X2, N1), X2, N0);
|
||||
den = _mm256_fmadd_ps(_mm256_fmadd_ps(D2, X2, D1), X2, D0);
|
||||
num = _mm256_mul_ps(num, X);
|
||||
den = _mm256_rcp_ps(den);
|
||||
num = _mm256_fmadd_ps(num, den, half);
|
||||
return _mm256_max_ps(min_out, _mm256_min_ps(max_out, num));
|
||||
}
|
||||
|
||||
static inline float tanh_approx(float x)
|
||||
{
|
||||
float out[8];
|
||||
__m256 X, Y;
|
||||
X = _mm256_set1_ps(x);
|
||||
Y = tanh8_approx(X);
|
||||
_mm256_storeu_ps(out, Y);
|
||||
return out[0];
|
||||
}
|
||||
|
||||
static inline float sigmoid_approx(float x)
|
||||
{
|
||||
float out[8];
|
||||
__m256 X, Y;
|
||||
X = _mm256_set1_ps(x);
|
||||
Y = sigmoid8_approx(X);
|
||||
_mm256_storeu_ps(out, Y);
|
||||
return out[0];
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
static inline __m128 tanh4_approx(__m128 X)
|
||||
{
|
||||
const __m128 N0 = _mm_set1_ps(952.52801514f);
|
||||
const __m128 N1 = _mm_set1_ps(96.39235687f);
|
||||
const __m128 N2 = _mm_set1_ps(0.60863042f);
|
||||
const __m128 D0 = _mm_set1_ps(952.72399902f);
|
||||
const __m128 D1 = _mm_set1_ps(413.36801147f);
|
||||
const __m128 D2 = _mm_set1_ps(11.88600922f);
|
||||
const __m128 max_out = _mm_set1_ps(1.f);
|
||||
const __m128 min_out = _mm_set1_ps(-1.f);
|
||||
__m128 X2, num, den;
|
||||
X2 = _mm_mul_ps(X, X);
|
||||
num = _mm_fmadd_ps(_mm_fmadd_ps(N2, X2, N1), X2, N0);
|
||||
den = _mm_fmadd_ps(_mm_fmadd_ps(D2, X2, D1), X2, D0);
|
||||
num = _mm_mul_ps(num, X);
|
||||
den = _mm_rcp_ps(den);
|
||||
num = _mm_mul_ps(num, den);
|
||||
return _mm_max_ps(min_out, _mm_min_ps(max_out, num));
|
||||
}
|
||||
|
||||
static inline __m128 sigmoid4_approx(__m128 X)
|
||||
{
|
||||
const __m128 N0 = _mm_set1_ps(238.13200378f);
|
||||
const __m128 N1 = _mm_set1_ps(6.02452230f);
|
||||
const __m128 N2 = _mm_set1_ps(0.00950985f);
|
||||
const __m128 D0 = _mm_set1_ps(952.72399902f);
|
||||
const __m128 D1 = _mm_set1_ps(103.34200287f);
|
||||
const __m128 D2 = _mm_set1_ps(0.74287558f);
|
||||
const __m128 half = _mm_set1_ps(0.5);
|
||||
const __m128 max_out = _mm_set1_ps(1.f);
|
||||
const __m128 min_out = _mm_set1_ps(0.f);
|
||||
__m128 X2, num, den;
|
||||
X2 = _mm_mul_ps(X, X);
|
||||
num = _mm_fmadd_ps(_mm_fmadd_ps(N2, X2, N1), X2, N0);
|
||||
den = _mm_fmadd_ps(_mm_fmadd_ps(D2, X2, D1), X2, D0);
|
||||
num = _mm_mul_ps(num, X);
|
||||
den = _mm_rcp_ps(den);
|
||||
num = _mm_fmadd_ps(num, den, half);
|
||||
return _mm_max_ps(min_out, _mm_min_ps(max_out, num));
|
||||
}
|
||||
|
||||
static inline float tanh_approx(float x)
|
||||
{
|
||||
float out[4];
|
||||
__m128 X, Y;
|
||||
X = _mm_set1_ps(x);
|
||||
Y = tanh4_approx(X);
|
||||
_mm_storeu_ps(out, Y);
|
||||
return out[0];
|
||||
}
|
||||
|
||||
static inline float sigmoid_approx(float x)
|
||||
{
|
||||
float out[4];
|
||||
__m128 X, Y;
|
||||
X = _mm_set1_ps(x);
|
||||
Y = sigmoid4_approx(X);
|
||||
_mm_storeu_ps(out, Y);
|
||||
return out[0];
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
static inline float lpcnet_exp(float x)
|
||||
{
|
||||
float out[8];
|
||||
__m256 X, Y;
|
||||
X = _mm256_set1_ps(x);
|
||||
Y = exp8_approx(X);
|
||||
_mm256_storeu_ps(out, Y);
|
||||
return out[0];
|
||||
}
|
||||
|
||||
static inline void softmax(float *y, const float *x, int N)
|
||||
{
|
||||
int i;
|
||||
for (i=0;i<N-7;i+=8)
|
||||
{
|
||||
__m256 X, Y;
|
||||
X = _mm256_loadu_ps(&x[i]);
|
||||
Y = exp8_approx(X);
|
||||
_mm256_storeu_ps(&y[i], Y);
|
||||
}
|
||||
for (;i<N;i++)
|
||||
y[i] = lpcnet_exp(x[i]);
|
||||
}
|
||||
|
||||
#ifdef __AVX__
|
||||
static inline void vec_tanh(float *y, const float *x, int N)
|
||||
{
|
||||
int i;
|
||||
for (i=0;i<N-7;i+=8)
|
||||
{
|
||||
__m256 X, Y;
|
||||
X = _mm256_loadu_ps(&x[i]);
|
||||
Y = tanh8_approx(X);
|
||||
_mm256_storeu_ps(&y[i], Y);
|
||||
}
|
||||
for (;i<N;i++)
|
||||
{
|
||||
y[i] = tanh_approx(x[i]);
|
||||
}
|
||||
}
|
||||
|
||||
static inline void vec_sigmoid(float *y, const float *x, int N)
|
||||
{
|
||||
int i;
|
||||
for (i=0;i<N-7;i+=8)
|
||||
{
|
||||
__m256 X, Y;
|
||||
X = _mm256_loadu_ps(&x[i]);
|
||||
Y = sigmoid8_approx(X);
|
||||
_mm256_storeu_ps(&y[i], Y);
|
||||
}
|
||||
for (;i<N;i++)
|
||||
{
|
||||
y[i] = sigmoid_approx(x[i]);
|
||||
}
|
||||
}
|
||||
#else
|
||||
static inline void vec_tanh(float *y, const float *x, int N)
|
||||
{
|
||||
int i;
|
||||
for (i=0;i<N-3;i+=4)
|
||||
{
|
||||
__m128 X, Y;
|
||||
X = _mm_loadu_ps(&x[i]);
|
||||
Y = tanh4_approx(X);
|
||||
_mm_storeu_ps(&y[i], Y);
|
||||
}
|
||||
for (;i<N;i++)
|
||||
{
|
||||
y[i] = tanh_approx(x[i]);
|
||||
}
|
||||
}
|
||||
|
||||
static inline void vec_sigmoid(float *y, const float *x, int N)
|
||||
{
|
||||
int i;
|
||||
for (i=0;i<N-3;i+=4)
|
||||
{
|
||||
__m128 X, Y;
|
||||
X = _mm_loadu_ps(&x[i]);
|
||||
Y = sigmoid4_approx(X);
|
||||
_mm_storeu_ps(&y[i], Y);
|
||||
}
|
||||
for (;i<N;i++)
|
||||
{
|
||||
y[i] = sigmoid_approx(x[i]);
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if defined(__AVXVNNI__) || defined(__AVX512VNNI__)
|
||||
|
||||
#define opus_mm256_dpbusds_epi32(src, a, b) _mm256_dpbusds_epi32(src, a, b)
|
||||
|
||||
#elif defined(__AVX2__)
|
||||
|
||||
static inline __m256i opus_mm256_dpbusds_epi32(__m256i src, __m256i a, __m256i b) {
|
||||
__m256i ones, tmp;
|
||||
ones = _mm256_set1_epi16(1);
|
||||
tmp = _mm256_maddubs_epi16(a, b);
|
||||
tmp = _mm256_madd_epi16(tmp, ones);
|
||||
return _mm256_add_epi32(src, tmp);
|
||||
}
|
||||
|
||||
#elif defined(__SSSE3__)
|
||||
|
||||
static inline mm256i_emu opus_mm256_dpbusds_epi32(mm256i_emu src, mm256i_emu a, mm256i_emu b) {
|
||||
mm256i_emu ones, tmp;
|
||||
ones = _mm256_set1_epi16(1);
|
||||
tmp = _mm256_maddubs_epi16(a, b);
|
||||
tmp = _mm256_madd_epi16(tmp, ones);
|
||||
return _mm256_add_epi32(src, tmp);
|
||||
}
|
||||
|
||||
#elif defined(__SSE2__)
|
||||
|
||||
static inline __m128i mm_dpbusds_epi32(__m128i src, __m128i a, __m128i b) {
|
||||
__m128i ah, al, bh, bl, tmp;
|
||||
ah = _mm_srli_epi16(a, 8);
|
||||
bh = _mm_srai_epi16(b, 8);
|
||||
al = _mm_srli_epi16(_mm_slli_epi16(a, 8), 8);
|
||||
bl = _mm_srai_epi16(_mm_slli_epi16(b, 8), 8);
|
||||
tmp = _mm_add_epi32(_mm_madd_epi16(ah, bh), _mm_madd_epi16(al, bl));
|
||||
return _mm_add_epi32(src, tmp);
|
||||
}
|
||||
|
||||
static inline mm256i_emu opus_mm256_dpbusds_epi32(mm256i_emu src, mm256i_emu a, mm256i_emu b) {
|
||||
mm256i_emu res;
|
||||
res.hi = mm_dpbusds_epi32(src.hi, a.hi, b.hi);
|
||||
res.lo = mm_dpbusds_epi32(src.lo, a.lo, b.lo);
|
||||
return res;
|
||||
}
|
||||
|
||||
|
||||
#else
|
||||
|
||||
#error "No optimizations in vec_avx.h. This should never happen. "
|
||||
#endif
|
||||
|
||||
static inline void sgemv(float *out, const float *weights, int rows, int cols, int col_stride, const float *x)
|
||||
{
|
||||
int i, j;
|
||||
i=0;
|
||||
for (;i<rows-15;i+=16)
|
||||
{
|
||||
float *y;
|
||||
__m256 vy0, vy8;
|
||||
y = &out[i];
|
||||
vy0 = _mm256_setzero_ps();
|
||||
vy8 = _mm256_setzero_ps();
|
||||
for (j=0;j<cols;j++)
|
||||
{
|
||||
__m256 vxj;
|
||||
__m256 vw;
|
||||
vxj = _mm256_broadcast_ss(&x[j]);
|
||||
|
||||
vw = _mm256_loadu_ps(&weights[j*col_stride + i]);
|
||||
vy0 = _mm256_fmadd_ps(vw, vxj, vy0);
|
||||
|
||||
vw = _mm256_loadu_ps(&weights[j*col_stride + i + 8]);
|
||||
vy8 = _mm256_fmadd_ps(vw, vxj, vy8);
|
||||
}
|
||||
_mm256_storeu_ps (&y[0], vy0);
|
||||
_mm256_storeu_ps (&y[8], vy8);
|
||||
}
|
||||
for (;i<rows-7;i+=8)
|
||||
{
|
||||
float *y;
|
||||
__m256 vy0;
|
||||
y = &out[i];
|
||||
vy0 = _mm256_setzero_ps();
|
||||
for (j=0;j<cols;j++)
|
||||
{
|
||||
__m256 vxj;
|
||||
__m256 vw;
|
||||
vxj = _mm256_broadcast_ss(&x[j]);
|
||||
|
||||
vw = _mm256_loadu_ps(&weights[j*col_stride + i]);
|
||||
vy0 = _mm256_fmadd_ps(vw, vxj, vy0);
|
||||
}
|
||||
_mm256_storeu_ps (&y[0], vy0);
|
||||
}
|
||||
for (;i<rows-3;i+=4)
|
||||
{
|
||||
float *y;
|
||||
__m128 vy0;
|
||||
y = &out[i];
|
||||
vy0 = _mm_setzero_ps();
|
||||
for (j=0;j<cols;j++)
|
||||
{
|
||||
__m128 vxj;
|
||||
__m128 vw;
|
||||
vxj = _mm_set1_ps(x[j]);
|
||||
|
||||
vw = _mm_loadu_ps(&weights[j*col_stride + i]);
|
||||
vy0 = _mm_fmadd_ps(vw, vxj, vy0);
|
||||
}
|
||||
_mm_storeu_ps (&y[0], vy0);
|
||||
}
|
||||
for (;i<rows;i++)
|
||||
{
|
||||
out[i] = 0;
|
||||
for (j=0;j<cols;j++) out[i] += weights[j*col_stride + i]*x[j];
|
||||
}
|
||||
}
|
||||
|
||||
static inline void sparse_sgemv8x4(float *out, const float *weights, const int *idx, int rows, const float *x)
|
||||
{
|
||||
int i, j;
|
||||
for (i=0;i<rows;i+=8)
|
||||
{
|
||||
float *y;
|
||||
int cols;
|
||||
__m256 vy0;
|
||||
y = &out[i];
|
||||
vy0 = _mm256_setzero_ps();
|
||||
cols = *idx++;
|
||||
for (j=0;j<cols;j++)
|
||||
{
|
||||
int id;
|
||||
__m256 vxj;
|
||||
__m256 vw;
|
||||
id = *idx++;
|
||||
vxj = _mm256_broadcast_ss(&x[id]);
|
||||
vw = _mm256_loadu_ps(&weights[0]);
|
||||
vy0 = _mm256_fmadd_ps(vw, vxj, vy0);
|
||||
|
||||
vxj = _mm256_broadcast_ss(&x[id+1]);
|
||||
vw = _mm256_loadu_ps(&weights[8]);
|
||||
vy0 = _mm256_fmadd_ps(vw, vxj, vy0);
|
||||
|
||||
vxj = _mm256_broadcast_ss(&x[id+2]);
|
||||
vw = _mm256_loadu_ps(&weights[16]);
|
||||
vy0 = _mm256_fmadd_ps(vw, vxj, vy0);
|
||||
|
||||
vxj = _mm256_broadcast_ss(&x[id+3]);
|
||||
vw = _mm256_loadu_ps(&weights[24]);
|
||||
vy0 = _mm256_fmadd_ps(vw, vxj, vy0);
|
||||
|
||||
weights += 32;
|
||||
}
|
||||
_mm256_storeu_ps (&y[0], vy0);
|
||||
}
|
||||
}
|
||||
|
||||
static inline void sparse_cgemv8x4(float *_out, const opus_int8 *w, const int *idx, const float *scale, int rows, int cols, const float *_x)
|
||||
{
|
||||
int i, j;
|
||||
unsigned char x[MAX_INPUTS];
|
||||
/*for (i=0;i<cols;i++) x[i] = 127+floor(.5+127*_x[i]);*/
|
||||
vector_ps_to_epi8(x, _x, cols);
|
||||
for (i=0;i<rows;i+=8)
|
||||
{
|
||||
int colblocks;
|
||||
__m256i vy0;
|
||||
__m256 vout;
|
||||
colblocks = *idx++;
|
||||
vy0 = _mm256_setzero_si256();
|
||||
j=0;
|
||||
#if 1 /* Unrolling by 4 gives some gain, comment out if it does not. */
|
||||
for (;j<colblocks-3;j+=4)
|
||||
{
|
||||
__m256i vxj;
|
||||
__m256i vw;
|
||||
vxj = _mm256_broadcastd_epi32(_mm_loadu_si32(&x[*idx++]));
|
||||
vw = _mm256_loadu_si256((const __m256i *)(void*)w);
|
||||
vy0 = opus_mm256_dpbusds_epi32(vy0, vxj, vw);
|
||||
w += 32;
|
||||
vxj = _mm256_broadcastd_epi32(_mm_loadu_si32(&x[*idx++]));
|
||||
vw = _mm256_loadu_si256((const __m256i *)(void*)w);
|
||||
vy0 = opus_mm256_dpbusds_epi32(vy0, vxj, vw);
|
||||
w += 32;
|
||||
vxj = _mm256_broadcastd_epi32(_mm_loadu_si32(&x[*idx++]));
|
||||
vw = _mm256_loadu_si256((const __m256i *)(void*)w);
|
||||
vy0 = opus_mm256_dpbusds_epi32(vy0, vxj, vw);
|
||||
w += 32;
|
||||
vxj = _mm256_broadcastd_epi32(_mm_loadu_si32(&x[*idx++]));
|
||||
vw = _mm256_loadu_si256((const __m256i *)(void*)w);
|
||||
vy0 = opus_mm256_dpbusds_epi32(vy0, vxj, vw);
|
||||
w += 32;
|
||||
}
|
||||
#endif
|
||||
for (;j<colblocks;j++)
|
||||
{
|
||||
__m256i vxj;
|
||||
__m256i vw;
|
||||
vxj = _mm256_broadcastd_epi32(_mm_loadu_si32(&x[*idx++]));
|
||||
vw = _mm256_loadu_si256((const __m256i *)(void*)w);
|
||||
vy0 = opus_mm256_dpbusds_epi32(vy0, vxj, vw);
|
||||
w += 32;
|
||||
}
|
||||
vout = _mm256_cvtepi32_ps(vy0);
|
||||
vout = _mm256_mul_ps(vout, _mm256_loadu_ps(&scale[i]));
|
||||
_mm256_storeu_ps(&_out[i], vout);
|
||||
}
|
||||
}
|
||||
static inline void cgemv8x4(float *_out, const opus_int8 *w, const float *scale, int rows, int cols, const float *_x)
|
||||
{
|
||||
int i, j;
|
||||
unsigned char x[MAX_INPUTS];
|
||||
/*for (i=0;i<cols;i++) x[i] = 127+floor(.5+127*_x[i]);*/
|
||||
vector_ps_to_epi8(x, _x, cols);
|
||||
for (i=0;i<rows;i+=8)
|
||||
{
|
||||
__m256i vy0;
|
||||
__m256 vout;
|
||||
vy0 = _mm256_setzero_si256();
|
||||
j=0;
|
||||
#if 1 /* Unrolling by 4 gives some gain, comment out if it does not. */
|
||||
for (;j<cols-12;j+=16)
|
||||
{
|
||||
__m256i vxj;
|
||||
__m256i vw;
|
||||
vxj = _mm256_broadcastd_epi32(_mm_loadu_si32(&x[j]));
|
||||
vw = _mm256_loadu_si256((const __m256i *)(void*)w);
|
||||
vy0 = opus_mm256_dpbusds_epi32(vy0, vxj, vw);
|
||||
w += 32;
|
||||
vxj = _mm256_broadcastd_epi32(_mm_loadu_si32(&x[j+4]));
|
||||
vw = _mm256_loadu_si256((const __m256i *)(void*)w);
|
||||
vy0 = opus_mm256_dpbusds_epi32(vy0, vxj, vw);
|
||||
w += 32;
|
||||
vxj = _mm256_broadcastd_epi32(_mm_loadu_si32(&x[j+8]));
|
||||
vw = _mm256_loadu_si256((const __m256i *)(void*)w);
|
||||
vy0 = opus_mm256_dpbusds_epi32(vy0, vxj, vw);
|
||||
w += 32;
|
||||
vxj = _mm256_broadcastd_epi32(_mm_loadu_si32(&x[j+12]));
|
||||
vw = _mm256_loadu_si256((const __m256i *)(void*)w);
|
||||
vy0 = opus_mm256_dpbusds_epi32(vy0, vxj, vw);
|
||||
w += 32;
|
||||
}
|
||||
#endif
|
||||
for (;j<cols;j+=4)
|
||||
{
|
||||
__m256i vxj;
|
||||
__m256i vw;
|
||||
vxj = _mm256_broadcastd_epi32(_mm_loadu_si32(&x[j]));
|
||||
vw = _mm256_loadu_si256((const __m256i *)(void*)w);
|
||||
vy0 = opus_mm256_dpbusds_epi32(vy0, vxj, vw);
|
||||
w += 32;
|
||||
}
|
||||
vout = _mm256_cvtepi32_ps(vy0);
|
||||
vout = _mm256_mul_ps(vout, _mm256_loadu_ps(&scale[i]));
|
||||
_mm256_storeu_ps(&_out[i], vout);
|
||||
}
|
||||
}
|
||||
|
||||
#define SCALE (128.f*127.f)
|
||||
#define SCALE_1 (1.f/128.f/127.f)
|
||||
#define USE_SU_BIAS
|
||||
|
||||
|
||||
#endif /*VEC_AVX_H*/
|
||||
|
|
@ -0,0 +1,473 @@
|
|||
/* Copyright (c) 2018 David Rowe
|
||||
2018 Mozilla
|
||||
2008-2011 Octasic Inc.
|
||||
2012-2017 Jean-Marc Valin */
|
||||
/*
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
|
||||
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
/* NEON support for ARM machines */
|
||||
|
||||
#ifndef VEC_NEON_H
|
||||
#define VEC_NEON_H
|
||||
|
||||
#include <arm_neon.h>
|
||||
#include "os_support.h"
|
||||
|
||||
#if defined(__arm__) && !defined(__aarch64__)
|
||||
/* Emulate vcvtnq_s32_f32() for ARMv7 Neon. */
|
||||
static OPUS_INLINE int32x4_t vcvtnq_s32_f32(float32x4_t x) {
|
||||
return vrshrq_n_s32(vcvtq_n_s32_f32(x, 8), 8);
|
||||
}
|
||||
|
||||
static OPUS_INLINE int16x8_t vpaddq_s16(int16x8_t a, int16x8_t b) {
|
||||
return vcombine_s16(vpadd_s16(vget_low_s16(a), vget_high_s16(a)), vpadd_s16(vget_low_s16(b), vget_high_s16(b)));
|
||||
}
|
||||
|
||||
static OPUS_INLINE int16x8_t vmull_high_s8(int8x16_t a, int8x16_t b) {
|
||||
return vmull_s8(vget_high_s8(a), vget_high_s8(b));
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef __ARM_FEATURE_FMA
|
||||
/* If we can, force the compiler to use an FMA instruction rather than break
|
||||
vmlaq_f32() into fmul/fadd. */
|
||||
#define vmlaq_f32(a,b,c) vfmaq_f32(a,b,c)
|
||||
#endif
|
||||
|
||||
#ifndef LPCNET_TEST
|
||||
static inline float32x4_t exp4_approx(float32x4_t x) {
|
||||
int32x4_t i;
|
||||
float32x4_t xf;
|
||||
|
||||
x = vmaxq_f32(vminq_f32(x, vdupq_n_f32(88.f)), vdupq_n_f32(-88.f));
|
||||
|
||||
/* express exp(x) as exp2(x/log(2)), add 127 for the exponent later */
|
||||
x = vmlaq_f32(vdupq_n_f32(127.f), x, vdupq_n_f32(1.44269504f));
|
||||
|
||||
/* split into integer and fractional parts */
|
||||
i = vcvtq_s32_f32(x);
|
||||
xf = vcvtq_f32_s32(i);
|
||||
x = vsubq_f32(x, xf);
|
||||
|
||||
float32x4_t K0 = vdupq_n_f32(0.99992522f);
|
||||
float32x4_t K1 = vdupq_n_f32(0.69583354f);
|
||||
float32x4_t K2 = vdupq_n_f32(0.22606716f);
|
||||
float32x4_t K3 = vdupq_n_f32(0.078024523f);
|
||||
float32x4_t Y = vmlaq_f32(K0, x, vmlaq_f32(K1, x, vmlaq_f32(K2, K3, x)));
|
||||
|
||||
/* compute 2^i */
|
||||
float32x4_t exponent = vreinterpretq_f32_s32(vshlq_n_s32(i, 23));
|
||||
|
||||
Y = vmulq_f32(Y, exponent);
|
||||
return Y;
|
||||
}
|
||||
|
||||
static inline float32x4_t tanh4_approx(float32x4_t X)
|
||||
{
|
||||
const float32x4_t N0 = vdupq_n_f32(952.52801514f);
|
||||
const float32x4_t N1 = vdupq_n_f32(96.39235687f);
|
||||
const float32x4_t N2 = vdupq_n_f32(0.60863042f);
|
||||
const float32x4_t D0 = vdupq_n_f32(952.72399902f);
|
||||
const float32x4_t D1 = vdupq_n_f32(413.36801147f);
|
||||
const float32x4_t D2 = vdupq_n_f32(11.88600922f);
|
||||
const float32x4_t max_out = vdupq_n_f32(1.f);
|
||||
const float32x4_t min_out = vdupq_n_f32(-1.f);
|
||||
float32x4_t X2, num, den;
|
||||
X2 = vmulq_f32(X, X);
|
||||
num = vmlaq_f32(N0, X2, vmlaq_f32(N1, N2, X2));
|
||||
den = vmlaq_f32(D0, X2, vmlaq_f32(D1, D2, X2));
|
||||
num = vmulq_f32(num, X);
|
||||
den = vrecpeq_f32(den);
|
||||
num = vmulq_f32(num, den);
|
||||
return vmaxq_f32(min_out, vminq_f32(max_out, num));
|
||||
}
|
||||
|
||||
static inline float32x4_t sigmoid4_approx(float32x4_t X)
|
||||
{
|
||||
const float32x4_t N0 = vdupq_n_f32(238.13200378f);
|
||||
const float32x4_t N1 = vdupq_n_f32(6.02452230f);
|
||||
const float32x4_t N2 = vdupq_n_f32(0.00950985f);
|
||||
const float32x4_t D0 = vdupq_n_f32(952.72399902f);
|
||||
const float32x4_t D1 = vdupq_n_f32(103.34200287f);
|
||||
const float32x4_t D2 = vdupq_n_f32(0.74287558f);
|
||||
const float32x4_t half = vdupq_n_f32(0.5f);
|
||||
const float32x4_t max_out = vdupq_n_f32(1.f);
|
||||
const float32x4_t min_out = vdupq_n_f32(0.f);
|
||||
float32x4_t X2, num, den;
|
||||
X2 = vmulq_f32(X, X);
|
||||
num = vmlaq_f32(N0, X2, vmlaq_f32(N1, N2, X2));
|
||||
den = vmlaq_f32(D0, X2, vmlaq_f32(D1, D2, X2));
|
||||
num = vmulq_f32(num, X);
|
||||
den = vrecpeq_f32(den);
|
||||
num = vmlaq_f32(half, num, den);
|
||||
return vmaxq_f32(min_out, vminq_f32(max_out, num));
|
||||
}
|
||||
|
||||
static inline float lpcnet_exp(float x)
|
||||
{
|
||||
float out[4];
|
||||
float32x4_t X, Y;
|
||||
X = vdupq_n_f32(x);
|
||||
Y = exp4_approx(X);
|
||||
vst1q_f32(out, Y);
|
||||
return out[0];
|
||||
}
|
||||
|
||||
static inline float tanh_approx(float x)
|
||||
{
|
||||
float out[4];
|
||||
float32x4_t X, Y;
|
||||
X = vdupq_n_f32(x);
|
||||
Y = tanh4_approx(X);
|
||||
vst1q_f32(out, Y);
|
||||
return out[0];
|
||||
}
|
||||
|
||||
static inline float sigmoid_approx(float x)
|
||||
{
|
||||
float out[4];
|
||||
float32x4_t X, Y;
|
||||
X = vdupq_n_f32(x);
|
||||
Y = sigmoid4_approx(X);
|
||||
vst1q_f32(out, Y);
|
||||
return out[0];
|
||||
}
|
||||
|
||||
static inline void softmax(float *y, const float *x, int N)
|
||||
{
|
||||
int i;
|
||||
for (i=0;i<N-3;i+=4)
|
||||
{
|
||||
float32x4_t X, Y;
|
||||
X = vld1q_f32(&x[i]);
|
||||
Y = exp4_approx(X);
|
||||
vst1q_f32(&y[i], Y);
|
||||
}
|
||||
for (;i<N;i++)
|
||||
y[i] = lpcnet_exp(x[i]);
|
||||
}
|
||||
|
||||
static inline void vec_tanh(float *y, const float *x, int N)
|
||||
{
|
||||
int i;
|
||||
for (i=0;i<N-3;i+=4)
|
||||
{
|
||||
float32x4_t X, Y;
|
||||
X = vld1q_f32(&x[i]);
|
||||
Y = tanh4_approx(X);
|
||||
vst1q_f32(&y[i], Y);
|
||||
}
|
||||
for (;i<N;i++)
|
||||
{
|
||||
float ex2;
|
||||
ex2 = lpcnet_exp(2*x[i]);
|
||||
y[i] = (ex2-1)/(ex2+1);
|
||||
}
|
||||
}
|
||||
|
||||
static inline void vec_sigmoid(float *y, const float *x, int N)
|
||||
{
|
||||
int i;
|
||||
for (i=0;i<N-3;i+=4)
|
||||
{
|
||||
float32x4_t X, Y;
|
||||
X = vld1q_f32(&x[i]);
|
||||
Y = sigmoid4_approx(X);
|
||||
vst1q_f32(&y[i], Y);
|
||||
}
|
||||
for (;i<N;i++)
|
||||
{
|
||||
float ex;
|
||||
ex = lpcnet_exp(x[i]);
|
||||
y[i] = (ex)/(ex+1);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
static inline void sgemv16x1(float *out, const float *weights, int rows, int cols, int col_stride, const float *x)
|
||||
{
|
||||
int i, j;
|
||||
for (i=0;i<rows;i+=16)
|
||||
{
|
||||
float * restrict y = &out[i];
|
||||
|
||||
/* keep y[0..15] in registers for duration of inner loop */
|
||||
|
||||
float32x4_t y0_3 = vdupq_n_f32(0);
|
||||
float32x4_t y4_7 = vdupq_n_f32(0);
|
||||
float32x4_t y8_11 = vdupq_n_f32(0);
|
||||
float32x4_t y12_15 = vdupq_n_f32(0);
|
||||
|
||||
for (j=0;j<cols;j++)
|
||||
{
|
||||
const float * restrict w;
|
||||
float32x4_t wvec0_3, wvec4_7, wvec8_11, wvec12_15;
|
||||
float32x4_t xj;
|
||||
|
||||
w = &weights[j*col_stride + i];
|
||||
wvec0_3 = vld1q_f32(&w[0]);
|
||||
wvec4_7 = vld1q_f32(&w[4]);
|
||||
wvec8_11 = vld1q_f32(&w[8]);
|
||||
wvec12_15 = vld1q_f32(&w[12]);
|
||||
|
||||
xj = vld1q_dup_f32(&x[j]);
|
||||
|
||||
y0_3 = vmlaq_f32(y0_3, wvec0_3, xj);
|
||||
y4_7 = vmlaq_f32(y4_7, wvec4_7, xj);
|
||||
y8_11 = vmlaq_f32(y8_11, wvec8_11, xj);
|
||||
y12_15 = vmlaq_f32(y12_15, wvec12_15, xj);
|
||||
}
|
||||
|
||||
/* save y[0..15] back to memory */
|
||||
|
||||
vst1q_f32(&y[0], y0_3);
|
||||
vst1q_f32(&y[4], y4_7);
|
||||
vst1q_f32(&y[8], y8_11);
|
||||
vst1q_f32(&y[12], y12_15);
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
static inline void sgemv8x1(float *out, const float *weights, int rows, int cols, int col_stride, const float *x)
|
||||
{
|
||||
int i, j;
|
||||
for (i=0;i<rows;i+=8)
|
||||
{
|
||||
float * restrict y = &out[i];
|
||||
|
||||
/* keep y[0..15] in registers for duration of inner loop */
|
||||
|
||||
float32x4_t y0_3 = vdupq_n_f32(0);
|
||||
float32x4_t y4_7 = vdupq_n_f32(0);
|
||||
|
||||
for (j=0;j<cols;j++)
|
||||
{
|
||||
const float * restrict w;
|
||||
float32x4_t wvec0_3, wvec4_7;
|
||||
float32x4_t xj;
|
||||
|
||||
w = &weights[j*col_stride + i];
|
||||
wvec0_3 = vld1q_f32(&w[0]);
|
||||
wvec4_7 = vld1q_f32(&w[4]);
|
||||
|
||||
xj = vld1q_dup_f32(&x[j]);
|
||||
|
||||
y0_3 = vmlaq_f32(y0_3, wvec0_3, xj);
|
||||
y4_7 = vmlaq_f32(y4_7, wvec4_7, xj);
|
||||
}
|
||||
|
||||
/* save y[0..15] back to memory */
|
||||
|
||||
vst1q_f32(&y[0], y0_3);
|
||||
vst1q_f32(&y[4], y4_7);
|
||||
}
|
||||
}
|
||||
|
||||
static inline void sgemv(float *out, const float *weights, int rows, int cols, int col_stride, const float *x)
|
||||
{
|
||||
if ((rows&0xf) == 0) sgemv16x1(out, weights, rows, cols, col_stride, x);
|
||||
else if ((rows&0x7) == 0) sgemv8x1(out, weights, rows, cols, col_stride, x);
|
||||
else {
|
||||
int i, j;
|
||||
for (i=0;i<rows;i++)
|
||||
{
|
||||
out[i] = 0;
|
||||
for (j=0;j<cols;j++) out[i] += weights[j*col_stride + i]*x[j];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Temporarily use unoptimized version */
|
||||
static inline void sparse_sgemv8x4(float *out, const float *w, const int *idx, int rows, const float *x)
|
||||
{
|
||||
int i, j;
|
||||
OPUS_CLEAR(out, rows);
|
||||
for (i=0;i<rows;i+=8)
|
||||
{
|
||||
int cols;
|
||||
cols = *idx++;
|
||||
for (j=0;j<cols;j++)
|
||||
{
|
||||
int pos;
|
||||
float * restrict y;
|
||||
float xj0, xj1, xj2, xj3;
|
||||
pos = (*idx++);
|
||||
xj0 = x[pos+0];
|
||||
xj1 = x[pos+1];
|
||||
xj2 = x[pos+2];
|
||||
xj3 = x[pos+3];
|
||||
y = &out[i];
|
||||
y[0] += w[0]*xj0;
|
||||
y[1] += w[1]*xj0;
|
||||
y[2] += w[2]*xj0;
|
||||
y[3] += w[3]*xj0;
|
||||
y[4] += w[4]*xj0;
|
||||
y[5] += w[5]*xj0;
|
||||
y[6] += w[6]*xj0;
|
||||
y[7] += w[7]*xj0;
|
||||
|
||||
y[0] += w[8]*xj1;
|
||||
y[1] += w[9]*xj1;
|
||||
y[2] += w[10]*xj1;
|
||||
y[3] += w[11]*xj1;
|
||||
y[4] += w[12]*xj1;
|
||||
y[5] += w[13]*xj1;
|
||||
y[6] += w[14]*xj1;
|
||||
y[7] += w[15]*xj1;
|
||||
|
||||
y[0] += w[16]*xj2;
|
||||
y[1] += w[17]*xj2;
|
||||
y[2] += w[18]*xj2;
|
||||
y[3] += w[19]*xj2;
|
||||
y[4] += w[20]*xj2;
|
||||
y[5] += w[21]*xj2;
|
||||
y[6] += w[22]*xj2;
|
||||
y[7] += w[23]*xj2;
|
||||
|
||||
y[0] += w[24]*xj3;
|
||||
y[1] += w[25]*xj3;
|
||||
y[2] += w[26]*xj3;
|
||||
y[3] += w[27]*xj3;
|
||||
y[4] += w[28]*xj3;
|
||||
y[5] += w[29]*xj3;
|
||||
y[6] += w[30]*xj3;
|
||||
y[7] += w[31]*xj3;
|
||||
w += 32;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#define SCALE (128.f*127.f)
|
||||
#define SCALE_1 (1.f/128.f/127.f)
|
||||
|
||||
#define MAX_INPUTS 2048
|
||||
#define MAX_OUTPUTS 8192
|
||||
|
||||
#if __ARM_FEATURE_DOTPROD
|
||||
static inline int32x4_t vdotprod(int32x4_t acc, int8x16_t a, int8x16_t b) {
|
||||
return vdotq_s32(acc, a, b);
|
||||
}
|
||||
#else
|
||||
static inline int32x4_t vdotprod(int32x4_t acc, int8x16_t a, int8x16_t b)
|
||||
{
|
||||
return vpadalq_s16(acc, vpaddq_s16(vmull_s8(vget_low_s8(a), vget_low_s8(b)), vmull_high_s8(a, b)));
|
||||
}
|
||||
#endif
|
||||
|
||||
static inline void cgemv8x4(float *_out, const opus_int8 *w, const float *scale, int rows, int cols, const float *_x)
|
||||
{
|
||||
int i, j;
|
||||
opus_int32 x_int[MAX_INPUTS/4];
|
||||
opus_int8 *x = (opus_int8*) x_int;
|
||||
const float32x4_t const127 = vdupq_n_f32(127.);
|
||||
for (i=0;i<cols;i+=8) {
|
||||
int32x4_t xi0, xi4;
|
||||
int16x8_t x_short;
|
||||
xi0 = vcvtnq_s32_f32(vmulq_f32(const127, vld1q_f32(&_x[i])));
|
||||
xi4 = vcvtnq_s32_f32(vmulq_f32(const127, vld1q_f32(&_x[i+4])));
|
||||
x_short = vcombine_s16(vmovn_s32(xi0), vmovn_s32(xi4));
|
||||
vst1_s8(&x[i], vmovn_s16(x_short));
|
||||
}
|
||||
for (i=0;i<rows;i+=8)
|
||||
{
|
||||
int32x4_t acc0, acc1;
|
||||
int32x4_t acc2, acc3;
|
||||
acc0 = vdupq_n_s32(0);
|
||||
acc1 = vdupq_n_s32(0);
|
||||
acc2 = vdupq_n_s32(0);
|
||||
acc3 = vdupq_n_s32(0);
|
||||
j=0;
|
||||
for (;j<cols-4;j+=8)
|
||||
{
|
||||
int8x16_t vw0, vw1, vw2, vw3, vx0, vx1;
|
||||
vx0 = (int8x16_t)vld1q_dup_s32((int*)(void*)&x[j]);
|
||||
vw0 = vld1q_s8(w);
|
||||
vw1 = vld1q_s8(&w[16]);
|
||||
acc0 = vdotprod(acc0, vw0, vx0);
|
||||
acc1 = vdotprod(acc1, vw1, vx0);
|
||||
vx1 = (int8x16_t)vld1q_dup_s32((int*)(void*)&x[j+4]);
|
||||
vw2 = vld1q_s8(&w[32]);
|
||||
vw3 = vld1q_s8(&w[48]);
|
||||
acc2 = vdotprod(acc2, vw2, vx1);
|
||||
acc3 = vdotprod(acc3, vw3, vx1);
|
||||
w += 64;
|
||||
}
|
||||
acc0 = vaddq_s32(acc0, acc2);
|
||||
acc1 = vaddq_s32(acc1, acc3);
|
||||
for (;j<cols;j+=4)
|
||||
{
|
||||
int8x16_t vw0, vw1, vx;
|
||||
vx = (int8x16_t)vld1q_dup_s32((int*)(void*)&x[j]);
|
||||
vw0 = vld1q_s8(w);
|
||||
vw1 = vld1q_s8(&w[16]);
|
||||
acc0 = vdotprod(acc0, vw0, vx);
|
||||
acc1 = vdotprod(acc1, vw1, vx);
|
||||
w += 32;
|
||||
}
|
||||
vst1q_f32(&_out[i], vmulq_f32(vld1q_f32(&scale[i]), vcvtq_f32_s32(acc0)));
|
||||
vst1q_f32(&_out[i+4], vmulq_f32(vld1q_f32(&scale[i+4]), vcvtq_f32_s32(acc1)));
|
||||
}
|
||||
}
|
||||
|
||||
static inline void sparse_cgemv8x4(float *_out, const opus_int8 *w, const int *idx, const float *scale, int rows, int cols, const float *_x)
|
||||
{
|
||||
int i, j;
|
||||
opus_int32 x_int[MAX_INPUTS/4];
|
||||
opus_int8 *x = (opus_int8*) x_int;
|
||||
const float32x4_t const127 = vdupq_n_f32(127.);
|
||||
for (i=0;i<cols;i+=8) {
|
||||
int32x4_t xi0, xi4;
|
||||
int16x8_t x_short;
|
||||
xi0 = vcvtnq_s32_f32(vmulq_f32(const127, vld1q_f32(&_x[i])));
|
||||
xi4 = vcvtnq_s32_f32(vmulq_f32(const127, vld1q_f32(&_x[i+4])));
|
||||
x_short = vcombine_s16(vmovn_s32(xi0), vmovn_s32(xi4));
|
||||
vst1_s8(&x[i], vmovn_s16(x_short));
|
||||
}
|
||||
for (i=0;i<rows;i+=8)
|
||||
{
|
||||
int colblocks;
|
||||
int32x4_t acc0, acc1;
|
||||
acc0 = vdupq_n_s32(0);
|
||||
acc1 = vdupq_n_s32(0);
|
||||
colblocks = *idx++;
|
||||
for (j=0;j<colblocks;j++)
|
||||
{
|
||||
int pos;
|
||||
pos = (*idx++);
|
||||
int8x16_t vw0, vw1, vx;
|
||||
vx = (int8x16_t)vld1q_dup_s32((int*)(void*)&x[pos]);
|
||||
vw0 = vld1q_s8(w);
|
||||
vw1 = vld1q_s8(&w[16]);
|
||||
acc0 = vdotprod(acc0, vw0, vx);
|
||||
acc1 = vdotprod(acc1, vw1, vx);
|
||||
w += 32;
|
||||
}
|
||||
vst1q_f32(&_out[i], vmulq_f32(vld1q_f32(&scale[i]), vcvtq_f32_s32(acc0)));
|
||||
vst1q_f32(&_out[i+4], vmulq_f32(vld1q_f32(&scale[i+4]), vcvtq_f32_s32(acc1)));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#endif
|
||||
|
|
@ -0,0 +1,97 @@
|
|||
/* Copyright (c) 2023 Amazon */
|
||||
/*
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
|
||||
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <stddef.h>
|
||||
#include "nnet.h"
|
||||
#include "os_support.h"
|
||||
#include "arch.h"
|
||||
|
||||
/* This is a bit of a hack because we need to build nnet_data.c and plc_data.c without USE_WEIGHTS_FILE,
|
||||
but USE_WEIGHTS_FILE is defined in config.h. */
|
||||
#undef HAVE_CONFIG_H
|
||||
#ifdef USE_WEIGHTS_FILE
|
||||
#undef USE_WEIGHTS_FILE
|
||||
#endif
|
||||
#include "pitchdnn_data.c"
|
||||
#include "fargan_data.c"
|
||||
#include "plc_data.c"
|
||||
#include "dred_rdovae_enc_data.c"
|
||||
#include "dred_rdovae_dec_data.c"
|
||||
#ifdef ENABLE_OSCE
|
||||
#include "lace_data.c"
|
||||
#include "nolace_data.c"
|
||||
#endif
|
||||
|
||||
void write_weights(const WeightArray *list, FILE *fout)
|
||||
{
|
||||
int i=0;
|
||||
unsigned char zeros[WEIGHT_BLOCK_SIZE] = {0};
|
||||
while (list[i].name != NULL) {
|
||||
WeightHead h;
|
||||
if (strlen(list[i].name) >= sizeof(h.name) - 1) {
|
||||
printf("[write_weights] warning: name %s too long\n", list[i].name);
|
||||
}
|
||||
memcpy(h.head, "DNNw", 4);
|
||||
h.version = WEIGHT_BLOB_VERSION;
|
||||
h.type = list[i].type;
|
||||
h.size = list[i].size;
|
||||
h.block_size = (h.size+WEIGHT_BLOCK_SIZE-1)/WEIGHT_BLOCK_SIZE*WEIGHT_BLOCK_SIZE;
|
||||
OPUS_CLEAR(h.name, sizeof(h.name));
|
||||
strncpy(h.name, list[i].name, sizeof(h.name));
|
||||
h.name[sizeof(h.name)-1] = 0;
|
||||
celt_assert(sizeof(h) == WEIGHT_BLOCK_SIZE);
|
||||
fwrite(&h, 1, WEIGHT_BLOCK_SIZE, fout);
|
||||
fwrite(list[i].data, 1, h.size, fout);
|
||||
fwrite(zeros, 1, h.block_size-h.size, fout);
|
||||
i++;
|
||||
}
|
||||
}
|
||||
|
||||
int main(void)
|
||||
{
|
||||
FILE *fout = fopen("weights_blob.bin", "w");
|
||||
write_weights(pitchdnn_arrays, fout);
|
||||
write_weights(fargan_arrays, fout);
|
||||
write_weights(plcmodel_arrays, fout);
|
||||
write_weights(rdovaeenc_arrays, fout);
|
||||
write_weights(rdovaedec_arrays, fout);
|
||||
#ifdef ENABLE_OSCE
|
||||
#ifndef DISABLE_LACE
|
||||
write_weights(lacelayers_arrays, fout);
|
||||
#endif
|
||||
#ifndef DISABLE_NOLACE
|
||||
write_weights(nolacelayers_arrays, fout);
|
||||
#endif
|
||||
#endif
|
||||
fclose(fout);
|
||||
return 0;
|
||||
}
|
||||
|
|
@ -0,0 +1,121 @@
|
|||
/* Copyright (c) 2011-2019 Mozilla
|
||||
2023 Amazon */
|
||||
/*
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
|
||||
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef DNN_X86_H
|
||||
#define DNN_X86_H
|
||||
|
||||
#include "cpu_support.h"
|
||||
#include "opus_types.h"
|
||||
|
||||
#if defined(OPUS_X86_MAY_HAVE_SSE2)
|
||||
void compute_linear_sse2(const LinearLayer *linear, float *out, const float *in);
|
||||
void compute_activation_sse2(float *output, const float *input, int N, int activation);
|
||||
void compute_conv2d_sse2(const Conv2dLayer *conv, float *out, float *mem, const float *in, int height, int hstride, int activation);
|
||||
#endif
|
||||
|
||||
#if defined(OPUS_X86_MAY_HAVE_SSE4_1)
|
||||
void compute_linear_sse4_1(const LinearLayer *linear, float *out, const float *in);
|
||||
void compute_activation_sse4_1(float *output, const float *input, int N, int activation);
|
||||
void compute_conv2d_sse4_1(const Conv2dLayer *conv, float *out, float *mem, const float *in, int height, int hstride, int activation);
|
||||
#endif
|
||||
|
||||
#if defined(OPUS_X86_MAY_HAVE_AVX2)
|
||||
void compute_linear_avx2(const LinearLayer *linear, float *out, const float *in);
|
||||
void compute_activation_avx2(float *output, const float *input, int N, int activation);
|
||||
void compute_conv2d_avx2(const Conv2dLayer *conv, float *out, float *mem, const float *in, int height, int hstride, int activation);
|
||||
#endif
|
||||
|
||||
|
||||
#if defined(OPUS_X86_PRESUME_AVX2)
|
||||
|
||||
#define OVERRIDE_COMPUTE_LINEAR
|
||||
#define compute_linear(linear, out, in, arch) ((void)(arch),compute_linear_avx2(linear, out, in))
|
||||
#define OVERRIDE_COMPUTE_ACTIVATION
|
||||
#define compute_activation(output, input, N, activation, arch) ((void)(arch),compute_activation_avx2(output, input, N, activation))
|
||||
#define OVERRIDE_COMPUTE_CONV2D
|
||||
#define compute_conv2d(conv, out, mem, in, height, hstride, activation, arch) ((void)(arch),compute_conv2d_avx2(conv, out, mem, in, height, hstride, activation))
|
||||
|
||||
#elif defined(OPUS_X86_PRESUME_SSE4_1) && !defined(OPUS_X86_MAY_HAVE_AVX2)
|
||||
|
||||
#define OVERRIDE_COMPUTE_LINEAR
|
||||
#define compute_linear(linear, out, in, arch) ((void)(arch),compute_linear_sse4_1(linear, out, in))
|
||||
#define OVERRIDE_COMPUTE_ACTIVATION
|
||||
#define compute_activation(output, input, N, activation, arch) ((void)(arch),compute_activation_sse4_1(output, input, N, activation))
|
||||
#define OVERRIDE_COMPUTE_CONV2D
|
||||
#define compute_conv2d(conv, out, mem, in, height, hstride, activation, arch) ((void)(arch),compute_conv2d_sse4_1(conv, out, mem, in, height, hstride, activation))
|
||||
|
||||
#elif defined(OPUS_X86_PRESUME_SSE2) && !defined(OPUS_X86_MAY_HAVE_AVX2) && !defined(OPUS_X86_MAY_HAVE_SSE4_1)
|
||||
|
||||
#define OVERRIDE_COMPUTE_LINEAR
|
||||
#define compute_linear(linear, out, in, arch) ((void)(arch),compute_linear_sse2(linear, out, in))
|
||||
#define OVERRIDE_COMPUTE_ACTIVATION
|
||||
#define compute_activation(output, input, N, activation, arch) ((void)(arch),compute_activation_sse2(output, input, N, activation))
|
||||
#define OVERRIDE_COMPUTE_CONV2D
|
||||
#define compute_conv2d(conv, out, mem, in, height, hstride, activation, arch) ((void)(arch),compute_conv2d_sse2(conv, out, mem, in, height, hstride, activation))
|
||||
|
||||
#elif defined(OPUS_HAVE_RTCD) && (defined(OPUS_X86_MAY_HAVE_AVX2) || defined(OPUS_X86_MAY_HAVE_SSE4_1) || defined(OPUS_X86_MAY_HAVE_SSE2))
|
||||
|
||||
extern void (*const DNN_COMPUTE_LINEAR_IMPL[OPUS_ARCHMASK + 1])(
|
||||
const LinearLayer *linear,
|
||||
float *out,
|
||||
const float *in
|
||||
);
|
||||
#define OVERRIDE_COMPUTE_LINEAR
|
||||
#define compute_linear(linear, out, in, arch) \
|
||||
((*DNN_COMPUTE_LINEAR_IMPL[(arch) & OPUS_ARCHMASK])(linear, out, in))
|
||||
|
||||
|
||||
extern void (*const DNN_COMPUTE_ACTIVATION_IMPL[OPUS_ARCHMASK + 1])(
|
||||
float *output,
|
||||
const float *input,
|
||||
int N,
|
||||
int activation
|
||||
);
|
||||
#define OVERRIDE_COMPUTE_ACTIVATION
|
||||
#define compute_activation(output, input, N, activation, arch) \
|
||||
((*DNN_COMPUTE_ACTIVATION_IMPL[(arch) & OPUS_ARCHMASK])(output, input, N, activation))
|
||||
|
||||
|
||||
extern void (*const DNN_COMPUTE_CONV2D_IMPL[OPUS_ARCHMASK + 1])(
|
||||
const Conv2dLayer *conv,
|
||||
float *out,
|
||||
float *mem,
|
||||
const float *in,
|
||||
int height,
|
||||
int hstride,
|
||||
int activation
|
||||
);
|
||||
#define OVERRIDE_COMPUTE_CONV2D
|
||||
#define compute_conv2d(conv, out, mem, in, height, hstride, activation, arch) \
|
||||
((*DNN_COMPUTE_CONV2D_IMPL[(arch) & OPUS_ARCHMASK])(conv, out, mem, in, height, hstride, activation))
|
||||
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
#endif /* DNN_X86_H */
|
||||
|
|
@ -0,0 +1,40 @@
|
|||
/* Copyright (c) 2018-2019 Mozilla
|
||||
2023 Amazon */
|
||||
/*
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
|
||||
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#include "x86/x86_arch_macros.h"
|
||||
|
||||
#ifndef __AVX2__
|
||||
#error nnet_avx2.c is being compiled without AVX2 enabled
|
||||
#endif
|
||||
|
||||
#define RTCD_ARCH avx2
|
||||
|
||||
#include "nnet_arch.h"
|
||||
|
|
@ -0,0 +1,40 @@
|
|||
/* Copyright (c) 2018-2019 Mozilla
|
||||
2023 Amazon */
|
||||
/*
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
|
||||
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#include "x86/x86_arch_macros.h"
|
||||
|
||||
#ifndef __SSE2__
|
||||
#error nnet_sse2.c is being compiled without SSE2 enabled
|
||||
#endif
|
||||
|
||||
#define RTCD_ARCH sse2
|
||||
|
||||
#include "nnet_arch.h"
|
||||
|
|
@ -0,0 +1,40 @@
|
|||
/* Copyright (c) 2018-2019 Mozilla
|
||||
2023 Amazon */
|
||||
/*
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
|
||||
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#include "x86/x86_arch_macros.h"
|
||||
|
||||
#ifndef __SSE4_1__
|
||||
#error nnet_sse4_1.c is being compiled without SSE4.1 enabled
|
||||
#endif
|
||||
|
||||
#define RTCD_ARCH sse4_1
|
||||
|
||||
#include "nnet_arch.h"
|
||||
|
|
@ -0,0 +1,83 @@
|
|||
/* Copyright (c) 2018-2019 Mozilla
|
||||
2023 Amazon */
|
||||
/*
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
|
||||
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#include "x86/x86cpu.h"
|
||||
#include "nnet.h"
|
||||
|
||||
#if defined(OPUS_HAVE_RTCD)
|
||||
|
||||
#if (defined(OPUS_X86_MAY_HAVE_SSE2) && !defined(OPUS_X86_PRESUME_AVX2))
|
||||
|
||||
void (*const DNN_COMPUTE_LINEAR_IMPL[OPUS_ARCHMASK + 1])(
|
||||
const LinearLayer *linear,
|
||||
float *out,
|
||||
const float *in
|
||||
) = {
|
||||
compute_linear_c, /* non-sse */
|
||||
compute_linear_c,
|
||||
MAY_HAVE_SSE2(compute_linear),
|
||||
MAY_HAVE_SSE4_1(compute_linear), /* sse4.1 */
|
||||
MAY_HAVE_AVX2(compute_linear) /* avx */
|
||||
};
|
||||
|
||||
void (*const DNN_COMPUTE_ACTIVATION_IMPL[OPUS_ARCHMASK + 1])(
|
||||
float *output,
|
||||
const float *input,
|
||||
int N,
|
||||
int activation
|
||||
) = {
|
||||
compute_activation_c, /* non-sse */
|
||||
compute_activation_c,
|
||||
MAY_HAVE_SSE2(compute_activation),
|
||||
MAY_HAVE_SSE4_1(compute_activation), /* sse4.1 */
|
||||
MAY_HAVE_AVX2(compute_activation) /* avx */
|
||||
};
|
||||
|
||||
void (*const DNN_COMPUTE_CONV2D_IMPL[OPUS_ARCHMASK + 1])(
|
||||
const Conv2dLayer *conv,
|
||||
float *out,
|
||||
float *mem,
|
||||
const float *in,
|
||||
int height,
|
||||
int hstride,
|
||||
int activation
|
||||
) = {
|
||||
compute_conv2d_c, /* non-sse */
|
||||
compute_conv2d_c,
|
||||
MAY_HAVE_SSE2(compute_conv2d),
|
||||
MAY_HAVE_SSE4_1(compute_conv2d), /* sse4.1 */
|
||||
MAY_HAVE_AVX2(compute_conv2d) /* avx */
|
||||
};
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
#endif
|
||||
|
|
@ -0,0 +1,44 @@
|
|||
DEEP_PLC_HEAD = \
|
||||
dnn/lpcnet.h \
|
||||
dnn/burg.h \
|
||||
dnn/common.h \
|
||||
dnn/freq.h \
|
||||
dnn/fargan.h \
|
||||
dnn/fargan_data.h \
|
||||
dnn/lpcnet_private.h \
|
||||
dnn/nnet.h \
|
||||
dnn/plc_data.h \
|
||||
dnn/vec.h \
|
||||
dnn/vec_avx.h \
|
||||
dnn/vec_neon.h \
|
||||
dnn/pitchdnn.h \
|
||||
dnn/pitchdnn_data.h \
|
||||
dnn/x86/dnn_x86.h \
|
||||
dnn/nnet_arch.h \
|
||||
dnn/arm/dnn_arm.h
|
||||
|
||||
DRED_HEAD = \
|
||||
dnn/dred_coding.h \
|
||||
dnn/dred_config.h \
|
||||
dnn/dred_decoder.h \
|
||||
dnn/dred_encoder.h \
|
||||
dnn/dred_rdovae.h \
|
||||
dnn/dred_rdovae_constants.h \
|
||||
dnn/dred_rdovae_enc.h \
|
||||
dnn/dred_rdovae_enc_data.h \
|
||||
dnn/dred_rdovae_dec.h \
|
||||
dnn/dred_rdovae_dec_data.h \
|
||||
dnn/dred_rdovae_stats_data.h
|
||||
|
||||
OSCE_HEAD= \
|
||||
dnn/osce.h \
|
||||
dnn/osce_config.h \
|
||||
dnn/osce_structs.h \
|
||||
dnn/osce_features.h \
|
||||
dnn/nndsp.h \
|
||||
dnn/lace_data.h \
|
||||
dnn/nolace_data.h
|
||||
|
||||
LOSSGEN_HEAD = \
|
||||
dnn/lossgen.h \
|
||||
dnn/lossgen_data.h
|
||||
|
|
@ -0,0 +1,44 @@
|
|||
DEEP_PLC_SOURCES = \
|
||||
dnn/burg.c \
|
||||
dnn/freq.c \
|
||||
dnn/fargan.c \
|
||||
dnn/fargan_data.c \
|
||||
dnn/lpcnet_enc.c \
|
||||
dnn/lpcnet_plc.c \
|
||||
dnn/lpcnet_tables.c \
|
||||
dnn/nnet.c \
|
||||
dnn/nnet_default.c \
|
||||
dnn/plc_data.c \
|
||||
dnn/parse_lpcnet_weights.c \
|
||||
dnn/pitchdnn.c \
|
||||
dnn/pitchdnn_data.c
|
||||
|
||||
DRED_SOURCES = \
|
||||
dnn/dred_rdovae_enc.c \
|
||||
dnn/dred_rdovae_enc_data.c \
|
||||
dnn/dred_rdovae_dec.c \
|
||||
dnn/dred_rdovae_dec_data.c \
|
||||
dnn/dred_rdovae_stats_data.c \
|
||||
dnn/dred_encoder.c \
|
||||
dnn/dred_coding.c \
|
||||
dnn/dred_decoder.c
|
||||
|
||||
OSCE_SOURCES = \
|
||||
dnn/osce.c \
|
||||
dnn/osce_features.c \
|
||||
dnn/nndsp.c \
|
||||
dnn/lace_data.c \
|
||||
dnn/nolace_data.c
|
||||
|
||||
LOSSGEN_SOURCES = \
|
||||
dnn/lossgen.c \
|
||||
dnn/lossgen_data.c
|
||||
|
||||
DNN_SOURCES_X86_RTCD = dnn/x86/x86_dnn_map.c
|
||||
DNN_SOURCES_AVX2 = dnn/x86/nnet_avx2.c
|
||||
DNN_SOURCES_SSE4_1 = dnn/x86/nnet_sse4_1.c
|
||||
DNN_SOURCES_SSE2 = dnn/x86/nnet_sse2.c
|
||||
|
||||
DNN_SOURCES_ARM_RTCD = dnn/arm/arm_dnn_map.c
|
||||
DNN_SOURCES_DOTPROD = dnn/arm/nnet_dotprod.c
|
||||
DNN_SOURCES_NEON = dnn/arm/nnet_neon.c
|
||||
|
|
@ -0,0 +1,85 @@
|
|||
/***********************************************************************
|
||||
Copyright (c) 2006-2011, Skype Limited. All rights reserved.
|
||||
2023 Amazon
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
- Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
- Neither the name of Internet Society, IETF or IETF Trust, nor the
|
||||
names of specific contributors, may be used to endorse or promote
|
||||
products derived from this software without specific prior written
|
||||
permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
***********************************************************************/
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#include "SigProc_FLP.h"
|
||||
#include <immintrin.h>
|
||||
|
||||
|
||||
/* inner product of two silk_float arrays, with result as double */
|
||||
double silk_inner_product_FLP_avx2(
|
||||
const silk_float *data1,
|
||||
const silk_float *data2,
|
||||
opus_int dataSize
|
||||
)
|
||||
{
|
||||
opus_int i;
|
||||
__m256d accum1, accum2;
|
||||
double result;
|
||||
|
||||
/* 4x unrolled loop */
|
||||
result = 0.0;
|
||||
accum1 = accum2 = _mm256_setzero_pd();
|
||||
for( i = 0; i < dataSize - 7; i += 8 ) {
|
||||
__m128 x1f, x2f;
|
||||
__m256d x1d, x2d;
|
||||
x1f = _mm_loadu_ps( &data1[ i ] );
|
||||
x2f = _mm_loadu_ps( &data2[ i ] );
|
||||
x1d = _mm256_cvtps_pd( x1f );
|
||||
x2d = _mm256_cvtps_pd( x2f );
|
||||
accum1 = _mm256_fmadd_pd( x1d, x2d, accum1 );
|
||||
x1f = _mm_loadu_ps( &data1[ i + 4 ] );
|
||||
x2f = _mm_loadu_ps( &data2[ i + 4 ] );
|
||||
x1d = _mm256_cvtps_pd( x1f );
|
||||
x2d = _mm256_cvtps_pd( x2f );
|
||||
accum2 = _mm256_fmadd_pd( x1d, x2d, accum2 );
|
||||
}
|
||||
for( ; i < dataSize - 3; i += 4 ) {
|
||||
__m128 x1f, x2f;
|
||||
__m256d x1d, x2d;
|
||||
x1f = _mm_loadu_ps( &data1[ i ] );
|
||||
x2f = _mm_loadu_ps( &data2[ i ] );
|
||||
x1d = _mm256_cvtps_pd( x1f );
|
||||
x2d = _mm256_cvtps_pd( x2f );
|
||||
accum1 = _mm256_fmadd_pd( x1d, x2d, accum1 );
|
||||
}
|
||||
accum1 = _mm256_add_pd(accum1, accum2);
|
||||
accum1 = _mm256_add_pd(accum1, _mm256_permute2f128_pd(accum1, accum1, 1));
|
||||
accum1 = _mm256_hadd_pd(accum1,accum1);
|
||||
result = _mm256_cvtsd_f64(accum1);
|
||||
|
||||
/* add any remaining products */
|
||||
for( ; i < dataSize; i++ ) {
|
||||
result += data1[ i ] * (double)data2[ i ];
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
|
|
@ -0,0 +1,315 @@
|
|||
/* Copyright (c) 2022 Amazon */
|
||||
/*
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
|
||||
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
|
||||
#include "opus_types.h"
|
||||
#include "opus_defines.h"
|
||||
#include "arch.h"
|
||||
#include "os_support.h"
|
||||
#include "opus_private.h"
|
||||
|
||||
|
||||
/* Given an extension payload, advance data to the next extension and return the
|
||||
length of the remaining extensions. */
|
||||
opus_int32 skip_extension(const unsigned char **data, opus_int32 len, opus_int32 *header_size)
|
||||
{
|
||||
int id, L;
|
||||
if (len==0)
|
||||
return 0;
|
||||
id = **data>>1;
|
||||
L = **data&1;
|
||||
if (id == 0 && L == 1)
|
||||
{
|
||||
*header_size = 1;
|
||||
if (len < 1)
|
||||
return -1;
|
||||
(*data)++;
|
||||
len--;
|
||||
return len;
|
||||
} else if (id > 0 && id < 32)
|
||||
{
|
||||
if (len < 1+L)
|
||||
return -1;
|
||||
*data += 1+L;
|
||||
len -= 1+L;
|
||||
*header_size = 1;
|
||||
return len;
|
||||
} else {
|
||||
if (L==0)
|
||||
{
|
||||
*data += len;
|
||||
*header_size = 1;
|
||||
return 0;
|
||||
} else {
|
||||
opus_int32 bytes=0;
|
||||
*header_size = 1;
|
||||
do {
|
||||
(*data)++;
|
||||
len--;
|
||||
if (len == 0)
|
||||
return -1;
|
||||
bytes += **data;
|
||||
(*header_size)++;
|
||||
} while (**data == 255);
|
||||
(*data)++;
|
||||
len--;
|
||||
if (bytes <= len)
|
||||
{
|
||||
len -= bytes;
|
||||
*data += bytes;
|
||||
} else {
|
||||
return -1;
|
||||
}
|
||||
return len;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Count the number of extensions, excluding real padding and separators. */
|
||||
opus_int32 opus_packet_extensions_count(const unsigned char *data, opus_int32 len)
|
||||
{
|
||||
opus_int32 curr_len;
|
||||
opus_int32 count=0;
|
||||
const unsigned char *curr_data = data;
|
||||
|
||||
celt_assert(len >= 0);
|
||||
celt_assert(data != NULL || len == 0);
|
||||
|
||||
curr_len = len;
|
||||
while (curr_len > 0)
|
||||
{
|
||||
int id;
|
||||
opus_int32 header_size;
|
||||
id = *curr_data>>1;
|
||||
curr_len = skip_extension(&curr_data, curr_len, &header_size);
|
||||
if (curr_len < 0)
|
||||
return OPUS_INVALID_PACKET;
|
||||
if (id > 1)
|
||||
count++;
|
||||
}
|
||||
return count;
|
||||
}
|
||||
|
||||
/* Extract extensions from Opus padding (excluding real padding and separators) */
|
||||
opus_int32 opus_packet_extensions_parse(const unsigned char *data, opus_int32 len, opus_extension_data *extensions, opus_int32 *nb_extensions)
|
||||
{
|
||||
const unsigned char *curr_data;
|
||||
opus_int32 curr_len;
|
||||
int curr_frame=0;
|
||||
opus_int32 count=0;
|
||||
|
||||
celt_assert(len >= 0);
|
||||
celt_assert(data != NULL || len == 0);
|
||||
celt_assert(nb_extensions != NULL);
|
||||
celt_assert(extensions != NULL || *nb_extensions == 0);
|
||||
|
||||
curr_data = data;
|
||||
curr_len = len;
|
||||
while (curr_len > 0)
|
||||
{
|
||||
int id;
|
||||
opus_int32 header_size;
|
||||
opus_extension_data curr_ext;
|
||||
id = *curr_data>>1;
|
||||
if (id > 1)
|
||||
{
|
||||
curr_ext.id = id;
|
||||
curr_ext.frame = curr_frame;
|
||||
curr_ext.data = curr_data;
|
||||
} else if (id == 1)
|
||||
{
|
||||
int L = *curr_data&1;
|
||||
if (L==0)
|
||||
curr_frame++;
|
||||
else {
|
||||
if (curr_len >= 2)
|
||||
curr_frame += curr_data[1];
|
||||
/* Else we're at the end and it doesn't matter. */
|
||||
}
|
||||
if (curr_frame >= 48)
|
||||
{
|
||||
*nb_extensions = count;
|
||||
return OPUS_INVALID_PACKET;
|
||||
}
|
||||
}
|
||||
curr_len = skip_extension(&curr_data, curr_len, &header_size);
|
||||
/* printf("curr_len = %d, header_size = %d\n", curr_len, header_size); */
|
||||
if (curr_len < 0)
|
||||
{
|
||||
*nb_extensions = count;
|
||||
return OPUS_INVALID_PACKET;
|
||||
}
|
||||
celt_assert(curr_data - data == len - curr_len);
|
||||
if (id > 1)
|
||||
{
|
||||
if (count == *nb_extensions)
|
||||
{
|
||||
return OPUS_BUFFER_TOO_SMALL;
|
||||
}
|
||||
curr_ext.len = curr_data - curr_ext.data - header_size;
|
||||
curr_ext.data += header_size;
|
||||
extensions[count++] = curr_ext;
|
||||
}
|
||||
}
|
||||
celt_assert(curr_len == 0);
|
||||
*nb_extensions = count;
|
||||
return OPUS_OK;
|
||||
}
|
||||
|
||||
opus_int32 opus_packet_extensions_generate(unsigned char *data, opus_int32 len, const opus_extension_data *extensions, opus_int32 nb_extensions, int pad)
|
||||
{
|
||||
int max_frame=0;
|
||||
opus_int32 i;
|
||||
int frame;
|
||||
int curr_frame = 0;
|
||||
opus_int32 pos = 0;
|
||||
opus_int32 written = 0;
|
||||
|
||||
celt_assert(len >= 0);
|
||||
|
||||
for (i=0;i<nb_extensions;i++)
|
||||
{
|
||||
max_frame = IMAX(max_frame, extensions[i].frame);
|
||||
if (extensions[i].id < 2 || extensions[i].id > 127)
|
||||
return OPUS_BAD_ARG;
|
||||
}
|
||||
if (max_frame >= 48) return OPUS_BAD_ARG;
|
||||
for (frame=0;frame<=max_frame;frame++)
|
||||
{
|
||||
for (i=0;i<nb_extensions;i++)
|
||||
{
|
||||
if (extensions[i].frame == frame)
|
||||
{
|
||||
/* Insert separator when needed. */
|
||||
if (frame != curr_frame) {
|
||||
int diff = frame - curr_frame;
|
||||
if (len-pos < 2)
|
||||
return OPUS_BUFFER_TOO_SMALL;
|
||||
if (diff == 1) {
|
||||
if (data) data[pos] = 0x02;
|
||||
pos++;
|
||||
} else {
|
||||
if (data) data[pos] = 0x03;
|
||||
pos++;
|
||||
if (data) data[pos] = diff;
|
||||
pos++;
|
||||
}
|
||||
curr_frame = frame;
|
||||
}
|
||||
if (extensions[i].id < 32)
|
||||
{
|
||||
if (extensions[i].len < 0 || extensions[i].len > 1)
|
||||
return OPUS_BAD_ARG;
|
||||
if (len-pos < extensions[i].len+1)
|
||||
return OPUS_BUFFER_TOO_SMALL;
|
||||
if (data) data[pos] = (extensions[i].id<<1) + extensions[i].len;
|
||||
pos++;
|
||||
if (extensions[i].len > 0) {
|
||||
if (data) data[pos] = extensions[i].data[0];
|
||||
pos++;
|
||||
}
|
||||
} else {
|
||||
int last;
|
||||
opus_int32 length_bytes;
|
||||
if (extensions[i].len < 0)
|
||||
return OPUS_BAD_ARG;
|
||||
last = (written == nb_extensions - 1);
|
||||
length_bytes = 1 + extensions[i].len/255;
|
||||
if (last)
|
||||
length_bytes = 0;
|
||||
if (len-pos < 1 + length_bytes + extensions[i].len)
|
||||
return OPUS_BUFFER_TOO_SMALL;
|
||||
if (data) data[pos] = (extensions[i].id<<1) + !last;
|
||||
pos++;
|
||||
if (!last)
|
||||
{
|
||||
opus_int32 j;
|
||||
for (j=0;j<extensions[i].len/255;j++) {
|
||||
if (data) data[pos] = 255;
|
||||
pos++;
|
||||
}
|
||||
if (data) data[pos] = extensions[i].len % 255;
|
||||
pos++;
|
||||
}
|
||||
if (data) OPUS_COPY(&data[pos], extensions[i].data, extensions[i].len);
|
||||
pos += extensions[i].len;
|
||||
}
|
||||
written++;
|
||||
}
|
||||
}
|
||||
}
|
||||
/* If we need to pad, just prepend 0x01 bytes. Even better would be to fill the
|
||||
end with zeros, but that requires checking that turning the last extesion into
|
||||
an L=1 case still fits. */
|
||||
if (pad && pos < len)
|
||||
{
|
||||
opus_int32 padding = len - pos;
|
||||
if (data) {
|
||||
OPUS_MOVE(data+padding, data, pos);
|
||||
for (i=0;i<padding;i++)
|
||||
data[i] = 0x01;
|
||||
}
|
||||
pos += padding;
|
||||
}
|
||||
return pos;
|
||||
}
|
||||
|
||||
#if 0
|
||||
#include <stdio.h>
|
||||
int main()
|
||||
{
|
||||
opus_extension_data ext[] = {{2, 0, (const unsigned char *)"a", 1},
|
||||
{32, 10, (const unsigned char *)"DRED", 4},
|
||||
{33, 1, (const unsigned char *)"NOT DRED", 8},
|
||||
{3, 4, (const unsigned char *)NULL, 0}
|
||||
};
|
||||
opus_extension_data ext2[10];
|
||||
int i, len;
|
||||
int nb_ext = 10;
|
||||
unsigned char packet[10000];
|
||||
len = opus_packet_extensions_generate(packet, 32, ext, 4, 1);
|
||||
for (i=0;i<len;i++)
|
||||
{
|
||||
printf("%#04x ", packet[i]);
|
||||
if (i%16 == 15)
|
||||
printf("\n");
|
||||
}
|
||||
printf("\n");
|
||||
printf("count = %d\n", opus_packet_extensions_count(packet, len));
|
||||
opus_packet_extensions_parse(packet, len, ext2, &nb_ext);
|
||||
for (i=0;i<nb_ext;i++)
|
||||
{
|
||||
int j;
|
||||
printf("%d %d {", ext2[i].id, ext2[i].frame);
|
||||
for (j=0;j<ext2[i].len;j++) printf("%#04x ", ext2[i].data[j]);
|
||||
printf("} %d\n", ext2[i].len);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
|
@ -0,0 +1,107 @@
|
|||
/* Copyright (c) 2023 Amazon
|
||||
Written by Michael Klingbeil */
|
||||
/*
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
|
||||
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <time.h>
|
||||
#ifndef _WIN32
|
||||
#include <unistd.h>
|
||||
#else
|
||||
#include <process.h>
|
||||
#define getpid _getpid
|
||||
#endif
|
||||
|
||||
/* including sources directly to test internal APIs */
|
||||
#define CELT_C /* to make celt_assert work */
|
||||
#include "opus.h"
|
||||
#include "test_opus_common.h"
|
||||
|
||||
|
||||
|
||||
#define NB_RANDOM_EXTENSIONS 10000000
|
||||
#define MAX_EXTENSION_SIZE 200
|
||||
#define MAX_NB_EXTENSIONS 100
|
||||
|
||||
void test_random_dred(void)
|
||||
{
|
||||
int error;
|
||||
int i;
|
||||
OpusDREDDecoder *dred_dec;
|
||||
OpusDRED *dred;
|
||||
dred_dec = opus_dred_decoder_create(&error);
|
||||
expect_true(error == OPUS_OK, "opus_dred_decoder_create() failed");
|
||||
dred = opus_dred_alloc(&error);
|
||||
expect_true(error == OPUS_OK, "opus_dred_create() failed");
|
||||
for (i=0;i<NB_RANDOM_EXTENSIONS;i++)
|
||||
{
|
||||
unsigned char payload[MAX_EXTENSION_SIZE];
|
||||
int len;
|
||||
int j;
|
||||
int res1, res2;
|
||||
int dred_end;
|
||||
len = fast_rand()%(MAX_EXTENSION_SIZE+1);
|
||||
for (j=0;j<len;j++)
|
||||
payload[j] = fast_rand()&0xFF;
|
||||
res1 = opus_dred_parse(dred_dec, dred, payload, len, 48000, 48000, &dred_end, fast_rand()&0x1);
|
||||
if (res1 > 0)
|
||||
{
|
||||
res2 = opus_dred_process(dred_dec, dred, dred);
|
||||
expect_true(res2 == OPUS_OK, "process should succeed if parse succeeds");
|
||||
expect_true(res1 >= dred_end, "end before beginning");
|
||||
}
|
||||
}
|
||||
opus_dred_free(dred);
|
||||
opus_dred_decoder_destroy(dred_dec);
|
||||
}
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
int env_used;
|
||||
char *env_seed;
|
||||
env_used=0;
|
||||
env_seed=getenv("SEED");
|
||||
if(argc>1)iseed=atoi(argv[1]);
|
||||
else if(env_seed)
|
||||
{
|
||||
iseed=atoi(env_seed);
|
||||
env_used=1;
|
||||
}
|
||||
else iseed=(opus_uint32)time(NULL)^(((opus_uint32)getpid()&65535)<<16);
|
||||
Rw=Rz=iseed;
|
||||
|
||||
fprintf(stderr,"Testing dred. Random seed: %u (%.4X)\n", iseed, fast_rand() % 65535);
|
||||
if(env_used)fprintf(stderr," Random seed set from the environment (SEED=%s).\n", env_seed);
|
||||
|
||||
test_random_dred();
|
||||
fprintf(stderr,"Tests completed successfully.\n");
|
||||
return 0;
|
||||
}
|
||||
|
|
@ -0,0 +1,450 @@
|
|||
/* Copyright (c) 2023 Amazon
|
||||
Written by Michael Klingbeil */
|
||||
/*
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
|
||||
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <time.h>
|
||||
#ifndef _WIN32
|
||||
#include <unistd.h>
|
||||
#else
|
||||
#include <process.h>
|
||||
#define getpid _getpid
|
||||
#endif
|
||||
|
||||
#include "../src/opus_private.h"
|
||||
#include "test_opus_common.h"
|
||||
|
||||
void test_extensions_generate_success(void)
|
||||
{
|
||||
static const opus_extension_data ext[] = {
|
||||
{2, 0, (const unsigned char *)"a", 1},
|
||||
{32, 10, (const unsigned char *)"DRED", 4},
|
||||
{33, 1, (const unsigned char *)"NOT DRED", 8},
|
||||
{3, 4, (const unsigned char *)NULL, 0}
|
||||
};
|
||||
|
||||
int result;
|
||||
unsigned char packet[32];
|
||||
const unsigned char *p = packet;
|
||||
result = opus_packet_extensions_generate(packet, 23+4, ext, 4, 1);
|
||||
expect_true(result == 23+4, "expected length 23+4");
|
||||
|
||||
/* expect padding */
|
||||
expect_true(p[0] == 1 && p[1] == 1 && p[2] == 1 && p[3] == 1, "expected padding");
|
||||
p += 4;
|
||||
|
||||
/* extension ID=2 */
|
||||
expect_true((p[0] >> 1) == 2, "expected extension id 2");
|
||||
/* For extension IDs 1 through 31, L=0 means that no data follows the
|
||||
extension, whereas L=1 means that exactly one byte of extension data follows. */
|
||||
expect_true((p[0] & 0x01) == 1, "expected L-bit set");
|
||||
/* content */
|
||||
expect_true(p[1] == 'a', "expected extension content");
|
||||
p += 2;
|
||||
|
||||
/* next byte should increment the frame count, ID=1, L=0 */
|
||||
expect_true(p[0] == 0x02, "bad frame separator");
|
||||
p += 1;
|
||||
/* extension ID=33 */
|
||||
expect_true((p[0] >> 1) == 33, "expected extension id 33");
|
||||
/* For IDs 32 to 127, L=0 signals that the extension data takes up the
|
||||
rest of the padding, and L=1 signals that a length indicator follows. */
|
||||
expect_true((p[0] & 0x01) == 1, "expected L-bit set");
|
||||
/* content */
|
||||
expect_true(p[1] == ext[2].len, "expected length");
|
||||
p += 2;
|
||||
expect_true(0 == memcmp(p, ext[2].data, ext[2].len), "expected extension content");
|
||||
p += ext[2].len;
|
||||
|
||||
/* advance to frame 4, increment by 3 */
|
||||
/* next byte should increment the frame count, ID=1, L=1 */
|
||||
expect_true(p[0] == 0x03, "bad frame separator");
|
||||
expect_true(p[1] == 0x03, "bad frame increment");
|
||||
p += 2;
|
||||
/* extension ID=3 */
|
||||
expect_true((p[0] >> 1) == 3, "expected extension id 3");
|
||||
/* For extension IDs 1 through 31, L=0 means that no data follows the
|
||||
extension, whereas L=1 means that exactly one byte of extension data follows. */
|
||||
expect_true((p[0] & 0x01) == 0, "expected L-bit unset");
|
||||
p += 1;
|
||||
|
||||
/* advance to frame 10, increment by 6 */
|
||||
/* next byte should increment the frame count, ID=1, L=1 */
|
||||
expect_true(p[0] == 0x03, "bad frame separator");
|
||||
expect_true(p[1] == 0x06, "bad frame increment");
|
||||
p += 2;
|
||||
/* extension ID=32 */
|
||||
expect_true((p[0] >> 1) == 32, "expected extension id 32");
|
||||
/* For IDs 32 to 127, L=0 signals that the extension data takes up the
|
||||
rest of the padding */
|
||||
expect_true((p[0] & 0x01) == 0, "expected L-bit unset");
|
||||
p += 1;
|
||||
expect_true(0 == memcmp(p, ext[1].data, ext[1].len), "expected extension content");
|
||||
}
|
||||
|
||||
void test_extensions_generate_zero(void)
|
||||
{
|
||||
int result;
|
||||
unsigned char packet[32];
|
||||
|
||||
/* zero length packet, zero extensions */
|
||||
result = opus_packet_extensions_generate(packet, 0, NULL, 0, 1);
|
||||
expect_true(result == 0, "expected length 0");
|
||||
}
|
||||
|
||||
void test_extensions_generate_no_padding(void)
|
||||
{
|
||||
static const opus_extension_data ext[] = {
|
||||
{2, 0, (const unsigned char *)"a", 1},
|
||||
{32, 10, (const unsigned char *)"DRED", 4},
|
||||
{33, 1, (const unsigned char *)"NOT DRED", 8},
|
||||
{3, 4, (const unsigned char *)NULL, 0}
|
||||
};
|
||||
|
||||
int result;
|
||||
unsigned char packet[32];
|
||||
result = opus_packet_extensions_generate(packet, sizeof(packet), ext, 4, 0);
|
||||
expect_true(result == 23, "expected length 23");
|
||||
}
|
||||
|
||||
void test_extensions_generate_fail(void)
|
||||
{
|
||||
static const opus_extension_data ext[] = {
|
||||
{2, 0, (const unsigned char *)"a", 1},
|
||||
{32, 10, (const unsigned char *)"DRED", 4},
|
||||
{33, 1, (const unsigned char *)"NOT DRED", 8},
|
||||
{3, 4, (const unsigned char *)NULL, 0}
|
||||
};
|
||||
|
||||
int result;
|
||||
unsigned char packet[100];
|
||||
|
||||
/* buffer too small */
|
||||
result = opus_packet_extensions_generate(packet, 4, ext, 4, 1);
|
||||
expect_true(result == OPUS_BUFFER_TOO_SMALL, "expected OPUS_BUFFER_TOO_SMALL");
|
||||
|
||||
/* invalid id */
|
||||
{
|
||||
static const opus_extension_data id_too_big[] = {
|
||||
{256, 0, (const unsigned char *)"a", 1},
|
||||
};
|
||||
result = opus_packet_extensions_generate(packet, sizeof(packet), id_too_big, 1, 1);
|
||||
expect_true(result == OPUS_BAD_ARG, "expected OPUS_BAD_ARG");
|
||||
}
|
||||
|
||||
/* invalid id */
|
||||
{
|
||||
static const opus_extension_data id_too_small[] = {
|
||||
{1, 0, (const unsigned char *)"a", 1},
|
||||
};
|
||||
result = opus_packet_extensions_generate(packet, sizeof(packet), id_too_small, 1, 1);
|
||||
expect_true(result == OPUS_BAD_ARG, "expected OPUS_BAD_ARG");
|
||||
}
|
||||
|
||||
/* frame index too big */
|
||||
{
|
||||
static const opus_extension_data frame_too_big[] = {
|
||||
{33, 48, (const unsigned char *)"a", 1},
|
||||
};
|
||||
result = opus_packet_extensions_generate(packet, sizeof(packet), frame_too_big, 1, 1);
|
||||
expect_true(result == OPUS_BAD_ARG, "expected OPUS_BAD_ARG");
|
||||
}
|
||||
|
||||
/* size too big for extension IDs 1 through 31 */
|
||||
{
|
||||
static const opus_extension_data size_too_big[] = {
|
||||
{2, 0, (const unsigned char *)"abcd", 4},
|
||||
};
|
||||
result = opus_packet_extensions_generate(packet, sizeof(packet), size_too_big, 1, 1);
|
||||
expect_true(result == OPUS_BAD_ARG, "expected OPUS_BAD_ARG");
|
||||
}
|
||||
|
||||
/* negative size for extension IDs 1 through 31 */
|
||||
{
|
||||
static const opus_extension_data neg_size[] = {
|
||||
{2, 0, NULL, -4},
|
||||
};
|
||||
result = opus_packet_extensions_generate(packet, sizeof(packet), neg_size, 1, 1);
|
||||
expect_true(result == OPUS_BAD_ARG, "expected OPUS_BAD_ARG");
|
||||
}
|
||||
|
||||
/* negative size for extension IDs 32 through 127 */
|
||||
{
|
||||
static const opus_extension_data neg_size_33[] = {
|
||||
{33, 0, NULL, -4},
|
||||
};
|
||||
result = opus_packet_extensions_generate(packet, sizeof(packet), neg_size_33, 1, 1);
|
||||
expect_true(result == OPUS_BAD_ARG, "expected OPUS_BAD_ARG");
|
||||
}
|
||||
}
|
||||
|
||||
void test_extensions_parse_success(void)
|
||||
{
|
||||
static const opus_extension_data ext[] = {
|
||||
{2, 0, (const unsigned char *)"a", 1},
|
||||
{32, 10, (const unsigned char *)"DRED", 4},
|
||||
{33, 1, (const unsigned char *)"NOT DRED", 8},
|
||||
{3, 4, (const unsigned char *)NULL, 0}
|
||||
};
|
||||
opus_extension_data ext_out[10];
|
||||
int nb_ext;
|
||||
int len, result;
|
||||
unsigned char packet[32];
|
||||
|
||||
nb_ext = 10;
|
||||
len = opus_packet_extensions_generate(packet, 32, ext, 4, 1);
|
||||
expect_true(len == 32, "expected length 32");
|
||||
result = opus_packet_extensions_count(packet, len);
|
||||
expect_true(result == 4, "expected opus_packet_extensions_count 4");
|
||||
result = opus_packet_extensions_parse(packet, len, ext_out, &nb_ext);
|
||||
expect_true(nb_ext == 4, "expected 4 extensions");
|
||||
|
||||
expect_true(ext_out[0].id == 2, "expected id 2");
|
||||
expect_true(ext_out[0].frame == 0, "expected frame 0");
|
||||
expect_true(ext_out[0].len == 1, "expected len 1");
|
||||
expect_true(0 == memcmp(ext_out[0].data, ext[0].data, 1), "expected data");
|
||||
|
||||
expect_true(ext_out[1].id == 33, "expected id 33");
|
||||
expect_true(ext_out[1].frame == 1, "expected frame 1");
|
||||
expect_true(ext_out[1].len == 8, "expected len 8");
|
||||
expect_true(0 == memcmp(ext_out[1].data, ext[2].data, 8), "expected data");
|
||||
|
||||
expect_true(ext_out[2].id == 3, "expected id 3");
|
||||
expect_true(ext_out[2].frame == 4, "expected frame 4");
|
||||
expect_true(ext_out[2].len == 0, "expected len 0");
|
||||
|
||||
expect_true(ext_out[3].id == 32, "expected id 32");
|
||||
expect_true(ext_out[3].frame == 10, "expected frame 10");
|
||||
expect_true(ext_out[3].len == 4, "expected len 4");
|
||||
expect_true(0 == memcmp(ext_out[3].data, ext[1].data, 4), "expected data");
|
||||
}
|
||||
|
||||
void test_extensions_parse_zero(void)
|
||||
{
|
||||
static const opus_extension_data ext[] = {
|
||||
{32, 1, (const unsigned char *)"DRED", 4},
|
||||
};
|
||||
int nb_ext;
|
||||
int len, result;
|
||||
unsigned char packet[32];
|
||||
|
||||
len = opus_packet_extensions_generate(packet, 32, ext, 1, 1);
|
||||
expect_true(len == 32, "expected length 32");
|
||||
|
||||
nb_ext = 0;
|
||||
result = opus_packet_extensions_parse(packet, len, NULL, &nb_ext);
|
||||
expect_true(result == OPUS_BUFFER_TOO_SMALL, "expected OPUS_BUFFER_TOO_SMALL");
|
||||
}
|
||||
|
||||
void test_extensions_parse_fail(void)
|
||||
{
|
||||
static const opus_extension_data ext[] = {
|
||||
{2, 0, (const unsigned char *)"a", 1},
|
||||
{33, 1, (const unsigned char *)"NOT DRED", 8},
|
||||
{3, 4, (const unsigned char *)NULL, 0},
|
||||
{32, 10, (const unsigned char *)"DRED", 4}
|
||||
};
|
||||
opus_extension_data ext_out[10];
|
||||
int nb_ext;
|
||||
int len, result;
|
||||
unsigned char packet[32];
|
||||
|
||||
/* create invalid length */
|
||||
len = opus_packet_extensions_generate(packet, sizeof(packet), ext, 4, 0);
|
||||
packet[4] = 255;
|
||||
nb_ext = 10;
|
||||
result = opus_packet_extensions_parse(packet, len, ext_out, &nb_ext);
|
||||
expect_true(result == OPUS_INVALID_PACKET, "expected OPUS_INVALID_PACKET");
|
||||
result = opus_packet_extensions_count(packet, len);
|
||||
expect_true(result == OPUS_INVALID_PACKET, "expected OPUS_INVALID_PACKET");
|
||||
|
||||
/* create invalid frame increment */
|
||||
nb_ext = 10;
|
||||
len = opus_packet_extensions_generate(packet, sizeof(packet), ext, 4, 0);
|
||||
packet[14] = 255;
|
||||
result = opus_packet_extensions_parse(packet, len, ext_out, &nb_ext);
|
||||
expect_true(result == OPUS_INVALID_PACKET, "expected OPUS_INVALID_PACKET");
|
||||
/* note, opus_packet_extensions_count does not read the invalid frame increment
|
||||
and tells us that we have 4 extensions */
|
||||
result = opus_packet_extensions_count(packet, len);
|
||||
expect_true(result == 4, "expected opus_packet_extensions_count to return 4");
|
||||
|
||||
/* not enough space */
|
||||
nb_ext = 1;
|
||||
len = opus_packet_extensions_generate(packet, sizeof(packet), ext, 4, 0);
|
||||
result = opus_packet_extensions_parse(packet, len, ext_out, &nb_ext);
|
||||
expect_true(result == OPUS_BUFFER_TOO_SMALL, "expected OPUS_BUFFER_TOO_SMALL");
|
||||
}
|
||||
|
||||
#define NB_RANDOM_EXTENSIONS 100000000
|
||||
#define MAX_EXTENSION_SIZE 200
|
||||
#define MAX_NB_EXTENSIONS 100
|
||||
|
||||
void test_random_extensions_parse(void)
|
||||
{
|
||||
int i;
|
||||
for (i=0;i<NB_RANDOM_EXTENSIONS;i++)
|
||||
{
|
||||
opus_extension_data ext_out[MAX_NB_EXTENSIONS];
|
||||
int nb_ext;
|
||||
unsigned char payload[MAX_EXTENSION_SIZE];
|
||||
int len;
|
||||
int j;
|
||||
int result;
|
||||
len = fast_rand()%(MAX_EXTENSION_SIZE+1);
|
||||
for (j=0;j<len;j++)
|
||||
payload[j] = fast_rand()&0xFF;
|
||||
nb_ext = fast_rand()%(MAX_NB_EXTENSIONS+1);
|
||||
result = opus_packet_extensions_parse(payload, len, ext_out, &nb_ext);
|
||||
expect_true(result == OPUS_OK || result == OPUS_BUFFER_TOO_SMALL || result == OPUS_INVALID_PACKET, "expected OPUS_OK, OPUS_BUFFER_TOO_SMALL or OPUS_INVALID_PACKET");
|
||||
/* Even if parsing fails, check that the extensions that got extracted make sense. */
|
||||
for (j=0;j<nb_ext;j++)
|
||||
{
|
||||
expect_true(ext_out[j].frame >= 0 && ext_out[j].frame < 48, "expected frame between 0 and 47");
|
||||
expect_true(ext_out[j].id >= 2 && ext_out[j].id <= 127, "expected id between 2 and 127");
|
||||
expect_true(ext_out[j].data >= payload && ext_out[j].data+ext_out[j].len <= payload+len, "expected data to be within packet");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void test_opus_repacketizer_out_range_impl(void)
|
||||
{
|
||||
OpusRepacketizer rp;
|
||||
unsigned char packet[1024];
|
||||
unsigned char packet_out[1024];
|
||||
opus_int16 size[48];
|
||||
const unsigned char *padding;
|
||||
opus_int32 padding_len;
|
||||
opus_extension_data ext_out[10];
|
||||
int i;
|
||||
int nb_ext;
|
||||
int res, len;
|
||||
int first_count = 0, second_count = 0;
|
||||
static const opus_extension_data ext[] = {
|
||||
{33, 0, (const unsigned char *)"abcdefg", 7},
|
||||
{100, 0, (const unsigned char *)"uvwxyz", 6},
|
||||
};
|
||||
|
||||
opus_repacketizer_init(&rp);
|
||||
|
||||
memset(packet, 0, sizeof(packet));
|
||||
/* Hybrid Packet with 20 msec frames, Code 3 */
|
||||
packet[0] = (15 << 3) | 3;
|
||||
/* Code 3, padding bit set, 1 frame */
|
||||
packet[1] = 1 << 6 | 1;
|
||||
packet[2] = 0;
|
||||
packet[3] = 0;
|
||||
|
||||
/* generate 2 extensions, id 33 and 100 */
|
||||
len = opus_packet_extensions_generate(&packet[4], sizeof(packet)-4, ext, 2, 0);
|
||||
/* update the padding length */
|
||||
packet[2] = len;
|
||||
|
||||
/* concatenate 3 frames */
|
||||
res = opus_repacketizer_cat(&rp, packet, 4+len);
|
||||
/* for the middle frame, no padding, no extensions */
|
||||
packet[1] = 1;
|
||||
res = opus_repacketizer_cat(&rp, packet, 4);
|
||||
/* switch back to extensions for the last frame extensions */
|
||||
packet[1] = 1 << 6 | 1;
|
||||
res = opus_repacketizer_cat(&rp, packet, 4+len);
|
||||
|
||||
expect_true(rp.nb_frames == 3, "Expected 3 frames");
|
||||
res = opus_repacketizer_out_range_impl(&rp,
|
||||
0, 3, /* begin, end */
|
||||
packet_out, /* unsigned char *data */
|
||||
sizeof(packet_out), /* opus_int32 maxlen */
|
||||
0, /*int self_delimited */
|
||||
0, /* int pad */
|
||||
NULL, /* const opus_extension_data *extensions */
|
||||
0 /* int nb_extensions */);
|
||||
expect_true(res > 0, "expected valid packet length");
|
||||
|
||||
/* now verify that we have the expected extensions */
|
||||
res = opus_packet_parse_impl(packet_out, res, 0, NULL, NULL, size,
|
||||
NULL, NULL, &padding, &padding_len);
|
||||
nb_ext = 10;
|
||||
res = opus_packet_extensions_parse(padding, padding_len, ext_out, &nb_ext);
|
||||
expect_true(nb_ext == 4, "Expected 4 extensions");
|
||||
for (i = 0 ; i < nb_ext; i++)
|
||||
{
|
||||
if (ext_out[i].id == 33)
|
||||
{
|
||||
opus_test_assert(ext_out[i].len == ext[0].len);
|
||||
opus_test_assert(0 == memcmp(ext_out[i].data, ext[0].data, ext[0].len));
|
||||
first_count++;
|
||||
}
|
||||
else if (ext_out[i].id == 100)
|
||||
{
|
||||
opus_test_assert(ext_out[i].len == ext[1].len);
|
||||
opus_test_assert(0 == memcmp(ext_out[i].data, ext[1].data, ext[1].len));
|
||||
second_count++;
|
||||
}
|
||||
if (i < 2)
|
||||
opus_test_assert(ext_out[i].frame == 0)
|
||||
else
|
||||
opus_test_assert(ext_out[i].frame == 2)
|
||||
}
|
||||
opus_test_assert(first_count == 2);
|
||||
opus_test_assert(second_count == 2);
|
||||
}
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
int env_used;
|
||||
char *env_seed;
|
||||
env_used=0;
|
||||
env_seed=getenv("SEED");
|
||||
if(argc>1)iseed=atoi(argv[1]);
|
||||
else if(env_seed)
|
||||
{
|
||||
iseed=atoi(env_seed);
|
||||
env_used=1;
|
||||
}
|
||||
else iseed=(opus_uint32)time(NULL)^(((opus_uint32)getpid()&65535)<<16);
|
||||
Rw=Rz=iseed;
|
||||
|
||||
fprintf(stderr,"Testing extensions. Random seed: %u (%.4X)\n", iseed, fast_rand() % 65535);
|
||||
if(env_used)fprintf(stderr," Random seed set from the environment (SEED=%s).\n", env_seed);
|
||||
|
||||
test_extensions_generate_success();
|
||||
test_extensions_generate_zero();
|
||||
test_extensions_generate_no_padding();
|
||||
test_extensions_generate_fail();
|
||||
test_extensions_parse_success();
|
||||
test_extensions_parse_zero();
|
||||
test_extensions_parse_fail();
|
||||
test_random_extensions_parse();
|
||||
test_opus_repacketizer_out_range_impl();
|
||||
fprintf(stderr,"Tests completed successfully.\n");
|
||||
return 0;
|
||||
}
|
||||
Loading…
Reference in New Issue