From 249056759d98dfddbad4e260721fab047e0b257b Mon Sep 17 00:00:00 2001 From: Lorenzo Miniero Date: Mon, 8 Apr 2024 18:14:01 +0200 Subject: [PATCH] Add optional RNNoise support to AudioBridge (#3185) --- conf/janus.plugin.audiobridge.jcfg.sample | 1 + configure.ac | 10 + src/Makefile.am | 11 +- src/plugins/audiobridge-deps/resample.c | 1239 +++++++++++++++++ .../audiobridge-deps/speex/speex_resampler.h | 343 +++++ src/plugins/janus_audiobridge.c | 386 ++++- 6 files changed, 1977 insertions(+), 13 deletions(-) create mode 100644 src/plugins/audiobridge-deps/resample.c create mode 100644 src/plugins/audiobridge-deps/speex/speex_resampler.h diff --git a/conf/janus.plugin.audiobridge.jcfg.sample b/conf/janus.plugin.audiobridge.jcfg.sample index cd98be559d..7340768ab3 100644 --- a/conf/janus.plugin.audiobridge.jcfg.sample +++ b/conf/janus.plugin.audiobridge.jcfg.sample @@ -12,6 +12,7 @@ # audio_level_average = 25 (average value of audio level, 127=muted, 0='too loud', default=25) # default_expectedloss = percent of packets we expect participants may miss, to help with FEC (default=0, max=20; automatically used for forwarders too) # default_bitrate = default bitrate in bps to use for the all participants (default=0, which means libopus decides; automatically used for forwarders too) +# denoise = true|false (whether denoising via RNNoise should be performed for each participant by default) # record = true|false (whether this room should be recorded, default=false) # record_file = "/path/to/recording.wav" (where to save the recording) # record_dir = "/path/to/" (path to save the recording to, makes record_file a relative path if provided) diff --git a/configure.ac b/configure.ac index 807f74ab42..60f036e007 100644 --- a/configure.ac +++ b/configure.ac @@ -819,6 +819,16 @@ PKG_CHECK_MODULES([OGG], AC_SUBST([OGG_CFLAGS]) AC_SUBST([OGG_LIBS]) +PKG_CHECK_MODULES([RNNOISE], + [rnnoise], + [ + AC_DEFINE(HAVE_RNNOISE) + ], + [ + ]) +AC_SUBST([RNNOISE_CFLAGS]) +AC_SUBST([RNNOISE_LIBS]) + PKG_CHECK_MODULES([LUA], [lua], [ diff --git a/src/Makefile.am b/src/Makefile.am index 67e5dff5d6..dd87a21c0c 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -396,13 +396,14 @@ plugins_libadd = \ if ENABLE_PLUGIN_AUDIOBRIDGE plugin_LTLIBRARIES += plugins/libjanus_audiobridge.la +plugins_libjanus_audiobridge_la_SOURCES = plugins/janus_audiobridge.c plugins_libjanus_audiobridge_la_SOURCES = plugins/janus_audiobridge.c \ - plugins/audiobridge-deps/jitter.c plugins/audiobridge-deps/arch.h \ - plugins/audiobridge-deps/os_support.h plugins/audiobridge-deps/speex/speex_jitter.h \ + plugins/audiobridge-deps/jitter.c plugins/audiobridge-deps/resample.c plugins/audiobridge-deps/arch.h \ + plugins/audiobridge-deps/os_support.h plugins/audiobridge-deps/speex/speex_jitter.h plugins/audiobridge-deps/speex/speex_resampler.h \ plugins/audiobridge-deps/speex/speexdsp_types.h plugins/audiobridge-deps/speex/speexdsp_config_types.h -plugins_libjanus_audiobridge_la_CFLAGS = $(plugins_cflags) $(OPUS_CFLAGS) $(OGG_CFLAGS) $(LIBSRTP_CFLAGS) -plugins_libjanus_audiobridge_la_LDFLAGS = $(plugins_ldflags) $(OPUS_LDFLAGS) $(OPUS_LIBS) $(OGG_LDFLAGS) $(OGG_LIBS) -plugins_libjanus_audiobridge_la_LIBADD = $(plugins_libadd) $(OPUS_LIBADD) $(OGG_LIBADD) +plugins_libjanus_audiobridge_la_CFLAGS = $(plugins_cflags) $(OPUS_CFLAGS) $(OGG_CFLAGS) $(RNNOISE_CFLAGS) $(LIBSRTP_CFLAGS) +plugins_libjanus_audiobridge_la_LDFLAGS = $(plugins_ldflags) $(OPUS_LDFLAGS) $(OPUS_LIBS) $(OGG_LDFLAGS) $(OGG_LIBS) $(RNNOISE_LDFLAGS) $(RNNOISE_LIBS) +plugins_libjanus_audiobridge_la_LIBADD = $(plugins_libadd) $(OPUS_LIBADD) $(OGG_LIBADD) $(RNNOISE_LIBADD) conf_DATA += ../conf/janus.plugin.audiobridge.jcfg.sample EXTRA_DIST += ../conf/janus.plugin.audiobridge.jcfg.sample endif diff --git a/src/plugins/audiobridge-deps/resample.c b/src/plugins/audiobridge-deps/resample.c new file mode 100644 index 0000000000..4892967b6c --- /dev/null +++ b/src/plugins/audiobridge-deps/resample.c @@ -0,0 +1,1239 @@ +/* Copyright (C) 2007-2008 Jean-Marc Valin + Copyright (C) 2008 Thorvald Natvig + + File: resample.c + Arbitrary resampling code + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + 3. The name of the author may not be used to endorse or promote products + derived from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, + INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. +*/ + +/* + The design goals of this code are: + - Very fast algorithm + - SIMD-friendly algorithm + - Low memory requirement + - Good *perceptual* quality (and not best SNR) + + Warning: This resampler is relatively new. Although I think I got rid of + all the major bugs and I don't expect the API to change anymore, there + may be something I've missed. So use with caution. + + This algorithm is based on this original resampling algorithm: + Smith, Julius O. Digital Audio Resampling Home Page + Center for Computer Research in Music and Acoustics (CCRMA), + Stanford University, 2007. + Web published at https://ccrma.stanford.edu/~jos/resample/. + + There is one main difference, though. This resampler uses cubic + interpolation instead of linear interpolation in the above paper. This + makes the table much smaller and makes it possible to compute that table + on a per-stream basis. In turn, being able to tweak the table for each + stream makes it possible to both reduce complexity on simple ratios + (e.g. 2/3), and get rid of the rounding operations in the inner loop. + The latter both reduces CPU time and makes the algorithm more SIMD-friendly. +*/ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#ifdef OUTSIDE_SPEEX +#include +static void *speex_alloc(int size) {return calloc(size,1);} +static void *speex_realloc(void *ptr, int size) {return realloc(ptr, size);} +static void speex_free(void *ptr) {free(ptr);} +#ifndef EXPORT +#define EXPORT +#endif +#include "speex_resampler.h" +#include "arch.h" +#else /* OUTSIDE_SPEEX */ + +#include "speex/speex_resampler.h" +#include "arch.h" +#include "os_support.h" +#endif /* OUTSIDE_SPEEX */ + +#include +#include + +#ifndef M_PI +#define M_PI 3.14159265358979323846 +#endif + +#define IMAX(a,b) ((a) > (b) ? (a) : (b)) +#define IMIN(a,b) ((a) < (b) ? (a) : (b)) + +#ifndef NULL +#define NULL 0 +#endif + +#ifndef UINT32_MAX +#define UINT32_MAX 4294967295U +#endif + +#ifdef USE_SSE +#include "resample_sse.h" +#endif + +#ifdef USE_NEON +#include "resample_neon.h" +#endif + +/* Numer of elements to allocate on the stack */ +#ifdef VAR_ARRAYS +#define FIXED_STACK_ALLOC 8192 +#else +#define FIXED_STACK_ALLOC 1024 +#endif + +typedef int (*resampler_basic_func)(SpeexResamplerState *, spx_uint32_t , const spx_word16_t *, spx_uint32_t *, spx_word16_t *, spx_uint32_t *); + +struct SpeexResamplerState_ { + spx_uint32_t in_rate; + spx_uint32_t out_rate; + spx_uint32_t num_rate; + spx_uint32_t den_rate; + + int quality; + spx_uint32_t nb_channels; + spx_uint32_t filt_len; + spx_uint32_t mem_alloc_size; + spx_uint32_t buffer_size; + int int_advance; + int frac_advance; + float cutoff; + spx_uint32_t oversample; + int initialised; + int started; + + /* These are per-channel */ + spx_int32_t *last_sample; + spx_uint32_t *samp_frac_num; + spx_uint32_t *magic_samples; + + spx_word16_t *mem; + spx_word16_t *sinc_table; + spx_uint32_t sinc_table_length; + resampler_basic_func resampler_ptr; + + int in_stride; + int out_stride; +} ; + +static const double kaiser12_table[68] = { + 0.99859849, 1.00000000, 0.99859849, 0.99440475, 0.98745105, 0.97779076, + 0.96549770, 0.95066529, 0.93340547, 0.91384741, 0.89213598, 0.86843014, + 0.84290116, 0.81573067, 0.78710866, 0.75723148, 0.72629970, 0.69451601, + 0.66208321, 0.62920216, 0.59606986, 0.56287762, 0.52980938, 0.49704014, + 0.46473455, 0.43304576, 0.40211431, 0.37206735, 0.34301800, 0.31506490, + 0.28829195, 0.26276832, 0.23854851, 0.21567274, 0.19416736, 0.17404546, + 0.15530766, 0.13794294, 0.12192957, 0.10723616, 0.09382272, 0.08164178, + 0.07063950, 0.06075685, 0.05193064, 0.04409466, 0.03718069, 0.03111947, + 0.02584161, 0.02127838, 0.01736250, 0.01402878, 0.01121463, 0.00886058, + 0.00691064, 0.00531256, 0.00401805, 0.00298291, 0.00216702, 0.00153438, + 0.00105297, 0.00069463, 0.00043489, 0.00025272, 0.00013031, 0.0000527734, + 0.00001000, 0.00000000}; +/* +static const double kaiser12_table[36] = { + 0.99440475, 1.00000000, 0.99440475, 0.97779076, 0.95066529, 0.91384741, + 0.86843014, 0.81573067, 0.75723148, 0.69451601, 0.62920216, 0.56287762, + 0.49704014, 0.43304576, 0.37206735, 0.31506490, 0.26276832, 0.21567274, + 0.17404546, 0.13794294, 0.10723616, 0.08164178, 0.06075685, 0.04409466, + 0.03111947, 0.02127838, 0.01402878, 0.00886058, 0.00531256, 0.00298291, + 0.00153438, 0.00069463, 0.00025272, 0.0000527734, 0.00000500, 0.00000000}; +*/ +static const double kaiser10_table[36] = { + 0.99537781, 1.00000000, 0.99537781, 0.98162644, 0.95908712, 0.92831446, + 0.89005583, 0.84522401, 0.79486424, 0.74011713, 0.68217934, 0.62226347, + 0.56155915, 0.50119680, 0.44221549, 0.38553619, 0.33194107, 0.28205962, + 0.23636152, 0.19515633, 0.15859932, 0.12670280, 0.09935205, 0.07632451, + 0.05731132, 0.04193980, 0.02979584, 0.02044510, 0.01345224, 0.00839739, + 0.00488951, 0.00257636, 0.00115101, 0.00035515, 0.00000000, 0.00000000}; + +static const double kaiser8_table[36] = { + 0.99635258, 1.00000000, 0.99635258, 0.98548012, 0.96759014, 0.94302200, + 0.91223751, 0.87580811, 0.83439927, 0.78875245, 0.73966538, 0.68797126, + 0.63451750, 0.58014482, 0.52566725, 0.47185369, 0.41941150, 0.36897272, + 0.32108304, 0.27619388, 0.23465776, 0.19672670, 0.16255380, 0.13219758, + 0.10562887, 0.08273982, 0.06335451, 0.04724088, 0.03412321, 0.02369490, + 0.01563093, 0.00959968, 0.00527363, 0.00233883, 0.00050000, 0.00000000}; + +static const double kaiser6_table[36] = { + 0.99733006, 1.00000000, 0.99733006, 0.98935595, 0.97618418, 0.95799003, + 0.93501423, 0.90755855, 0.87598009, 0.84068475, 0.80211977, 0.76076565, + 0.71712752, 0.67172623, 0.62508937, 0.57774224, 0.53019925, 0.48295561, + 0.43647969, 0.39120616, 0.34752997, 0.30580127, 0.26632152, 0.22934058, + 0.19505503, 0.16360756, 0.13508755, 0.10953262, 0.08693120, 0.06722600, + 0.05031820, 0.03607231, 0.02432151, 0.01487334, 0.00752000, 0.00000000}; + +struct FuncDef { + const double *table; + int oversample; +}; + +static const struct FuncDef kaiser12_funcdef = {kaiser12_table, 64}; +#define KAISER12 (&kaiser12_funcdef) +static const struct FuncDef kaiser10_funcdef = {kaiser10_table, 32}; +#define KAISER10 (&kaiser10_funcdef) +static const struct FuncDef kaiser8_funcdef = {kaiser8_table, 32}; +#define KAISER8 (&kaiser8_funcdef) +static const struct FuncDef kaiser6_funcdef = {kaiser6_table, 32}; +#define KAISER6 (&kaiser6_funcdef) + +struct QualityMapping { + int base_length; + int oversample; + float downsample_bandwidth; + float upsample_bandwidth; + const struct FuncDef *window_func; +}; + + +/* This table maps conversion quality to internal parameters. There are two + reasons that explain why the up-sampling bandwidth is larger than the + down-sampling bandwidth: + 1) When up-sampling, we can assume that the spectrum is already attenuated + close to the Nyquist rate (from an A/D or a previous resampling filter) + 2) Any aliasing that occurs very close to the Nyquist rate will be masked + by the sinusoids/noise just below the Nyquist rate (guaranteed only for + up-sampling). +*/ +static const struct QualityMapping quality_map[11] = { + { 8, 4, 0.830f, 0.860f, KAISER6 }, /* Q0 */ + { 16, 4, 0.850f, 0.880f, KAISER6 }, /* Q1 */ + { 32, 4, 0.882f, 0.910f, KAISER6 }, /* Q2 */ /* 82.3% cutoff ( ~60 dB stop) 6 */ + { 48, 8, 0.895f, 0.917f, KAISER8 }, /* Q3 */ /* 84.9% cutoff ( ~80 dB stop) 8 */ + { 64, 8, 0.921f, 0.940f, KAISER8 }, /* Q4 */ /* 88.7% cutoff ( ~80 dB stop) 8 */ + { 80, 16, 0.922f, 0.940f, KAISER10}, /* Q5 */ /* 89.1% cutoff (~100 dB stop) 10 */ + { 96, 16, 0.940f, 0.945f, KAISER10}, /* Q6 */ /* 91.5% cutoff (~100 dB stop) 10 */ + {128, 16, 0.950f, 0.950f, KAISER10}, /* Q7 */ /* 93.1% cutoff (~100 dB stop) 10 */ + {160, 16, 0.960f, 0.960f, KAISER10}, /* Q8 */ /* 94.5% cutoff (~100 dB stop) 10 */ + {192, 32, 0.968f, 0.968f, KAISER12}, /* Q9 */ /* 95.5% cutoff (~100 dB stop) 10 */ + {256, 32, 0.975f, 0.975f, KAISER12}, /* Q10 */ /* 96.6% cutoff (~100 dB stop) 10 */ +}; +/*8,24,40,56,80,104,128,160,200,256,320*/ +static double compute_func(float x, const struct FuncDef *func) +{ + float y, frac; + double interp[4]; + int ind; + y = x*func->oversample; + ind = (int)floor(y); + frac = (y-ind); + /* CSE with handle the repeated powers */ + interp[3] = -0.1666666667*frac + 0.1666666667*(frac*frac*frac); + interp[2] = frac + 0.5*(frac*frac) - 0.5*(frac*frac*frac); + /*interp[2] = 1.f - 0.5f*frac - frac*frac + 0.5f*frac*frac*frac;*/ + interp[0] = -0.3333333333*frac + 0.5*(frac*frac) - 0.1666666667*(frac*frac*frac); + /* Just to make sure we don't have rounding problems */ + interp[1] = 1.f-interp[3]-interp[2]-interp[0]; + + /*sum = frac*accum[1] + (1-frac)*accum[2];*/ + return interp[0]*func->table[ind] + interp[1]*func->table[ind+1] + interp[2]*func->table[ind+2] + interp[3]*func->table[ind+3]; +} + +#if 0 +#include +int main(int argc, char **argv) +{ + int i; + for (i=0;i<256;i++) + { + printf ("%f\n", compute_func(i/256., KAISER12)); + } + return 0; +} +#endif + +#ifdef FIXED_POINT +/* The slow way of computing a sinc for the table. Should improve that some day */ +static spx_word16_t sinc(float cutoff, float x, int N, const struct FuncDef *window_func) +{ + /*fprintf (stderr, "%f ", x);*/ + float xx = x * cutoff; + if (fabs(x)<1e-6f) + return WORD2INT(32768.*cutoff); + else if (fabs(x) > .5f*N) + return 0; + /*FIXME: Can it really be any slower than this? */ + return WORD2INT(32768.*cutoff*sin(M_PI*xx)/(M_PI*xx) * compute_func(fabs(2.*x/N), window_func)); +} +#else +/* The slow way of computing a sinc for the table. Should improve that some day */ +static spx_word16_t sinc(float cutoff, float x, int N, const struct FuncDef *window_func) +{ + /*fprintf (stderr, "%f ", x);*/ + float xx = x * cutoff; + if (fabs(x)<1e-6) + return cutoff; + else if (fabs(x) > .5*N) + return 0; + /*FIXME: Can it really be any slower than this? */ + return cutoff*sin(M_PI*xx)/(M_PI*xx) * compute_func(fabs(2.*x/N), window_func); +} +#endif + +#ifdef FIXED_POINT +static void cubic_coef(spx_word16_t x, spx_word16_t interp[4]) +{ + /* Compute interpolation coefficients. I'm not sure whether this corresponds to cubic interpolation + but I know it's MMSE-optimal on a sinc */ + spx_word16_t x2, x3; + x2 = MULT16_16_P15(x, x); + x3 = MULT16_16_P15(x, x2); + interp[0] = PSHR32(MULT16_16(QCONST16(-0.16667f, 15),x) + MULT16_16(QCONST16(0.16667f, 15),x3),15); + interp[1] = EXTRACT16(EXTEND32(x) + SHR32(SUB32(EXTEND32(x2),EXTEND32(x3)),1)); + interp[3] = PSHR32(MULT16_16(QCONST16(-0.33333f, 15),x) + MULT16_16(QCONST16(.5f,15),x2) - MULT16_16(QCONST16(0.16667f, 15),x3),15); + /* Just to make sure we don't have rounding problems */ + interp[2] = Q15_ONE-interp[0]-interp[1]-interp[3]; + if (interp[2]<32767) + interp[2]+=1; +} +#else +static void cubic_coef(spx_word16_t frac, spx_word16_t interp[4]) +{ + /* Compute interpolation coefficients. I'm not sure whether this corresponds to cubic interpolation + but I know it's MMSE-optimal on a sinc */ + interp[0] = -0.16667f*frac + 0.16667f*frac*frac*frac; + interp[1] = frac + 0.5f*frac*frac - 0.5f*frac*frac*frac; + /*interp[2] = 1.f - 0.5f*frac - frac*frac + 0.5f*frac*frac*frac;*/ + interp[3] = -0.33333f*frac + 0.5f*frac*frac - 0.16667f*frac*frac*frac; + /* Just to make sure we don't have rounding problems */ + interp[2] = 1.-interp[0]-interp[1]-interp[3]; +} +#endif + +static int resampler_basic_direct_single(SpeexResamplerState *st, spx_uint32_t channel_index, const spx_word16_t *in, spx_uint32_t *in_len, spx_word16_t *out, spx_uint32_t *out_len) +{ + const int N = st->filt_len; + int out_sample = 0; + int last_sample = st->last_sample[channel_index]; + spx_uint32_t samp_frac_num = st->samp_frac_num[channel_index]; + const spx_word16_t *sinc_table = st->sinc_table; + const int out_stride = st->out_stride; + const int int_advance = st->int_advance; + const int frac_advance = st->frac_advance; + const spx_uint32_t den_rate = st->den_rate; + spx_word32_t sum; + + while (!(last_sample >= (spx_int32_t)*in_len || out_sample >= (spx_int32_t)*out_len)) + { + const spx_word16_t *sinct = & sinc_table[samp_frac_num*N]; + const spx_word16_t *iptr = & in[last_sample]; + +#ifndef OVERRIDE_INNER_PRODUCT_SINGLE + int j; + sum = 0; + for(j=0;j= den_rate) + { + samp_frac_num -= den_rate; + last_sample++; + } + } + + st->last_sample[channel_index] = last_sample; + st->samp_frac_num[channel_index] = samp_frac_num; + return out_sample; +} + +#ifdef FIXED_POINT +#else +/* This is the same as the previous function, except with a double-precision accumulator */ +static int resampler_basic_direct_double(SpeexResamplerState *st, spx_uint32_t channel_index, const spx_word16_t *in, spx_uint32_t *in_len, spx_word16_t *out, spx_uint32_t *out_len) +{ + const int N = st->filt_len; + int out_sample = 0; + int last_sample = st->last_sample[channel_index]; + spx_uint32_t samp_frac_num = st->samp_frac_num[channel_index]; + const spx_word16_t *sinc_table = st->sinc_table; + const int out_stride = st->out_stride; + const int int_advance = st->int_advance; + const int frac_advance = st->frac_advance; + const spx_uint32_t den_rate = st->den_rate; + double sum; + + while (!(last_sample >= (spx_int32_t)*in_len || out_sample >= (spx_int32_t)*out_len)) + { + const spx_word16_t *sinct = & sinc_table[samp_frac_num*N]; + const spx_word16_t *iptr = & in[last_sample]; + +#ifndef OVERRIDE_INNER_PRODUCT_DOUBLE + int j; + double accum[4] = {0,0,0,0}; + + for(j=0;j= den_rate) + { + samp_frac_num -= den_rate; + last_sample++; + } + } + + st->last_sample[channel_index] = last_sample; + st->samp_frac_num[channel_index] = samp_frac_num; + return out_sample; +} +#endif + +static int resampler_basic_interpolate_single(SpeexResamplerState *st, spx_uint32_t channel_index, const spx_word16_t *in, spx_uint32_t *in_len, spx_word16_t *out, spx_uint32_t *out_len) +{ + const int N = st->filt_len; + int out_sample = 0; + int last_sample = st->last_sample[channel_index]; + spx_uint32_t samp_frac_num = st->samp_frac_num[channel_index]; + const int out_stride = st->out_stride; + const int int_advance = st->int_advance; + const int frac_advance = st->frac_advance; + const spx_uint32_t den_rate = st->den_rate; + spx_word32_t sum; + + while (!(last_sample >= (spx_int32_t)*in_len || out_sample >= (spx_int32_t)*out_len)) + { + const spx_word16_t *iptr = & in[last_sample]; + + const int offset = samp_frac_num*st->oversample/st->den_rate; +#ifdef FIXED_POINT + const spx_word16_t frac = PDIV32(SHL32((samp_frac_num*st->oversample) % st->den_rate,15),st->den_rate); +#else + const spx_word16_t frac = ((float)((samp_frac_num*st->oversample) % st->den_rate))/st->den_rate; +#endif + spx_word16_t interp[4]; + + +#ifndef OVERRIDE_INTERPOLATE_PRODUCT_SINGLE + int j; + spx_word32_t accum[4] = {0,0,0,0}; + + for(j=0;jsinc_table[4+(j+1)*st->oversample-offset-2]); + accum[1] += MULT16_16(curr_in,st->sinc_table[4+(j+1)*st->oversample-offset-1]); + accum[2] += MULT16_16(curr_in,st->sinc_table[4+(j+1)*st->oversample-offset]); + accum[3] += MULT16_16(curr_in,st->sinc_table[4+(j+1)*st->oversample-offset+1]); + } + + cubic_coef(frac, interp); + sum = MULT16_32_Q15(interp[0],accum[0]) + MULT16_32_Q15(interp[1],accum[1]) + MULT16_32_Q15(interp[2],accum[2]) + MULT16_32_Q15(interp[3],accum[3]); + sum = SATURATE32PSHR(sum, 15, 32767); +#else + cubic_coef(frac, interp); + sum = interpolate_product_single(iptr, st->sinc_table + st->oversample + 4 - offset - 2, N, st->oversample, interp); +#endif + + out[out_stride * out_sample++] = sum; + last_sample += int_advance; + samp_frac_num += frac_advance; + if (samp_frac_num >= den_rate) + { + samp_frac_num -= den_rate; + last_sample++; + } + } + + st->last_sample[channel_index] = last_sample; + st->samp_frac_num[channel_index] = samp_frac_num; + return out_sample; +} + +#ifdef FIXED_POINT +#else +/* This is the same as the previous function, except with a double-precision accumulator */ +static int resampler_basic_interpolate_double(SpeexResamplerState *st, spx_uint32_t channel_index, const spx_word16_t *in, spx_uint32_t *in_len, spx_word16_t *out, spx_uint32_t *out_len) +{ + const int N = st->filt_len; + int out_sample = 0; + int last_sample = st->last_sample[channel_index]; + spx_uint32_t samp_frac_num = st->samp_frac_num[channel_index]; + const int out_stride = st->out_stride; + const int int_advance = st->int_advance; + const int frac_advance = st->frac_advance; + const spx_uint32_t den_rate = st->den_rate; + spx_word32_t sum; + + while (!(last_sample >= (spx_int32_t)*in_len || out_sample >= (spx_int32_t)*out_len)) + { + const spx_word16_t *iptr = & in[last_sample]; + + const int offset = samp_frac_num*st->oversample/st->den_rate; +#ifdef FIXED_POINT + const spx_word16_t frac = PDIV32(SHL32((samp_frac_num*st->oversample) % st->den_rate,15),st->den_rate); +#else + const spx_word16_t frac = ((float)((samp_frac_num*st->oversample) % st->den_rate))/st->den_rate; +#endif + spx_word16_t interp[4]; + + +#ifndef OVERRIDE_INTERPOLATE_PRODUCT_DOUBLE + int j; + double accum[4] = {0,0,0,0}; + + for(j=0;jsinc_table[4+(j+1)*st->oversample-offset-2]); + accum[1] += MULT16_16(curr_in,st->sinc_table[4+(j+1)*st->oversample-offset-1]); + accum[2] += MULT16_16(curr_in,st->sinc_table[4+(j+1)*st->oversample-offset]); + accum[3] += MULT16_16(curr_in,st->sinc_table[4+(j+1)*st->oversample-offset+1]); + } + + cubic_coef(frac, interp); + sum = MULT16_32_Q15(interp[0],accum[0]) + MULT16_32_Q15(interp[1],accum[1]) + MULT16_32_Q15(interp[2],accum[2]) + MULT16_32_Q15(interp[3],accum[3]); +#else + cubic_coef(frac, interp); + sum = interpolate_product_double(iptr, st->sinc_table + st->oversample + 4 - offset - 2, N, st->oversample, interp); +#endif + + out[out_stride * out_sample++] = PSHR32(sum,15); + last_sample += int_advance; + samp_frac_num += frac_advance; + if (samp_frac_num >= den_rate) + { + samp_frac_num -= den_rate; + last_sample++; + } + } + + st->last_sample[channel_index] = last_sample; + st->samp_frac_num[channel_index] = samp_frac_num; + return out_sample; +} +#endif + +/* This resampler is used to produce zero output in situations where memory + for the filter could not be allocated. The expected numbers of input and + output samples are still processed so that callers failing to check error + codes are not surprised, possibly getting into infinite loops. */ +static int resampler_basic_zero(SpeexResamplerState *st, spx_uint32_t channel_index, const spx_word16_t *in, spx_uint32_t *in_len, spx_word16_t *out, spx_uint32_t *out_len) +{ + int out_sample = 0; + int last_sample = st->last_sample[channel_index]; + spx_uint32_t samp_frac_num = st->samp_frac_num[channel_index]; + const int out_stride = st->out_stride; + const int int_advance = st->int_advance; + const int frac_advance = st->frac_advance; + const spx_uint32_t den_rate = st->den_rate; + + (void)in; + while (!(last_sample >= (spx_int32_t)*in_len || out_sample >= (spx_int32_t)*out_len)) + { + out[out_stride * out_sample++] = 0; + last_sample += int_advance; + samp_frac_num += frac_advance; + if (samp_frac_num >= den_rate) + { + samp_frac_num -= den_rate; + last_sample++; + } + } + + st->last_sample[channel_index] = last_sample; + st->samp_frac_num[channel_index] = samp_frac_num; + return out_sample; +} + +static int multiply_frac(spx_uint32_t *result, spx_uint32_t value, spx_uint32_t num, spx_uint32_t den) +{ + spx_uint32_t major = value / den; + spx_uint32_t remain = value % den; + /* TODO: Could use 64 bits operation to check for overflow. But only guaranteed in C99+ */ + if (remain > UINT32_MAX / num || major > UINT32_MAX / num + || major * num > UINT32_MAX - remain * num / den) + return RESAMPLER_ERR_OVERFLOW; + *result = remain * num / den + major * num; + return RESAMPLER_ERR_SUCCESS; +} + +static int update_filter(SpeexResamplerState *st) +{ + spx_uint32_t old_length = st->filt_len; + spx_uint32_t old_alloc_size = st->mem_alloc_size; + int use_direct; + spx_uint32_t min_sinc_table_length; + spx_uint32_t min_alloc_size; + + st->int_advance = st->num_rate/st->den_rate; + st->frac_advance = st->num_rate%st->den_rate; + st->oversample = quality_map[st->quality].oversample; + st->filt_len = quality_map[st->quality].base_length; + + if (st->num_rate > st->den_rate) + { + /* down-sampling */ + st->cutoff = quality_map[st->quality].downsample_bandwidth * st->den_rate / st->num_rate; + if (multiply_frac(&st->filt_len,st->filt_len,st->num_rate,st->den_rate) != RESAMPLER_ERR_SUCCESS) + goto fail; + /* Round up to make sure we have a multiple of 8 for SSE */ + st->filt_len = ((st->filt_len-1)&(~0x7))+8; + if (2*st->den_rate < st->num_rate) + st->oversample >>= 1; + if (4*st->den_rate < st->num_rate) + st->oversample >>= 1; + if (8*st->den_rate < st->num_rate) + st->oversample >>= 1; + if (16*st->den_rate < st->num_rate) + st->oversample >>= 1; + if (st->oversample < 1) + st->oversample = 1; + } else { + /* up-sampling */ + st->cutoff = quality_map[st->quality].upsample_bandwidth; + } + +#ifdef RESAMPLE_FULL_SINC_TABLE + use_direct = 1; + if (INT_MAX/sizeof(spx_word16_t)/st->den_rate < st->filt_len) + goto fail; +#else + /* Choose the resampling type that requires the least amount of memory */ + use_direct = st->filt_len*st->den_rate <= st->filt_len*st->oversample+8 + && INT_MAX/sizeof(spx_word16_t)/st->den_rate >= st->filt_len; +#endif + if (use_direct) + { + min_sinc_table_length = st->filt_len*st->den_rate; + } else { + if ((INT_MAX/sizeof(spx_word16_t)-8)/st->oversample < st->filt_len) + goto fail; + + min_sinc_table_length = st->filt_len*st->oversample+8; + } + if (st->sinc_table_length < min_sinc_table_length) + { + spx_word16_t *sinc_table = (spx_word16_t *)speex_realloc(st->sinc_table,min_sinc_table_length*sizeof(spx_word16_t)); + if (!sinc_table) + goto fail; + + st->sinc_table = sinc_table; + st->sinc_table_length = min_sinc_table_length; + } + if (use_direct) + { + spx_uint32_t i; + for (i=0;iden_rate;i++) + { + spx_uint32_t j; + for (j=0;jfilt_len;j++) + { + st->sinc_table[i*st->filt_len+j] = sinc(st->cutoff,((j-(spx_int32_t)st->filt_len/2+1)-((float)i)/st->den_rate), st->filt_len, quality_map[st->quality].window_func); + } + } +#ifdef FIXED_POINT + st->resampler_ptr = resampler_basic_direct_single; +#else + if (st->quality>8) + st->resampler_ptr = resampler_basic_direct_double; + else + st->resampler_ptr = resampler_basic_direct_single; +#endif + /*fprintf (stderr, "resampler uses direct sinc table and normalised cutoff %f\n", cutoff);*/ + } else { + spx_int32_t i; + for (i=-4;i<(spx_int32_t)(st->oversample*st->filt_len+4);i++) + st->sinc_table[i+4] = sinc(st->cutoff,(i/(float)st->oversample - st->filt_len/2), st->filt_len, quality_map[st->quality].window_func); +#ifdef FIXED_POINT + st->resampler_ptr = resampler_basic_interpolate_single; +#else + if (st->quality>8) + st->resampler_ptr = resampler_basic_interpolate_double; + else + st->resampler_ptr = resampler_basic_interpolate_single; +#endif + /*fprintf (stderr, "resampler uses interpolated sinc table and normalised cutoff %f\n", cutoff);*/ + } + + /* Here's the place where we update the filter memory to take into account + the change in filter length. It's probably the messiest part of the code + due to handling of lots of corner cases. */ + + /* Adding buffer_size to filt_len won't overflow here because filt_len + could be multiplied by sizeof(spx_word16_t) above. */ + min_alloc_size = st->filt_len-1 + st->buffer_size; + if (min_alloc_size > st->mem_alloc_size) + { + spx_word16_t *mem; + if (INT_MAX/sizeof(spx_word16_t)/st->nb_channels < min_alloc_size) + goto fail; + else if (!(mem = (spx_word16_t*)speex_realloc(st->mem, st->nb_channels*min_alloc_size * sizeof(*mem)))) + goto fail; + + st->mem = mem; + st->mem_alloc_size = min_alloc_size; + } + if (!st->started) + { + spx_uint32_t i; + for (i=0;inb_channels*st->mem_alloc_size;i++) + st->mem[i] = 0; + /*speex_warning("reinit filter");*/ + } else if (st->filt_len > old_length) + { + spx_uint32_t i; + /* Increase the filter length */ + /*speex_warning("increase filter size");*/ + for (i=st->nb_channels;i--;) + { + spx_uint32_t j; + spx_uint32_t olen = old_length; + /*if (st->magic_samples[i])*/ + { + /* Try and remove the magic samples as if nothing had happened */ + + /* FIXME: This is wrong but for now we need it to avoid going over the array bounds */ + olen = old_length + 2*st->magic_samples[i]; + for (j=old_length-1+st->magic_samples[i];j--;) + st->mem[i*st->mem_alloc_size+j+st->magic_samples[i]] = st->mem[i*old_alloc_size+j]; + for (j=0;jmagic_samples[i];j++) + st->mem[i*st->mem_alloc_size+j] = 0; + st->magic_samples[i] = 0; + } + if (st->filt_len > olen) + { + /* If the new filter length is still bigger than the "augmented" length */ + /* Copy data going backward */ + for (j=0;jmem[i*st->mem_alloc_size+(st->filt_len-2-j)] = st->mem[i*st->mem_alloc_size+(olen-2-j)]; + /* Then put zeros for lack of anything better */ + for (;jfilt_len-1;j++) + st->mem[i*st->mem_alloc_size+(st->filt_len-2-j)] = 0; + /* Adjust last_sample */ + st->last_sample[i] += (st->filt_len - olen)/2; + } else { + /* Put back some of the magic! */ + st->magic_samples[i] = (olen - st->filt_len)/2; + for (j=0;jfilt_len-1+st->magic_samples[i];j++) + st->mem[i*st->mem_alloc_size+j] = st->mem[i*st->mem_alloc_size+j+st->magic_samples[i]]; + } + } + } else if (st->filt_len < old_length) + { + spx_uint32_t i; + /* Reduce filter length, this a bit tricky. We need to store some of the memory as "magic" + samples so they can be used directly as input the next time(s) */ + for (i=0;inb_channels;i++) + { + spx_uint32_t j; + spx_uint32_t old_magic = st->magic_samples[i]; + st->magic_samples[i] = (old_length - st->filt_len)/2; + /* We must copy some of the memory that's no longer used */ + /* Copy data going backward */ + for (j=0;jfilt_len-1+st->magic_samples[i]+old_magic;j++) + st->mem[i*st->mem_alloc_size+j] = st->mem[i*st->mem_alloc_size+j+st->magic_samples[i]]; + st->magic_samples[i] += old_magic; + } + } + return RESAMPLER_ERR_SUCCESS; + +fail: + st->resampler_ptr = resampler_basic_zero; + /* st->mem may still contain consumed input samples for the filter. + Restore filt_len so that filt_len - 1 still points to the position after + the last of these samples. */ + st->filt_len = old_length; + return RESAMPLER_ERR_ALLOC_FAILED; +} + +EXPORT SpeexResamplerState *speex_resampler_init(spx_uint32_t nb_channels, spx_uint32_t in_rate, spx_uint32_t out_rate, int quality, int *err) +{ + return speex_resampler_init_frac(nb_channels, in_rate, out_rate, in_rate, out_rate, quality, err); +} + +EXPORT SpeexResamplerState *speex_resampler_init_frac(spx_uint32_t nb_channels, spx_uint32_t ratio_num, spx_uint32_t ratio_den, spx_uint32_t in_rate, spx_uint32_t out_rate, int quality, int *err) +{ + SpeexResamplerState *st; + int filter_err; + + if (nb_channels == 0 || ratio_num == 0 || ratio_den == 0 || quality > 10 || quality < 0) + { + if (err) + *err = RESAMPLER_ERR_INVALID_ARG; + return NULL; + } + st = (SpeexResamplerState *)speex_alloc(sizeof(SpeexResamplerState)); + if (!st) + { + if (err) + *err = RESAMPLER_ERR_ALLOC_FAILED; + return NULL; + } + st->initialised = 0; + st->started = 0; + st->in_rate = 0; + st->out_rate = 0; + st->num_rate = 0; + st->den_rate = 0; + st->quality = -1; + st->sinc_table_length = 0; + st->mem_alloc_size = 0; + st->filt_len = 0; + st->mem = 0; + st->resampler_ptr = 0; + + st->cutoff = 1.f; + st->nb_channels = nb_channels; + st->in_stride = 1; + st->out_stride = 1; + + st->buffer_size = 160; + + /* Per channel data */ + if (!(st->last_sample = (spx_int32_t*)speex_alloc(nb_channels*sizeof(spx_int32_t)))) + goto fail; + if (!(st->magic_samples = (spx_uint32_t*)speex_alloc(nb_channels*sizeof(spx_uint32_t)))) + goto fail; + if (!(st->samp_frac_num = (spx_uint32_t*)speex_alloc(nb_channels*sizeof(spx_uint32_t)))) + goto fail; + + speex_resampler_set_quality(st, quality); + speex_resampler_set_rate_frac(st, ratio_num, ratio_den, in_rate, out_rate); + + filter_err = update_filter(st); + if (filter_err == RESAMPLER_ERR_SUCCESS) + { + st->initialised = 1; + } else { + speex_resampler_destroy(st); + st = NULL; + } + if (err) + *err = filter_err; + + return st; + +fail: + if (err) + *err = RESAMPLER_ERR_ALLOC_FAILED; + speex_resampler_destroy(st); + return NULL; +} + +EXPORT void speex_resampler_destroy(SpeexResamplerState *st) +{ + speex_free(st->mem); + speex_free(st->sinc_table); + speex_free(st->last_sample); + speex_free(st->magic_samples); + speex_free(st->samp_frac_num); + speex_free(st); +} + +static int speex_resampler_process_native(SpeexResamplerState *st, spx_uint32_t channel_index, spx_uint32_t *in_len, spx_word16_t *out, spx_uint32_t *out_len) +{ + int j=0; + const int N = st->filt_len; + int out_sample = 0; + spx_word16_t *mem = st->mem + channel_index * st->mem_alloc_size; + spx_uint32_t ilen; + + st->started = 1; + + /* Call the right resampler through the function ptr */ + out_sample = st->resampler_ptr(st, channel_index, mem, in_len, out, out_len); + + if (st->last_sample[channel_index] < (spx_int32_t)*in_len) + *in_len = st->last_sample[channel_index]; + *out_len = out_sample; + st->last_sample[channel_index] -= *in_len; + + ilen = *in_len; + + for(j=0;jmagic_samples[channel_index]; + spx_word16_t *mem = st->mem + channel_index * st->mem_alloc_size; + const int N = st->filt_len; + + speex_resampler_process_native(st, channel_index, &tmp_in_len, *out, &out_len); + + st->magic_samples[channel_index] -= tmp_in_len; + + /* If we couldn't process all "magic" input samples, save the rest for next time */ + if (st->magic_samples[channel_index]) + { + spx_uint32_t i; + for (i=0;imagic_samples[channel_index];i++) + mem[N-1+i]=mem[N-1+i+tmp_in_len]; + } + *out += out_len*st->out_stride; + return out_len; +} + +#ifdef FIXED_POINT +EXPORT int speex_resampler_process_int(SpeexResamplerState *st, spx_uint32_t channel_index, const spx_int16_t *in, spx_uint32_t *in_len, spx_int16_t *out, spx_uint32_t *out_len) +#else +EXPORT int speex_resampler_process_float(SpeexResamplerState *st, spx_uint32_t channel_index, const float *in, spx_uint32_t *in_len, float *out, spx_uint32_t *out_len) +#endif +{ + spx_uint32_t j; + spx_uint32_t ilen = *in_len; + spx_uint32_t olen = *out_len; + spx_word16_t *x = st->mem + channel_index * st->mem_alloc_size; + const int filt_offs = st->filt_len - 1; + const spx_uint32_t xlen = st->mem_alloc_size - filt_offs; + const int istride = st->in_stride; + + if (st->magic_samples[channel_index]) + olen -= speex_resampler_magic(st, channel_index, &out, olen); + if (! st->magic_samples[channel_index]) { + while (ilen && olen) { + spx_uint32_t ichunk = (ilen > xlen) ? xlen : ilen; + spx_uint32_t ochunk = olen; + + if (in) { + for(j=0;jout_stride; + if (in) + in += ichunk * istride; + } + } + *in_len -= ilen; + *out_len -= olen; + return st->resampler_ptr == resampler_basic_zero ? RESAMPLER_ERR_ALLOC_FAILED : RESAMPLER_ERR_SUCCESS; +} + +#ifdef FIXED_POINT +EXPORT int speex_resampler_process_float(SpeexResamplerState *st, spx_uint32_t channel_index, const float *in, spx_uint32_t *in_len, float *out, spx_uint32_t *out_len) +#else +EXPORT int speex_resampler_process_int(SpeexResamplerState *st, spx_uint32_t channel_index, const spx_int16_t *in, spx_uint32_t *in_len, spx_int16_t *out, spx_uint32_t *out_len) +#endif +{ + spx_uint32_t j; + const int istride_save = st->in_stride; + const int ostride_save = st->out_stride; + spx_uint32_t ilen = *in_len; + spx_uint32_t olen = *out_len; + spx_word16_t *x = st->mem + channel_index * st->mem_alloc_size; + const spx_uint32_t xlen = st->mem_alloc_size - (st->filt_len - 1); +#ifdef VAR_ARRAYS + const unsigned int ylen = (olen < FIXED_STACK_ALLOC) ? olen : FIXED_STACK_ALLOC; + spx_word16_t ystack[ylen]; +#else + const unsigned int ylen = FIXED_STACK_ALLOC; + spx_word16_t ystack[FIXED_STACK_ALLOC]; +#endif + + st->out_stride = 1; + + while (ilen && olen) { + spx_word16_t *y = ystack; + spx_uint32_t ichunk = (ilen > xlen) ? xlen : ilen; + spx_uint32_t ochunk = (olen > ylen) ? ylen : olen; + spx_uint32_t omagic = 0; + + if (st->magic_samples[channel_index]) { + omagic = speex_resampler_magic(st, channel_index, &y, ochunk); + ochunk -= omagic; + olen -= omagic; + } + if (! st->magic_samples[channel_index]) { + if (in) { + for(j=0;jfilt_len-1]=WORD2INT(in[j*istride_save]); +#else + x[j+st->filt_len-1]=in[j*istride_save]; +#endif + } else { + for(j=0;jfilt_len-1]=0; + } + + speex_resampler_process_native(st, channel_index, &ichunk, y, &ochunk); + } else { + ichunk = 0; + ochunk = 0; + } + + for (j=0;jout_stride = ostride_save; + *in_len -= ilen; + *out_len -= olen; + + return st->resampler_ptr == resampler_basic_zero ? RESAMPLER_ERR_ALLOC_FAILED : RESAMPLER_ERR_SUCCESS; +} + +EXPORT int speex_resampler_process_interleaved_float(SpeexResamplerState *st, const float *in, spx_uint32_t *in_len, float *out, spx_uint32_t *out_len) +{ + spx_uint32_t i; + int istride_save, ostride_save; + spx_uint32_t bak_out_len = *out_len; + spx_uint32_t bak_in_len = *in_len; + istride_save = st->in_stride; + ostride_save = st->out_stride; + st->in_stride = st->out_stride = st->nb_channels; + for (i=0;inb_channels;i++) + { + *out_len = bak_out_len; + *in_len = bak_in_len; + if (in != NULL) + speex_resampler_process_float(st, i, in+i, in_len, out+i, out_len); + else + speex_resampler_process_float(st, i, NULL, in_len, out+i, out_len); + } + st->in_stride = istride_save; + st->out_stride = ostride_save; + return st->resampler_ptr == resampler_basic_zero ? RESAMPLER_ERR_ALLOC_FAILED : RESAMPLER_ERR_SUCCESS; +} + +EXPORT int speex_resampler_process_interleaved_int(SpeexResamplerState *st, const spx_int16_t *in, spx_uint32_t *in_len, spx_int16_t *out, spx_uint32_t *out_len) +{ + spx_uint32_t i; + int istride_save, ostride_save; + spx_uint32_t bak_out_len = *out_len; + spx_uint32_t bak_in_len = *in_len; + istride_save = st->in_stride; + ostride_save = st->out_stride; + st->in_stride = st->out_stride = st->nb_channels; + for (i=0;inb_channels;i++) + { + *out_len = bak_out_len; + *in_len = bak_in_len; + if (in != NULL) + speex_resampler_process_int(st, i, in+i, in_len, out+i, out_len); + else + speex_resampler_process_int(st, i, NULL, in_len, out+i, out_len); + } + st->in_stride = istride_save; + st->out_stride = ostride_save; + return st->resampler_ptr == resampler_basic_zero ? RESAMPLER_ERR_ALLOC_FAILED : RESAMPLER_ERR_SUCCESS; +} + +EXPORT int speex_resampler_set_rate(SpeexResamplerState *st, spx_uint32_t in_rate, spx_uint32_t out_rate) +{ + return speex_resampler_set_rate_frac(st, in_rate, out_rate, in_rate, out_rate); +} + +EXPORT void speex_resampler_get_rate(SpeexResamplerState *st, spx_uint32_t *in_rate, spx_uint32_t *out_rate) +{ + *in_rate = st->in_rate; + *out_rate = st->out_rate; +} + +static inline spx_uint32_t compute_gcd(spx_uint32_t a, spx_uint32_t b) +{ + while (b != 0) + { + spx_uint32_t temp = a; + + a = b; + b = temp % b; + } + return a; +} + +EXPORT int speex_resampler_set_rate_frac(SpeexResamplerState *st, spx_uint32_t ratio_num, spx_uint32_t ratio_den, spx_uint32_t in_rate, spx_uint32_t out_rate) +{ + spx_uint32_t fact; + spx_uint32_t old_den; + spx_uint32_t i; + + if (ratio_num == 0 || ratio_den == 0) + return RESAMPLER_ERR_INVALID_ARG; + + if (st->in_rate == in_rate && st->out_rate == out_rate && st->num_rate == ratio_num && st->den_rate == ratio_den) + return RESAMPLER_ERR_SUCCESS; + + old_den = st->den_rate; + st->in_rate = in_rate; + st->out_rate = out_rate; + st->num_rate = ratio_num; + st->den_rate = ratio_den; + + fact = compute_gcd(st->num_rate, st->den_rate); + + st->num_rate /= fact; + st->den_rate /= fact; + + if (old_den > 0) + { + for (i=0;inb_channels;i++) + { + if (multiply_frac(&st->samp_frac_num[i],st->samp_frac_num[i],st->den_rate,old_den) != RESAMPLER_ERR_SUCCESS) + return RESAMPLER_ERR_OVERFLOW; + /* Safety net */ + if (st->samp_frac_num[i] >= st->den_rate) + st->samp_frac_num[i] = st->den_rate-1; + } + } + + if (st->initialised) + return update_filter(st); + return RESAMPLER_ERR_SUCCESS; +} + +EXPORT void speex_resampler_get_ratio(SpeexResamplerState *st, spx_uint32_t *ratio_num, spx_uint32_t *ratio_den) +{ + *ratio_num = st->num_rate; + *ratio_den = st->den_rate; +} + +EXPORT int speex_resampler_set_quality(SpeexResamplerState *st, int quality) +{ + if (quality > 10 || quality < 0) + return RESAMPLER_ERR_INVALID_ARG; + if (st->quality == quality) + return RESAMPLER_ERR_SUCCESS; + st->quality = quality; + if (st->initialised) + return update_filter(st); + return RESAMPLER_ERR_SUCCESS; +} + +EXPORT void speex_resampler_get_quality(SpeexResamplerState *st, int *quality) +{ + *quality = st->quality; +} + +EXPORT void speex_resampler_set_input_stride(SpeexResamplerState *st, spx_uint32_t stride) +{ + st->in_stride = stride; +} + +EXPORT void speex_resampler_get_input_stride(SpeexResamplerState *st, spx_uint32_t *stride) +{ + *stride = st->in_stride; +} + +EXPORT void speex_resampler_set_output_stride(SpeexResamplerState *st, spx_uint32_t stride) +{ + st->out_stride = stride; +} + +EXPORT void speex_resampler_get_output_stride(SpeexResamplerState *st, spx_uint32_t *stride) +{ + *stride = st->out_stride; +} + +EXPORT int speex_resampler_get_input_latency(SpeexResamplerState *st) +{ + return st->filt_len / 2; +} + +EXPORT int speex_resampler_get_output_latency(SpeexResamplerState *st) +{ + return ((st->filt_len / 2) * st->den_rate + (st->num_rate >> 1)) / st->num_rate; +} + +EXPORT int speex_resampler_skip_zeros(SpeexResamplerState *st) +{ + spx_uint32_t i; + for (i=0;inb_channels;i++) + st->last_sample[i] = st->filt_len/2; + return RESAMPLER_ERR_SUCCESS; +} + +EXPORT int speex_resampler_reset_mem(SpeexResamplerState *st) +{ + spx_uint32_t i; + for (i=0;inb_channels;i++) + { + st->last_sample[i] = 0; + st->magic_samples[i] = 0; + st->samp_frac_num[i] = 0; + } + for (i=0;inb_channels*(st->filt_len-1);i++) + st->mem[i] = 0; + return RESAMPLER_ERR_SUCCESS; +} + +EXPORT const char *speex_resampler_strerror(int err) +{ + switch (err) + { + case RESAMPLER_ERR_SUCCESS: + return "Success."; + case RESAMPLER_ERR_ALLOC_FAILED: + return "Memory allocation failed."; + case RESAMPLER_ERR_BAD_STATE: + return "Bad resampler state."; + case RESAMPLER_ERR_INVALID_ARG: + return "Invalid argument."; + case RESAMPLER_ERR_PTR_OVERLAP: + return "Input and output buffers overlap."; + default: + return "Unknown error. Bad error code or strange version mismatch."; + } +} diff --git a/src/plugins/audiobridge-deps/speex/speex_resampler.h b/src/plugins/audiobridge-deps/speex/speex_resampler.h new file mode 100644 index 0000000000..901de37b3d --- /dev/null +++ b/src/plugins/audiobridge-deps/speex/speex_resampler.h @@ -0,0 +1,343 @@ +/* Copyright (C) 2007 Jean-Marc Valin + + File: speex_resampler.h + Resampling code + + The design goals of this code are: + - Very fast algorithm + - Low memory requirement + - Good *perceptual* quality (and not best SNR) + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + 3. The name of the author may not be used to endorse or promote products + derived from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, + INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. +*/ + + +#ifndef SPEEX_RESAMPLER_H +#define SPEEX_RESAMPLER_H + +#ifdef OUTSIDE_SPEEX + +/********* WARNING: MENTAL SANITY ENDS HERE *************/ + +/* If the resampler is defined outside of Speex, we change the symbol names so that + there won't be any clash if linking with Speex later on. */ + +/* #define RANDOM_PREFIX your software name here */ +#ifndef RANDOM_PREFIX +#error "Please define RANDOM_PREFIX (above) to something specific to your project to prevent symbol name clashes" +#endif + +#define CAT_PREFIX2(a,b) a ## b +#define CAT_PREFIX(a,b) CAT_PREFIX2(a, b) + +#define speex_resampler_init CAT_PREFIX(RANDOM_PREFIX,_resampler_init) +#define speex_resampler_init_frac CAT_PREFIX(RANDOM_PREFIX,_resampler_init_frac) +#define speex_resampler_destroy CAT_PREFIX(RANDOM_PREFIX,_resampler_destroy) +#define speex_resampler_process_float CAT_PREFIX(RANDOM_PREFIX,_resampler_process_float) +#define speex_resampler_process_int CAT_PREFIX(RANDOM_PREFIX,_resampler_process_int) +#define speex_resampler_process_interleaved_float CAT_PREFIX(RANDOM_PREFIX,_resampler_process_interleaved_float) +#define speex_resampler_process_interleaved_int CAT_PREFIX(RANDOM_PREFIX,_resampler_process_interleaved_int) +#define speex_resampler_set_rate CAT_PREFIX(RANDOM_PREFIX,_resampler_set_rate) +#define speex_resampler_get_rate CAT_PREFIX(RANDOM_PREFIX,_resampler_get_rate) +#define speex_resampler_set_rate_frac CAT_PREFIX(RANDOM_PREFIX,_resampler_set_rate_frac) +#define speex_resampler_get_ratio CAT_PREFIX(RANDOM_PREFIX,_resampler_get_ratio) +#define speex_resampler_set_quality CAT_PREFIX(RANDOM_PREFIX,_resampler_set_quality) +#define speex_resampler_get_quality CAT_PREFIX(RANDOM_PREFIX,_resampler_get_quality) +#define speex_resampler_set_input_stride CAT_PREFIX(RANDOM_PREFIX,_resampler_set_input_stride) +#define speex_resampler_get_input_stride CAT_PREFIX(RANDOM_PREFIX,_resampler_get_input_stride) +#define speex_resampler_set_output_stride CAT_PREFIX(RANDOM_PREFIX,_resampler_set_output_stride) +#define speex_resampler_get_output_stride CAT_PREFIX(RANDOM_PREFIX,_resampler_get_output_stride) +#define speex_resampler_get_input_latency CAT_PREFIX(RANDOM_PREFIX,_resampler_get_input_latency) +#define speex_resampler_get_output_latency CAT_PREFIX(RANDOM_PREFIX,_resampler_get_output_latency) +#define speex_resampler_skip_zeros CAT_PREFIX(RANDOM_PREFIX,_resampler_skip_zeros) +#define speex_resampler_reset_mem CAT_PREFIX(RANDOM_PREFIX,_resampler_reset_mem) +#define speex_resampler_strerror CAT_PREFIX(RANDOM_PREFIX,_resampler_strerror) + +#define spx_int16_t short +#define spx_int32_t int +#define spx_uint16_t unsigned short +#define spx_uint32_t unsigned int + +#define speex_assert(cond) + +#else /* OUTSIDE_SPEEX */ + +#include "speexdsp_types.h" + +#endif /* OUTSIDE_SPEEX */ + +#ifdef __cplusplus +extern "C" { +#endif + +#define SPEEX_RESAMPLER_QUALITY_MAX 10 +#define SPEEX_RESAMPLER_QUALITY_MIN 0 +#define SPEEX_RESAMPLER_QUALITY_DEFAULT 4 +#define SPEEX_RESAMPLER_QUALITY_VOIP 3 +#define SPEEX_RESAMPLER_QUALITY_DESKTOP 5 + +enum { + RESAMPLER_ERR_SUCCESS = 0, + RESAMPLER_ERR_ALLOC_FAILED = 1, + RESAMPLER_ERR_BAD_STATE = 2, + RESAMPLER_ERR_INVALID_ARG = 3, + RESAMPLER_ERR_PTR_OVERLAP = 4, + RESAMPLER_ERR_OVERFLOW = 5, + + RESAMPLER_ERR_MAX_ERROR +}; + +struct SpeexResamplerState_; +typedef struct SpeexResamplerState_ SpeexResamplerState; + +/** Create a new resampler with integer input and output rates. + * @param nb_channels Number of channels to be processed + * @param in_rate Input sampling rate (integer number of Hz). + * @param out_rate Output sampling rate (integer number of Hz). + * @param quality Resampling quality between 0 and 10, where 0 has poor quality + * and 10 has very high quality. + * @return Newly created resampler state + * @retval NULL Error: not enough memory + */ +SpeexResamplerState *speex_resampler_init(spx_uint32_t nb_channels, + spx_uint32_t in_rate, + spx_uint32_t out_rate, + int quality, + int *err); + +/** Create a new resampler with fractional input/output rates. The sampling + * rate ratio is an arbitrary rational number with both the numerator and + * denominator being 32-bit integers. + * @param nb_channels Number of channels to be processed + * @param ratio_num Numerator of the sampling rate ratio + * @param ratio_den Denominator of the sampling rate ratio + * @param in_rate Input sampling rate rounded to the nearest integer (in Hz). + * @param out_rate Output sampling rate rounded to the nearest integer (in Hz). + * @param quality Resampling quality between 0 and 10, where 0 has poor quality + * and 10 has very high quality. + * @return Newly created resampler state + * @retval NULL Error: not enough memory + */ +SpeexResamplerState *speex_resampler_init_frac(spx_uint32_t nb_channels, + spx_uint32_t ratio_num, + spx_uint32_t ratio_den, + spx_uint32_t in_rate, + spx_uint32_t out_rate, + int quality, + int *err); + +/** Destroy a resampler state. + * @param st Resampler state + */ +void speex_resampler_destroy(SpeexResamplerState *st); + +/** Resample a float array. The input and output buffers must *not* overlap. + * @param st Resampler state + * @param channel_index Index of the channel to process for the multi-channel + * base (0 otherwise) + * @param in Input buffer + * @param in_len Number of input samples in the input buffer. Returns the + * number of samples processed + * @param out Output buffer + * @param out_len Size of the output buffer. Returns the number of samples written + */ +int speex_resampler_process_float(SpeexResamplerState *st, + spx_uint32_t channel_index, + const float *in, + spx_uint32_t *in_len, + float *out, + spx_uint32_t *out_len); + +/** Resample an int array. The input and output buffers must *not* overlap. + * @param st Resampler state + * @param channel_index Index of the channel to process for the multi-channel + * base (0 otherwise) + * @param in Input buffer + * @param in_len Number of input samples in the input buffer. Returns the number + * of samples processed + * @param out Output buffer + * @param out_len Size of the output buffer. Returns the number of samples written + */ +int speex_resampler_process_int(SpeexResamplerState *st, + spx_uint32_t channel_index, + const spx_int16_t *in, + spx_uint32_t *in_len, + spx_int16_t *out, + spx_uint32_t *out_len); + +/** Resample an interleaved float array. The input and output buffers must *not* overlap. + * @param st Resampler state + * @param in Input buffer + * @param in_len Number of input samples in the input buffer. Returns the number + * of samples processed. This is all per-channel. + * @param out Output buffer + * @param out_len Size of the output buffer. Returns the number of samples written. + * This is all per-channel. + */ +int speex_resampler_process_interleaved_float(SpeexResamplerState *st, + const float *in, + spx_uint32_t *in_len, + float *out, + spx_uint32_t *out_len); + +/** Resample an interleaved int array. The input and output buffers must *not* overlap. + * @param st Resampler state + * @param in Input buffer + * @param in_len Number of input samples in the input buffer. Returns the number + * of samples processed. This is all per-channel. + * @param out Output buffer + * @param out_len Size of the output buffer. Returns the number of samples written. + * This is all per-channel. + */ +int speex_resampler_process_interleaved_int(SpeexResamplerState *st, + const spx_int16_t *in, + spx_uint32_t *in_len, + spx_int16_t *out, + spx_uint32_t *out_len); + +/** Set (change) the input/output sampling rates (integer value). + * @param st Resampler state + * @param in_rate Input sampling rate (integer number of Hz). + * @param out_rate Output sampling rate (integer number of Hz). + */ +int speex_resampler_set_rate(SpeexResamplerState *st, + spx_uint32_t in_rate, + spx_uint32_t out_rate); + +/** Get the current input/output sampling rates (integer value). + * @param st Resampler state + * @param in_rate Input sampling rate (integer number of Hz) copied. + * @param out_rate Output sampling rate (integer number of Hz) copied. + */ +void speex_resampler_get_rate(SpeexResamplerState *st, + spx_uint32_t *in_rate, + spx_uint32_t *out_rate); + +/** Set (change) the input/output sampling rates and resampling ratio + * (fractional values in Hz supported). + * @param st Resampler state + * @param ratio_num Numerator of the sampling rate ratio + * @param ratio_den Denominator of the sampling rate ratio + * @param in_rate Input sampling rate rounded to the nearest integer (in Hz). + * @param out_rate Output sampling rate rounded to the nearest integer (in Hz). + */ +int speex_resampler_set_rate_frac(SpeexResamplerState *st, + spx_uint32_t ratio_num, + spx_uint32_t ratio_den, + spx_uint32_t in_rate, + spx_uint32_t out_rate); + +/** Get the current resampling ratio. This will be reduced to the least + * common denominator. + * @param st Resampler state + * @param ratio_num Numerator of the sampling rate ratio copied + * @param ratio_den Denominator of the sampling rate ratio copied + */ +void speex_resampler_get_ratio(SpeexResamplerState *st, + spx_uint32_t *ratio_num, + spx_uint32_t *ratio_den); + +/** Set (change) the conversion quality. + * @param st Resampler state + * @param quality Resampling quality between 0 and 10, where 0 has poor + * quality and 10 has very high quality. + */ +int speex_resampler_set_quality(SpeexResamplerState *st, + int quality); + +/** Get the conversion quality. + * @param st Resampler state + * @param quality Resampling quality between 0 and 10, where 0 has poor + * quality and 10 has very high quality. + */ +void speex_resampler_get_quality(SpeexResamplerState *st, + int *quality); + +/** Set (change) the input stride. + * @param st Resampler state + * @param stride Input stride + */ +void speex_resampler_set_input_stride(SpeexResamplerState *st, + spx_uint32_t stride); + +/** Get the input stride. + * @param st Resampler state + * @param stride Input stride copied + */ +void speex_resampler_get_input_stride(SpeexResamplerState *st, + spx_uint32_t *stride); + +/** Set (change) the output stride. + * @param st Resampler state + * @param stride Output stride + */ +void speex_resampler_set_output_stride(SpeexResamplerState *st, + spx_uint32_t stride); + +/** Get the output stride. + * @param st Resampler state copied + * @param stride Output stride + */ +void speex_resampler_get_output_stride(SpeexResamplerState *st, + spx_uint32_t *stride); + +/** Get the latency introduced by the resampler measured in input samples. + * @param st Resampler state + */ +int speex_resampler_get_input_latency(SpeexResamplerState *st); + +/** Get the latency introduced by the resampler measured in output samples. + * @param st Resampler state + */ +int speex_resampler_get_output_latency(SpeexResamplerState *st); + +/** Make sure that the first samples to go out of the resamplers don't have + * leading zeros. This is only useful before starting to use a newly created + * resampler. It is recommended to use that when resampling an audio file, as + * it will generate a file with the same length. For real-time processing, + * it is probably easier not to use this call (so that the output duration + * is the same for the first frame). + * @param st Resampler state + */ +int speex_resampler_skip_zeros(SpeexResamplerState *st); + +/** Reset a resampler so a new (unrelated) stream can be processed. + * @param st Resampler state + */ +int speex_resampler_reset_mem(SpeexResamplerState *st); + +/** Returns the English meaning for an error code + * @param err Error code + * @return English string + */ +const char *speex_resampler_strerror(int err); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/plugins/janus_audiobridge.c b/src/plugins/janus_audiobridge.c index e416e664fc..5e993c6e80 100644 --- a/src/plugins/janus_audiobridge.c +++ b/src/plugins/janus_audiobridge.c @@ -40,6 +40,7 @@ room-: { audio_level_average = 25 (average value of audio level, 127=muted, 0='too loud', default=25) default_expectedloss = percent of packets we expect participants may miss, to help with FEC (default=0, max=20; automatically used for forwarders too) default_bitrate = default bitrate in bps to use for the all participants (default=0, which means libopus decides; automatically used for forwarders too) + denoise = true|false (whether denoising via RNNoise should be performed for each participant by default) record = true|false (whether this room should be recorded, default=false) record_file = /path/to/recording.wav (where to save the recording) record_dir = /path/to/ (path to save the recording to, makes record_file a relative path if provided) @@ -142,6 +143,7 @@ room-: { "audio_level_average" : , "default_expectedloss" : , "default_bitrate" : , + "denoise" : , "record" : , "record_file" : "", "record_dir" : "", @@ -839,6 +841,7 @@ room-: { "expected_loss" : <0-20, a percentage of the expected loss (capped at 20%), only needed in case FEC is used; optional, default is 0 (FEC disabled even when negotiated) or the room default>, "volume" : 100 increases volume; optional, default is 100 (no volume change)>, "spatial_position" : , + "denoise" : , "secret" : "", "audio_level_average" : "", "audio_active_packets" : "", @@ -926,7 +929,8 @@ room-: { "expected_loss" : "volume" : , "spatial_position" : , - "record": , + "denoise" : , + "record": ", "group" : "" } @@ -1037,7 +1041,10 @@ room-: { "pause_events" : "bitrate" : , "quality" : <0-10, Opus-related complexity to use, higher is higher quality; optional, default is 4>, - "expected_loss" : <0-20, a percentage of the expected loss (capped at 20%), only needed in case FEC is used; optional, default is 0 (FEC disabled even when negotiated) or the room default> + "expected_loss" : <0-20, a percentage of the expected loss (capped at 20%), only needed in case FEC is used; optional, default is 0 (FEC disabled even when negotiated) or the room default>, + "volume" : , + "spatial_position" : , + "denoise" : } \endverbatim * @@ -1132,6 +1139,10 @@ room-: { /* We ship our own version of the libspeex-dsp jitter buffer, since * the one available out of the box comes with a nasty memory leak */ #include "audiobridge-deps/speex/speex_jitter.h" +#include "audiobridge-deps/speex/speex_resampler.h" +#ifdef HAVE_RNNOISE +#include +#endif #include #include @@ -1155,8 +1166,8 @@ room-: { /* Plugin information */ -#define JANUS_AUDIOBRIDGE_VERSION 12 -#define JANUS_AUDIOBRIDGE_VERSION_STRING "0.0.12" +#define JANUS_AUDIOBRIDGE_VERSION 13 +#define JANUS_AUDIOBRIDGE_VERSION_STRING "0.0.13" #define JANUS_AUDIOBRIDGE_DESCRIPTION "This is a plugin implementing an audio conference bridge for Janus, mixing Opus streams." #define JANUS_AUDIOBRIDGE_NAME "JANUS AudioBridge plugin" #define JANUS_AUDIOBRIDGE_AUTHOR "Meetecho s.r.l." @@ -1275,6 +1286,7 @@ static struct janus_json_parameter create_parameters[] = { {"audio_level_average", JSON_INTEGER, JANUS_JSON_PARAM_POSITIVE}, {"default_expectedloss", JSON_INTEGER, JANUS_JSON_PARAM_POSITIVE}, {"default_bitrate", JSON_INTEGER, JANUS_JSON_PARAM_POSITIVE}, + {"denoise", JANUS_JSON_BOOL, 0}, {"groups", JSON_ARRAY, 0} }; static struct janus_json_parameter edit_parameters[] = { @@ -1313,6 +1325,7 @@ static struct janus_json_parameter join_parameters[] = { {"spatial_position", JSON_INTEGER, JANUS_JSON_PARAM_POSITIVE}, {"audio_level_average", JSON_INTEGER, JANUS_JSON_PARAM_POSITIVE}, {"audio_active_packets", JSON_INTEGER, JANUS_JSON_PARAM_POSITIVE}, + {"denoise", JANUS_JSON_BOOL, 0}, {"record", JANUS_JSON_BOOL, 0}, {"filename", JSON_STRING, 0}, {"generate_offer", JANUS_JSON_BOOL, 0}, @@ -1343,6 +1356,7 @@ static struct janus_json_parameter configure_parameters[] = { {"volume", JSON_INTEGER, JANUS_JSON_PARAM_POSITIVE}, {"group", JSON_STRING, 0}, {"spatial_position", JSON_INTEGER, JANUS_JSON_PARAM_POSITIVE}, + {"denoise", JANUS_JSON_BOOL, 0}, {"record", JANUS_JSON_BOOL, 0}, {"filename", JSON_STRING, 0}, {"display", JSON_STRING, 0}, @@ -1439,6 +1453,9 @@ typedef struct janus_audiobridge_room { int32_t default_bitrate; /* Default bitrate to use for all Opus streams when encoding */ int audio_active_packets; /* Amount of packets with audio level for checkup */ int audio_level_average; /* Average audio level */ +#ifdef HAVE_RNNOISE + gboolean denoise; /* Whether we should denoise participants by default */ +#endif volatile gint record; /* Whether this room has to be recorded or not */ gchar *record_file; /* Path of the recording file (absolute or relative, depending on record_dir) */ gchar *record_dir; /* Folder to save the recording file to */ @@ -1673,6 +1690,18 @@ typedef struct janus_audiobridge_participant { int opus_complexity; /* Complexity to use in the encoder (by default, DEFAULT_COMPLEXITY) */ gboolean stereo; /* Whether stereo will be used for spatial audio */ int spatial_position; /* Panning of this participant in the mix */ +#ifdef HAVE_RNNOISE +#define DENOISER_FRAME_SIZE 480 + gboolean denoise; /* Whether we should denoise this participant */ + DenoiseState *rnnoise[2]; /* RNNoise states (we'll need two for stereo) */ + uint32_t resampler_rate; /* Sampling rate of the resamplers */ + gboolean resampler_stereo; /* Whether the current resamplers are stereo */ + SpeexResamplerState *upsampler; /* Speex upsampler used for denoising */ + SpeexResamplerState *downsampler; /* Speex downsampler used for denoising*/ + opus_int16 *upsample_buffer; /* Buffer for upsampling */ + opus_int16 *downsample_buffer; /* Buffer for downsampling */ + float *denoiser_buffer[2]; /* Buffer for denoising */ +#endif /* RTP stuff */ JitterBuffer *jitter; /* Jitter buffer of incoming audio packets */ gint64 jitter_next_check; /* Timestamp to perform next jitter buffer size check */ @@ -1694,6 +1723,7 @@ typedef struct janus_audiobridge_participant { janus_audiobridge_plainrtp_media plainrtp_media; janus_mutex pmutex; /* Opus stuff */ + uint32_t sampling_rate; /* Sampling rate to decode at */ OpusEncoder *encoder; /* Opus encoder instance */ OpusDecoder *decoder; /* Opus decoder instance */ gboolean fec; /* Opus FEC status */ @@ -1814,6 +1844,24 @@ static void janus_audiobridge_participant_free(const janus_refcount *participant janus_audiobridge_participant_clear_outbuf(participant); g_async_queue_unref(participant->outbuf); } +#ifdef HAVE_RNNOISE + if(participant->rnnoise[0]) + rnnoise_destroy(participant->rnnoise[0]); + if(participant->rnnoise[1]) + rnnoise_destroy(participant->rnnoise[1]); + if(participant->denoiser_buffer[0]) + g_free(participant->denoiser_buffer[0]); + if(participant->denoiser_buffer[1]) + g_free(participant->denoiser_buffer[1]); + if(participant->upsampler) + speex_resampler_destroy(participant->upsampler); + if(participant->downsampler) + speex_resampler_destroy(participant->downsampler); + if(participant->upsample_buffer) + g_free(participant->upsample_buffer); + if(participant->downsample_buffer) + g_free(participant->downsample_buffer); +#endif g_free(participant->mjr_base); #ifdef HAVE_LIBOGG janus_audiobridge_file_free(participant->annc); @@ -1824,6 +1872,12 @@ static void janus_audiobridge_participant_free(const janus_refcount *participant g_free(participant); } +#ifdef HAVE_RNNOISE +static void janus_audiobridge_participant_denoise(janus_audiobridge_participant *participant, char *data, int len); +static void janus_audiobridge_participant_upsample(janus_audiobridge_participant *participant, opus_int16 *input, int *in_len, opus_int16 *output, int *out_len); +static void janus_audiobridge_participant_downsample(janus_audiobridge_participant *participant, opus_int16 *input, int *in_len, opus_int16 *output, int *out_len); +#endif + static void janus_audiobridge_session_destroy(janus_audiobridge_session *session) { if(session && g_atomic_int_compare_and_exchange(&session->destroyed, 0, 1)) janus_refcount_decrease(&session->ref); @@ -2406,6 +2460,12 @@ int janus_audiobridge_init(janus_callbacks *callback, const char *config_path) { /* This is the callback we'll need to invoke to contact the Janus core */ gateway = callback; +#ifdef HAVE_RNNOISE + JANUS_LOG(LOG_INFO, "Denoising via RNNoise supported (%d)\n", rnnoise_get_frame_size()); +#else + JANUS_LOG(LOG_WARN, "Denoising via RNNoise NOT supported\n"); +#endif + /* Parse configuration to populate the rooms list */ if(config != NULL) { janus_config_category *config_general = janus_config_get_create(config, NULL, janus_config_type_category, "general"); @@ -2520,6 +2580,7 @@ int janus_audiobridge_init(janus_callbacks *callback, const char *config_path) { janus_config_item *audio_level_average = janus_config_get(config, cat, janus_config_type_item, "audio_level_average"); janus_config_item *default_expectedloss = janus_config_get(config, cat, janus_config_type_item, "default_expectedloss"); janus_config_item *default_bitrate = janus_config_get(config, cat, janus_config_type_item, "default_bitrate"); + janus_config_item *denoise = janus_config_get(config, cat, janus_config_type_item, "denoise"); janus_config_item *secret = janus_config_get(config, cat, janus_config_type_item, "secret"); janus_config_item *pin = janus_config_get(config, cat, janus_config_type_item, "pin"); janus_config_array *groups = janus_config_get(config, cat, janus_config_type_array, "groups"); @@ -2634,6 +2695,13 @@ int janus_audiobridge_init(janus_callbacks *callback, const char *config_path) { audiobridge->default_bitrate = 0; } } +#ifdef HAVE_RNNOISE + audiobridge->denoise = denoise && denoise->value && janus_is_true(denoise->value); +#else + if(denoise && denoise->value && janus_is_true(denoise->value)) { + JANUS_LOG(LOG_WARN, "RNNoise unavailable, denoising not supported\n"); + } +#endif audiobridge->room_ssrc = janus_random_uint32(); if(secret != NULL && secret->value != NULL) { audiobridge->room_secret = g_strdup(secret->value); @@ -2960,6 +3028,9 @@ json_t *janus_audiobridge_query_session(janus_plugin_session *handle) { json_object_set_new(info, "queue-out", json_integer(g_async_queue_length(participant->outbuf))); if(participant->stereo) json_object_set_new(info, "spatial_position", json_integer(participant->spatial_position)); +#ifdef HAVE_RNNOISE + json_object_set_new(info, "denoise", participant->denoise ? json_true() : json_false()); +#endif if(participant->arc && participant->arc->filename) json_object_set_new(info, "audio-recording", json_string(participant->arc->filename)); if(participant->extmap_id > 0) { @@ -3097,6 +3168,7 @@ static json_t *janus_audiobridge_process_synchronous_request(janus_audiobridge_s json_t *audio_level_average = json_object_get(root, "audio_level_average"); json_t *default_expectedloss = json_object_get(root, "default_expectedloss"); json_t *default_bitrate = json_object_get(root, "default_bitrate"); + json_t *denoise = json_object_get(root, "denoise"); json_t *groups = json_object_get(root, "groups"); json_t *record = json_object_get(root, "record"); json_t *recfile = json_object_get(root, "record_file"); @@ -3264,6 +3336,13 @@ static json_t *janus_audiobridge_process_synchronous_request(janus_audiobridge_s audiobridge->default_bitrate = 0; } } +#ifdef HAVE_RNNOISE + audiobridge->denoise = denoise ? json_is_true(denoise) : FALSE; +#else + if(denoise && json_is_true(denoise)) { + JANUS_LOG(LOG_WARN, "RNNoise unavailable, denoising not supported\n"); + } +#endif switch(audiobridge->sampling_rate) { case 8000: case 12000: @@ -4344,6 +4423,104 @@ static json_t *janus_audiobridge_process_synchronous_request(janus_audiobridge_s janus_mutex_unlock(&audiobridge->mutex); janus_refcount_decrease(&audiobridge->ref); goto prepare_response; +#ifdef HAVE_RNNOISE + } else if(!strcasecmp(request_text, "denoise_enable") || !strcasecmp(request_text, "denoise_disable")) { + gboolean denoise = (!strcasecmp(request_text, "denoise_enable")); + JANUS_LOG(LOG_VERB, "Attempt to %s denoising for a participant in an existing AudioBridge room\n", + denoise ? "enable" : "disable"); + JANUS_VALIDATE_JSON_OBJECT(root, secret_parameters, + error_code, error_cause, TRUE, + JANUS_AUDIOBRIDGE_ERROR_MISSING_ELEMENT, JANUS_AUDIOBRIDGE_ERROR_INVALID_ELEMENT); + if(error_code != 0) + goto prepare_response; + if(!string_ids) { + JANUS_VALIDATE_JSON_OBJECT(root, room_parameters, + error_code, error_cause, TRUE, + JANUS_AUDIOBRIDGE_ERROR_MISSING_ELEMENT, JANUS_AUDIOBRIDGE_ERROR_INVALID_ELEMENT); + } else { + JANUS_VALIDATE_JSON_OBJECT(root, roomstr_parameters, + error_code, error_cause, TRUE, + JANUS_AUDIOBRIDGE_ERROR_MISSING_ELEMENT, JANUS_AUDIOBRIDGE_ERROR_INVALID_ELEMENT); + } + if(error_code != 0) + goto prepare_response; + if(!string_ids) { + JANUS_VALIDATE_JSON_OBJECT(root, id_parameters, + error_code, error_cause, TRUE, + JANUS_AUDIOBRIDGE_ERROR_MISSING_ELEMENT, JANUS_AUDIOBRIDGE_ERROR_INVALID_ELEMENT); + } else { + JANUS_VALIDATE_JSON_OBJECT(root, idstr_parameters, + error_code, error_cause, TRUE, + JANUS_AUDIOBRIDGE_ERROR_MISSING_ELEMENT, JANUS_AUDIOBRIDGE_ERROR_INVALID_ELEMENT); + } + if(error_code != 0) + goto prepare_response; + json_t *room = json_object_get(root, "room"); + json_t *id = json_object_get(root, "id"); + guint64 room_id = 0; + char room_id_num[30], *room_id_str = NULL; + if(!string_ids) { + room_id = json_integer_value(room); + g_snprintf(room_id_num, sizeof(room_id_num), "%"SCNu64, room_id); + room_id_str = room_id_num; + } else { + room_id_str = (char *)json_string_value(room); + } + janus_mutex_lock(&rooms_mutex); + janus_audiobridge_room *audiobridge = g_hash_table_lookup(rooms, + string_ids ? (gpointer)room_id_str : (gpointer)&room_id); + if(audiobridge == NULL) { + janus_mutex_unlock(&rooms_mutex); + error_code = JANUS_AUDIOBRIDGE_ERROR_NO_SUCH_ROOM; + JANUS_LOG(LOG_ERR, "No such room (%s)\n", room_id_str); + g_snprintf(error_cause, 512, "No such room (%s)", room_id_str); + goto prepare_response; + } + janus_refcount_increase(&audiobridge->ref); + janus_mutex_lock(&audiobridge->mutex); + janus_mutex_unlock(&rooms_mutex); + + /* A secret may be required for this action */ + JANUS_CHECK_SECRET(audiobridge->room_secret, root, "secret", error_code, error_cause, + JANUS_AUDIOBRIDGE_ERROR_MISSING_ELEMENT, JANUS_AUDIOBRIDGE_ERROR_INVALID_ELEMENT, JANUS_AUDIOBRIDGE_ERROR_UNAUTHORIZED); + if(error_code != 0) { + janus_mutex_unlock(&audiobridge->mutex); + janus_refcount_decrease(&audiobridge->ref); + goto prepare_response; + } + + guint64 user_id = 0; + char user_id_num[30], *user_id_str = NULL; + if(!string_ids) { + user_id = json_integer_value(id); + g_snprintf(user_id_num, sizeof(user_id_num), "%"SCNu64, user_id); + user_id_str = user_id_num; + } else { + user_id_str = (char *)json_string_value(id); + } + janus_audiobridge_participant *participant = g_hash_table_lookup(audiobridge->participants, + string_ids ? (gpointer)user_id_str : (gpointer)&user_id); + if(participant == NULL) { + janus_mutex_unlock(&audiobridge->mutex); + janus_refcount_decrease(&audiobridge->ref); + JANUS_LOG(LOG_ERR, "No such user %s in room %s\n", user_id_str, room_id_str); + error_code = JANUS_AUDIOBRIDGE_ERROR_NO_SUCH_USER; + g_snprintf(error_cause, 512, "No such user %s in room %s", user_id_str, room_id_str); + goto prepare_response; + } + + participant->denoise = denoise; + + /* Prepare response */ + response = json_object(); + json_object_set_new(response, "audiobridge", json_string("success")); + json_object_set_new(response, "room", string_ids ? json_string(room_id_str) : json_integer(room_id)); + + /* Done */ + janus_mutex_unlock(&audiobridge->mutex); + janus_refcount_decrease(&audiobridge->ref); + goto prepare_response; +#endif } else if(!strcasecmp(request_text, "kick")) { JANUS_LOG(LOG_VERB, "Attempt to kick a participant from an existing AudioBridge room\n"); JANUS_VALIDATE_JSON_OBJECT(root, secret_parameters, @@ -6306,6 +6483,7 @@ static void *janus_audiobridge_handler(void *data) { json_t *acodec = json_object_get(root, "codec"); json_t *user_audio_level_average = json_object_get(root, "audio_level_average"); json_t *user_audio_active_packets = json_object_get(root, "audio_active_packets"); + json_t *denoise = json_object_get(root, "denoise"); json_t *record = json_object_get(root, "record"); json_t *recfile = json_object_get(root, "filename"); json_t *gen_offer = json_object_get(root, "generate_offer"); @@ -6477,6 +6655,7 @@ static void *janus_audiobridge_handler(void *data) { /* Opus encoder */ int error = 0; if(participant->encoder == NULL) { + participant->sampling_rate = audiobridge->sampling_rate; participant->encoder = opus_encoder_create(audiobridge->sampling_rate, audiobridge->spatial_audio ? 2 : 1, OPUS_APPLICATION_VOIP, &error); if(error != OPUS_OK) { @@ -6542,6 +6721,13 @@ static void *janus_audiobridge_handler(void *data) { goto error; } } +#ifdef HAVE_RNNOISE + participant->denoise = denoise ? json_is_true(denoise) : audiobridge->denoise; +#else + if(denoise && json_is_true(denoise)) { + JANUS_LOG(LOG_WARN, "RNNoise unavailable, denoising not supported\n"); + } +#endif participant->reset = FALSE; /* If this is a plain RTP participant, create the socket */ if(rtp != NULL) { @@ -6793,6 +6979,7 @@ static void *janus_audiobridge_handler(void *data) { json_t *exploss = json_object_get(root, "expected_loss"); json_t *gain = json_object_get(root, "volume"); json_t *spatial = json_object_get(root, "spatial_position"); + json_t *denoise = json_object_get(root, "denoise"); json_t *record = json_object_get(root, "record"); json_t *recfile = json_object_get(root, "filename"); json_t *display = json_object_get(root, "display"); @@ -6846,7 +7033,7 @@ static void *janus_audiobridge_handler(void *data) { } participant->group = group_id; } - if(muted || display || (participant->stereo && spatial)) { + if(muted || display || (participant->stereo && spatial) || denoise) { if(muted) { participant->muted = json_is_true(muted); JANUS_LOG(LOG_VERB, "Setting muted property: %s (room %s, user %s)\n", @@ -6873,6 +7060,14 @@ static void *janus_audiobridge_handler(void *data) { spatial_position = 100; participant->spatial_position = spatial_position; } +#ifdef HAVE_RNNOISE + if(denoise) + participant->denoise = json_is_true(denoise); +#else + if(denoise && json_is_true(denoise)) { + JANUS_LOG(LOG_WARN, "RNNoise unavailable, denoising not supported\n"); + } +#endif /* Notify all other participants */ janus_mutex_lock(&rooms_mutex); janus_audiobridge_room *audiobridge = participant->room; @@ -7102,8 +7297,9 @@ static void *janus_audiobridge_handler(void *data) { json_t *bitrate = json_object_get(root, "bitrate"); json_t *quality = json_object_get(root, "quality"); json_t *exploss = json_object_get(root, "expected_loss"); + json_t *denoise = json_object_get(root, "denoise"); int volume = gain ? json_integer_value(gain) : 100; - int spatial_position = spatial ? json_integer_value(spatial) : 64; + int spatial_position = spatial ? json_integer_value(spatial) : 50; int32_t opus_bitrate = audiobridge->default_bitrate; if(bitrate) { opus_bitrate = json_integer_value(bitrate); @@ -7197,8 +7393,6 @@ static void *janus_audiobridge_handler(void *data) { if(old_audiobridge->sampling_rate != audiobridge->sampling_rate || old_audiobridge->spatial_audio != audiobridge->spatial_audio) { /* Create a new one that takes into account the sampling rate we want now */ - participant->stereo = audiobridge->spatial_audio; - participant->spatial_position = 50; int error = 0; OpusEncoder *new_encoder = opus_encoder_create(audiobridge->sampling_rate, audiobridge->spatial_audio ? 2 : 1, OPUS_APPLICATION_VOIP, &error); @@ -7269,6 +7463,7 @@ static void *janus_audiobridge_handler(void *data) { g_usleep(5000); if(participant->encoder) opus_encoder_destroy(participant->encoder); + participant->sampling_rate = audiobridge->sampling_rate; participant->encoder = new_encoder; g_atomic_int_set(&participant->encoding, 0); while(!g_atomic_int_compare_and_exchange(&participant->decoding, 0, 1)) @@ -7358,6 +7553,14 @@ static void *janus_audiobridge_handler(void *data) { participant->expected_loss = expected_loss; opus_encoder_ctl(participant->encoder, OPUS_SET_PACKET_LOSS_PERC(participant->expected_loss)); } +#ifdef HAVE_RNNOISE + /* Check if a denoiser is needed now */ + participant->denoise = denoise ? json_is_true(denoise) : audiobridge->denoise; +#else + if(denoise && json_is_true(denoise)) { + JANUS_LOG(LOG_WARN, "RNNoise unavailable, denoising not supported\n"); + } +#endif g_hash_table_insert(audiobridge->participants, string_ids ? (gpointer)g_strdup(participant->user_id_str) : (gpointer)janus_uint64_dup(participant->user_id), participant); @@ -8622,6 +8825,11 @@ static void *janus_audiobridge_participant_thread(void *data) { int32_t output_samples; opus_decoder_ctl(participant->decoder, OPUS_GET_LAST_PACKET_DURATION(&output_samples)); pkt->length = opus_decode(participant->decoder, payload, plen, (opus_int16 *)pkt->data, output_samples, 1); +#ifdef HAVE_RNNOISE + /* Check if we need to denoise this packet */ + if(participant->denoise) + janus_audiobridge_participant_denoise(participant, (char *)pkt->data, pkt->length); +#endif /* Queue the decoded redundant packet for the mixer */ janus_mutex_lock(&participant->qmutex); participant->inbuf = g_list_append(participant->inbuf, pkt); @@ -8662,6 +8870,11 @@ static void *janus_audiobridge_participant_thread(void *data) { } pkt->length = 320; } +#ifdef HAVE_RNNOISE + /* Check if we need to denoise this packet */ + if(participant->denoise) + janus_audiobridge_participant_denoise(participant, (char *)pkt->data, pkt->length); +#endif /* Get rid of the buffered packet */ janus_audiobridge_buffer_packet_destroy(bpkt); /* Update the details */ @@ -9093,3 +9306,160 @@ static void janus_audiobridge_participant_istalking(janus_audiobridge_session *s } } } + +#ifdef HAVE_RNNOISE +static void janus_audiobridge_participant_denoise(janus_audiobridge_participant *participant, char *data, int len) { + if(len < 0 || data == NULL) + return; + /* Create a denoiser if we still don't have one */ + if(participant->rnnoise[0] == NULL) { + /* Create RNNoise context */ + participant->rnnoise[0] = rnnoise_create(NULL); + /* If we still don't have a denoiser, give up */ + if(participant->rnnoise[0] == NULL) + return; + /* Allocate the buffer for the denoiser */ + if(participant->denoiser_buffer[0] == NULL) + participant->denoiser_buffer[0] = g_malloc(DENOISER_FRAME_SIZE * sizeof(float)); + } + /* Check if we need a denoiser for stereo channel too */ + if(participant->stereo && participant->rnnoise[1] == NULL) { + /* Create RNNoise context */ + participant->rnnoise[1] = rnnoise_create(NULL); + /* If we still don't have a denoiser, give up */ + if(participant->rnnoise[1] == NULL) + return; + /* Allocate the buffer for the denoiser */ + if(participant->denoiser_buffer[1] == NULL) + participant->denoiser_buffer[1] = g_malloc(DENOISER_FRAME_SIZE * sizeof(float)); + } + /* Check if we need to (re)create resamplers too */ + if(participant->sampling_rate != participant->resampler_rate || + participant->stereo != participant->resampler_stereo) { + participant->resampler_rate = participant->sampling_rate; + participant->resampler_stereo = participant->stereo; + if(participant->upsampler) + speex_resampler_destroy(participant->upsampler); + participant->upsampler = NULL; + if(participant->downsampler) + speex_resampler_destroy(participant->downsampler); + participant->downsampler = NULL; + /* We need resamplers only if rate is not 48kHz */ + if(participant->resampler_rate != 48000) { + spx_uint32_t channels = !participant->resampler_stereo ? 1 : 2; + spx_uint32_t from_rate = participant->resampler_rate; + spx_uint32_t to_rate = 48000; + int quality = 8, error = 0; + participant->upsampler = speex_resampler_init(channels, from_rate, to_rate, quality, &error); + if(participant->upsampler != NULL) { + JANUS_LOG(LOG_INFO, "Created %s resampler from %d to %d (channels=%d, quality=%d)\n", + (participant->resampler_stereo ? "stereo" : "mono"), from_rate, to_rate, channels, quality); + } else { + /* We couldn't create a resampler, don't do anything */ + return; + } + participant->downsampler = speex_resampler_init(channels, to_rate, from_rate, quality, &error); + if(participant->downsampler != NULL) { + JANUS_LOG(LOG_INFO, "Created %s resampler from %d to %d (channels=%d, quality=%d)\n", + (participant->resampler_stereo ? "stereo" : "mono"), to_rate, from_rate, channels, quality); + } else { + /* We couldn't create a resampler, don't do anything */ + return; + } + if(participant->upsample_buffer == NULL) + participant->upsample_buffer = g_malloc(2 * OPUS_SAMPLES * sizeof(opus_int16)); + if(participant->downsample_buffer == NULL) + participant->downsample_buffer = g_malloc(2 * OPUS_SAMPLES * sizeof(opus_int16)); + } + } + + /* Opus int16 original samples */ + opus_int16 *samples = (opus_int16 *)data; + /* Number of original samples, should be: 160 (8kHz), 320 (16kHz), 480 (24kHz), 960 (48kHz) */ + int samples_count = len; + /* Actual length of the resampled array (double size for stereo) */ + const int samples_len = !participant->resampler_stereo ? samples_count : 2*samples_count; + + /* Should be 960 */ + int upsample_buffer_count = len * (48000/participant->resampler_rate); + /* Upsampled buffer */ + opus_int16 *upsample_buffer = samples; + + /* Downsampled data samples count is equal to original samples */ + int downsample_buffer_count = samples_count; + /* Downsampled buffer */ + opus_int16 *downsample_buffer = upsample_buffer; + + /* Upsample */ + if(participant->resampler_rate != 48000) { + upsample_buffer = participant->upsample_buffer; + janus_audiobridge_participant_upsample(participant, samples, &samples_count, upsample_buffer, &upsample_buffer_count); + } + + int i = 0, j = 0; + float *denoiser_buffer = participant->denoiser_buffer[0]; + float *denoiser_buffer_alt = participant->denoiser_buffer[1]; + + /* Denoise in chunks of 480 samples */ + if(!participant->resampler_stereo) { + for(i=0; irnnoise[0], denoiser_buffer, denoiser_buffer); + for(j=0; jrnnoise[0], denoiser_buffer, denoiser_buffer); + rnnoise_process_frame(participant->rnnoise[1], denoiser_buffer_alt, denoiser_buffer_alt); + for(j=0; jresampler_rate != 48000) { + downsample_buffer = participant->downsample_buffer; + janus_audiobridge_participant_downsample(participant, upsample_buffer, &upsample_buffer_count, downsample_buffer, &downsample_buffer_count); + } + + /* Copy denoised and downsampled data back */ + memcpy(samples, downsample_buffer, samples_len*sizeof(opus_int16)); +} + +static void janus_audiobridge_participant_upsample(janus_audiobridge_participant *participant, opus_int16 *input, int *in_len, opus_int16 *output, int *out_len) { + if(!participant->resampler_stereo) { + int err = speex_resampler_process_int(participant->upsampler, 0, (spx_int16_t *)input, (spx_uint32_t *)in_len, (spx_int16_t *)output, (spx_uint32_t *)out_len); + if(err != 0) { + //TODO + } + } else { + int err = speex_resampler_process_interleaved_int(participant->upsampler, (spx_int16_t *)input, (spx_uint32_t *)in_len, (spx_int16_t *)output, (spx_uint32_t *)out_len); + if(err != 0) { + //TODO + } + } +} +static void janus_audiobridge_participant_downsample(janus_audiobridge_participant *participant, opus_int16 *input, int *in_len, opus_int16 *output, int *out_len) { + if(!participant->resampler_stereo) { + int err = speex_resampler_process_int(participant->downsampler, 0, (spx_int16_t *)input, (spx_uint32_t *)in_len, (spx_int16_t *)output, (spx_uint32_t *)out_len); + if(err != 0) { + //TODO + } + } else { + int err = speex_resampler_process_interleaved_int(participant->downsampler, (spx_int16_t *)input, (spx_uint32_t *)in_len, (spx_int16_t *)output, (spx_uint32_t *)out_len); + if(err != 0) { + //TODO + } + } +} +#endif