This is an automated email from the git hooks/post-receive script.
richard pushed a commit to branch tor-browser-91.9esr-11.0-1 in repository tor-browser.
commit 2ccdca191a440f17990f9ab00680d034eb59c9ea Author: Tom Ritter tom@mozilla.com AuthorDate: Mon Apr 11 16:51:56 2022 +0000
Bug 1762614: Update libopus to 2654707e86cc94413998976d179b2ab4a2aa3114 r=kinetik,a=dsmith
Differential Revision: https://phabricator.services.mozilla.com/D142717 --- media/libopus/README_MOZILLA | 2 +- media/libopus/celt/arch.h | 5 +- media/libopus/celt/arm/armcpu.c | 2 + media/libopus/celt/bands.c | 11 +- media/libopus/celt/celt_decoder.c | 10 +- media/libopus/celt/celt_encoder.c | 4 +- media/libopus/celt/celt_lpc.c | 62 +- media/libopus/celt/ecintrin.h | 4 + media/libopus/celt/fixed_debug.h | 45 + media/libopus/celt/fixed_generic.h | 14 +- media/libopus/celt/float_cast.h | 58 +- media/libopus/celt/mathops.h | 4 +- media/libopus/celt/mips/celt_mipsr1.h | 1 + media/libopus/celt/mips/vq_mipsr1.h | 8 +- media/libopus/celt/os_support.h | 1 - media/libopus/celt/rate.c | 2 +- media/libopus/celt/rate.h | 2 +- media/libopus/celt/stack_alloc.h | 4 +- media/libopus/celt/vq.c | 4 + media/libopus/celt/vq.h | 4 - media/libopus/celt/x86/pitch_sse.h | 2 +- media/libopus/celt/x86/pitch_sse4_1.c | 51 +- media/libopus/celt/x86/x86cpu.h | 34 +- media/libopus/include/opus_custom.h | 4 +- media/libopus/include/opus_defines.h | 13 +- media/libopus/moz.build | 2 +- media/libopus/nonunified2.patch | 2 - media/libopus/silk/CNG.c | 4 + media/libopus/silk/LPC_fit.c | 3 +- media/libopus/silk/MacroCount.h | 2 +- media/libopus/silk/NSQ.c | 36 +- media/libopus/silk/NSQ_del_dec.c | 40 +- media/libopus/silk/PLC.c | 6 +- media/libopus/silk/SigProc_FIX.h | 6 +- media/libopus/silk/VAD.c | 5 +- media/libopus/silk/VQ_WMat_EC.c | 4 +- .../libopus/silk/arm/LPC_inv_pred_gain_neon_intr.c | 22 +- media/libopus/silk/bwexpander_32.c | 3 +- media/libopus/silk/debug.c | 10 +- media/libopus/silk/debug.h | 25 +- media/libopus/silk/decode_frame.c | 1 - media/libopus/silk/define.h | 1 + .../arm/warped_autocorrelation_FIX_neon_intr.c | 9 +- media/libopus/silk/fixed/burg_modified_FIX.c | 8 +- .../fixed/mips/warped_autocorrelation_FIX_mipsr1.h | 7 +- media/libopus/silk/fixed/vector_ops_FIX.c | 2 +- .../silk/fixed/warped_autocorrelation_FIX.c | 2 + .../silk/fixed/x86/burg_modified_FIX_sse4_1.c | 69 +- .../libopus/silk/fixed/x86/vector_ops_FIX_sse4_1.c | 40 +- .../silk/float/warped_autocorrelation_FLP.c | 4 +- media/libopus/silk/main.h | 60 +- media/libopus/silk/typedef.h | 3 + media/libopus/silk/x86/NSQ_del_dec_sse4_1.c | 179 ++- media/libopus/silk/x86/NSQ_sse4_1.c | 211 ++-- media/libopus/silk/x86/SigProc_FIX_sse.h | 12 +- media/libopus/silk/x86/VAD_sse4_1.c | 28 +- media/libopus/silk/x86/VQ_WMat_EC_sse4_1.c | 189 +-- media/libopus/silk/x86/main_sse.h | 170 ++- media/libopus/silk/x86/x86_silk_map.c | 89 +- media/libopus/src/analysis.c | 66 +- media/libopus/src/analysis.h | 1 + media/libopus/src/mapping_matrix.h | 2 +- media/libopus/src/mlp.c | 75 +- media/libopus/src/mlp_data.c | 1248 ++++++++++---------- media/libopus/src/opus.c | 2 +- media/libopus/src/opus_encoder.c | 142 ++- media/libopus/src/opus_multistream_decoder.c | 9 +- media/libopus/src/opus_multistream_encoder.c | 11 +- media/libopus/src/opus_private.h | 1 + 69 files changed, 1780 insertions(+), 1382 deletions(-)
diff --git a/media/libopus/README_MOZILLA b/media/libopus/README_MOZILLA index 76f12ab98afaf..d2c5c4920bc90 100644 --- a/media/libopus/README_MOZILLA +++ b/media/libopus/README_MOZILLA @@ -8,4 +8,4 @@ files after the copy step.
The upstream repository is https://git.xiph.org/opus.git
-The git tag/revision used was v1.3-rc-19-g5cbd7d5f. +The git tag/revision used was 2654707e86cc94413998976d179b2ab4a2aa3114. diff --git a/media/libopus/celt/arch.h b/media/libopus/celt/arch.h index c627a744bd291..3845c3a08323f 100644 --- a/media/libopus/celt/arch.h +++ b/media/libopus/celt/arch.h @@ -73,6 +73,9 @@ __attribute__((noreturn)) void celt_fatal(const char *str, const char *file, int line) { fprintf (stderr, "Fatal (internal) error in %s, line %d: %s\n", file, line, str); +#if defined(_MSC_VER) + _set_abort_behavior( 0, _WRITE_ABORT_MSG); +#endif abort(); } #endif @@ -160,7 +163,7 @@ static OPUS_INLINE opus_int16 SAT16(opus_int32 x) {
#ifdef OPUS_ARM_PRESUME_AARCH64_NEON_INTR #include "arm/fixed_arm64.h" -#elif OPUS_ARM_INLINE_EDSP +#elif defined (OPUS_ARM_INLINE_EDSP) #include "arm/fixed_armv5e.h" #elif defined (OPUS_ARM_INLINE_ASM) #include "arm/fixed_armv4.h" diff --git a/media/libopus/celt/arm/armcpu.c b/media/libopus/celt/arm/armcpu.c index 694a63b78e687..cce3ae3a9db70 100644 --- a/media/libopus/celt/arm/armcpu.c +++ b/media/libopus/celt/arm/armcpu.c @@ -93,6 +93,8 @@ static OPUS_INLINE opus_uint32 opus_cpu_capabilities(void){
#elif defined(__linux__) /* Linux based */ +#include <stdio.h> + opus_uint32 opus_cpu_capabilities(void) { opus_uint32 flags = 0; diff --git a/media/libopus/celt/bands.c b/media/libopus/celt/bands.c index f7bb66a9358b1..bd54036afe079 100644 --- a/media/libopus/celt/bands.c +++ b/media/libopus/celt/bands.c @@ -371,14 +371,14 @@ void anti_collapse(const CELTMode *m, celt_norm *X_, unsigned char *collapse_mas static void compute_channel_weights(celt_ener Ex, celt_ener Ey, opus_val16 w[2]) { celt_ener minE; -#if FIXED_POINT +#ifdef FIXED_POINT int shift; #endif minE = MIN32(Ex, Ey); /* Adjustment to make the weights a bit more conservative. */ Ex = ADD32(Ex, minE/3); Ey = ADD32(Ey, minE/3); -#if FIXED_POINT +#ifdef FIXED_POINT shift = celt_ilog2(EPSILON+MAX32(Ex, Ey))-14; #endif w[0] = VSHR32(Ex, shift); @@ -901,7 +901,7 @@ static void compute_theta(struct band_ctx *ctx, struct split_ctx *sctx, sctx->itheta = itheta; sctx->qalloc = qalloc; } -static unsigned quant_band_n1(struct band_ctx *ctx, celt_norm *X, celt_norm *Y, int b, +static unsigned quant_band_n1(struct band_ctx *ctx, celt_norm *X, celt_norm *Y, celt_norm *lowband_out) { int c; @@ -926,7 +926,6 @@ static unsigned quant_band_n1(struct band_ctx *ctx, celt_norm *X, celt_norm *Y, sign = ec_dec_bits(ec, 1); } ctx->remaining_bits -= 1<<BITRES; - b-=1<<BITRES; } if (ctx->resynth) x[0] = sign ? -NORM_SCALING : NORM_SCALING; @@ -1134,7 +1133,7 @@ static unsigned quant_band(struct band_ctx *ctx, celt_norm *X, /* Special case for one sample */ if (N==1) { - return quant_band_n1(ctx, X, NULL, b, lowband_out); + return quant_band_n1(ctx, X, NULL, lowband_out); }
if (tf_change>0) @@ -1256,7 +1255,7 @@ static unsigned quant_band_stereo(struct band_ctx *ctx, celt_norm *X, celt_norm /* Special case for one sample */ if (N==1) { - return quant_band_n1(ctx, X, Y, b, lowband_out); + return quant_band_n1(ctx, X, Y, lowband_out); }
orig_fill = fill; diff --git a/media/libopus/celt/celt_decoder.c b/media/libopus/celt/celt_decoder.c index 8520e57b1f8de..74ca3b740df77 100644 --- a/media/libopus/celt/celt_decoder.c +++ b/media/libopus/celt/celt_decoder.c @@ -117,13 +117,19 @@ void validate_celt_decoder(CELTDecoder *st) #ifndef CUSTOM_MODES celt_assert(st->mode == opus_custom_mode_create(48000, 960, NULL)); celt_assert(st->overlap == 120); + celt_assert(st->end <= 21); +#else +/* From Section 4.3 in the spec: "The normal CELT layer uses 21 of those bands, + though Opus Custom (see Section 6.2) may use a different number of bands" + + Check if it's within the maximum number of Bark frequency bands instead */ + celt_assert(st->end <= 25); #endif celt_assert(st->channels == 1 || st->channels == 2); celt_assert(st->stream_channels == 1 || st->stream_channels == 2); celt_assert(st->downsample > 0); celt_assert(st->start == 0 || st->start == 17); celt_assert(st->start < st->end); - celt_assert(st->end <= 21); #ifdef OPUS_ARCHMASK celt_assert(st->arch >= 0); celt_assert(st->arch <= OPUS_ARCHMASK); @@ -1059,7 +1065,7 @@ int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *dat ALLOC(pulses, nbEBands, int); ALLOC(fine_priority, nbEBands, int);
- codedBands = compute_allocation(mode, start, end, offsets, cap, + codedBands = clt_compute_allocation(mode, start, end, offsets, cap, alloc_trim, &intensity, &dual_stereo, bits, &balance, pulses, fine_quant, fine_priority, C, LM, dec, 0, 0, 0);
diff --git a/media/libopus/celt/celt_encoder.c b/media/libopus/celt/celt_encoder.c index e6a47df616378..d6f8afc20bfb6 100644 --- a/media/libopus/celt/celt_encoder.c +++ b/media/libopus/celt/celt_encoder.c @@ -1571,7 +1571,7 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, (tmp+4*mode->Fs)/(8*mode->Fs)-!!st->signalling)); effectiveBytes = nbCompressedBytes - nbFilledBytes; } - equiv_rate = ((opus_int32)nbCompressedBytes*8*50 >> (3-LM)) - (40*C+20)*((400>>LM) - 50); + equiv_rate = ((opus_int32)nbCompressedBytes*8*50 << (3-LM)) - (40*C+20)*((400>>LM) - 50); if (st->bitrate != OPUS_BITRATE_MAX) equiv_rate = IMIN(equiv_rate, st->bitrate - (40*C+20)*((400>>LM) - 50));
@@ -2191,7 +2191,7 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, #endif if (st->lfe) signalBandwidth = 1; - codedBands = compute_allocation(mode, start, end, offsets, cap, + codedBands = clt_compute_allocation(mode, start, end, offsets, cap, alloc_trim, &st->intensity, &dual_stereo, bits, &balance, pulses, fine_quant, fine_priority, C, LM, enc, 1, st->lastCodedBands, signalBandwidth); if (st->lastCodedBands) diff --git a/media/libopus/celt/celt_lpc.c b/media/libopus/celt/celt_lpc.c index 8ecb693ee984d..242e6df55e6dc 100644 --- a/media/libopus/celt/celt_lpc.c +++ b/media/libopus/celt/celt_lpc.c @@ -50,17 +50,21 @@ int p #endif
OPUS_CLEAR(lpc, p); +#ifdef FIXED_POINT if (ac[0] != 0) +#else + if (ac[0] > 1e-10f) +#endif { for (i = 0; i < p; i++) { /* Sum up this iteration's reflection coefficient */ opus_val32 rr = 0; for (j = 0; j < i; j++) rr += MULT32_32_Q31(lpc[j],ac[i - j]); - rr += SHR32(ac[i + 1],3); - r = -frac_div32(SHL32(rr,3), error); + rr += SHR32(ac[i + 1],6); + r = -frac_div32(SHL32(rr,6), error); /* Update LPC coefficients and total error */ - lpc[i] = SHR32(r,3); + lpc[i] = SHR32(r,6); for (j = 0; j < (i+1)>>1; j++) { opus_val32 tmp1, tmp2; @@ -73,17 +77,61 @@ int p error = error - MULT32_32_Q31(MULT32_32_Q31(r,r),error); /* Bail out once we get 30 dB gain */ #ifdef FIXED_POINT - if (error<SHR32(ac[0],10)) + if (error<=SHR32(ac[0],10)) break; #else - if (error<.001f*ac[0]) + if (error<=.001f*ac[0]) break; #endif } } #ifdef FIXED_POINT - for (i=0;i<p;i++) - _lpc[i] = ROUND16(lpc[i],16); + { + /* Convert the int32 lpcs to int16 and ensure there are no wrap-arounds. + This reuses the logic in silk_LPC_fit() and silk_bwexpander_32(). Any bug + fixes should also be applied there. */ + int iter, idx = 0; + opus_val32 maxabs, absval, chirp_Q16, chirp_minus_one_Q16; + + for (iter = 0; iter < 10; iter++) { + maxabs = 0; + for (i = 0; i < p; i++) { + absval = ABS32(lpc[i]); + if (absval > maxabs) { + maxabs = absval; + idx = i; + } + } + maxabs = PSHR32(maxabs, 13); /* Q25->Q12 */ + + if (maxabs > 32767) { + maxabs = MIN32(maxabs, 163838); + chirp_Q16 = QCONST32(0.999, 16) - DIV32(SHL32(maxabs - 32767, 14), + SHR32(MULT32_32_32(maxabs, idx + 1), 2)); + chirp_minus_one_Q16 = chirp_Q16 - 65536; + + /* Apply bandwidth expansion. */ + for (i = 0; i < p - 1; i++) { + lpc[i] = MULT32_32_Q16(chirp_Q16, lpc[i]); + chirp_Q16 += PSHR32(MULT32_32_32(chirp_Q16, chirp_minus_one_Q16), 16); + } + lpc[p - 1] = MULT32_32_Q16(chirp_Q16, lpc[p - 1]); + } else { + break; + } + } + + if (iter == 10) { + /* If the coeffs still do not fit into the 16 bit range after 10 iterations, + fall back to the A(z)=1 filter. */ + OPUS_CLEAR(lpc, p); + _lpc[0] = 4096; /* Q12 */ + } else { + for (i = 0; i < p; i++) { + _lpc[i] = EXTRACT16(PSHR32(lpc[i], 13)); /* Q25->Q12 */ + } + } + } #endif }
diff --git a/media/libopus/celt/ecintrin.h b/media/libopus/celt/ecintrin.h index 2263cff6bdf30..66a4c36ea40d5 100644 --- a/media/libopus/celt/ecintrin.h +++ b/media/libopus/celt/ecintrin.h @@ -49,7 +49,11 @@ This macro should only be used for implementing ec_ilog(), if it is defined. All other code should use EC_ILOG() instead.*/ #if defined(_MSC_VER) && (_MSC_VER >= 1400) +#if defined(_MSC_VER) && (_MSC_VER >= 1910) +# include <intrin0.h> /* Improve compiler throughput. */ +#else # include <intrin.h> +#endif /*In _DEBUG mode this is not an intrinsic by default.*/ # pragma intrinsic(_BitScanReverse)
diff --git a/media/libopus/celt/fixed_debug.h b/media/libopus/celt/fixed_debug.h index f435295234cff..3765baa60b501 100644 --- a/media/libopus/celt/fixed_debug.h +++ b/media/libopus/celt/fixed_debug.h @@ -410,6 +410,51 @@ static OPUS_INLINE short MULT16_16_16(int a, int b) return res; }
+/* result fits in 32 bits */ +static OPUS_INLINE int MULT32_32_32(opus_int64 a, opus_int64 b) +{ + opus_int64 res; + if (!VERIFY_INT(a) || !VERIFY_INT(b)) + { + fprintf (stderr, "MULT32_32_32: inputs are not int: %d %d\n", a, b); +#ifdef FIXED_DEBUG_ASSERT + celt_assert(0); +#endif + } + res = a*b; + if (!VERIFY_INT(res)) + { + fprintf (stderr, "MULT32_32_32: output is not int: %d\n", res); +#ifdef FIXED_DEBUG_ASSERT + celt_assert(0); +#endif + } + celt_mips+=5; + return res; +} + +static OPUS_INLINE int MULT32_32_Q16(opus_int64 a, opus_int64 b) +{ + opus_int64 res; + if (!VERIFY_INT(a) || !VERIFY_INT(b)) + { + fprintf (stderr, "MULT32_32_Q16: inputs are not int: %d %d\n", a, b); +#ifdef FIXED_DEBUG_ASSERT + celt_assert(0); +#endif + } + res = ((opus_int64)(a)*(opus_int64)(b)) >> 16; + if (!VERIFY_INT(res)) + { + fprintf (stderr, "MULT32_32_Q16: output is not int: %d*%d=%d\n", a, b, (int)res); +#ifdef FIXED_DEBUG_ASSERT + celt_assert(0); +#endif + } + celt_mips+=5; + return res; +} + #define MULT16_16(a, b) MULT16_16_(a, b, __FILE__, __LINE__) static OPUS_INLINE int MULT16_16_(int a, int b, char *file, int line) { diff --git a/media/libopus/celt/fixed_generic.h b/media/libopus/celt/fixed_generic.h index 5f4abda76ed8c..8f29d46bb7647 100644 --- a/media/libopus/celt/fixed_generic.h +++ b/media/libopus/celt/fixed_generic.h @@ -57,6 +57,13 @@ #define MULT16_32_Q15(a,b) ADD32(SHL(MULT16_16((a),SHR((b),16)),1), SHR(MULT16_16SU((a),((b)&0x0000ffff)),15)) #endif
+/** 32x32 multiplication, followed by a 16-bit shift right. Results fits in 32 bits */ +#if OPUS_FAST_INT64 +#define MULT32_32_Q16(a,b) ((opus_val32)SHR((opus_int64)(a)*(opus_int64)(b),16)) +#else +#define MULT32_32_Q16(a,b) (ADD32(ADD32(ADD32((opus_val32)(SHR32(((opus_uint32)((a)&0x0000ffff)*(opus_uint32)((b)&0x0000ffff)),16)), MULT16_16SU(SHR32(a,16),((b)&0x0000ffff))), MULT16_16SU(SHR32(b,16),((a)&0x0000ffff))), SHL32(MULT16_16(SHR32(a,16),SHR32(b,16)),16))) +#endif + /** 32x32 multiplication, followed by a 31-bit shift right. Results fits in 32 bits */ #if OPUS_FAST_INT64 #define MULT32_32_Q31(a,b) ((opus_val32)SHR((opus_int64)(a)*(opus_int64)(b),31)) @@ -102,9 +109,9 @@
#define SATURATE16(x) (EXTRACT16((x)>32767 ? 32767 : (x)<-32768 ? -32768 : (x)))
-/** Shift by a and round-to-neareast 32-bit value. Result is a 16-bit value */ +/** Shift by a and round-to-nearest 32-bit value. Result is a 16-bit value */ #define ROUND16(x,a) (EXTRACT16(PSHR32((x),(a)))) -/** Shift by a and round-to-neareast 32-bit value. Result is a saturated 16-bit value */ +/** Shift by a and round-to-nearest 32-bit value. Result is a saturated 16-bit value */ #define SROUND16(x,a) EXTRACT16(SATURATE(PSHR32(x,a), 32767));
/** Divide by two */ @@ -131,6 +138,9 @@ /** 16x16 multiplication where the result fits in 16 bits */ #define MULT16_16_16(a,b) ((((opus_val16)(a))*((opus_val16)(b))))
+/** 32x32 multiplication where the result fits in 32 bits */ +#define MULT32_32_32(a,b) ((((opus_val32)(a))*((opus_val32)(b)))) + /* (opus_val32)(opus_val16) gives TI compiler a hint that it's 16x16->32 multiply */ /** 16x16 multiplication where the result fits in 32 bits */ #define MULT16_16(a,b) (((opus_val32)(opus_val16)(a))*((opus_val32)(opus_val16)(b))) diff --git a/media/libopus/celt/float_cast.h b/media/libopus/celt/float_cast.h index 98b40abcf65f5..9d34976ee216a 100644 --- a/media/libopus/celt/float_cast.h +++ b/media/libopus/celt/float_cast.h @@ -67,6 +67,38 @@ #include <xmmintrin.h> static OPUS_INLINE opus_int32 float2int(float x) {return _mm_cvt_ss2si(_mm_set_ss(x));}
+#elif (defined(_MSC_VER) && _MSC_VER >= 1400) && (defined(_M_X64) || (defined(_M_IX86_FP) && _M_IX86_FP >= 1)) + + #include <xmmintrin.h> + static OPUS_INLINE opus_int32 float2int(float value) + { + /* _mm_load_ss will generate same code as _mm_set_ss + ** in _MSC_VER >= 1914 /02 so keep __mm_load__ss + ** for backward compatibility. + */ + return _mm_cvtss_si32(_mm_load_ss(&value)); + } + +#elif (defined(_MSC_VER) && _MSC_VER >= 1400) && defined (_M_IX86) + + #include <math.h> + + /* Win32 doesn't seem to have these functions. + ** Therefore implement OPUS_INLINE versions of these functions here. + */ + + static OPUS_INLINE opus_int32 + float2int (float flt) + { int intgr; + + _asm + { fld flt + fistp intgr + } ; + + return intgr ; + } + #elif defined(HAVE_LRINTF)
/* These defines enable functionality introduced with the 1999 ISO C @@ -96,32 +128,6 @@ static OPUS_INLINE opus_int32 float2int(float x) {return _mm_cvt_ss2si(_mm_set_s #include <math.h> #define float2int(x) lrint(x)
-#elif (defined(_MSC_VER) && _MSC_VER >= 1400) && defined (_M_X64) - #include <xmmintrin.h> - - __inline long int float2int(float value) - { - return _mm_cvtss_si32(_mm_load_ss(&value)); - } -#elif (defined(_MSC_VER) && _MSC_VER >= 1400) && defined (_M_IX86) - #include <math.h> - - /* Win32 doesn't seem to have these functions. - ** Therefore implement OPUS_INLINE versions of these functions here. - */ - - __inline long int - float2int (float flt) - { int intgr; - - _asm - { fld flt - fistp intgr - } ; - - return intgr ; - } - #else
#if (defined(__GNUC__) && defined(__STDC__) && __STDC__ && __STDC_VERSION__ >= 199901L) diff --git a/media/libopus/celt/mathops.h b/media/libopus/celt/mathops.h index 5e86ff0dd2579..fe29dac1c21df 100644 --- a/media/libopus/celt/mathops.h +++ b/media/libopus/celt/mathops.h @@ -137,7 +137,7 @@ static OPUS_INLINE float celt_log2(float x) } in; in.f = x; integer = (in.i>>23)-127; - in.i -= integer<<23; + in.i -= (opus_uint32)integer<<23; frac = in.f - 1.5f; frac = -0.41445418f + frac*(0.95909232f + frac*(-0.33951290f + frac*0.16541097f)); @@ -160,7 +160,7 @@ static OPUS_INLINE float celt_exp2(float x) /* K0 = 1, K1 = log(2), K2 = 3-4*log(2), K3 = 3*log(2) - 2 */ res.f = 0.99992522f + frac * (0.69583354f + frac * (0.22606716f + 0.078024523f*frac)); - res.i = (res.i + (integer<<23)) & 0x7fffffff; + res.i = (res.i + ((opus_uint32)integer<<23)) & 0x7fffffff; return res.f; }
diff --git a/media/libopus/celt/mips/celt_mipsr1.h b/media/libopus/celt/mips/celt_mipsr1.h index e85661a6618ce..c332fe0471e31 100644 --- a/media/libopus/celt/mips/celt_mipsr1.h +++ b/media/libopus/celt/mips/celt_mipsr1.h @@ -53,6 +53,7 @@ #include "celt_lpc.h" #include "vq.h"
+#define OVERRIDE_COMB_FILTER_CONST #define OVERRIDE_comb_filter void comb_filter(opus_val32 *y, opus_val32 *x, int T0, int T1, int N, opus_val16 g0, opus_val16 g1, int tapset0, int tapset1, diff --git a/media/libopus/celt/mips/vq_mipsr1.h b/media/libopus/celt/mips/vq_mipsr1.h index fd18eab7a9660..f26a33e755e83 100644 --- a/media/libopus/celt/mips/vq_mipsr1.h +++ b/media/libopus/celt/mips/vq_mipsr1.h @@ -36,8 +36,6 @@ #include "mathops.h" #include "arch.h"
-static void renormalise_vector_mips(celt_norm *X, int N, opus_val16 gain, int arch); - #define OVERRIDE_vq_exp_rotation1 static void exp_rotation1(celt_norm *X, int len, int stride, opus_val16 c, opus_val16 s) { @@ -66,11 +64,7 @@ static void exp_rotation1(celt_norm *X, int len, int stride, opus_val16 c, opus_ }
#define OVERRIDE_renormalise_vector - -#define renormalise_vector(X, N, gain, arch) \ - (renormalise_vector_mips(X, N, gain, arch)) - -void renormalise_vector_mips(celt_norm *X, int N, opus_val16 gain, int arch) +void renormalise_vector(celt_norm *X, int N, opus_val16 gain, int arch) { int i; #ifdef FIXED_POINT diff --git a/media/libopus/celt/os_support.h b/media/libopus/celt/os_support.h index a2171971e9d65..009bf861daf7f 100644 --- a/media/libopus/celt/os_support.h +++ b/media/libopus/celt/os_support.h @@ -39,7 +39,6 @@ #include "opus_defines.h"
#include <string.h> -#include <stdio.h> #include <stdlib.h>
/** Opus wrapper for malloc(). To do your own dynamic allocation, all you need to do is replace this function and opus_free */ diff --git a/media/libopus/celt/rate.c b/media/libopus/celt/rate.c index ca4cc870eaa41..465e1ba26cb4e 100644 --- a/media/libopus/celt/rate.c +++ b/media/libopus/celt/rate.c @@ -529,7 +529,7 @@ static OPUS_INLINE int interp_bits2pulses(const CELTMode *m, int start, int end, return codedBands; }
-int compute_allocation(const CELTMode *m, int start, int end, const int *offsets, const int *cap, int alloc_trim, int *intensity, int *dual_stereo, +int clt_compute_allocation(const CELTMode *m, int start, int end, const int *offsets, const int *cap, int alloc_trim, int *intensity, int *dual_stereo, opus_int32 total, opus_int32 *balance, int *pulses, int *ebits, int *fine_priority, int C, int LM, ec_ctx *ec, int encode, int prev, int signalBandwidth) { int lo, hi, len, j; diff --git a/media/libopus/celt/rate.h b/media/libopus/celt/rate.h index 515f7687cec6f..fad5e412dafac 100644 --- a/media/libopus/celt/rate.h +++ b/media/libopus/celt/rate.h @@ -95,7 +95,7 @@ static OPUS_INLINE int pulses2bits(const CELTMode *m, int band, int LM, int puls @param pulses Number of pulses per band (returned) @return Total number of bits allocated */ -int compute_allocation(const CELTMode *m, int start, int end, const int *offsets, const int *cap, int alloc_trim, int *intensity, int *dual_stero, +int clt_compute_allocation(const CELTMode *m, int start, int end, const int *offsets, const int *cap, int alloc_trim, int *intensity, int *dual_stereo, opus_int32 total, opus_int32 *balance, int *pulses, int *ebits, int *fine_priority, int C, int LM, ec_ctx *ec, int encode, int prev, int signalBandwidth);
#endif diff --git a/media/libopus/celt/stack_alloc.h b/media/libopus/celt/stack_alloc.h index 2b51c8d80cc58..ae40e2a1659fa 100644 --- a/media/libopus/celt/stack_alloc.h +++ b/media/libopus/celt/stack_alloc.h @@ -40,7 +40,7 @@ #endif
#ifdef USE_ALLOCA -# ifdef WIN32 +# ifdef _WIN32 # include <malloc.h> # else # ifdef HAVE_ALLOCA_H @@ -102,7 +102,7 @@
#define VARDECL(type, var) type *var
-# ifdef WIN32 +# ifdef _WIN32 # define ALLOC(var, size, type) var = ((type*)_alloca(sizeof(type)*(size))) # else # define ALLOC(var, size, type) var = ((type*)alloca(sizeof(type)*(size))) diff --git a/media/libopus/celt/vq.c b/media/libopus/celt/vq.c index a6b5552d69c52..8011e22548bda 100644 --- a/media/libopus/celt/vq.c +++ b/media/libopus/celt/vq.c @@ -39,6 +39,10 @@ #include "rate.h" #include "pitch.h"
+#if defined(MIPSr1_ASM) +#include "mips/vq_mipsr1.h" +#endif + #ifndef OVERRIDE_vq_exp_rotation1 static void exp_rotation1(celt_norm *X, int len, int stride, opus_val16 c, opus_val16 s) { diff --git a/media/libopus/celt/vq.h b/media/libopus/celt/vq.h index 0dfe6af058239..45ec55918e322 100644 --- a/media/libopus/celt/vq.h +++ b/media/libopus/celt/vq.h @@ -41,10 +41,6 @@ #include "x86/vq_sse.h" #endif
-#if defined(MIPSr1_ASM) -#include "mips/vq_mipsr1.h" -#endif - void exp_rotation(celt_norm *X, int len, int dir, int stride, int K, int spread);
opus_val16 op_pvq_search_c(celt_norm *X, int *iy, int K, int N, int arch); diff --git a/media/libopus/celt/x86/pitch_sse.h b/media/libopus/celt/x86/pitch_sse.h index e5f87ab51af6e..f7a014b6e03a1 100644 --- a/media/libopus/celt/x86/pitch_sse.h +++ b/media/libopus/celt/x86/pitch_sse.h @@ -91,7 +91,7 @@ opus_val32 celt_inner_prod_sse2( int N); #endif
-#if defined(OPUS_X86_MAY_HAVE_SSE2) && !defined(FIXED_POINT) +#if defined(OPUS_X86_MAY_HAVE_SSE) && !defined(FIXED_POINT) opus_val32 celt_inner_prod_sse( const opus_val16 *x, const opus_val16 *y, diff --git a/media/libopus/celt/x86/pitch_sse4_1.c b/media/libopus/celt/x86/pitch_sse4_1.c index a092c68b24333..2bc578304faf2 100644 --- a/media/libopus/celt/x86/pitch_sse4_1.c +++ b/media/libopus/celt/x86/pitch_sse4_1.c @@ -117,6 +117,14 @@ void xcorr_kernel_sse4_1(const opus_val16 * x, const opus_val16 * y, opus_val32 __m128i sum0, sum1, sum2, sum3, vecSum; __m128i initSum;
+#ifdef OPUS_CHECK_ASM + opus_val32 sum_c[4]; + for (j=0;j<4;j++) { + sum_c[j] = sum[j]; + } + xcorr_kernel_c(x, y, sum_c, len); +#endif + celt_assert(len >= 3);
sum0 = _mm_setzero_si128(); @@ -177,19 +185,56 @@ void xcorr_kernel_sse4_1(const opus_val16 * x, const opus_val16 * y, opus_val32 vecSum = _mm_add_epi32(vecSum, sum2); }
- for (;j<len;j++) + vecX = OP_CVTEPI16_EPI32_M64(&x[len - 4]); + if (len - j == 3) { - vecX = OP_CVTEPI16_EPI32_M64(&x[j + 0]); - vecX0 = _mm_shuffle_epi32(vecX, 0x00); + vecX0 = _mm_shuffle_epi32(vecX, 0x55); + vecX1 = _mm_shuffle_epi32(vecX, 0xaa); + vecX2 = _mm_shuffle_epi32(vecX, 0xff);
vecY0 = OP_CVTEPI16_EPI32_M64(&y[j + 0]); + vecY1 = OP_CVTEPI16_EPI32_M64(&y[j + 1]); + vecY2 = OP_CVTEPI16_EPI32_M64(&y[j + 2]);
sum0 = _mm_mullo_epi32(vecX0, vecY0); + sum1 = _mm_mullo_epi32(vecX1, vecY1); + sum2 = _mm_mullo_epi32(vecX2, vecY2); + + vecSum = _mm_add_epi32(vecSum, sum0); + vecSum = _mm_add_epi32(vecSum, sum1); + vecSum = _mm_add_epi32(vecSum, sum2); + } + else if (len - j == 2) + { + vecX0 = _mm_shuffle_epi32(vecX, 0xaa); + vecX1 = _mm_shuffle_epi32(vecX, 0xff); + + vecY0 = OP_CVTEPI16_EPI32_M64(&y[j + 0]); + vecY1 = OP_CVTEPI16_EPI32_M64(&y[j + 1]); + + sum0 = _mm_mullo_epi32(vecX0, vecY0); + sum1 = _mm_mullo_epi32(vecX1, vecY1); + + vecSum = _mm_add_epi32(vecSum, sum0); + vecSum = _mm_add_epi32(vecSum, sum1); + } + else if (len - j == 1) + { + vecX0 = _mm_shuffle_epi32(vecX, 0xff); + + vecY0 = OP_CVTEPI16_EPI32_M64(&y[j + 0]); + + sum0 = _mm_mullo_epi32(vecX0, vecY0); + vecSum = _mm_add_epi32(vecSum, sum0); }
initSum = _mm_loadu_si128((__m128i *)(&sum[0])); initSum = _mm_add_epi32(initSum, vecSum); _mm_storeu_si128((__m128i *)sum, initSum); + +#ifdef OPUS_CHECK_ASM + celt_assert(!memcmp(sum_c, sum, sizeof(sum_c))); +#endif } #endif diff --git a/media/libopus/celt/x86/x86cpu.h b/media/libopus/celt/x86/x86cpu.h index 1e2bf17b9b84e..0de8df3556da4 100644 --- a/media/libopus/celt/x86/x86cpu.h +++ b/media/libopus/celt/x86/x86cpu.h @@ -56,40 +56,10 @@ int opus_select_arch(void); # endif
-/*gcc appears to emit MOVDQA's to load the argument of an _mm_cvtepi8_epi32() - or _mm_cvtepi16_epi32() when optimizations are disabled, even though the - actual PMOVSXWD instruction takes an m32 or m64. Unlike a normal memory - reference, these require 16-byte alignment and load a full 16 bytes (instead - of 4 or 8), possibly reading out of bounds. - - We can insert an explicit MOVD or MOVQ using _mm_cvtsi32_si128() or - _mm_loadl_epi64(), which should have the same semantics as an m32 or m64 - reference in the PMOVSXWD instruction itself, but gcc is not smart enough to - optimize this out when optimizations ARE enabled. - - Clang, in contrast, requires us to do this always for _mm_cvtepi8_epi32 - (which is fair, since technically the compiler is always allowed to do the - dereference before invoking the function implementing the intrinsic). - However, it is smart enough to eliminate the extra MOVD instruction. - For _mm_cvtepi16_epi32, it does the right thing, though does *not* optimize out - the extra MOVQ if it's specified explicitly */ - -# if defined(__clang__) || !defined(__OPTIMIZE__) -# define OP_CVTEPI8_EPI32_M32(x) \ +#define OP_CVTEPI8_EPI32_M32(x) \ (_mm_cvtepi8_epi32(_mm_cvtsi32_si128(*(int *)(x)))) -# else -# define OP_CVTEPI8_EPI32_M32(x) \ - (_mm_cvtepi8_epi32(*(__m128i *)(x))) -#endif
-/* similar reasoning about the instruction sequence as in the 32-bit macro above, - */ -# if defined(__clang__) || !defined(__OPTIMIZE__) -# define OP_CVTEPI16_EPI32_M64(x) \ +#define OP_CVTEPI16_EPI32_M64(x) \ (_mm_cvtepi16_epi32(_mm_loadl_epi64((__m128i *)(x)))) -# else -# define OP_CVTEPI16_EPI32_M64(x) \ - (_mm_cvtepi16_epi32(*(__m128i *)(x))) -# endif
#endif diff --git a/media/libopus/include/opus_custom.h b/media/libopus/include/opus_custom.h index 41f36bf2fbc90..2227be011bd2f 100644 --- a/media/libopus/include/opus_custom.h +++ b/media/libopus/include/opus_custom.h @@ -178,7 +178,7 @@ OPUS_CUSTOM_EXPORT OPUS_WARN_UNUSED_RESULT OpusCustomEncoder *opus_custom_encode ) OPUS_ARG_NONNULL(1);
-/** Destroys a an encoder state. +/** Destroys an encoder state. * @param[in] st <tt>OpusCustomEncoder*</tt>: State to be freed. */ OPUS_CUSTOM_EXPORT void opus_custom_encoder_destroy(OpusCustomEncoder *st); @@ -286,7 +286,7 @@ OPUS_CUSTOM_EXPORT OPUS_WARN_UNUSED_RESULT OpusCustomDecoder *opus_custom_decode int *error ) OPUS_ARG_NONNULL(1);
-/** Destroys a an decoder state. +/** Destroys a decoder state. * @param[in] st <tt>OpusCustomDecoder*</tt>: State to be freed. */ OPUS_CUSTOM_EXPORT void opus_custom_decoder_destroy(OpusCustomDecoder *st); diff --git a/media/libopus/include/opus_defines.h b/media/libopus/include/opus_defines.h index fbf5d0eb74fd9..ceee5b840cf5b 100644 --- a/media/libopus/include/opus_defines.h +++ b/media/libopus/include/opus_defines.h @@ -64,7 +64,7 @@ extern "C" { /**Export control for opus functions */
#ifndef OPUS_EXPORT -# if defined(WIN32) +# if defined(_WIN32) # if defined(OPUS_BUILD) && defined(DLL_EXPORT) # define OPUS_EXPORT __declspec(dllexport) # else @@ -168,6 +168,7 @@ extern "C" { /* Don't use 4045, it's already taken by OPUS_GET_GAIN_REQUEST */ #define OPUS_SET_PHASE_INVERSION_DISABLED_REQUEST 4046 #define OPUS_GET_PHASE_INVERSION_DISABLED_REQUEST 4047 +#define OPUS_GET_IN_DTX_REQUEST 4049
/** Defines for the presence of extended APIs. */ #define OPUS_HAVE_OPUS_PROJECTION_H @@ -715,6 +716,16 @@ extern "C" { * </dl> * @hideinitializer */ #define OPUS_GET_PHASE_INVERSION_DISABLED(x) OPUS_GET_PHASE_INVERSION_DISABLED_REQUEST, __opus_check_int_ptr(x) +/** Gets the DTX state of the encoder. + * Returns whether the last encoded frame was either a comfort noise update + * during DTX or not encoded because of DTX. + * @param[out] x <tt>opus_int32 *</tt>: Returns one of the following values: + * <dl> + * <dt>0</dt><dd>The encoder is not in DTX.</dd> + * <dt>1</dt><dd>The encoder is in DTX.</dd> + * </dl> + * @hideinitializer */ +#define OPUS_GET_IN_DTX(x) OPUS_GET_IN_DTX_REQUEST, __opus_check_int_ptr(x)
/**@}*/
diff --git a/media/libopus/moz.build b/media/libopus/moz.build index 37be53beb6850..7b9ac6568612d 100644 --- a/media/libopus/moz.build +++ b/media/libopus/moz.build @@ -20,7 +20,7 @@ AllowCompilerWarnings() FINAL_LIBRARY = 'gkmedias'
DEFINES['OPUS_BUILD'] = True -DEFINES['OPUS_VERSION'] = '"v1.3-rc-19-g5cbd7d5f-mozilla"' +DEFINES['OPUS_VERSION'] = '2654707e86cc94413998976d179b2ab4a2aa3114' DEFINES['USE_ALLOCA'] = True DEFINES['ENABLE_HARDENING'] = True
diff --git a/media/libopus/nonunified2.patch b/media/libopus/nonunified2.patch index 762d9ee63bc83..02241de5230e8 100644 --- a/media/libopus/nonunified2.patch +++ b/media/libopus/nonunified2.patch @@ -1,5 +1,3 @@ -diff --git a/media/libopus/nonunified2.patch b/media/libopus/nonunified2.patch -new file mode 100644 diff --git a/media/libopus/sources.mozbuild b/media/libopus/sources.mozbuild --- a/media/libopus/sources.mozbuild +++ b/media/libopus/sources.mozbuild diff --git a/media/libopus/silk/CNG.c b/media/libopus/silk/CNG.c index ef8e38df9ffc2..2a910099ed36e 100644 --- a/media/libopus/silk/CNG.c +++ b/media/libopus/silk/CNG.c @@ -118,6 +118,10 @@ void silk_CNG( /* Smooth gains */ for( i = 0; i < psDec->nb_subfr; i++ ) { psCNG->CNG_smth_Gain_Q16 += silk_SMULWB( psDecCtrl->Gains_Q16[ i ] - psCNG->CNG_smth_Gain_Q16, CNG_GAIN_SMTH_Q16 ); + /* If the smoothed gain is 3 dB greater than this subframe's gain, use this subframe's gain to adapt faster. */ + if( silk_SMULWW( psCNG->CNG_smth_Gain_Q16, CNG_GAIN_SMTH_THRESHOLD_Q16 ) > psDecCtrl->Gains_Q16[ i ] ) { + psCNG->CNG_smth_Gain_Q16 = psDecCtrl->Gains_Q16[ i ]; + } } }
diff --git a/media/libopus/silk/LPC_fit.c b/media/libopus/silk/LPC_fit.c index cdea4f3abcaf7..c0690a1fc9a84 100644 --- a/media/libopus/silk/LPC_fit.c +++ b/media/libopus/silk/LPC_fit.c @@ -31,7 +31,8 @@ POSSIBILITY OF SUCH DAMAGE.
#include "SigProc_FIX.h"
-/* Convert int32 coefficients to int16 coefs and make sure there's no wrap-around */ +/* Convert int32 coefficients to int16 coefs and make sure there's no wrap-around. + This logic is reused in _celt_lpc(). Any bug fixes should also be applied there. */ void silk_LPC_fit( opus_int16 *a_QOUT, /* O Output signal */ opus_int32 *a_QIN, /* I/O Input signal */ diff --git a/media/libopus/silk/MacroCount.h b/media/libopus/silk/MacroCount.h index 78100ffedeb29..dab2f57a68a25 100644 --- a/media/libopus/silk/MacroCount.h +++ b/media/libopus/silk/MacroCount.h @@ -27,9 +27,9 @@ POSSIBILITY OF SUCH DAMAGE.
#ifndef SIGPROCFIX_API_MACROCOUNT_H #define SIGPROCFIX_API_MACROCOUNT_H -#include <stdio.h>
#ifdef silk_MACRO_COUNT +#include <stdio.h> #define varDefine opus_int64 ops_count = 0;
extern opus_int64 ops_count; diff --git a/media/libopus/silk/NSQ.c b/media/libopus/silk/NSQ.c index 1d64d8e257b85..45dd45ce8dc63 100644 --- a/media/libopus/silk/NSQ.c +++ b/media/libopus/silk/NSQ.c @@ -75,21 +75,21 @@ static OPUS_INLINE void silk_noise_shape_quantizer(
void silk_NSQ_c ( - const silk_encoder_state *psEncC, /* I Encoder State */ - silk_nsq_state *NSQ, /* I/O NSQ state */ - SideInfoIndices *psIndices, /* I/O Quantization Indices */ + const silk_encoder_state *psEncC, /* I Encoder State */ + silk_nsq_state *NSQ, /* I/O NSQ state */ + SideInfoIndices *psIndices, /* I/O Quantization Indices */ const opus_int16 x16[], /* I Input */ - opus_int8 pulses[], /* O Quantized pulse signal */ - const opus_int16 PredCoef_Q12[ 2 * MAX_LPC_ORDER ], /* I Short term prediction coefs */ - const opus_int16 LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ], /* I Long term prediction coefs */ - const opus_int16 AR_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ], /* I Noise shaping coefs */ - const opus_int HarmShapeGain_Q14[ MAX_NB_SUBFR ], /* I Long term shaping coefs */ - const opus_int Tilt_Q14[ MAX_NB_SUBFR ], /* I Spectral tilt */ - const opus_int32 LF_shp_Q14[ MAX_NB_SUBFR ], /* I Low frequency shaping coefs */ - const opus_int32 Gains_Q16[ MAX_NB_SUBFR ], /* I Quantization step sizes */ - const opus_int pitchL[ MAX_NB_SUBFR ], /* I Pitch lags */ - const opus_int Lambda_Q10, /* I Rate/distortion tradeoff */ - const opus_int LTP_scale_Q14 /* I LTP state scaling */ + opus_int8 pulses[], /* O Quantized pulse signal */ + const opus_int16 PredCoef_Q12[ 2 * MAX_LPC_ORDER ], /* I Short term prediction coefs */ + const opus_int16 LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ], /* I Long term prediction coefs */ + const opus_int16 AR_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ], /* I Noise shaping coefs */ + const opus_int HarmShapeGain_Q14[ MAX_NB_SUBFR ], /* I Long term shaping coefs */ + const opus_int Tilt_Q14[ MAX_NB_SUBFR ], /* I Spectral tilt */ + const opus_int32 LF_shp_Q14[ MAX_NB_SUBFR ], /* I Low frequency shaping coefs */ + const opus_int32 Gains_Q16[ MAX_NB_SUBFR ], /* I Quantization step sizes */ + const opus_int pitchL[ MAX_NB_SUBFR ], /* I Pitch lags */ + const opus_int Lambda_Q10, /* I Rate/distortion tradeoff */ + const opus_int LTP_scale_Q14 /* I LTP state scaling */ ) { opus_int k, lag, start_idx, LSF_interpolation_flag; @@ -173,9 +173,9 @@ void silk_NSQ_c RESTORE_STACK; }
-/***********************************/ -/* silk_noise_shape_quantizer */ -/***********************************/ +/******************************/ +/* silk_noise_shape_quantizer */ +/******************************/
#if !defined(OPUS_X86_MAY_HAVE_SSE4_1) static OPUS_INLINE @@ -262,7 +262,7 @@ void silk_noise_shape_quantizer( tmp1 = silk_SUB32( tmp1, n_LF_Q12 ); /* Q12 */ if( lag > 0 ) { /* Symmetric, packed FIR coefficients */ - n_LTP_Q13 = silk_SMULWB( silk_ADD32( shp_lag_ptr[ 0 ], shp_lag_ptr[ -2 ] ), HarmShapeFIRPacked_Q14 ); + n_LTP_Q13 = silk_SMULWB( silk_ADD_SAT32( shp_lag_ptr[ 0 ], shp_lag_ptr[ -2 ] ), HarmShapeFIRPacked_Q14 ); n_LTP_Q13 = silk_SMLAWT( n_LTP_Q13, shp_lag_ptr[ -1 ], HarmShapeFIRPacked_Q14 ); n_LTP_Q13 = silk_LSHIFT( n_LTP_Q13, 1 ); shp_lag_ptr++; diff --git a/media/libopus/silk/NSQ_del_dec.c b/media/libopus/silk/NSQ_del_dec.c index 3fd9fa0d5b722..41f3fc93ef171 100644 --- a/media/libopus/silk/NSQ_del_dec.c +++ b/media/libopus/silk/NSQ_del_dec.c @@ -115,21 +115,21 @@ static OPUS_INLINE void silk_noise_shape_quantizer_del_dec( );
void silk_NSQ_del_dec_c( - const silk_encoder_state *psEncC, /* I Encoder State */ - silk_nsq_state *NSQ, /* I/O NSQ state */ - SideInfoIndices *psIndices, /* I/O Quantization Indices */ + const silk_encoder_state *psEncC, /* I Encoder State */ + silk_nsq_state *NSQ, /* I/O NSQ state */ + SideInfoIndices *psIndices, /* I/O Quantization Indices */ const opus_int16 x16[], /* I Input */ - opus_int8 pulses[], /* O Quantized pulse signal */ - const opus_int16 PredCoef_Q12[ 2 * MAX_LPC_ORDER ], /* I Short term prediction coefs */ - const opus_int16 LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ], /* I Long term prediction coefs */ - const opus_int16 AR_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ], /* I Noise shaping coefs */ - const opus_int HarmShapeGain_Q14[ MAX_NB_SUBFR ], /* I Long term shaping coefs */ - const opus_int Tilt_Q14[ MAX_NB_SUBFR ], /* I Spectral tilt */ - const opus_int32 LF_shp_Q14[ MAX_NB_SUBFR ], /* I Low frequency shaping coefs */ - const opus_int32 Gains_Q16[ MAX_NB_SUBFR ], /* I Quantization step sizes */ - const opus_int pitchL[ MAX_NB_SUBFR ], /* I Pitch lags */ - const opus_int Lambda_Q10, /* I Rate/distortion tradeoff */ - const opus_int LTP_scale_Q14 /* I LTP state scaling */ + opus_int8 pulses[], /* O Quantized pulse signal */ + const opus_int16 PredCoef_Q12[ 2 * MAX_LPC_ORDER ], /* I Short term prediction coefs */ + const opus_int16 LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ], /* I Long term prediction coefs */ + const opus_int16 AR_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ], /* I Noise shaping coefs */ + const opus_int HarmShapeGain_Q14[ MAX_NB_SUBFR ], /* I Long term shaping coefs */ + const opus_int Tilt_Q14[ MAX_NB_SUBFR ], /* I Spectral tilt */ + const opus_int32 LF_shp_Q14[ MAX_NB_SUBFR ], /* I Low frequency shaping coefs */ + const opus_int32 Gains_Q16[ MAX_NB_SUBFR ], /* I Quantization step sizes */ + const opus_int pitchL[ MAX_NB_SUBFR ], /* I Pitch lags */ + const opus_int Lambda_Q10, /* I Rate/distortion tradeoff */ + const opus_int LTP_scale_Q14 /* I LTP state scaling */ ) { opus_int i, k, lag, start_idx, LSF_interpolation_flag, Winner_ind, subfr; @@ -394,8 +394,8 @@ static OPUS_INLINE void silk_noise_shape_quantizer_del_dec( /* Long-term shaping */ if( lag > 0 ) { /* Symmetric, packed FIR coefficients */ - n_LTP_Q14 = silk_SMULWB( silk_ADD32( shp_lag_ptr[ 0 ], shp_lag_ptr[ -2 ] ), HarmShapeFIRPacked_Q14 ); - n_LTP_Q14 = silk_SMLAWT( n_LTP_Q14, shp_lag_ptr[ -1 ], HarmShapeFIRPacked_Q14 ); + n_LTP_Q14 = silk_SMULWB( silk_ADD_SAT32( shp_lag_ptr[ 0 ], shp_lag_ptr[ -2 ] ), HarmShapeFIRPacked_Q14 ); + n_LTP_Q14 = silk_SMLAWT( n_LTP_Q14, shp_lag_ptr[ -1 ], HarmShapeFIRPacked_Q14 ); n_LTP_Q14 = silk_SUB_LSHIFT32( LTP_pred_Q14, n_LTP_Q14, 2 ); /* Q12 -> Q14 */ shp_lag_ptr++; } else { @@ -451,9 +451,9 @@ static OPUS_INLINE void silk_noise_shape_quantizer_del_dec(
/* Input minus prediction plus noise feedback */ /* r = x[ i ] - LTP_pred - LPC_pred + n_AR + n_Tilt + n_LF + n_LTP */ - tmp1 = silk_ADD32( n_AR_Q14, n_LF_Q14 ); /* Q14 */ + tmp1 = silk_ADD_SAT32( n_AR_Q14, n_LF_Q14 ); /* Q14 */ tmp2 = silk_ADD32( n_LTP_Q14, LPC_pred_Q14 ); /* Q13 */ - tmp1 = silk_SUB32( tmp2, tmp1 ); /* Q13 */ + tmp1 = silk_SUB_SAT32( tmp2, tmp1 ); /* Q13 */ tmp1 = silk_RSHIFT_ROUND( tmp1, 4 ); /* Q10 */
r_Q10 = silk_SUB32( x_Q10[ i ], tmp1 ); /* residual error Q10 */ @@ -535,7 +535,7 @@ static OPUS_INLINE void silk_noise_shape_quantizer_del_dec( /* Update states */ psSS[ 0 ].Diff_Q14 = silk_SUB_LSHIFT32( xq_Q14, x_Q10[ i ], 4 ); sLF_AR_shp_Q14 = silk_SUB32( psSS[ 0 ].Diff_Q14, n_AR_Q14 ); - psSS[ 0 ].sLTP_shp_Q14 = silk_SUB32( sLF_AR_shp_Q14, n_LF_Q14 ); + psSS[ 0 ].sLTP_shp_Q14 = silk_SUB_SAT32( sLF_AR_shp_Q14, n_LF_Q14 ); psSS[ 0 ].LF_AR_Q14 = sLF_AR_shp_Q14; psSS[ 0 ].LPC_exc_Q14 = LPC_exc_Q14; psSS[ 0 ].xq_Q14 = xq_Q14; @@ -555,7 +555,7 @@ static OPUS_INLINE void silk_noise_shape_quantizer_del_dec( /* Update states */ psSS[ 1 ].Diff_Q14 = silk_SUB_LSHIFT32( xq_Q14, x_Q10[ i ], 4 ); sLF_AR_shp_Q14 = silk_SUB32( psSS[ 1 ].Diff_Q14, n_AR_Q14 ); - psSS[ 1 ].sLTP_shp_Q14 = silk_SUB32( sLF_AR_shp_Q14, n_LF_Q14 ); + psSS[ 1 ].sLTP_shp_Q14 = silk_SUB_SAT32( sLF_AR_shp_Q14, n_LF_Q14 ); psSS[ 1 ].LF_AR_Q14 = sLF_AR_shp_Q14; psSS[ 1 ].LPC_exc_Q14 = LPC_exc_Q14; psSS[ 1 ].xq_Q14 = xq_Q14; diff --git a/media/libopus/silk/PLC.c b/media/libopus/silk/PLC.c index f89391651cdf9..4667440db2b7f 100644 --- a/media/libopus/silk/PLC.c +++ b/media/libopus/silk/PLC.c @@ -328,10 +328,8 @@ static OPUS_INLINE void silk_PLC_conceal( for( j = 0; j < LTP_ORDER; j++ ) { B_Q14[ j ] = silk_RSHIFT( silk_SMULBB( harm_Gain_Q15, B_Q14[ j ] ), 15 ); } - if ( psDec->indices.signalType != TYPE_NO_VOICE_ACTIVITY ) { - /* Gradually reduce excitation gain */ - rand_scale_Q14 = silk_RSHIFT( silk_SMULBB( rand_scale_Q14, rand_Gain_Q15 ), 15 ); - } + /* Gradually reduce excitation gain */ + rand_scale_Q14 = silk_RSHIFT( silk_SMULBB( rand_scale_Q14, rand_Gain_Q15 ), 15 );
/* Slowly increase pitch lag */ psPLC->pitchL_Q8 = silk_SMLAWB( psPLC->pitchL_Q8, psPLC->pitchL_Q8, PITCH_DRIFT_FAC_Q16 ); diff --git a/media/libopus/silk/SigProc_FIX.h b/media/libopus/silk/SigProc_FIX.h index f9ae326326d31..1d9bf2f16e45b 100644 --- a/media/libopus/silk/SigProc_FIX.h +++ b/media/libopus/silk/SigProc_FIX.h @@ -381,7 +381,7 @@ opus_int32 silk_inner_prod_aligned_scale( const opus_int len /* I vector lengths */ );
-opus_int64 silk_inner_prod16_aligned_64_c( +opus_int64 silk_inner_prod16_c( const opus_int16 *inVec1, /* I input vector 1 */ const opus_int16 *inVec2, /* I input vector 2 */ const opus_int len /* I vector lengths */ @@ -613,8 +613,8 @@ static OPUS_INLINE opus_int64 silk_max_64(opus_int64 a, opus_int64 b) #define silk_burg_modified(res_nrg, res_nrg_Q, A_Q16, x, minInvGain_Q30, subfr_length, nb_subfr, D, arch) \ ((void)(arch), silk_burg_modified_c(res_nrg, res_nrg_Q, A_Q16, x, minInvGain_Q30, subfr_length, nb_subfr, D, arch))
-#define silk_inner_prod16_aligned_64(inVec1, inVec2, len, arch) \ - ((void)(arch),silk_inner_prod16_aligned_64_c(inVec1, inVec2, len)) +#define silk_inner_prod16(inVec1, inVec2, len, arch) \ + ((void)(arch),silk_inner_prod16_c(inVec1, inVec2, len)) #endif
#include "Inlines.h" diff --git a/media/libopus/silk/VAD.c b/media/libopus/silk/VAD.c index 541e5056ff697..d0cda52162c1c 100644 --- a/media/libopus/silk/VAD.c +++ b/media/libopus/silk/VAD.c @@ -312,6 +312,8 @@ void silk_VAD_GetNoiseLevels( /* Initially faster smoothing */ if( psSilk_VAD->counter < 1000 ) { /* 1000 = 20 sec */ min_coef = silk_DIV32_16( silk_int16_MAX, silk_RSHIFT( psSilk_VAD->counter, 4 ) + 1 ); + /* Increment frame counter */ + psSilk_VAD->counter++; } else { min_coef = 0; } @@ -355,7 +357,4 @@ void silk_VAD_GetNoiseLevels( /* Store as part of state */ psSilk_VAD->NL[ k ] = nl; } - - /* Increment frame counter */ - psSilk_VAD->counter++; } diff --git a/media/libopus/silk/VQ_WMat_EC.c b/media/libopus/silk/VQ_WMat_EC.c index 0f3d545c4ef8c..245a7e4b06d17 100644 --- a/media/libopus/silk/VQ_WMat_EC.c +++ b/media/libopus/silk/VQ_WMat_EC.c @@ -64,7 +64,7 @@ void silk_VQ_WMat_EC_c( *rate_dist_Q8 = silk_int32_MAX; *res_nrg_Q15 = silk_int32_MAX; cb_row_Q7 = cb_Q7; - /* In things go really bad, at least *ind is set to something safe. */ + /* If things go really bad, at least *ind is set to something safe. */ *ind = 0; for( k = 0; k < L; k++ ) { opus_int32 penalty; @@ -115,7 +115,7 @@ void silk_VQ_WMat_EC_c( if( sum1_Q15 >= 0 ) { /* Translate residual energy to bits using high-rate assumption (6 dB ==> 1 bit/sample) */ bits_res_Q8 = silk_SMULBB( subfr_len, silk_lin2log( sum1_Q15 + penalty) - (15 << 7) ); - /* In the following line we reduce the codelength component by half ("-1"); seems to slghtly improve quality */ + /* In the following line we reduce the codelength component by half ("-1"); seems to slightly improve quality */ bits_tot_Q8 = silk_ADD_LSHIFT32( bits_res_Q8, cl_Q5[ k ], 3-1 ); if( bits_tot_Q8 <= *rate_dist_Q8 ) { *rate_dist_Q8 = bits_tot_Q8; diff --git a/media/libopus/silk/arm/LPC_inv_pred_gain_neon_intr.c b/media/libopus/silk/arm/LPC_inv_pred_gain_neon_intr.c index ab426bcd66f8b..726e6667b4a51 100644 --- a/media/libopus/silk/arm/LPC_inv_pred_gain_neon_intr.c +++ b/media/libopus/silk/arm/LPC_inv_pred_gain_neon_intr.c @@ -210,19 +210,23 @@ opus_int32 silk_LPC_inverse_pred_gain_neon( /* O Returns inverse predi /* Increase Q domain of the AR coefficients */ t0_s16x8 = vld1q_s16( A_Q12 + 0 ); t1_s16x8 = vld1q_s16( A_Q12 + 8 ); - t2_s16x8 = vld1q_s16( A_Q12 + 16 ); + if ( order > 16 ) { + t2_s16x8 = vld1q_s16( A_Q12 + 16 ); + } t0_s32x4 = vpaddlq_s16( t0_s16x8 );
switch( order - leftover ) { case 24: t0_s32x4 = vpadalq_s16( t0_s32x4, t2_s16x8 ); + vst1q_s32( Atmp_QA + 16, vshll_n_s16( vget_low_s16 ( t2_s16x8 ), QA - 12 ) ); + vst1q_s32( Atmp_QA + 20, vshll_n_s16( vget_high_s16( t2_s16x8 ), QA - 12 ) ); /* FALLTHROUGH */
case 16: t0_s32x4 = vpadalq_s16( t0_s32x4, t1_s16x8 ); - vst1q_s32( Atmp_QA + 16, vshll_n_s16( vget_low_s16 ( t2_s16x8 ), QA - 12 ) ); - vst1q_s32( Atmp_QA + 20, vshll_n_s16( vget_high_s16( t2_s16x8 ), QA - 12 ) ); + vst1q_s32( Atmp_QA + 8, vshll_n_s16( vget_low_s16 ( t1_s16x8 ), QA - 12 ) ); + vst1q_s32( Atmp_QA + 12, vshll_n_s16( vget_high_s16( t1_s16x8 ), QA - 12 ) ); /* FALLTHROUGH */
case 8: @@ -230,8 +234,8 @@ opus_int32 silk_LPC_inverse_pred_gain_neon( /* O Returns inverse predi const int32x2_t t_s32x2 = vpadd_s32( vget_low_s32( t0_s32x4 ), vget_high_s32( t0_s32x4 ) ); const int64x1_t t_s64x1 = vpaddl_s32( t_s32x2 ); DC_resp = vget_lane_s32( vreinterpret_s32_s64( t_s64x1 ), 0 ); - vst1q_s32( Atmp_QA + 8, vshll_n_s16( vget_low_s16 ( t1_s16x8 ), QA - 12 ) ); - vst1q_s32( Atmp_QA + 12, vshll_n_s16( vget_high_s16( t1_s16x8 ), QA - 12 ) ); + vst1q_s32( Atmp_QA + 0, vshll_n_s16( vget_low_s16 ( t0_s16x8 ), QA - 12 ) ); + vst1q_s32( Atmp_QA + 4, vshll_n_s16( vget_high_s16( t0_s16x8 ), QA - 12 ) ); } break;
@@ -246,16 +250,22 @@ opus_int32 silk_LPC_inverse_pred_gain_neon( /* O Returns inverse predi case 6: DC_resp += (opus_int32)A_Q12[ 5 ]; DC_resp += (opus_int32)A_Q12[ 4 ]; + Atmp_QA[ order - leftover + 5 ] = silk_LSHIFT32( (opus_int32)A_Q12[ 5 ], QA - 12 ); + Atmp_QA[ order - leftover + 4 ] = silk_LSHIFT32( (opus_int32)A_Q12[ 4 ], QA - 12 ); /* FALLTHROUGH */
case 4: DC_resp += (opus_int32)A_Q12[ 3 ]; DC_resp += (opus_int32)A_Q12[ 2 ]; + Atmp_QA[ order - leftover + 3 ] = silk_LSHIFT32( (opus_int32)A_Q12[ 3 ], QA - 12 ); + Atmp_QA[ order - leftover + 2 ] = silk_LSHIFT32( (opus_int32)A_Q12[ 2 ], QA - 12 ); /* FALLTHROUGH */
case 2: DC_resp += (opus_int32)A_Q12[ 1 ]; DC_resp += (opus_int32)A_Q12[ 0 ]; + Atmp_QA[ order - leftover + 1 ] = silk_LSHIFT32( (opus_int32)A_Q12[ 1 ], QA - 12 ); + Atmp_QA[ order - leftover + 0 ] = silk_LSHIFT32( (opus_int32)A_Q12[ 0 ], QA - 12 ); /* FALLTHROUGH */
default: @@ -266,8 +276,6 @@ opus_int32 silk_LPC_inverse_pred_gain_neon( /* O Returns inverse predi if( DC_resp >= 4096 ) { invGain_Q30 = 0; } else { - vst1q_s32( Atmp_QA + 0, vshll_n_s16( vget_low_s16 ( t0_s16x8 ), QA - 12 ) ); - vst1q_s32( Atmp_QA + 4, vshll_n_s16( vget_high_s16( t0_s16x8 ), QA - 12 ) ); invGain_Q30 = LPC_inverse_pred_gain_QA_neon( Atmp_QA, order ); } } diff --git a/media/libopus/silk/bwexpander_32.c b/media/libopus/silk/bwexpander_32.c index d0010f73dfd50..0f32b9df1a1d7 100644 --- a/media/libopus/silk/bwexpander_32.c +++ b/media/libopus/silk/bwexpander_32.c @@ -31,7 +31,8 @@ POSSIBILITY OF SUCH DAMAGE.
#include "SigProc_FIX.h"
-/* Chirp (bandwidth expand) LP AR filter */ +/* Chirp (bandwidth expand) LP AR filter. + This logic is reused in _celt_lpc(). Any bug fixes should also be applied there. */ void silk_bwexpander_32( opus_int32 *ar, /* I/O AR filter to be expanded (without leading 1) */ const opus_int d, /* I Length of ar */ diff --git a/media/libopus/silk/debug.c b/media/libopus/silk/debug.c index 9253faf71bfe3..eb0c36ef1f9a0 100644 --- a/media/libopus/silk/debug.c +++ b/media/libopus/silk/debug.c @@ -30,18 +30,20 @@ POSSIBILITY OF SUCH DAMAGE. #endif
#include "debug.h" + +#if SILK_DEBUG || SILK_TIC_TOC #include "SigProc_FIX.h" +#endif
#if SILK_TIC_TOC
-#ifdef _WIN32 - #if (defined(_WIN32) || defined(_WINCE)) #include <windows.h> /* timer */ #else /* Linux or Mac*/ #include <sys/time.h> #endif
+#ifdef _WIN32 unsigned long silk_GetHighResolutionTime(void) /* O time in usec*/ { /* Returns a time counter in microsec */ @@ -65,7 +67,7 @@ unsigned long GetHighResolutionTime(void) /* O time in usec*/ int silk_Timer_nTimers = 0; int silk_Timer_depth_ctr = 0; char silk_Timer_tags[silk_NUM_TIMERS_MAX][silk_NUM_TIMERS_MAX_TAG_LEN]; -#ifdef WIN32 +#ifdef _WIN32 LARGE_INTEGER silk_Timer_start[silk_NUM_TIMERS_MAX]; #else unsigned long silk_Timer_start[silk_NUM_TIMERS_MAX]; @@ -76,7 +78,7 @@ opus_int64 silk_Timer_sum[silk_NUM_TIMERS_MAX]; opus_int64 silk_Timer_max[silk_NUM_TIMERS_MAX]; opus_int64 silk_Timer_depth[silk_NUM_TIMERS_MAX];
-#ifdef WIN32 +#ifdef _WIN32 void silk_TimerSave(char *file_name) { if( silk_Timer_nTimers > 0 ) diff --git a/media/libopus/silk/debug.h b/media/libopus/silk/debug.h index 6f68c1ca0f022..36163e478d305 100644 --- a/media/libopus/silk/debug.h +++ b/media/libopus/silk/debug.h @@ -28,28 +28,29 @@ POSSIBILITY OF SUCH DAMAGE. #ifndef SILK_DEBUG_H #define SILK_DEBUG_H
-#include "typedef.h" -#include <stdio.h> /* file writing */ -#include <string.h> /* strcpy, strcmp */ - -#ifdef __cplusplus -extern "C" -{ -#endif - -unsigned long GetHighResolutionTime(void); /* O time in usec*/ - /* Set to 1 to enable DEBUG_STORE_DATA() macros for dumping * intermediate signals from the codec. */ #define SILK_DEBUG 0
/* Flag for using timers */ -#define SILK_TIC_TOC 0 +#define SILK_TIC_TOC 0
+#if SILK_DEBUG || SILK_TIC_TOC +#include "typedef.h" +#include <string.h> /* strcpy, strcmp */ +#include <stdio.h> /* file writing */ +#endif + +#ifdef __cplusplus +extern "C" +{ +#endif
#if SILK_TIC_TOC
+unsigned long GetHighResolutionTime(void); /* O time in usec*/ + #if (defined(_WIN32) || defined(_WINCE)) #include <windows.h> /* timer */ #else /* Linux or Mac*/ diff --git a/media/libopus/silk/decode_frame.c b/media/libopus/silk/decode_frame.c index e73825b2676f9..4f36f854c2b59 100644 --- a/media/libopus/silk/decode_frame.c +++ b/media/libopus/silk/decode_frame.c @@ -97,7 +97,6 @@ opus_int silk_decode_frame( psDec->first_frame_after_reset = 0; } else { /* Handle packet loss by extrapolation */ - psDec->indices.signalType = psDec->prevSignalType; silk_PLC( psDec, psDecCtrl, pOut, 1, arch ); }
diff --git a/media/libopus/silk/define.h b/media/libopus/silk/define.h index 247cb0bf71d83..491c86f33e9f5 100644 --- a/media/libopus/silk/define.h +++ b/media/libopus/silk/define.h @@ -225,6 +225,7 @@ extern "C" /* Defines for CN generation */ #define CNG_BUF_MASK_MAX 255 /* 2^floor(log2(MAX_FRAME_LENGTH))-1 */ #define CNG_GAIN_SMTH_Q16 4634 /* 0.25^(1/4) */ +#define CNG_GAIN_SMTH_THRESHOLD_Q16 46396 /* -3 dB */ #define CNG_NLSF_SMTH_Q16 16348 /* 0.25 */
#ifdef __cplusplus diff --git a/media/libopus/silk/fixed/arm/warped_autocorrelation_FIX_neon_intr.c b/media/libopus/silk/fixed/arm/warped_autocorrelation_FIX_neon_intr.c index 00a70cb51ffbf..6f3be025cc375 100644 --- a/media/libopus/silk/fixed/arm/warped_autocorrelation_FIX_neon_intr.c +++ b/media/libopus/silk/fixed/arm/warped_autocorrelation_FIX_neon_intr.c @@ -84,7 +84,9 @@ void silk_warped_autocorrelation_FIX_neon( silk_assert( ( order & 1 ) == 0 ); silk_assert( 2 * QS - QC >= 0 );
- ALLOC( input_QST, length + 2 * MAX_SHAPE_LPC_ORDER, opus_int32 ); + /* The additional +4 is to ensure a later vld1q_s32 call does not overflow. */ + /* Strictly, only +3 is needed but +4 simplifies initialization using the 4x32 neon load. */ + ALLOC( input_QST, length + 2 * MAX_SHAPE_LPC_ORDER + 4, opus_int32 );
input_QS = input_QST; /* input_QS has zero paddings in the beginning and end. */ @@ -121,6 +123,8 @@ void silk_warped_autocorrelation_FIX_neon( vst1q_s32( input_QS, vdupq_n_s32( 0 ) ); input_QS += 4; vst1q_s32( input_QS, vdupq_n_s32( 0 ) ); + input_QS += 4; + vst1q_s32( input_QS, vdupq_n_s32( 0 ) ); input_QS = input_QST + MAX_SHAPE_LPC_ORDER - orderT;
/* The following loop runs ( length + order ) times, with ( order ) extra epilogues. */ @@ -153,7 +157,8 @@ void silk_warped_autocorrelation_FIX_neon( opus_int o = orderT; int32x4_t state_QS_s32x4[ 3 ][ 2 ];
- ALLOC( state, length + orderT, opus_int32 ); + /* The additional +4 is to ensure a later vld1q_s32 call does not overflow. */ + ALLOC( state, length + order + 4, opus_int32 ); state_QS_s32x4[ 2 ][ 1 ] = vdupq_n_s32( 0 );
/* Calculate 8 taps of all inputs in each loop. */ diff --git a/media/libopus/silk/fixed/burg_modified_FIX.c b/media/libopus/silk/fixed/burg_modified_FIX.c index 274d4b28e1125..185a12b178f73 100644 --- a/media/libopus/silk/fixed/burg_modified_FIX.c +++ b/media/libopus/silk/fixed/burg_modified_FIX.c @@ -68,7 +68,7 @@ void silk_burg_modified_c( celt_assert( subfr_length * nb_subfr <= MAX_FRAME_SIZE );
/* Compute autocorrelations, added over subframes */ - C0_64 = silk_inner_prod16_aligned_64( x, x, subfr_length*nb_subfr, arch ); + C0_64 = silk_inner_prod16( x, x, subfr_length*nb_subfr, arch ); lz = silk_CLZ64(C0_64); rshifts = 32 + 1 + N_BITS_HEAD_ROOM - lz; if (rshifts > MAX_RSHIFTS) rshifts = MAX_RSHIFTS; @@ -87,7 +87,7 @@ void silk_burg_modified_c( x_ptr = x + s * subfr_length; for( n = 1; n < D + 1; n++ ) { C_first_row[ n - 1 ] += (opus_int32)silk_RSHIFT64( - silk_inner_prod16_aligned_64( x_ptr, x_ptr + n, subfr_length - n, arch ), rshifts ); + silk_inner_prod16( x_ptr, x_ptr + n, subfr_length - n, arch ), rshifts ); } } } else { @@ -150,7 +150,7 @@ void silk_burg_modified_c( C_first_row[ k ] = silk_MLA( C_first_row[ k ], x1, x_ptr[ n - k - 1 ] ); /* Q( -rshifts ) */ C_last_row[ k ] = silk_MLA( C_last_row[ k ], x2, x_ptr[ subfr_length - n + k ] ); /* Q( -rshifts ) */ Atmp1 = silk_RSHIFT_ROUND( Af_QA[ k ], QA - 17 ); /* Q17 */ - /* We sometimes have get overflows in the multiplications (even beyond +/- 2^32), + /* We sometimes get overflows in the multiplications (even beyond +/- 2^32), but they cancel each other and the real result seems to always fit in a 32-bit signed integer. This was determined experimentally, not theoretically (unfortunately). */ tmp1 = silk_MLA_ovflw( tmp1, x_ptr[ n - k - 1 ], Atmp1 ); /* Q17 */ @@ -253,7 +253,7 @@ void silk_burg_modified_c( if( rshifts > 0 ) { for( s = 0; s < nb_subfr; s++ ) { x_ptr = x + s * subfr_length; - C0 -= (opus_int32)silk_RSHIFT64( silk_inner_prod16_aligned_64( x_ptr, x_ptr, D, arch ), rshifts ); + C0 -= (opus_int32)silk_RSHIFT64( silk_inner_prod16( x_ptr, x_ptr, D, arch ), rshifts ); } } else { for( s = 0; s < nb_subfr; s++ ) { diff --git a/media/libopus/silk/fixed/mips/warped_autocorrelation_FIX_mipsr1.h b/media/libopus/silk/fixed/mips/warped_autocorrelation_FIX_mipsr1.h index fcbd96c88d98a..66eb2ed26d11e 100644 --- a/media/libopus/silk/fixed/mips/warped_autocorrelation_FIX_mipsr1.h +++ b/media/libopus/silk/fixed/mips/warped_autocorrelation_FIX_mipsr1.h @@ -41,15 +41,14 @@ POSSIBILITY OF SUCH DAMAGE. #define QS 14
/* Autocorrelations for a warped frequency axis */ -#define OVERRIDE_silk_warped_autocorrelation_FIX -void silk_warped_autocorrelation_FIX( +#define OVERRIDE_silk_warped_autocorrelation_FIX_c +void silk_warped_autocorrelation_FIX_c( opus_int32 *corr, /* O Result [order + 1] */ opus_int *scale, /* O Scaling of the correlation vector */ const opus_int16 *input, /* I Input data to correlate */ const opus_int warping_Q16, /* I Warping coefficient */ const opus_int length, /* I Length of input */ - const opus_int order, /* I Correlation order (even) */ - int arch /* I Run-time architecture */ + const opus_int order /* I Correlation order (even) */ ) { opus_int n, i, lsh; diff --git a/media/libopus/silk/fixed/vector_ops_FIX.c b/media/libopus/silk/fixed/vector_ops_FIX.c index d94980014f624..dcf84070a6b00 100644 --- a/media/libopus/silk/fixed/vector_ops_FIX.c +++ b/media/libopus/silk/fixed/vector_ops_FIX.c @@ -87,7 +87,7 @@ opus_int32 silk_inner_prod_aligned( #endif }
-opus_int64 silk_inner_prod16_aligned_64_c( +opus_int64 silk_inner_prod16_c( const opus_int16 *inVec1, /* I input vector 1 */ const opus_int16 *inVec2, /* I input vector 2 */ const opus_int len /* I vector lengths */ diff --git a/media/libopus/silk/fixed/warped_autocorrelation_FIX.c b/media/libopus/silk/fixed/warped_autocorrelation_FIX.c index 52002a111837c..5c79553bc001b 100644 --- a/media/libopus/silk/fixed/warped_autocorrelation_FIX.c +++ b/media/libopus/silk/fixed/warped_autocorrelation_FIX.c @@ -37,6 +37,7 @@ POSSIBILITY OF SUCH DAMAGE.
/* Autocorrelations for a warped frequency axis */ +#ifndef OVERRIDE_silk_warped_autocorrelation_FIX_c void silk_warped_autocorrelation_FIX_c( opus_int32 *corr, /* O Result [order + 1] */ opus_int *scale, /* O Scaling of the correlation vector */ @@ -88,3 +89,4 @@ void silk_warped_autocorrelation_FIX_c( } silk_assert( corr_QC[ 0 ] >= 0 ); /* If breaking, decrease QC*/ } +#endif /* OVERRIDE_silk_warped_autocorrelation_FIX_c */ diff --git a/media/libopus/silk/fixed/x86/burg_modified_FIX_sse4_1.c b/media/libopus/silk/fixed/x86/burg_modified_FIX_sse4_1.c index bbb1ce0fcc458..e58bf079e5a25 100644 --- a/media/libopus/silk/fixed/x86/burg_modified_FIX_sse4_1.c +++ b/media/libopus/silk/fixed/x86/burg_modified_FIX_sse4_1.c @@ -1,5 +1,5 @@ -/* Copyright (c) 2014, Cisco Systems, INC - Written by XiangMingZhu WeiZhou MinPeng YanWang +/* Copyright (c) 2014-2020, Cisco Systems, INC + Written by XiangMingZhu WeiZhou MinPeng YanWang FrancisQuiers
Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions @@ -42,7 +42,7 @@ #define MAX_FRAME_SIZE 384 /* subfr_length * nb_subfr = ( 0.005 * 16000 + 16 ) * 4 = 384 */
#define QA 25 -#define N_BITS_HEAD_ROOM 2 +#define N_BITS_HEAD_ROOM 3 #define MIN_RSHIFTS -16 #define MAX_RSHIFTS (32 - QA)
@@ -59,7 +59,7 @@ void silk_burg_modified_sse4_1( int arch /* I Run-time architecture */ ) { - opus_int k, n, s, lz, rshifts, rshifts_extra, reached_max_gain; + opus_int k, n, s, lz, rshifts, reached_max_gain; opus_int32 C0, num, nrg, rc_Q31, invGain_Q30, Atmp_QA, Atmp1, tmp1, tmp2, x1, x2; const opus_int16 *x_ptr; opus_int32 C_first_row[ SILK_MAX_ORDER_LPC ]; @@ -68,6 +68,7 @@ void silk_burg_modified_sse4_1( opus_int32 CAf[ SILK_MAX_ORDER_LPC + 1 ]; opus_int32 CAb[ SILK_MAX_ORDER_LPC + 1 ]; opus_int32 xcorr[ SILK_MAX_ORDER_LPC ]; + opus_int64 C0_64;
__m128i FIRST_3210, LAST_3210, ATMP_3210, TMP1_3210, TMP2_3210, T1_3210, T2_3210, PTR_3210, SUBFR_3210, X1_3210, X2_3210; __m128i CONST1 = _mm_set1_epi32(1); @@ -75,23 +76,18 @@ void silk_burg_modified_sse4_1( celt_assert( subfr_length * nb_subfr <= MAX_FRAME_SIZE );
/* Compute autocorrelations, added over subframes */ - silk_sum_sqr_shift( &C0, &rshifts, x, nb_subfr * subfr_length ); - if( rshifts > MAX_RSHIFTS ) { - C0 = silk_LSHIFT32( C0, rshifts - MAX_RSHIFTS ); - silk_assert( C0 > 0 ); - rshifts = MAX_RSHIFTS; + C0_64 = silk_inner_prod16( x, x, subfr_length*nb_subfr, arch ); + lz = silk_CLZ64(C0_64); + rshifts = 32 + 1 + N_BITS_HEAD_ROOM - lz; + if (rshifts > MAX_RSHIFTS) rshifts = MAX_RSHIFTS; + if (rshifts < MIN_RSHIFTS) rshifts = MIN_RSHIFTS; + + if (rshifts > 0) { + C0 = (opus_int32)silk_RSHIFT64(C0_64, rshifts ); } else { - lz = silk_CLZ32( C0 ) - 1; - rshifts_extra = N_BITS_HEAD_ROOM - lz; - if( rshifts_extra > 0 ) { - rshifts_extra = silk_min( rshifts_extra, MAX_RSHIFTS - rshifts ); - C0 = silk_RSHIFT32( C0, rshifts_extra ); - } else { - rshifts_extra = silk_max( rshifts_extra, MIN_RSHIFTS - rshifts ); - C0 = silk_LSHIFT32( C0, -rshifts_extra ); - } - rshifts += rshifts_extra; + C0 = silk_LSHIFT32((opus_int32)C0_64, -rshifts ); } + CAb[ 0 ] = CAf[ 0 ] = C0 + silk_SMMUL( SILK_FIX_CONST( FIND_LPC_COND_FAC, 32 ), C0 ) + 1; /* Q(-rshifts) */ silk_memset( C_first_row, 0, SILK_MAX_ORDER_LPC * sizeof( opus_int32 ) ); if( rshifts > 0 ) { @@ -99,7 +95,7 @@ void silk_burg_modified_sse4_1( x_ptr = x + s * subfr_length; for( n = 1; n < D + 1; n++ ) { C_first_row[ n - 1 ] += (opus_int32)silk_RSHIFT64( - silk_inner_prod16_aligned_64( x_ptr, x_ptr + n, subfr_length - n, arch ), rshifts ); + silk_inner_prod16( x_ptr, x_ptr + n, subfr_length - n, arch ), rshifts ); } } } else { @@ -203,8 +199,11 @@ void silk_burg_modified_sse4_1( C_first_row[ k ] = silk_MLA( C_first_row[ k ], x1, x_ptr[ n - k - 1 ] ); /* Q( -rshifts ) */ C_last_row[ k ] = silk_MLA( C_last_row[ k ], x2, x_ptr[ subfr_length - n + k ] ); /* Q( -rshifts ) */ Atmp1 = silk_RSHIFT_ROUND( Af_QA[ k ], QA - 17 ); /* Q17 */ - tmp1 = silk_MLA( tmp1, x_ptr[ n - k - 1 ], Atmp1 ); /* Q17 */ - tmp2 = silk_MLA( tmp2, x_ptr[ subfr_length - n + k ], Atmp1 ); /* Q17 */ + /* We sometimes get overflows in the multiplications (even beyond +/- 2^32), + but they cancel each other and the real result seems to always fit in a 32-bit + signed integer. This was determined experimentally, not theoretically (unfortunately). */ + tmp1 = silk_MLA_ovflw( tmp1, x_ptr[ n - k - 1 ], Atmp1 ); /* Q17 */ + tmp2 = silk_MLA_ovflw( tmp2, x_ptr[ subfr_length - n + k ], Atmp1 ); /* Q17 */ }
tmp1 = -tmp1; /* Q17 */ @@ -350,7 +349,7 @@ void silk_burg_modified_sse4_1( if( rshifts > 0 ) { for( s = 0; s < nb_subfr; s++ ) { x_ptr = x + s * subfr_length; - C0 -= (opus_int32)silk_RSHIFT64( silk_inner_prod16_aligned_64( x_ptr, x_ptr, D, arch ), rshifts ); + C0 -= (opus_int32)silk_RSHIFT64( silk_inner_prod16( x_ptr, x_ptr, D, arch ), rshifts ); } } else { for( s = 0; s < nb_subfr; s++ ) { @@ -374,4 +373,28 @@ void silk_burg_modified_sse4_1( *res_nrg = silk_SMLAWW( nrg, silk_SMMUL( SILK_FIX_CONST( FIND_LPC_COND_FAC, 32 ), C0 ), -tmp1 );/* Q( -rshifts ) */ *res_nrg_Q = -rshifts; } + +#ifdef OPUS_CHECK_ASM + { + opus_int32 res_nrg_c = 0; + opus_int res_nrg_Q_c = 0; + opus_int32 A_Q16_c[ MAX_LPC_ORDER ] = {0}; + + silk_burg_modified_c( + &res_nrg_c, + &res_nrg_Q_c, + A_Q16_c, + x, + minInvGain_Q30, + subfr_length, + nb_subfr, + D, + 0 + ); + + silk_assert( *res_nrg == res_nrg_c ); + silk_assert( *res_nrg_Q == res_nrg_Q_c ); + silk_assert( !memcmp( A_Q16, A_Q16_c, D * sizeof( *A_Q16 ) ) ); + } +#endif } diff --git a/media/libopus/silk/fixed/x86/vector_ops_FIX_sse4_1.c b/media/libopus/silk/fixed/x86/vector_ops_FIX_sse4_1.c index c1e90564d0ed7..0cfb08d902012 100644 --- a/media/libopus/silk/fixed/x86/vector_ops_FIX_sse4_1.c +++ b/media/libopus/silk/fixed/x86/vector_ops_FIX_sse4_1.c @@ -37,39 +37,36 @@ #include "SigProc_FIX.h" #include "pitch.h"
-opus_int64 silk_inner_prod16_aligned_64_sse4_1( +opus_int64 silk_inner_prod16_sse4_1( const opus_int16 *inVec1, /* I input vector 1 */ const opus_int16 *inVec2, /* I input vector 2 */ const opus_int len /* I vector lengths */ ) { - opus_int i, dataSize8; + opus_int i, dataSize4; opus_int64 sum;
- __m128i xmm_tempa; - __m128i inVec1_76543210, acc1; - __m128i inVec2_76543210, acc2; + __m128i xmm_prod_20, xmm_prod_31; + __m128i inVec1_3210, acc1; + __m128i inVec2_3210, acc2;
sum = 0; - dataSize8 = len & ~7; + dataSize4 = len & ~3;
acc1 = _mm_setzero_si128(); acc2 = _mm_setzero_si128();
- for( i = 0; i < dataSize8; i += 8 ) { - inVec1_76543210 = _mm_loadu_si128( (__m128i *)(&inVec1[i + 0] ) ); - inVec2_76543210 = _mm_loadu_si128( (__m128i *)(&inVec2[i + 0] ) ); + for( i = 0; i < dataSize4; i += 4 ) { + inVec1_3210 = OP_CVTEPI16_EPI32_M64( &inVec1[i + 0] ); + inVec2_3210 = OP_CVTEPI16_EPI32_M64( &inVec2[i + 0] ); + xmm_prod_20 = _mm_mul_epi32( inVec1_3210, inVec2_3210 );
- /* only when all 4 operands are -32768 (0x8000), this results in wrap around */ - inVec1_76543210 = _mm_madd_epi16( inVec1_76543210, inVec2_76543210 ); + inVec1_3210 = _mm_shuffle_epi32( inVec1_3210, _MM_SHUFFLE( 0, 3, 2, 1 ) ); + inVec2_3210 = _mm_shuffle_epi32( inVec2_3210, _MM_SHUFFLE( 0, 3, 2, 1 ) ); + xmm_prod_31 = _mm_mul_epi32( inVec1_3210, inVec2_3210 );
- xmm_tempa = _mm_cvtepi32_epi64( inVec1_76543210 ); - /* equal shift right 8 bytes */ - inVec1_76543210 = _mm_shuffle_epi32( inVec1_76543210, _MM_SHUFFLE( 0, 0, 3, 2 ) ); - inVec1_76543210 = _mm_cvtepi32_epi64( inVec1_76543210 ); - - acc1 = _mm_add_epi64( acc1, xmm_tempa ); - acc2 = _mm_add_epi64( acc2, inVec1_76543210 ); + acc1 = _mm_add_epi64( acc1, xmm_prod_20 ); + acc2 = _mm_add_epi64( acc2, xmm_prod_31 ); }
acc1 = _mm_add_epi64( acc1, acc2 ); @@ -84,5 +81,12 @@ opus_int64 silk_inner_prod16_aligned_64_sse4_1( sum = silk_SMLABB( sum, inVec1[ i ], inVec2[ i ] ); }
+#ifdef OPUS_CHECK_ASM + { + opus_int64 sum_c = silk_inner_prod16_c( inVec1, inVec2, len ); + silk_assert( sum == sum_c ); + } +#endif + return sum; } diff --git a/media/libopus/silk/float/warped_autocorrelation_FLP.c b/media/libopus/silk/float/warped_autocorrelation_FLP.c index 96662767cf3b0..09186e73d463d 100644 --- a/media/libopus/silk/float/warped_autocorrelation_FLP.c +++ b/media/libopus/silk/float/warped_autocorrelation_FLP.c @@ -42,8 +42,8 @@ void silk_warped_autocorrelation_FLP( { opus_int n, i; double tmp1, tmp2; - double state[ MAX_SHAPE_LPC_ORDER + 1 ] = { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 }; - double C[ MAX_SHAPE_LPC_ORDER + 1 ] = { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 }; + double state[ MAX_SHAPE_LPC_ORDER + 1 ] = { 0 }; + double C[ MAX_SHAPE_LPC_ORDER + 1 ] = { 0 };
/* Order must be even */ celt_assert( ( order & 1 ) == 0 ); diff --git a/media/libopus/silk/main.h b/media/libopus/silk/main.h index 1a33eed549b6a..a5f568758ffb6 100644 --- a/media/libopus/silk/main.h +++ b/media/libopus/silk/main.h @@ -247,21 +247,21 @@ void silk_VQ_WMat_EC_c( /************************************/
void silk_NSQ_c( - const silk_encoder_state *psEncC, /* I Encoder State */ - silk_nsq_state *NSQ, /* I/O NSQ state */ - SideInfoIndices *psIndices, /* I/O Quantization Indices */ - const opus_int16 x16[], /* I Input */ - opus_int8 pulses[], /* O Quantized pulse signal */ - const opus_int16 PredCoef_Q12[ 2 * MAX_LPC_ORDER ], /* I Short term prediction coefs */ - const opus_int16 LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ], /* I Long term prediction coefs */ - const opus_int16 AR_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ], /* I Noise shaping coefs */ - const opus_int HarmShapeGain_Q14[ MAX_NB_SUBFR ], /* I Long term shaping coefs */ - const opus_int Tilt_Q14[ MAX_NB_SUBFR ], /* I Spectral tilt */ - const opus_int32 LF_shp_Q14[ MAX_NB_SUBFR ], /* I Low frequency shaping coefs */ - const opus_int32 Gains_Q16[ MAX_NB_SUBFR ], /* I Quantization step sizes */ - const opus_int pitchL[ MAX_NB_SUBFR ], /* I Pitch lags */ - const opus_int Lambda_Q10, /* I Rate/distortion tradeoff */ - const opus_int LTP_scale_Q14 /* I LTP state scaling */ + const silk_encoder_state *psEncC, /* I Encoder State */ + silk_nsq_state *NSQ, /* I/O NSQ state */ + SideInfoIndices *psIndices, /* I/O Quantization Indices */ + const opus_int16 x16[], /* I Input */ + opus_int8 pulses[], /* O Quantized pulse signal */ + const opus_int16 PredCoef_Q12[ 2 * MAX_LPC_ORDER ], /* I Short term prediction coefs */ + const opus_int16 LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ], /* I Long term prediction coefs */ + const opus_int16 AR_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ], /* I Noise shaping coefs */ + const opus_int HarmShapeGain_Q14[ MAX_NB_SUBFR ], /* I Long term shaping coefs */ + const opus_int Tilt_Q14[ MAX_NB_SUBFR ], /* I Spectral tilt */ + const opus_int32 LF_shp_Q14[ MAX_NB_SUBFR ], /* I Low frequency shaping coefs */ + const opus_int32 Gains_Q16[ MAX_NB_SUBFR ], /* I Quantization step sizes */ + const opus_int pitchL[ MAX_NB_SUBFR ], /* I Pitch lags */ + const opus_int Lambda_Q10, /* I Rate/distortion tradeoff */ + const opus_int LTP_scale_Q14 /* I LTP state scaling */ );
#if !defined(OVERRIDE_silk_NSQ) @@ -273,21 +273,21 @@ void silk_NSQ_c(
/* Noise shaping using delayed decision */ void silk_NSQ_del_dec_c( - const silk_encoder_state *psEncC, /* I Encoder State */ - silk_nsq_state *NSQ, /* I/O NSQ state */ - SideInfoIndices *psIndices, /* I/O Quantization Indices */ - const opus_int16 x16[], /* I Input */ - opus_int8 pulses[], /* O Quantized pulse signal */ - const opus_int16 PredCoef_Q12[ 2 * MAX_LPC_ORDER ], /* I Short term prediction coefs */ - const opus_int16 LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ], /* I Long term prediction coefs */ - const opus_int16 AR_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ], /* I Noise shaping coefs */ - const opus_int HarmShapeGain_Q14[ MAX_NB_SUBFR ], /* I Long term shaping coefs */ - const opus_int Tilt_Q14[ MAX_NB_SUBFR ], /* I Spectral tilt */ - const opus_int32 LF_shp_Q14[ MAX_NB_SUBFR ], /* I Low frequency shaping coefs */ - const opus_int32 Gains_Q16[ MAX_NB_SUBFR ], /* I Quantization step sizes */ - const opus_int pitchL[ MAX_NB_SUBFR ], /* I Pitch lags */ - const opus_int Lambda_Q10, /* I Rate/distortion tradeoff */ - const opus_int LTP_scale_Q14 /* I LTP state scaling */ + const silk_encoder_state *psEncC, /* I Encoder State */ + silk_nsq_state *NSQ, /* I/O NSQ state */ + SideInfoIndices *psIndices, /* I/O Quantization Indices */ + const opus_int16 x16[], /* I Input */ + opus_int8 pulses[], /* O Quantized pulse signal */ + const opus_int16 PredCoef_Q12[ 2 * MAX_LPC_ORDER ], /* I Short term prediction coefs */ + const opus_int16 LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ], /* I Long term prediction coefs */ + const opus_int16 AR_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ], /* I Noise shaping coefs */ + const opus_int HarmShapeGain_Q14[ MAX_NB_SUBFR ], /* I Long term shaping coefs */ + const opus_int Tilt_Q14[ MAX_NB_SUBFR ], /* I Spectral tilt */ + const opus_int32 LF_shp_Q14[ MAX_NB_SUBFR ], /* I Low frequency shaping coefs */ + const opus_int32 Gains_Q16[ MAX_NB_SUBFR ], /* I Quantization step sizes */ + const opus_int pitchL[ MAX_NB_SUBFR ], /* I Pitch lags */ + const opus_int Lambda_Q10, /* I Rate/distortion tradeoff */ + const opus_int LTP_scale_Q14 /* I LTP state scaling */ );
#if !defined(OVERRIDE_silk_NSQ_del_dec) diff --git a/media/libopus/silk/typedef.h b/media/libopus/silk/typedef.h index 97b7e709be5bc..793d2c0c1d381 100644 --- a/media/libopus/silk/typedef.h +++ b/media/libopus/silk/typedef.h @@ -67,6 +67,9 @@ __attribute__((noreturn)) static OPUS_INLINE void _silk_fatal(const char *str, const char *file, int line) { fprintf (stderr, "Fatal (internal) error in %s, line %d: %s\n", file, line, str); +#if defined(_MSC_VER) + _set_abort_behavior( 0, _WRITE_ABORT_MSG); +#endif abort(); } # define silk_assert(COND) {if (!(COND)) {silk_fatal("assertion failed: " #COND);}} diff --git a/media/libopus/silk/x86/NSQ_del_dec_sse4_1.c b/media/libopus/silk/x86/NSQ_del_dec_sse4_1.c index 2c75ede2dde8c..42735c528bf49 100644 --- a/media/libopus/silk/x86/NSQ_del_dec_sse4_1.c +++ b/media/libopus/silk/x86/NSQ_del_dec_sse4_1.c @@ -1,5 +1,5 @@ -/* Copyright (c) 2014, Cisco Systems, INC - Written by XiangMingZhu WeiZhou MinPeng YanWang +/* Copyright (c) 2014-2020, Cisco Systems, INC + Written by XiangMingZhu WeiZhou MinPeng YanWang FrancisQuiers
Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions @@ -46,6 +46,7 @@ typedef struct { opus_int32 Shape_Q14[ DECISION_DELAY ]; opus_int32 sAR2_Q14[ MAX_SHAPE_LPC_ORDER ]; opus_int32 LF_AR_Q14; + opus_int32 Diff_Q14; opus_int32 Seed; opus_int32 SeedInit; opus_int32 RD_Q10; @@ -56,6 +57,7 @@ typedef struct { opus_int32 RD_Q10; opus_int32 xq_Q14; opus_int32 LF_AR_Q14; + opus_int32 Diff_Q14; opus_int32 sLTP_shp_Q14; opus_int32 LPC_exc_Q14; } NSQ_sample_struct; @@ -66,7 +68,7 @@ static OPUS_INLINE void silk_nsq_del_dec_scale_states_sse4_1( const silk_encoder_state *psEncC, /* I Encoder State */ silk_nsq_state *NSQ, /* I/O NSQ state */ NSQ_del_dec_struct psDelDec[], /* I/O Delayed decision states */ - const opus_int32 x_Q3[], /* I Input in Q3 */ + const opus_int16 x16[], /* I Input */ opus_int32 x_sc_Q10[], /* O Input scaled with 1/Gain in Q10 */ const opus_int16 sLTP[], /* I Re-whitened LTP state in Q0 */ opus_int32 sLTP_Q15[], /* O LTP state matching scaled input */ @@ -112,21 +114,21 @@ static OPUS_INLINE void silk_noise_shape_quantizer_del_dec_sse4_1( );
void silk_NSQ_del_dec_sse4_1( - const silk_encoder_state *psEncC, /* I Encoder State */ - silk_nsq_state *NSQ, /* I/O NSQ state */ - SideInfoIndices *psIndices, /* I/O Quantization Indices */ - const opus_int32 x_Q3[], /* I Prefiltered input signal */ - opus_int8 pulses[], /* O Quantized pulse signal */ - const opus_int16 PredCoef_Q12[ 2 * MAX_LPC_ORDER ], /* I Short term prediction coefs */ - const opus_int16 LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ], /* I Long term prediction coefs */ - const opus_int16 AR2_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ], /* I Noise shaping coefs */ - const opus_int HarmShapeGain_Q14[ MAX_NB_SUBFR ], /* I Long term shaping coefs */ - const opus_int Tilt_Q14[ MAX_NB_SUBFR ], /* I Spectral tilt */ - const opus_int32 LF_shp_Q14[ MAX_NB_SUBFR ], /* I Low frequency shaping coefs */ - const opus_int32 Gains_Q16[ MAX_NB_SUBFR ], /* I Quantization step sizes */ - const opus_int pitchL[ MAX_NB_SUBFR ], /* I Pitch lags */ - const opus_int Lambda_Q10, /* I Rate/distortion tradeoff */ - const opus_int LTP_scale_Q14 /* I LTP state scaling */ + const silk_encoder_state *psEncC, /* I Encoder State */ + silk_nsq_state *NSQ, /* I/O NSQ state */ + SideInfoIndices *psIndices, /* I/O Quantization Indices */ + const opus_int16 x16[], /* I Input */ + opus_int8 pulses[], /* O Quantized pulse signal */ + const opus_int16 PredCoef_Q12[ 2 * MAX_LPC_ORDER ], /* I Short term prediction coefs */ + const opus_int16 LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ], /* I Long term prediction coefs */ + const opus_int16 AR_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ], /* I Noise shaping coefs */ + const opus_int HarmShapeGain_Q14[ MAX_NB_SUBFR ], /* I Long term shaping coefs */ + const opus_int Tilt_Q14[ MAX_NB_SUBFR ], /* I Spectral tilt */ + const opus_int32 LF_shp_Q14[ MAX_NB_SUBFR ], /* I Low frequency shaping coefs */ + const opus_int32 Gains_Q16[ MAX_NB_SUBFR ], /* I Quantization step sizes */ + const opus_int pitchL[ MAX_NB_SUBFR ], /* I Pitch lags */ + const opus_int Lambda_Q10, /* I Rate/distortion tradeoff */ + const opus_int LTP_scale_Q14 /* I LTP state scaling */ ) { opus_int i, k, lag, start_idx, LSF_interpolation_flag, Winner_ind, subfr; @@ -142,8 +144,39 @@ void silk_NSQ_del_dec_sse4_1( VARDECL( opus_int32, delayedGain_Q10 ); VARDECL( NSQ_del_dec_struct, psDelDec ); NSQ_del_dec_struct *psDD; +#ifdef OPUS_CHECK_ASM + silk_nsq_state NSQ_c; + SideInfoIndices psIndices_c; + opus_int8 pulses_c[ MAX_FRAME_LENGTH ]; + const opus_int8 *const pulses_a = pulses; +#endif SAVE_STACK;
+#ifdef OPUS_CHECK_ASM + ( void )pulses_a; + silk_memcpy( &NSQ_c, NSQ, sizeof( NSQ_c ) ); + silk_memcpy( &psIndices_c, psIndices, sizeof( psIndices_c ) ); + silk_assert( psEncC->nb_subfr * psEncC->subfr_length <= MAX_FRAME_LENGTH ); + silk_memcpy( pulses_c, pulses, psEncC->nb_subfr * psEncC->subfr_length * sizeof( pulses[0] ) ); + silk_NSQ_del_dec_c( + psEncC, + &NSQ_c, + &psIndices_c, + x16, + pulses_c, + PredCoef_Q12, + LTPCoef_Q14, + AR_Q13, + HarmShapeGain_Q14, + Tilt_Q14, + LF_shp_Q14, + Gains_Q16, + pitchL, + Lambda_Q10, + LTP_scale_Q14 + ); +#endif + /* Set unvoiced lag to the previous one, overwrite later for voiced */ lag = NSQ->lagPrev;
@@ -158,6 +191,7 @@ void silk_NSQ_del_dec_sse4_1( psDD->SeedInit = psDD->Seed; psDD->RD_Q10 = 0; psDD->LF_AR_Q14 = NSQ->sLF_AR_shp_Q14; + psDD->Diff_Q14 = NSQ->sDiff_shp_Q14; psDD->Shape_Q14[ 0 ] = NSQ->sLTP_shp_Q14[ psEncC->ltp_mem_length - 1 ]; silk_memcpy( psDD->sLPC_Q14, NSQ->sLPC_Q14, NSQ_LPC_BUF_LENGTH * sizeof( opus_int32 ) ); silk_memcpy( psDD->sAR2_Q14, NSQ->sAR2_Q14, sizeof( NSQ->sAR2_Q14 ) ); @@ -185,8 +219,7 @@ void silk_NSQ_del_dec_sse4_1( LSF_interpolation_flag = 1; }
- ALLOC( sLTP_Q15, - psEncC->ltp_mem_length + psEncC->frame_length, opus_int32 ); + ALLOC( sLTP_Q15, psEncC->ltp_mem_length + psEncC->frame_length, opus_int32 ); ALLOC( sLTP, psEncC->ltp_mem_length + psEncC->frame_length, opus_int16 ); ALLOC( x_sc_Q10, psEncC->subfr_length, opus_int32 ); ALLOC( delayedGain_Q10, DECISION_DELAY, opus_int32 ); @@ -198,7 +231,7 @@ void silk_NSQ_del_dec_sse4_1( for( k = 0; k < psEncC->nb_subfr; k++ ) { A_Q12 = &PredCoef_Q12[ ( ( k >> 1 ) | ( 1 - LSF_interpolation_flag ) ) * MAX_LPC_ORDER ]; B_Q14 = <PCoef_Q14[ k * LTP_ORDER ]; - AR_shp_Q13 = &AR2_Q13[ k * MAX_SHAPE_LPC_ORDER ]; + AR_shp_Q13 = &AR_Q13[ k * MAX_SHAPE_LPC_ORDER ];
/* Noise shape parameters */ silk_assert( HarmShapeGain_Q14[ k ] >= 0 ); @@ -257,7 +290,7 @@ void silk_NSQ_del_dec_sse4_1( } }
- silk_nsq_del_dec_scale_states_sse4_1( psEncC, NSQ, psDelDec, x_Q3, x_sc_Q10, sLTP, sLTP_Q15, k, + silk_nsq_del_dec_scale_states_sse4_1( psEncC, NSQ, psDelDec, x16, x_sc_Q10, sLTP, sLTP_Q15, k, psEncC->nStatesDelayedDecision, LTP_scale_Q14, Gains_Q16, pitchL, psIndices->signalType, decisionDelay );
silk_noise_shape_quantizer_del_dec_sse4_1( NSQ, psDelDec, psIndices->signalType, x_sc_Q10, pulses, pxq, sLTP_Q15, @@ -265,7 +298,7 @@ void silk_NSQ_del_dec_sse4_1( Gains_Q16[ k ], Lambda_Q10, offset_Q10, psEncC->subfr_length, subfr++, psEncC->shapingLPCOrder, psEncC->predictLPCOrder, psEncC->warping_Q16, psEncC->nStatesDelayedDecision, &smpl_buf_idx, decisionDelay );
- x_Q3 += psEncC->subfr_length; + x16 += psEncC->subfr_length; pulses += psEncC->subfr_length; pxq += psEncC->subfr_length; } @@ -288,6 +321,7 @@ void silk_NSQ_del_dec_sse4_1( for( i = 0; i < decisionDelay; i++ ) { last_smple_idx = ( last_smple_idx - 1 ) % DECISION_DELAY; if( last_smple_idx < 0 ) last_smple_idx += DECISION_DELAY; + pulses[ i - decisionDelay ] = (opus_int8)silk_RSHIFT_ROUND( psDD->Q_Q10[ last_smple_idx ], 10 ); pxq[ i - decisionDelay ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( silk_SMULWW( psDD->Xq_Q14[ last_smple_idx ], Gain_Q10 ), 8 ) ); @@ -298,11 +332,19 @@ void silk_NSQ_del_dec_sse4_1(
/* Update states */ NSQ->sLF_AR_shp_Q14 = psDD->LF_AR_Q14; + NSQ->sDiff_shp_Q14 = psDD->Diff_Q14; NSQ->lagPrev = pitchL[ psEncC->nb_subfr - 1 ];
/* Save quantized speech signal */ silk_memmove( NSQ->xq, &NSQ->xq[ psEncC->frame_length ], psEncC->ltp_mem_length * sizeof( opus_int16 ) ); silk_memmove( NSQ->sLTP_shp_Q14, &NSQ->sLTP_shp_Q14[ psEncC->frame_length ], psEncC->ltp_mem_length * sizeof( opus_int32 ) ); + +#ifdef OPUS_CHECK_ASM + silk_assert( !memcmp( &NSQ_c, NSQ, sizeof( NSQ_c ) ) ); + silk_assert( !memcmp( &psIndices_c, psIndices, sizeof( psIndices_c ) ) ); + silk_assert( !memcmp( pulses_c, pulses_a, psEncC->nb_subfr * psEncC->subfr_length * sizeof( pulses[0] ) ) ); +#endif + RESTORE_STACK; }
@@ -345,6 +387,7 @@ static OPUS_INLINE void silk_noise_shape_quantizer_del_dec_sse4_1( opus_int32 q1_Q0, q1_Q10, q2_Q10, exc_Q14, LPC_exc_Q14, xq_Q14, Gain_Q10; opus_int32 tmp1, tmp2, sLF_AR_shp_Q14; opus_int32 *pred_lag_ptr, *shp_lag_ptr, *psLPC_Q14; + VARDECL( NSQ_sample_pair, psSampleState ); NSQ_del_dec_struct *psDD; NSQ_sample_struct *psSS; @@ -356,6 +399,8 @@ static OPUS_INLINE void silk_noise_shape_quantizer_del_dec_sse4_1( celt_assert( nStatesDelayedDecision > 0 ); ALLOC( psSampleState, nStatesDelayedDecision, NSQ_sample_pair );
+ int rdo_offset = (Lambda_Q10 >> 1) - 512; + shp_lag_ptr = &NSQ->sLTP_shp_Q14[ NSQ->sLTP_shp_buf_idx - lag + HARM_SHAPE_FIR_TAPS / 2 ]; pred_lag_ptr = &sLTP_Q15[ NSQ->sLTP_buf_idx - lag + LTP_ORDER / 2 ]; Gain_Q10 = silk_RSHIFT( Gain_Q16, 6 ); @@ -407,8 +452,8 @@ static OPUS_INLINE void silk_noise_shape_quantizer_del_dec_sse4_1( /* Long-term shaping */ if( lag > 0 ) { /* Symmetric, packed FIR coefficients */ - n_LTP_Q14 = silk_SMULWB( silk_ADD32( shp_lag_ptr[ 0 ], shp_lag_ptr[ -2 ] ), HarmShapeFIRPacked_Q14 ); - n_LTP_Q14 = silk_SMLAWT( n_LTP_Q14, shp_lag_ptr[ -1 ], HarmShapeFIRPacked_Q14 ); + n_LTP_Q14 = silk_SMULWB( silk_ADD_SAT32( shp_lag_ptr[ 0 ], shp_lag_ptr[ -2 ] ), HarmShapeFIRPacked_Q14 ); + n_LTP_Q14 = silk_SMLAWT( n_LTP_Q14, shp_lag_ptr[ -1 ], HarmShapeFIRPacked_Q14 ); n_LTP_Q14 = silk_SUB_LSHIFT32( LTP_pred_Q14, n_LTP_Q14, 2 ); /* Q12 -> Q14 */ shp_lag_ptr++; } else { @@ -478,7 +523,7 @@ static OPUS_INLINE void silk_noise_shape_quantizer_del_dec_sse4_1( psLPC_Q14_tmp = _mm_srli_epi64( psLPC_Q14_tmp, 16 ); tmpb = _mm_add_epi32( tmpb, psLPC_Q14_tmp );
- /* setp 4 */ + /* step 4 */ psLPC_Q14_tmp = _mm_loadu_si128( (__m128i *)(&psLPC_Q14[ -15 ] ) ); psLPC_Q14_tmp = _mm_shuffle_epi32( psLPC_Q14_tmp, 0x1B ); tmpa = _mm_mul_epi32( psLPC_Q14_tmp, a_Q12_CDEF ); @@ -511,9 +556,9 @@ static OPUS_INLINE void silk_noise_shape_quantizer_del_dec_sse4_1( LPC_pred_Q14 = silk_LSHIFT( LPC_pred_Q14, 4 ); /* Q10 -> Q14 */
/* Noise shape feedback */ - silk_assert( ( shapingLPCOrder & 1 ) == 0 ); /* check that order is even */ + celt_assert( ( shapingLPCOrder & 1 ) == 0 ); /* check that order is even */ /* Output of lowpass section */ - tmp2 = silk_SMLAWB( psLPC_Q14[ 0 ], psDD->sAR2_Q14[ 0 ], warping_Q16 ); + tmp2 = silk_SMLAWB( psDD->Diff_Q14, psDD->sAR2_Q14[ 0 ], warping_Q16 ); /* Output of allpass section */ tmp1 = silk_SMLAWB( psDD->sAR2_Q14[ 0 ], psDD->sAR2_Q14[ 1 ] - tmp2, warping_Q16 ); psDD->sAR2_Q14[ 0 ] = tmp2; @@ -543,9 +588,9 @@ static OPUS_INLINE void silk_noise_shape_quantizer_del_dec_sse4_1(
/* Input minus prediction plus noise feedback */ /* r = x[ i ] - LTP_pred - LPC_pred + n_AR + n_Tilt + n_LF + n_LTP */ - tmp1 = silk_ADD32( n_AR_Q14, n_LF_Q14 ); /* Q14 */ + tmp1 = silk_ADD_SAT32( n_AR_Q14, n_LF_Q14 ); /* Q14 */ tmp2 = silk_ADD32( n_LTP_Q14, LPC_pred_Q14 ); /* Q13 */ - tmp1 = silk_SUB32( tmp2, tmp1 ); /* Q13 */ + tmp1 = silk_SUB_SAT32( tmp2, tmp1 ); /* Q13 */ tmp1 = silk_RSHIFT_ROUND( tmp1, 4 ); /* Q10 */
r_Q10 = silk_SUB32( x_Q10[ i ], tmp1 ); /* residual error Q10 */ @@ -559,6 +604,18 @@ static OPUS_INLINE void silk_noise_shape_quantizer_del_dec_sse4_1( /* Find two quantization level candidates and measure their rate-distortion */ q1_Q10 = silk_SUB32( r_Q10, offset_Q10 ); q1_Q0 = silk_RSHIFT( q1_Q10, 10 ); + if (Lambda_Q10 > 2048) { + /* For aggressive RDO, the bias becomes more than one pulse. */ + if (q1_Q10 > rdo_offset) { + q1_Q0 = silk_RSHIFT( q1_Q10 - rdo_offset, 10 ); + } else if (q1_Q10 < -rdo_offset) { + q1_Q0 = silk_RSHIFT( q1_Q10 + rdo_offset, 10 ); + } else if (q1_Q10 < 0) { + q1_Q0 = -1; + } else { + q1_Q0 = 0; + } + } if( q1_Q0 > 0 ) { q1_Q10 = silk_SUB32( silk_LSHIFT( q1_Q0, 10 ), QUANT_LEVEL_ADJUST_Q10 ); q1_Q10 = silk_ADD32( q1_Q10, offset_Q10 ); @@ -612,8 +669,9 @@ static OPUS_INLINE void silk_noise_shape_quantizer_del_dec_sse4_1( xq_Q14 = silk_ADD32( LPC_exc_Q14, LPC_pred_Q14 );
/* Update states */ - sLF_AR_shp_Q14 = silk_SUB32( xq_Q14, n_AR_Q14 ); - psSS[ 0 ].sLTP_shp_Q14 = silk_SUB32( sLF_AR_shp_Q14, n_LF_Q14 ); + psSS[ 0 ].Diff_Q14 = silk_SUB_LSHIFT32( xq_Q14, x_Q10[ i ], 4 ); + sLF_AR_shp_Q14 = silk_SUB32( psSS[ 0 ].Diff_Q14, n_AR_Q14 ); + psSS[ 0 ].sLTP_shp_Q14 = silk_SUB_SAT32( sLF_AR_shp_Q14, n_LF_Q14 ); psSS[ 0 ].LF_AR_Q14 = sLF_AR_shp_Q14; psSS[ 0 ].LPC_exc_Q14 = LPC_exc_Q14; psSS[ 0 ].xq_Q14 = xq_Q14; @@ -626,14 +684,14 @@ static OPUS_INLINE void silk_noise_shape_quantizer_del_dec_sse4_1( exc_Q14 = -exc_Q14; }
- /* Add predictions */ LPC_exc_Q14 = silk_ADD32( exc_Q14, LTP_pred_Q14 ); xq_Q14 = silk_ADD32( LPC_exc_Q14, LPC_pred_Q14 );
/* Update states */ - sLF_AR_shp_Q14 = silk_SUB32( xq_Q14, n_AR_Q14 ); - psSS[ 1 ].sLTP_shp_Q14 = silk_SUB32( sLF_AR_shp_Q14, n_LF_Q14 ); + psSS[ 1 ].Diff_Q14 = silk_SUB_LSHIFT32( xq_Q14, x_Q10[ i ], 4 ); + sLF_AR_shp_Q14 = silk_SUB32( psSS[ 1 ].Diff_Q14, n_AR_Q14 ); + psSS[ 1 ].sLTP_shp_Q14 = silk_SUB_SAT32( sLF_AR_shp_Q14, n_LF_Q14 ); psSS[ 1 ].LF_AR_Q14 = sLF_AR_shp_Q14; psSS[ 1 ].LPC_exc_Q14 = LPC_exc_Q14; psSS[ 1 ].xq_Q14 = xq_Q14; @@ -705,6 +763,7 @@ static OPUS_INLINE void silk_noise_shape_quantizer_del_dec_sse4_1( psDD = &psDelDec[ k ]; psSS = &psSampleState[ k ][ 0 ]; psDD->LF_AR_Q14 = psSS->LF_AR_Q14; + psDD->Diff_Q14 = psSS->Diff_Q14; psDD->sLPC_Q14[ NSQ_LPC_BUF_LENGTH + i ] = psSS->xq_Q14; psDD->Xq_Q14[ *smpl_buf_idx ] = psSS->xq_Q14; psDD->Q_Q10[ *smpl_buf_idx ] = psSS->Q_Q10; @@ -728,7 +787,7 @@ static OPUS_INLINE void silk_nsq_del_dec_scale_states_sse4_1( const silk_encoder_state *psEncC, /* I Encoder State */ silk_nsq_state *NSQ, /* I/O NSQ state */ NSQ_del_dec_struct psDelDec[], /* I/O Delayed decision states */ - const opus_int32 x_Q3[], /* I Input in Q3 */ + const opus_int16 x16[], /* I Input */ opus_int32 x_sc_Q10[], /* O Input scaled with 1/Gain in Q10 */ const opus_int16 sLTP[], /* I Re-whitened LTP state in Q0 */ opus_int32 sLTP_Q15[], /* O LTP state matching scaled input */ @@ -742,51 +801,41 @@ static OPUS_INLINE void silk_nsq_del_dec_scale_states_sse4_1( ) { opus_int i, k, lag; - opus_int32 gain_adj_Q16, inv_gain_Q31, inv_gain_Q23; + opus_int32 gain_adj_Q16, inv_gain_Q31, inv_gain_Q26; NSQ_del_dec_struct *psDD; - __m128i xmm_inv_gain_Q23, xmm_x_Q3_x2x0, xmm_x_Q3_x3x1; + __m128i xmm_inv_gain_Q26, xmm_x16_x2x0, xmm_x16_x3x1;
lag = pitchL[ subfr ]; inv_gain_Q31 = silk_INVERSE32_varQ( silk_max( Gains_Q16[ subfr ], 1 ), 47 ); - silk_assert( inv_gain_Q31 != 0 );
- /* Calculate gain adjustment factor */ - if( Gains_Q16[ subfr ] != NSQ->prev_gain_Q16 ) { - gain_adj_Q16 = silk_DIV32_varQ( NSQ->prev_gain_Q16, Gains_Q16[ subfr ], 16 ); - } else { - gain_adj_Q16 = (opus_int32)1 << 16; - } - /* Scale input */ - inv_gain_Q23 = silk_RSHIFT_ROUND( inv_gain_Q31, 8 ); + inv_gain_Q26 = silk_RSHIFT_ROUND( inv_gain_Q31, 5 );
- /* prepare inv_gain_Q23 in packed 4 32-bits */ - xmm_inv_gain_Q23 = _mm_set1_epi32(inv_gain_Q23); + /* prepare inv_gain_Q26 in packed 4 32-bits */ + xmm_inv_gain_Q26 = _mm_set1_epi32(inv_gain_Q26);
for( i = 0; i < psEncC->subfr_length - 3; i += 4 ) { - xmm_x_Q3_x2x0 = _mm_loadu_si128( (__m128i *)(&(x_Q3[ i ] ) ) ); + xmm_x16_x2x0 = OP_CVTEPI16_EPI32_M64( &(x16[ i ] ) ); + /* equal shift right 4 bytes*/ - xmm_x_Q3_x3x1 = _mm_shuffle_epi32( xmm_x_Q3_x2x0, _MM_SHUFFLE( 0, 3, 2, 1 ) ); + xmm_x16_x3x1 = _mm_shuffle_epi32( xmm_x16_x2x0, _MM_SHUFFLE( 0, 3, 2, 1 ) );
- xmm_x_Q3_x2x0 = _mm_mul_epi32( xmm_x_Q3_x2x0, xmm_inv_gain_Q23 ); - xmm_x_Q3_x3x1 = _mm_mul_epi32( xmm_x_Q3_x3x1, xmm_inv_gain_Q23 ); + xmm_x16_x2x0 = _mm_mul_epi32( xmm_x16_x2x0, xmm_inv_gain_Q26 ); + xmm_x16_x3x1 = _mm_mul_epi32( xmm_x16_x3x1, xmm_inv_gain_Q26 );
- xmm_x_Q3_x2x0 = _mm_srli_epi64( xmm_x_Q3_x2x0, 16 ); - xmm_x_Q3_x3x1 = _mm_slli_epi64( xmm_x_Q3_x3x1, 16 ); + xmm_x16_x2x0 = _mm_srli_epi64( xmm_x16_x2x0, 16 ); + xmm_x16_x3x1 = _mm_slli_epi64( xmm_x16_x3x1, 16 );
- xmm_x_Q3_x2x0 = _mm_blend_epi16( xmm_x_Q3_x2x0, xmm_x_Q3_x3x1, 0xCC ); + xmm_x16_x2x0 = _mm_blend_epi16( xmm_x16_x2x0, xmm_x16_x3x1, 0xCC );
- _mm_storeu_si128( (__m128i *)(&(x_sc_Q10[ i ])), xmm_x_Q3_x2x0 ); + _mm_storeu_si128( (__m128i *)(&(x_sc_Q10[ i ] ) ), xmm_x16_x2x0 ); }
for( ; i < psEncC->subfr_length; i++ ) { - x_sc_Q10[ i ] = silk_SMULWW( x_Q3[ i ], inv_gain_Q23 ); + x_sc_Q10[ i ] = silk_SMULWW( x16[ i ], inv_gain_Q26 ); }
- /* Save inverse gain */ - NSQ->prev_gain_Q16 = Gains_Q16[ subfr ]; - /* After rewhitening the LTP state is un-scaled, so scale with inv_gain_Q16 */ if( NSQ->rewhite_flag ) { if( subfr == 0 ) { @@ -800,7 +849,9 @@ static OPUS_INLINE void silk_nsq_del_dec_scale_states_sse4_1( }
/* Adjust for changing gain */ - if( gain_adj_Q16 != (opus_int32)1 << 16 ) { + if( Gains_Q16[ subfr ] != NSQ->prev_gain_Q16 ) { + gain_adj_Q16 = silk_DIV32_varQ( NSQ->prev_gain_Q16, Gains_Q16[ subfr ], 16 ); + /* Scale long-term shaping state */ { __m128i xmm_gain_adj_Q16, xmm_sLTP_shp_Q14_x2x0, xmm_sLTP_shp_Q14_x3x1; @@ -841,6 +892,7 @@ static OPUS_INLINE void silk_nsq_del_dec_scale_states_sse4_1(
/* Scale scalar states */ psDD->LF_AR_Q14 = silk_SMULWW( gain_adj_Q16, psDD->LF_AR_Q14 ); + psDD->Diff_Q14 = silk_SMULWW( gain_adj_Q16, psDD->Diff_Q14 );
/* Scale short-term prediction and shaping states */ for( i = 0; i < NSQ_LPC_BUF_LENGTH; i++ ) { @@ -855,5 +907,8 @@ static OPUS_INLINE void silk_nsq_del_dec_scale_states_sse4_1( } } } + + /* Save inverse gain */ + NSQ->prev_gain_Q16 = Gains_Q16[ subfr ]; } } diff --git a/media/libopus/silk/x86/NSQ_sse4_1.c b/media/libopus/silk/x86/NSQ_sse4_1.c index b0315e35fc896..a2a74659bbe64 100644 --- a/media/libopus/silk/x86/NSQ_sse4_1.c +++ b/media/libopus/silk/x86/NSQ_sse4_1.c @@ -1,5 +1,5 @@ -/* Copyright (c) 2014, Cisco Systems, INC - Written by XiangMingZhu WeiZhou MinPeng YanWang +/* Copyright (c) 2014-2020, Cisco Systems, INC + Written by XiangMingZhu WeiZhou MinPeng YanWang FrancisQuiers
Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions @@ -37,17 +37,17 @@ #include "stack_alloc.h"
static OPUS_INLINE void silk_nsq_scale_states_sse4_1( - const silk_encoder_state *psEncC, /* I Encoder State */ - silk_nsq_state *NSQ, /* I/O NSQ state */ - const opus_int32 x_Q3[], /* I input in Q3 */ - opus_int32 x_sc_Q10[], /* O input scaled with 1/Gain */ - const opus_int16 sLTP[], /* I re-whitened LTP state in Q0 */ - opus_int32 sLTP_Q15[], /* O LTP state matching scaled input */ - opus_int subfr, /* I subframe number */ - const opus_int LTP_scale_Q14, /* I */ - const opus_int32 Gains_Q16[ MAX_NB_SUBFR ], /* I */ - const opus_int pitchL[ MAX_NB_SUBFR ], /* I Pitch lag */ - const opus_int signal_type /* I Signal type */ + const silk_encoder_state *psEncC, /* I Encoder State */ + silk_nsq_state *NSQ, /* I/O NSQ state */ + const opus_int16 x16[], /* I input */ + opus_int32 x_sc_Q10[], /* O input scaled with 1/Gain */ + const opus_int16 sLTP[], /* I re-whitened LTP state in Q0 */ + opus_int32 sLTP_Q15[], /* O LTP state matching scaled input */ + opus_int subfr, /* I subframe number */ + const opus_int LTP_scale_Q14, /* I */ + const opus_int32 Gains_Q16[ MAX_NB_SUBFR ], /* I */ + const opus_int pitchL[ MAX_NB_SUBFR ], /* I Pitch lag */ + const opus_int signal_type /* I Signal type */ );
static OPUS_INLINE void silk_noise_shape_quantizer_10_16_sse4_1( @@ -65,27 +65,28 @@ static OPUS_INLINE void silk_noise_shape_quantizer_10_16_sse4_1( opus_int Tilt_Q14, /* I Spectral tilt */ opus_int32 LF_shp_Q14, /* I */ opus_int32 Gain_Q16, /* I */ + opus_int Lambda_Q10, /* I */ opus_int offset_Q10, /* I */ opus_int length, /* I Input length */ opus_int32 table[][4] /* I */ );
void silk_NSQ_sse4_1( - const silk_encoder_state *psEncC, /* I Encoder State */ - silk_nsq_state *NSQ, /* I/O NSQ state */ - SideInfoIndices *psIndices, /* I/O Quantization Indices */ - const opus_int32 x_Q3[], /* I Prefiltered input signal */ - opus_int8 pulses[], /* O Quantized pulse signal */ - const opus_int16 PredCoef_Q12[ 2 * MAX_LPC_ORDER ], /* I Short term prediction coefs */ - const opus_int16 LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ], /* I Long term prediction coefs */ - const opus_int16 AR2_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ], /* I Noise shaping coefs */ - const opus_int HarmShapeGain_Q14[ MAX_NB_SUBFR ], /* I Long term shaping coefs */ - const opus_int Tilt_Q14[ MAX_NB_SUBFR ], /* I Spectral tilt */ - const opus_int32 LF_shp_Q14[ MAX_NB_SUBFR ], /* I Low frequency shaping coefs */ - const opus_int32 Gains_Q16[ MAX_NB_SUBFR ], /* I Quantization step sizes */ - const opus_int pitchL[ MAX_NB_SUBFR ], /* I Pitch lags */ - const opus_int Lambda_Q10, /* I Rate/distortion tradeoff */ - const opus_int LTP_scale_Q14 /* I LTP state scaling */ + const silk_encoder_state *psEncC, /* I Encoder State */ + silk_nsq_state *NSQ, /* I/O NSQ state */ + SideInfoIndices *psIndices, /* I/O Quantization Indices */ + const opus_int16 x16[], /* I Input */ + opus_int8 pulses[], /* O Quantized pulse signal */ + const opus_int16 PredCoef_Q12[ 2 * MAX_LPC_ORDER ], /* I Short term prediction coefs */ + const opus_int16 LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ], /* I Long term prediction coefs */ + const opus_int16 AR_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ], /* I Noise shaping coefs */ + const opus_int HarmShapeGain_Q14[ MAX_NB_SUBFR ], /* I Long term shaping coefs */ + const opus_int Tilt_Q14[ MAX_NB_SUBFR ], /* I Spectral tilt */ + const opus_int32 LF_shp_Q14[ MAX_NB_SUBFR ], /* I Low frequency shaping coefs */ + const opus_int32 Gains_Q16[ MAX_NB_SUBFR ], /* I Quantization step sizes */ + const opus_int pitchL[ MAX_NB_SUBFR ], /* I Pitch lags */ + const opus_int Lambda_Q10, /* I Rate/distortion tradeoff */ + const opus_int LTP_scale_Q14 /* I LTP state scaling */ ) { opus_int k, lag, start_idx, LSF_interpolation_flag; @@ -101,8 +102,41 @@ void silk_NSQ_sse4_1( opus_int32 tmp1; opus_int32 q1_Q10, q2_Q10, rd1_Q20, rd2_Q20;
+#ifdef OPUS_CHECK_ASM + silk_nsq_state NSQ_c; + SideInfoIndices psIndices_c; + opus_int8 pulses_c[ MAX_FRAME_LENGTH ]; + const opus_int8 *const pulses_a = pulses; +#endif + SAVE_STACK;
+#ifdef OPUS_CHECK_ASM + ( void )pulses_a; + silk_memcpy( &NSQ_c, NSQ, sizeof( NSQ_c ) ); + silk_memcpy( &psIndices_c, psIndices, sizeof( psIndices_c ) ); + silk_assert( psEncC->nb_subfr * psEncC->subfr_length <= MAX_FRAME_LENGTH ); + silk_memcpy( pulses_c, pulses, psEncC->nb_subfr * psEncC->subfr_length * sizeof( pulses[0] ) ); + + silk_NSQ_c( + psEncC, + &NSQ_c, + &psIndices_c, + x16, + pulses_c, + PredCoef_Q12, + LTPCoef_Q14, + AR_Q13, + HarmShapeGain_Q14, + Tilt_Q14, + LF_shp_Q14, + Gains_Q16, + pitchL, + Lambda_Q10, + LTP_scale_Q14 + ); +#endif + NSQ->rand_seed = psIndices->Seed;
/* Set unvoiced lag to the previous one, overwrite later for voiced */ @@ -172,8 +206,7 @@ void silk_NSQ_sse4_1( LSF_interpolation_flag = 1; }
- ALLOC( sLTP_Q15, - psEncC->ltp_mem_length + psEncC->frame_length, opus_int32 ); + ALLOC( sLTP_Q15, psEncC->ltp_mem_length + psEncC->frame_length, opus_int32 ); ALLOC( sLTP, psEncC->ltp_mem_length + psEncC->frame_length, opus_int16 ); ALLOC( x_sc_Q10, psEncC->subfr_length, opus_int32 ); /* Set up pointers to start of sub frame */ @@ -183,7 +216,7 @@ void silk_NSQ_sse4_1( for( k = 0; k < psEncC->nb_subfr; k++ ) { A_Q12 = &PredCoef_Q12[ (( k >> 1 ) | ( 1 - LSF_interpolation_flag )) * MAX_LPC_ORDER ]; B_Q14 = <PCoef_Q14[ k * LTP_ORDER ]; - AR_shp_Q13 = &AR2_Q13[ k * MAX_SHAPE_LPC_ORDER ]; + AR_shp_Q13 = &AR_Q13[ k * MAX_SHAPE_LPC_ORDER ];
/* Noise shape parameters */ silk_assert( HarmShapeGain_Q14[ k ] >= 0 ); @@ -209,12 +242,12 @@ void silk_NSQ_sse4_1( } }
- silk_nsq_scale_states_sse4_1( psEncC, NSQ, x_Q3, x_sc_Q10, sLTP, sLTP_Q15, k, LTP_scale_Q14, Gains_Q16, pitchL, psIndices->signalType ); + silk_nsq_scale_states_sse4_1( psEncC, NSQ, x16, x_sc_Q10, sLTP, sLTP_Q15, k, LTP_scale_Q14, Gains_Q16, pitchL, psIndices->signalType );
if ( opus_likely( ( 10 == psEncC->shapingLPCOrder ) && ( 16 == psEncC->predictLPCOrder) ) ) { silk_noise_shape_quantizer_10_16_sse4_1( NSQ, psIndices->signalType, x_sc_Q10, pulses, pxq, sLTP_Q15, A_Q12, B_Q14, - AR_shp_Q13, lag, HarmShapeFIRPacked_Q14, Tilt_Q14[ k ], LF_shp_Q14[ k ], Gains_Q16[ k ], + AR_shp_Q13, lag, HarmShapeFIRPacked_Q14, Tilt_Q14[ k ], LF_shp_Q14[ k ], Gains_Q16[ k ], Lambda_Q10, offset_Q10, psEncC->subfr_length, &(table[32]) ); } else @@ -224,7 +257,7 @@ void silk_NSQ_sse4_1( offset_Q10, psEncC->subfr_length, psEncC->shapingLPCOrder, psEncC->predictLPCOrder, psEncC->arch ); }
- x_Q3 += psEncC->subfr_length; + x16 += psEncC->subfr_length; pulses += psEncC->subfr_length; pxq += psEncC->subfr_length; } @@ -235,12 +268,19 @@ void silk_NSQ_sse4_1( /* Save quantized speech and noise shaping signals */ silk_memmove( NSQ->xq, &NSQ->xq[ psEncC->frame_length ], psEncC->ltp_mem_length * sizeof( opus_int16 ) ); silk_memmove( NSQ->sLTP_shp_Q14, &NSQ->sLTP_shp_Q14[ psEncC->frame_length ], psEncC->ltp_mem_length * sizeof( opus_int32 ) ); + +#ifdef OPUS_CHECK_ASM + silk_assert( !memcmp( &NSQ_c, NSQ, sizeof( NSQ_c ) ) ); + silk_assert( !memcmp( &psIndices_c, psIndices, sizeof( psIndices_c ) ) ); + silk_assert( !memcmp( pulses_c, pulses_a, psEncC->nb_subfr * psEncC->subfr_length * sizeof( pulses[0] ) ) ); +#endif + RESTORE_STACK; }
-/***********************************/ -/* silk_noise_shape_quantizer_10_16 */ -/***********************************/ +/************************************/ +/* silk_noise_shape_quantizer_10_16 */ +/************************************/ static OPUS_INLINE void silk_noise_shape_quantizer_10_16_sse4_1( silk_nsq_state *NSQ, /* I/O NSQ state */ opus_int signalType, /* I Signal type */ @@ -256,6 +296,7 @@ static OPUS_INLINE void silk_noise_shape_quantizer_10_16_sse4_1( opus_int Tilt_Q14, /* I Spectral tilt */ opus_int32 LF_shp_Q14, /* I */ opus_int32 Gain_Q16, /* I */ + opus_int Lambda_Q10, /* I */ opus_int offset_Q10, /* I */ opus_int length, /* I Input length */ opus_int32 table[][4] /* I */ @@ -264,7 +305,7 @@ static OPUS_INLINE void silk_noise_shape_quantizer_10_16_sse4_1( opus_int i; opus_int32 LTP_pred_Q13, LPC_pred_Q10, n_AR_Q12, n_LTP_Q13; opus_int32 n_LF_Q12, r_Q10, q1_Q0, q1_Q10, q2_Q10; - opus_int32 exc_Q14, LPC_exc_Q14, xq_Q14, Gain_Q10; + opus_int32 exc_Q14, LPC_exc_Q14, xq_Q14, Gain_Q10, sDiff_shp_Q14; opus_int32 tmp1, tmp2, sLF_AR_shp_Q14; opus_int32 *psLPC_Q14, *shp_lag_ptr, *pred_lag_ptr;
@@ -279,6 +320,8 @@ static OPUS_INLINE void silk_noise_shape_quantizer_10_16_sse4_1( __m128i sAR2_Q14_hi_76543210, sAR2_Q14_lo_76543210; __m128i AR_shp_Q13_76543210;
+ int rdo_offset = (Lambda_Q10 >> 1) - 512; + shp_lag_ptr = &NSQ->sLTP_shp_Q14[ NSQ->sLTP_shp_buf_idx - lag + HARM_SHAPE_FIR_TAPS / 2 ]; pred_lag_ptr = &sLTP_Q15[ NSQ->sLTP_buf_idx - lag + LTP_ORDER / 2 ]; Gain_Q10 = silk_RSHIFT( Gain_Q16, 6 ); @@ -288,6 +331,7 @@ static OPUS_INLINE void silk_noise_shape_quantizer_10_16_sse4_1(
sLF_AR_shp_Q14 = NSQ->sLF_AR_shp_Q14; xq_Q14 = psLPC_Q14[ 0 ]; + sDiff_shp_Q14 = NSQ->sDiff_shp_Q14; LTP_pred_Q13 = 0;
/* load a_Q12 */ @@ -430,8 +474,8 @@ static OPUS_INLINE void silk_noise_shape_quantizer_10_16_sse4_1( sAR2_Q14_hi_76543210 = _mm_slli_si128( sAR2_Q14_hi_76543210, 2 ); sAR2_Q14_lo_76543210 = _mm_slli_si128( sAR2_Q14_lo_76543210, 2 );
- sAR2_Q14_hi_76543210 = _mm_insert_epi16( sAR2_Q14_hi_76543210, (xq_Q14 >> 16), 0 ); - sAR2_Q14_lo_76543210 = _mm_insert_epi16( sAR2_Q14_lo_76543210, (xq_Q14), 0 ); + sAR2_Q14_hi_76543210 = _mm_insert_epi16( sAR2_Q14_hi_76543210, (sDiff_shp_Q14 >> 16), 0 ); + sAR2_Q14_lo_76543210 = _mm_insert_epi16( sAR2_Q14_lo_76543210, (sDiff_shp_Q14), 0 );
/* high part, use pmaddwd, results in 4 32-bit */ xmm_hi_07 = _mm_madd_epi16( sAR2_Q14_hi_76543210, AR_shp_Q13_76543210 ); @@ -462,14 +506,14 @@ static OPUS_INLINE void silk_noise_shape_quantizer_10_16_sse4_1( n_LF_Q12 = silk_SMULWB( NSQ->sLTP_shp_Q14[ NSQ->sLTP_shp_buf_idx - 1 ], LF_shp_Q14 ); n_LF_Q12 = silk_SMLAWT( n_LF_Q12, sLF_AR_shp_Q14, LF_shp_Q14 );
- silk_assert( lag > 0 || signalType != TYPE_VOICED ); + celt_assert( lag > 0 || signalType != TYPE_VOICED );
/* Combine prediction and noise shaping signals */ tmp1 = silk_SUB32( silk_LSHIFT32( LPC_pred_Q10, 2 ), n_AR_Q12 ); /* Q12 */ tmp1 = silk_SUB32( tmp1, n_LF_Q12 ); /* Q12 */ if( lag > 0 ) { /* Symmetric, packed FIR coefficients */ - n_LTP_Q13 = silk_SMULWB( silk_ADD32( shp_lag_ptr[ 0 ], shp_lag_ptr[ -2 ] ), HarmShapeFIRPacked_Q14 ); + n_LTP_Q13 = silk_SMULWB( silk_ADD_SAT32( shp_lag_ptr[ 0 ], shp_lag_ptr[ -2 ] ), HarmShapeFIRPacked_Q14 ); n_LTP_Q13 = silk_SMLAWT( n_LTP_Q13, shp_lag_ptr[ -1 ], HarmShapeFIRPacked_Q14 ); n_LTP_Q13 = silk_LSHIFT( n_LTP_Q13, 1 ); shp_lag_ptr++; @@ -495,6 +539,18 @@ static OPUS_INLINE void silk_noise_shape_quantizer_10_16_sse4_1( /* Find two quantization level candidates and measure their rate-distortion */ q1_Q10 = silk_SUB32( r_Q10, offset_Q10 ); q1_Q0 = silk_RSHIFT( q1_Q10, 10 ); + if (Lambda_Q10 > 2048) { + /* For aggressive RDO, the bias becomes more than one pulse. */ + if (q1_Q10 > rdo_offset) { + q1_Q0 = silk_RSHIFT( q1_Q10 - rdo_offset, 10 ); + } else if (q1_Q10 < -rdo_offset) { + q1_Q0 = silk_RSHIFT( q1_Q10 + rdo_offset, 10 ); + } else if (q1_Q10 < 0) { + q1_Q0 = -1; + } else { + q1_Q0 = 0; + } + }
q1_Q10 = table[q1_Q0][0]; q2_Q10 = table[q1_Q0][1]; @@ -519,7 +575,8 @@ static OPUS_INLINE void silk_noise_shape_quantizer_10_16_sse4_1( /* Update states */ psLPC_Q14++; *psLPC_Q14 = xq_Q14; - sLF_AR_shp_Q14 = silk_SUB_LSHIFT32( xq_Q14, n_AR_Q12, 2 ); + NSQ->sDiff_shp_Q14 = silk_SUB_LSHIFT32( xq_Q14, x_sc_Q10[ i ], 4 ); + sLF_AR_shp_Q14 = silk_SUB_LSHIFT32( NSQ->sDiff_shp_Q14, n_AR_Q12, 2 );
NSQ->sLTP_shp_Q14[ NSQ->sLTP_shp_buf_idx ] = silk_SUB_LSHIFT32( sLF_AR_shp_Q14, n_LF_Q12, 2 ); sLTP_Q15[ NSQ->sLTP_buf_idx ] = silk_LSHIFT( LPC_exc_Q14, 1 ); @@ -600,64 +657,54 @@ static OPUS_INLINE void silk_noise_shape_quantizer_10_16_sse4_1( }
static OPUS_INLINE void silk_nsq_scale_states_sse4_1( - const silk_encoder_state *psEncC, /* I Encoder State */ - silk_nsq_state *NSQ, /* I/O NSQ state */ - const opus_int32 x_Q3[], /* I input in Q3 */ - opus_int32 x_sc_Q10[], /* O input scaled with 1/Gain */ - const opus_int16 sLTP[], /* I re-whitened LTP state in Q0 */ - opus_int32 sLTP_Q15[], /* O LTP state matching scaled input */ - opus_int subfr, /* I subframe number */ - const opus_int LTP_scale_Q14, /* I */ - const opus_int32 Gains_Q16[ MAX_NB_SUBFR ], /* I */ - const opus_int pitchL[ MAX_NB_SUBFR ], /* I Pitch lag */ - const opus_int signal_type /* I Signal type */ + const silk_encoder_state *psEncC, /* I Encoder State */ + silk_nsq_state *NSQ, /* I/O NSQ state */ + const opus_int16 x16[], /* I input */ + opus_int32 x_sc_Q10[], /* O input scaled with 1/Gain */ + const opus_int16 sLTP[], /* I re-whitened LTP state in Q0 */ + opus_int32 sLTP_Q15[], /* O LTP state matching scaled input */ + opus_int subfr, /* I subframe number */ + const opus_int LTP_scale_Q14, /* I */ + const opus_int32 Gains_Q16[ MAX_NB_SUBFR ], /* I */ + const opus_int pitchL[ MAX_NB_SUBFR ], /* I Pitch lag */ + const opus_int signal_type /* I Signal type */ ) { opus_int i, lag; - opus_int32 gain_adj_Q16, inv_gain_Q31, inv_gain_Q23; - __m128i xmm_inv_gain_Q23, xmm_x_Q3_x2x0, xmm_x_Q3_x3x1; + opus_int32 gain_adj_Q16, inv_gain_Q31, inv_gain_Q26; + __m128i xmm_inv_gain_Q26, xmm_x16_x2x0, xmm_x16_x3x1;
lag = pitchL[ subfr ]; inv_gain_Q31 = silk_INVERSE32_varQ( silk_max( Gains_Q16[ subfr ], 1 ), 47 ); silk_assert( inv_gain_Q31 != 0 );
- /* Calculate gain adjustment factor */ - if( Gains_Q16[ subfr ] != NSQ->prev_gain_Q16 ) { - gain_adj_Q16 = silk_DIV32_varQ( NSQ->prev_gain_Q16, Gains_Q16[ subfr ], 16 ); - } else { - gain_adj_Q16 = (opus_int32)1 << 16; - } - /* Scale input */ - inv_gain_Q23 = silk_RSHIFT_ROUND( inv_gain_Q31, 8 ); + inv_gain_Q26 = silk_RSHIFT_ROUND( inv_gain_Q31, 5 );
- /* prepare inv_gain_Q23 in packed 4 32-bits */ - xmm_inv_gain_Q23 = _mm_set1_epi32(inv_gain_Q23); + /* prepare inv_gain_Q26 in packed 4 32-bits */ + xmm_inv_gain_Q26 = _mm_set1_epi32(inv_gain_Q26);
for( i = 0; i < psEncC->subfr_length - 3; i += 4 ) { - xmm_x_Q3_x2x0 = _mm_loadu_si128( (__m128i *)(&(x_Q3[ i ] ) ) ); + xmm_x16_x2x0 = OP_CVTEPI16_EPI32_M64( &(x16[ i ] ) );
/* equal shift right 4 bytes*/ - xmm_x_Q3_x3x1 = _mm_shuffle_epi32( xmm_x_Q3_x2x0, _MM_SHUFFLE( 0, 3, 2, 1 ) ); + xmm_x16_x3x1 = _mm_shuffle_epi32( xmm_x16_x2x0, _MM_SHUFFLE( 0, 3, 2, 1 ) );
- xmm_x_Q3_x2x0 = _mm_mul_epi32( xmm_x_Q3_x2x0, xmm_inv_gain_Q23 ); - xmm_x_Q3_x3x1 = _mm_mul_epi32( xmm_x_Q3_x3x1, xmm_inv_gain_Q23 ); + xmm_x16_x2x0 = _mm_mul_epi32( xmm_x16_x2x0, xmm_inv_gain_Q26 ); + xmm_x16_x3x1 = _mm_mul_epi32( xmm_x16_x3x1, xmm_inv_gain_Q26 );
- xmm_x_Q3_x2x0 = _mm_srli_epi64( xmm_x_Q3_x2x0, 16 ); - xmm_x_Q3_x3x1 = _mm_slli_epi64( xmm_x_Q3_x3x1, 16 ); + xmm_x16_x2x0 = _mm_srli_epi64( xmm_x16_x2x0, 16 ); + xmm_x16_x3x1 = _mm_slli_epi64( xmm_x16_x3x1, 16 );
- xmm_x_Q3_x2x0 = _mm_blend_epi16( xmm_x_Q3_x2x0, xmm_x_Q3_x3x1, 0xCC ); + xmm_x16_x2x0 = _mm_blend_epi16( xmm_x16_x2x0, xmm_x16_x3x1, 0xCC );
- _mm_storeu_si128( (__m128i *)(&(x_sc_Q10[ i ] ) ), xmm_x_Q3_x2x0 ); + _mm_storeu_si128( (__m128i *)(&(x_sc_Q10[ i ] ) ), xmm_x16_x2x0 ); }
for( ; i < psEncC->subfr_length; i++ ) { - x_sc_Q10[ i ] = silk_SMULWW( x_Q3[ i ], inv_gain_Q23 ); + x_sc_Q10[ i ] = silk_SMULWW( x16[ i ], inv_gain_Q26 ); }
- /* Save inverse gain */ - NSQ->prev_gain_Q16 = Gains_Q16[ subfr ]; - /* After rewhitening the LTP state is un-scaled, so scale with inv_gain_Q16 */ if( NSQ->rewhite_flag ) { if( subfr == 0 ) { @@ -671,7 +718,9 @@ static OPUS_INLINE void silk_nsq_scale_states_sse4_1( }
/* Adjust for changing gain */ - if( gain_adj_Q16 != (opus_int32)1 << 16 ) { + if( Gains_Q16[ subfr ] != NSQ->prev_gain_Q16 ) { + gain_adj_Q16 = silk_DIV32_varQ( NSQ->prev_gain_Q16, Gains_Q16[ subfr ], 16 ); + /* Scale long-term shaping state */ __m128i xmm_gain_adj_Q16, xmm_sLTP_shp_Q14_x2x0, xmm_sLTP_shp_Q14_x3x1;
@@ -707,6 +756,7 @@ static OPUS_INLINE void silk_nsq_scale_states_sse4_1( }
NSQ->sLF_AR_shp_Q14 = silk_SMULWW( gain_adj_Q16, NSQ->sLF_AR_shp_Q14 ); + NSQ->sDiff_shp_Q14 = silk_SMULWW( gain_adj_Q16, NSQ->sDiff_shp_Q14 );
/* Scale short-term prediction and shaping states */ for( i = 0; i < NSQ_LPC_BUF_LENGTH; i++ ) { @@ -715,5 +765,8 @@ static OPUS_INLINE void silk_nsq_scale_states_sse4_1( for( i = 0; i < MAX_SHAPE_LPC_ORDER; i++ ) { NSQ->sAR2_Q14[ i ] = silk_SMULWW( gain_adj_Q16, NSQ->sAR2_Q14[ i ] ); } + + /* Save inverse gain */ + NSQ->prev_gain_Q16 = Gains_Q16[ subfr ]; } } diff --git a/media/libopus/silk/x86/SigProc_FIX_sse.h b/media/libopus/silk/x86/SigProc_FIX_sse.h index 61efa8da4152d..e49d5d4ecc584 100644 --- a/media/libopus/silk/x86/SigProc_FIX_sse.h +++ b/media/libopus/silk/x86/SigProc_FIX_sse.h @@ -67,7 +67,7 @@ extern void (*const SILK_BURG_MODIFIED_IMPL[OPUS_ARCHMASK + 1])(
#endif
-opus_int64 silk_inner_prod16_aligned_64_sse4_1( +opus_int64 silk_inner_prod16_sse4_1( const opus_int16 *inVec1, const opus_int16 *inVec2, const opus_int len @@ -76,18 +76,18 @@ opus_int64 silk_inner_prod16_aligned_64_sse4_1(
#if defined(OPUS_X86_PRESUME_SSE4_1)
-#define silk_inner_prod16_aligned_64(inVec1, inVec2, len, arch) \ - ((void)(arch),silk_inner_prod16_aligned_64_sse4_1(inVec1, inVec2, len)) +#define silk_inner_prod16(inVec1, inVec2, len, arch) \ + ((void)(arch),silk_inner_prod16_sse4_1(inVec1, inVec2, len))
#else
-extern opus_int64 (*const SILK_INNER_PROD16_ALIGNED_64_IMPL[OPUS_ARCHMASK + 1])( +extern opus_int64 (*const SILK_INNER_PROD16_IMPL[OPUS_ARCHMASK + 1])( const opus_int16 *inVec1, const opus_int16 *inVec2, const opus_int len);
-# define silk_inner_prod16_aligned_64(inVec1, inVec2, len, arch) \ - ((*SILK_INNER_PROD16_ALIGNED_64_IMPL[(arch) & OPUS_ARCHMASK])(inVec1, inVec2, len)) +# define silk_inner_prod16(inVec1, inVec2, len, arch) \ + ((*SILK_INNER_PROD16_IMPL[(arch) & OPUS_ARCHMASK])(inVec1, inVec2, len))
#endif #endif diff --git a/media/libopus/silk/x86/VAD_sse4_1.c b/media/libopus/silk/x86/VAD_sse4_1.c index d02ddf4ad0815..e7eaf9714a742 100644 --- a/media/libopus/silk/x86/VAD_sse4_1.c +++ b/media/libopus/silk/x86/VAD_sse4_1.c @@ -1,5 +1,5 @@ -/* Copyright (c) 2014, Cisco Systems, INC - Written by XiangMingZhu WeiZhou MinPeng YanWang +/* Copyright (c) 2014-2020, Cisco Systems, INC + Written by XiangMingZhu WeiZhou MinPeng YanWang FrancisQuiers
Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions @@ -63,6 +63,14 @@ opus_int silk_VAD_GetSA_Q8_sse4_1( /* O Return value, 0 if s
SAVE_STACK;
+#ifdef OPUS_CHECK_ASM + silk_encoder_state psEncC_c; + opus_int ret_c; + + silk_memcpy( &psEncC_c, psEncC, sizeof( psEncC_c ) ); + ret_c = silk_VAD_GetSA_Q8_c( &psEncC_c, pIn ); +#endif + /* Safety checks */ silk_assert( VAD_N_BANDS == 4 ); celt_assert( MAX_FRAME_LENGTH >= psEncC->frame_length ); @@ -233,15 +241,14 @@ opus_int silk_VAD_GetSA_Q8_sse4_1( /* O Return value, 0 if s speech_nrg += ( b + 1 ) * silk_RSHIFT( Xnrg[ b ] - psSilk_VAD->NL[ b ], 4 ); }
+ if( psEncC->frame_length == 20 * psEncC->fs_kHz ) { + speech_nrg = silk_RSHIFT32( speech_nrg, 1 ); + } /* Power scaling */ if( speech_nrg <= 0 ) { SA_Q15 = silk_RSHIFT( SA_Q15, 1 ); - } else if( speech_nrg < 32768 ) { - if( psEncC->frame_length == 10 * psEncC->fs_kHz ) { - speech_nrg = silk_LSHIFT_SAT32( speech_nrg, 16 ); - } else { - speech_nrg = silk_LSHIFT_SAT32( speech_nrg, 15 ); - } + } else if( speech_nrg < 16384 ) { + speech_nrg = silk_LSHIFT32( speech_nrg, 16 );
/* square-root */ speech_nrg = silk_SQRT_APPROX( speech_nrg ); @@ -272,6 +279,11 @@ opus_int silk_VAD_GetSA_Q8_sse4_1( /* O Return value, 0 if s psEncC->input_quality_bands_Q15[ b ] = silk_sigm_Q15( silk_RSHIFT( SNR_Q7 - 16 * 128, 4 ) ); }
+#ifdef OPUS_CHECK_ASM + silk_assert( ret == ret_c ); + silk_assert( !memcmp( &psEncC_c, psEncC, sizeof( psEncC_c ) ) ); +#endif + RESTORE_STACK; return( ret ); } diff --git a/media/libopus/silk/x86/VQ_WMat_EC_sse4_1.c b/media/libopus/silk/x86/VQ_WMat_EC_sse4_1.c index 74d6c6d0ec6e4..2c7d18d05eab4 100644 --- a/media/libopus/silk/x86/VQ_WMat_EC_sse4_1.c +++ b/media/libopus/silk/x86/VQ_WMat_EC_sse4_1.c @@ -1,5 +1,5 @@ -/* Copyright (c) 2014, Cisco Systems, INC - Written by XiangMingZhu WeiZhou MinPeng YanWang +/* Copyright (c) 2014-2020, Cisco Systems, INC + Written by XiangMingZhu WeiZhou MinPeng YanWang FrancisQuiers
Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions @@ -38,105 +38,136 @@ /* Entropy constrained matrix-weighted VQ, hard-coded to 5-element vectors, for a single input data vector */ void silk_VQ_WMat_EC_sse4_1( opus_int8 *ind, /* O index of best codebook vector */ - opus_int32 *rate_dist_Q14, /* O best weighted quant error + mu * rate */ + opus_int32 *res_nrg_Q15, /* O best residual energy */ + opus_int32 *rate_dist_Q8, /* O best total bitrate */ opus_int *gain_Q7, /* O sum of absolute LTP coefficients */ - const opus_int16 *in_Q14, /* I input vector to be quantized */ - const opus_int32 *W_Q18, /* I weighting matrix */ + const opus_int32 *XX_Q17, /* I correlation matrix */ + const opus_int32 *xX_Q17, /* I correlation vector */ const opus_int8 *cb_Q7, /* I codebook */ const opus_uint8 *cb_gain_Q7, /* I codebook effective gain */ const opus_uint8 *cl_Q5, /* I code length for each codebook vector */ - const opus_int mu_Q9, /* I tradeoff betw. weighted error and rate */ + const opus_int subfr_len, /* I number of samples per subframe */ const opus_int32 max_gain_Q7, /* I maximum sum of absolute LTP coefficients */ - opus_int L /* I number of vectors in codebook */ + const opus_int L /* I number of vectors in codebook */ ) { opus_int k, gain_tmp_Q7; const opus_int8 *cb_row_Q7; - opus_int16 diff_Q14[ 5 ]; - opus_int32 sum1_Q14, sum2_Q16; + opus_int32 neg_xX_Q24[ 5 ]; + opus_int32 sum1_Q15, sum2_Q24; + opus_int32 bits_res_Q8, bits_tot_Q8; + __m128i v_XX_31_Q17, v_XX_42_Q17, v_cb_row_31_Q7, v_cb_row_42_Q7, v_acc1_Q24, v_acc2_Q24; + + /* Negate and convert to new Q domain */ + neg_xX_Q24[ 0 ] = -silk_LSHIFT32( xX_Q17[ 0 ], 7 ); + neg_xX_Q24[ 1 ] = -silk_LSHIFT32( xX_Q17[ 1 ], 7 ); + neg_xX_Q24[ 2 ] = -silk_LSHIFT32( xX_Q17[ 2 ], 7 ); + neg_xX_Q24[ 3 ] = -silk_LSHIFT32( xX_Q17[ 3 ], 7 ); + neg_xX_Q24[ 4 ] = -silk_LSHIFT32( xX_Q17[ 4 ], 7 ); + + v_XX_31_Q17 = _mm_loadu_si128( (__m128i *)(&XX_Q17[ 1 ] ) ); + v_XX_42_Q17 = _mm_shuffle_epi32( v_XX_31_Q17, _MM_SHUFFLE( 0, 3, 2, 1 ) );
- __m128i C_tmp1, C_tmp2, C_tmp3, C_tmp4, C_tmp5; /* Loop over codebook */ - *rate_dist_Q14 = silk_int32_MAX; + *rate_dist_Q8 = silk_int32_MAX; + *res_nrg_Q15 = silk_int32_MAX; cb_row_Q7 = cb_Q7; + /* If things go really bad, at least *ind is set to something safe. */ + *ind = 0; for( k = 0; k < L; k++ ) { + opus_int32 penalty; gain_tmp_Q7 = cb_gain_Q7[k]; - - diff_Q14[ 0 ] = in_Q14[ 0 ] - silk_LSHIFT( cb_row_Q7[ 0 ], 7 ); - - C_tmp1 = OP_CVTEPI16_EPI32_M64( &in_Q14[ 1 ] ); - C_tmp2 = OP_CVTEPI8_EPI32_M32( &cb_row_Q7[ 1 ] ); - C_tmp2 = _mm_slli_epi32( C_tmp2, 7 ); - C_tmp1 = _mm_sub_epi32( C_tmp1, C_tmp2 ); - - diff_Q14[ 1 ] = _mm_extract_epi16( C_tmp1, 0 ); - diff_Q14[ 2 ] = _mm_extract_epi16( C_tmp1, 2 ); - diff_Q14[ 3 ] = _mm_extract_epi16( C_tmp1, 4 ); - diff_Q14[ 4 ] = _mm_extract_epi16( C_tmp1, 6 ); - /* Weighted rate */ - sum1_Q14 = silk_SMULBB( mu_Q9, cl_Q5[ k ] ); + /* Quantization error: 1 - 2 * xX * cb + cb' * XX * cb */ + sum1_Q15 = SILK_FIX_CONST( 1.001, 15 );
/* Penalty for too large gain */ - sum1_Q14 = silk_ADD_LSHIFT32( sum1_Q14, silk_max( silk_SUB32( gain_tmp_Q7, max_gain_Q7 ), 0 ), 10 ); - - silk_assert( sum1_Q14 >= 0 ); - - /* first row of W_Q18 */ - C_tmp3 = _mm_loadu_si128( (__m128i *)(&W_Q18[ 1 ] ) ); - C_tmp4 = _mm_mul_epi32( C_tmp3, C_tmp1 ); - C_tmp4 = _mm_srli_si128( C_tmp4, 2 ); - - C_tmp1 = _mm_shuffle_epi32( C_tmp1, _MM_SHUFFLE( 0, 3, 2, 1 ) ); /* shift right 4 bytes */ - C_tmp3 = _mm_shuffle_epi32( C_tmp3, _MM_SHUFFLE( 0, 3, 2, 1 ) ); /* shift right 4 bytes */ - - C_tmp5 = _mm_mul_epi32( C_tmp3, C_tmp1 ); - C_tmp5 = _mm_srli_si128( C_tmp5, 2 ); - - C_tmp5 = _mm_add_epi32( C_tmp4, C_tmp5 ); - C_tmp5 = _mm_slli_epi32( C_tmp5, 1 ); - - C_tmp5 = _mm_add_epi32( C_tmp5, _mm_shuffle_epi32( C_tmp5, _MM_SHUFFLE( 0, 0, 0, 2 ) ) ); - sum2_Q16 = _mm_cvtsi128_si32( C_tmp5 ); - - sum2_Q16 = silk_SMLAWB( sum2_Q16, W_Q18[ 0 ], diff_Q14[ 0 ] ); - sum1_Q14 = silk_SMLAWB( sum1_Q14, sum2_Q16, diff_Q14[ 0 ] ); - - /* second row of W_Q18 */ - sum2_Q16 = silk_SMULWB( W_Q18[ 7 ], diff_Q14[ 2 ] ); - sum2_Q16 = silk_SMLAWB( sum2_Q16, W_Q18[ 8 ], diff_Q14[ 3 ] ); - sum2_Q16 = silk_SMLAWB( sum2_Q16, W_Q18[ 9 ], diff_Q14[ 4 ] ); - sum2_Q16 = silk_LSHIFT( sum2_Q16, 1 ); - sum2_Q16 = silk_SMLAWB( sum2_Q16, W_Q18[ 6 ], diff_Q14[ 1 ] ); - sum1_Q14 = silk_SMLAWB( sum1_Q14, sum2_Q16, diff_Q14[ 1 ] ); - - /* third row of W_Q18 */ - sum2_Q16 = silk_SMULWB( W_Q18[ 13 ], diff_Q14[ 3 ] ); - sum2_Q16 = silk_SMLAWB( sum2_Q16, W_Q18[ 14 ], diff_Q14[ 4 ] ); - sum2_Q16 = silk_LSHIFT( sum2_Q16, 1 ); - sum2_Q16 = silk_SMLAWB( sum2_Q16, W_Q18[ 12 ], diff_Q14[ 2 ] ); - sum1_Q14 = silk_SMLAWB( sum1_Q14, sum2_Q16, diff_Q14[ 2 ] ); - - /* fourth row of W_Q18 */ - sum2_Q16 = silk_SMULWB( W_Q18[ 19 ], diff_Q14[ 4 ] ); - sum2_Q16 = silk_LSHIFT( sum2_Q16, 1 ); - sum2_Q16 = silk_SMLAWB( sum2_Q16, W_Q18[ 18 ], diff_Q14[ 3 ] ); - sum1_Q14 = silk_SMLAWB( sum1_Q14, sum2_Q16, diff_Q14[ 3 ] ); - - /* last row of W_Q18 */ - sum2_Q16 = silk_SMULWB( W_Q18[ 24 ], diff_Q14[ 4 ] ); - sum1_Q14 = silk_SMLAWB( sum1_Q14, sum2_Q16, diff_Q14[ 4 ] ); - - silk_assert( sum1_Q14 >= 0 ); + penalty = silk_LSHIFT32( silk_max( silk_SUB32( gain_tmp_Q7, max_gain_Q7 ), 0 ), 11 ); + + /* first row of XX_Q17 */ + v_cb_row_31_Q7 = OP_CVTEPI8_EPI32_M32( &cb_row_Q7[ 1 ] ); + v_cb_row_42_Q7 = _mm_shuffle_epi32( v_cb_row_31_Q7, _MM_SHUFFLE( 0, 3, 2, 1 ) ); + v_cb_row_31_Q7 = _mm_mul_epi32( v_XX_31_Q17, v_cb_row_31_Q7 ); + v_cb_row_42_Q7 = _mm_mul_epi32( v_XX_42_Q17, v_cb_row_42_Q7 ); + v_acc1_Q24 = _mm_add_epi64( v_cb_row_31_Q7, v_cb_row_42_Q7); + v_acc2_Q24 = _mm_shuffle_epi32( v_acc1_Q24, _MM_SHUFFLE( 1, 0, 3, 2 ) ); + v_acc1_Q24 = _mm_add_epi64( v_acc1_Q24, v_acc2_Q24); + sum2_Q24 = _mm_cvtsi128_si32( v_acc1_Q24 ); + sum2_Q24 = silk_ADD32( neg_xX_Q24[ 0 ], sum2_Q24 ); + sum2_Q24 = silk_LSHIFT32( sum2_Q24, 1 ); + sum2_Q24 = silk_MLA( sum2_Q24, XX_Q17[ 0 ], cb_row_Q7[ 0 ] ); + sum1_Q15 = silk_SMLAWB( sum1_Q15, sum2_Q24, cb_row_Q7[ 0 ] ); + + /* second row of XX_Q17 */ + sum2_Q24 = silk_MLA( neg_xX_Q24[ 1 ], XX_Q17[ 7 ], cb_row_Q7[ 2 ] ); + sum2_Q24 = silk_MLA( sum2_Q24, XX_Q17[ 8 ], cb_row_Q7[ 3 ] ); + sum2_Q24 = silk_MLA( sum2_Q24, XX_Q17[ 9 ], cb_row_Q7[ 4 ] ); + sum2_Q24 = silk_LSHIFT32( sum2_Q24, 1 ); + sum2_Q24 = silk_MLA( sum2_Q24, XX_Q17[ 6 ], cb_row_Q7[ 1 ] ); + sum1_Q15 = silk_SMLAWB( sum1_Q15, sum2_Q24, cb_row_Q7[ 1 ] ); + + /* third row of XX_Q17 */ + sum2_Q24 = silk_MLA( neg_xX_Q24[ 2 ], XX_Q17[ 13 ], cb_row_Q7[ 3 ] ); + sum2_Q24 = silk_MLA( sum2_Q24, XX_Q17[ 14 ], cb_row_Q7[ 4 ] ); + sum2_Q24 = silk_LSHIFT32( sum2_Q24, 1 ); + sum2_Q24 = silk_MLA( sum2_Q24, XX_Q17[ 12 ], cb_row_Q7[ 2 ] ); + sum1_Q15 = silk_SMLAWB( sum1_Q15, sum2_Q24, cb_row_Q7[ 2 ] ); + + /* fourth row of XX_Q17 */ + sum2_Q24 = silk_MLA( neg_xX_Q24[ 3 ], XX_Q17[ 19 ], cb_row_Q7[ 4 ] ); + sum2_Q24 = silk_LSHIFT32( sum2_Q24, 1 ); + sum2_Q24 = silk_MLA( sum2_Q24, XX_Q17[ 18 ], cb_row_Q7[ 3 ] ); + sum1_Q15 = silk_SMLAWB( sum1_Q15, sum2_Q24, cb_row_Q7[ 3 ] ); + + /* last row of XX_Q17 */ + sum2_Q24 = silk_LSHIFT32( neg_xX_Q24[ 4 ], 1 ); + sum2_Q24 = silk_MLA( sum2_Q24, XX_Q17[ 24 ], cb_row_Q7[ 4 ] ); + sum1_Q15 = silk_SMLAWB( sum1_Q15, sum2_Q24, cb_row_Q7[ 4 ] );
/* find best */ - if( sum1_Q14 < *rate_dist_Q14 ) { - *rate_dist_Q14 = sum1_Q14; - *ind = (opus_int8)k; - *gain_Q7 = gain_tmp_Q7; + if( sum1_Q15 >= 0 ) { + /* Translate residual energy to bits using high-rate assumption (6 dB ==> 1 bit/sample) */ + bits_res_Q8 = silk_SMULBB( subfr_len, silk_lin2log( sum1_Q15 + penalty) - (15 << 7) ); + /* In the following line we reduce the codelength component by half ("-1"); seems to slightly improve quality */ + bits_tot_Q8 = silk_ADD_LSHIFT32( bits_res_Q8, cl_Q5[ k ], 3-1 ); + if( bits_tot_Q8 <= *rate_dist_Q8 ) { + *rate_dist_Q8 = bits_tot_Q8; + *res_nrg_Q15 = sum1_Q15 + penalty; + *ind = (opus_int8)k; + *gain_Q7 = gain_tmp_Q7; + } }
/* Go to next cbk vector */ cb_row_Q7 += LTP_ORDER; } + +#ifdef OPUS_CHECK_ASM + { + opus_int8 ind_c = 0; + opus_int32 res_nrg_Q15_c = 0; + opus_int32 rate_dist_Q8_c = 0; + opus_int gain_Q7_c = 0; + + silk_VQ_WMat_EC_c( + &ind_c, + &res_nrg_Q15_c, + &rate_dist_Q8_c, + &gain_Q7_c, + XX_Q17, + xX_Q17, + cb_Q7, + cb_gain_Q7, + cl_Q5, + subfr_len, + max_gain_Q7, + L + ); + + silk_assert( *ind == ind_c ); + silk_assert( *res_nrg_Q15 == res_nrg_Q15_c ); + silk_assert( *rate_dist_Q8 == rate_dist_Q8_c ); + silk_assert( *gain_Q7 == gain_Q7_c ); + } +#endif } diff --git a/media/libopus/silk/x86/main_sse.h b/media/libopus/silk/x86/main_sse.h index 2f15d44869781..0a0391a2dab40 100644 --- a/media/libopus/silk/x86/main_sse.h +++ b/media/libopus/silk/x86/main_sse.h @@ -34,73 +34,72 @@
# if defined(OPUS_X86_MAY_HAVE_SSE4_1)
-#if 0 /* FIXME: SSE disabled until silk_VQ_WMat_EC_sse4_1() gets updated. */ # define OVERRIDE_silk_VQ_WMat_EC
void silk_VQ_WMat_EC_sse4_1( opus_int8 *ind, /* O index of best codebook vector */ - opus_int32 *rate_dist_Q14, /* O best weighted quant error + mu * rate */ + opus_int32 *res_nrg_Q15, /* O best residual energy */ + opus_int32 *rate_dist_Q8, /* O best total bitrate */ opus_int *gain_Q7, /* O sum of absolute LTP coefficients */ - const opus_int16 *in_Q14, /* I input vector to be quantized */ - const opus_int32 *W_Q18, /* I weighting matrix */ + const opus_int32 *XX_Q17, /* I correlation matrix */ + const opus_int32 *xX_Q17, /* I correlation vector */ const opus_int8 *cb_Q7, /* I codebook */ const opus_uint8 *cb_gain_Q7, /* I codebook effective gain */ const opus_uint8 *cl_Q5, /* I code length for each codebook vector */ - const opus_int mu_Q9, /* I tradeoff betw. weighted error and rate */ + const opus_int subfr_len, /* I number of samples per subframe */ const opus_int32 max_gain_Q7, /* I maximum sum of absolute LTP coefficients */ - opus_int L /* I number of vectors in codebook */ + const opus_int L /* I number of vectors in codebook */ );
#if defined OPUS_X86_PRESUME_SSE4_1
-#define silk_VQ_WMat_EC(ind, rate_dist_Q14, gain_Q7, in_Q14, W_Q18, cb_Q7, cb_gain_Q7, cl_Q5, \ - mu_Q9, max_gain_Q7, L, arch) \ - ((void)(arch),silk_VQ_WMat_EC_sse4_1(ind, rate_dist_Q14, gain_Q7, in_Q14, W_Q18, cb_Q7, cb_gain_Q7, cl_Q5, \ - mu_Q9, max_gain_Q7, L)) +#define silk_VQ_WMat_EC(ind, res_nrg_Q15, rate_dist_Q8, gain_Q7, XX_Q17, xX_Q17, cb_Q7, cb_gain_Q7, cl_Q5, \ + subfr_len, max_gain_Q7, L, arch) \ + ((void)(arch),silk_VQ_WMat_EC_sse4_1(ind, res_nrg_Q15, rate_dist_Q8, gain_Q7, XX_Q17, xX_Q17, cb_Q7, cb_gain_Q7, cl_Q5, \ + subfr_len, max_gain_Q7, L))
#else
extern void (*const SILK_VQ_WMAT_EC_IMPL[OPUS_ARCHMASK + 1])( opus_int8 *ind, /* O index of best codebook vector */ - opus_int32 *rate_dist_Q14, /* O best weighted quant error + mu * rate */ + opus_int32 *res_nrg_Q15, /* O best residual energy */ + opus_int32 *rate_dist_Q8, /* O best total bitrate */ opus_int *gain_Q7, /* O sum of absolute LTP coefficients */ - const opus_int16 *in_Q14, /* I input vector to be quantized */ - const opus_int32 *W_Q18, /* I weighting matrix */ + const opus_int32 *XX_Q17, /* I correlation matrix */ + const opus_int32 *xX_Q17, /* I correlation vector */ const opus_int8 *cb_Q7, /* I codebook */ const opus_uint8 *cb_gain_Q7, /* I codebook effective gain */ const opus_uint8 *cl_Q5, /* I code length for each codebook vector */ - const opus_int mu_Q9, /* I tradeoff betw. weighted error and rate */ + const opus_int subfr_len, /* I number of samples per subframe */ const opus_int32 max_gain_Q7, /* I maximum sum of absolute LTP coefficients */ - opus_int L /* I number of vectors in codebook */ + const opus_int L /* I number of vectors in codebook */ );
-# define silk_VQ_WMat_EC(ind, rate_dist_Q14, gain_Q7, in_Q14, W_Q18, cb_Q7, cb_gain_Q7, cl_Q5, \ - mu_Q9, max_gain_Q7, L, arch) \ - ((*SILK_VQ_WMAT_EC_IMPL[(arch) & OPUS_ARCHMASK])(ind, rate_dist_Q14, gain_Q7, in_Q14, W_Q18, cb_Q7, cb_gain_Q7, cl_Q5, \ - mu_Q9, max_gain_Q7, L)) +# define silk_VQ_WMat_EC(ind, res_nrg_Q15, rate_dist_Q8, gain_Q7, XX_Q17, xX_Q17, cb_Q7, cb_gain_Q7, cl_Q5, \ + subfr_len, max_gain_Q7, L, arch) \ + ((*SILK_VQ_WMAT_EC_IMPL[(arch) & OPUS_ARCHMASK])(ind, res_nrg_Q15, rate_dist_Q8, gain_Q7, XX_Q17, xX_Q17, cb_Q7, cb_gain_Q7, cl_Q5, \ + subfr_len, max_gain_Q7, L))
#endif -#endif
-#if 0 /* FIXME: SSE disabled until the NSQ code gets updated. */ # define OVERRIDE_silk_NSQ
void silk_NSQ_sse4_1( - const silk_encoder_state *psEncC, /* I Encoder State */ - silk_nsq_state *NSQ, /* I/O NSQ state */ - SideInfoIndices *psIndices, /* I/O Quantization Indices */ - const opus_int32 x_Q3[], /* I Prefiltered input signal */ - opus_int8 pulses[], /* O Quantized pulse signal */ - const opus_int16 PredCoef_Q12[ 2 * MAX_LPC_ORDER ], /* I Short term prediction coefs */ - const opus_int16 LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ], /* I Long term prediction coefs */ - const opus_int16 AR2_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ], /* I Noise shaping coefs */ - const opus_int HarmShapeGain_Q14[ MAX_NB_SUBFR ], /* I Long term shaping coefs */ - const opus_int Tilt_Q14[ MAX_NB_SUBFR ], /* I Spectral tilt */ - const opus_int32 LF_shp_Q14[ MAX_NB_SUBFR ], /* I Low frequency shaping coefs */ - const opus_int32 Gains_Q16[ MAX_NB_SUBFR ], /* I Quantization step sizes */ - const opus_int pitchL[ MAX_NB_SUBFR ], /* I Pitch lags */ - const opus_int Lambda_Q10, /* I Rate/distortion tradeoff */ - const opus_int LTP_scale_Q14 /* I LTP state scaling */ + const silk_encoder_state *psEncC, /* I Encoder State */ + silk_nsq_state *NSQ, /* I/O NSQ state */ + SideInfoIndices *psIndices, /* I/O Quantization Indices */ + const opus_int16 x16[], /* I Input */ + opus_int8 pulses[], /* O Quantized pulse signal */ + const opus_int16 PredCoef_Q12[ 2 * MAX_LPC_ORDER ], /* I Short term prediction coefs */ + const opus_int16 LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ], /* I Long term prediction coefs */ + const opus_int16 AR_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ], /* I Noise shaping coefs */ + const opus_int HarmShapeGain_Q14[ MAX_NB_SUBFR ], /* I Long term shaping coefs */ + const opus_int Tilt_Q14[ MAX_NB_SUBFR ], /* I Spectral tilt */ + const opus_int32 LF_shp_Q14[ MAX_NB_SUBFR ], /* I Low frequency shaping coefs */ + const opus_int32 Gains_Q16[ MAX_NB_SUBFR ], /* I Quantization step sizes */ + const opus_int pitchL[ MAX_NB_SUBFR ], /* I Pitch lags */ + const opus_int Lambda_Q10, /* I Rate/distortion tradeoff */ + const opus_int LTP_scale_Q14 /* I LTP state scaling */ );
#if defined OPUS_X86_PRESUME_SSE4_1 @@ -113,21 +112,21 @@ void silk_NSQ_sse4_1( #else
extern void (*const SILK_NSQ_IMPL[OPUS_ARCHMASK + 1])( - const silk_encoder_state *psEncC, /* I Encoder State */ - silk_nsq_state *NSQ, /* I/O NSQ state */ - SideInfoIndices *psIndices, /* I/O Quantization Indices */ - const opus_int32 x_Q3[], /* I Prefiltered input signal */ - opus_int8 pulses[], /* O Quantized pulse signal */ - const opus_int16 PredCoef_Q12[ 2 * MAX_LPC_ORDER ], /* I Short term prediction coefs */ - const opus_int16 LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ], /* I Long term prediction coefs */ - const opus_int16 AR2_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ], /* I Noise shaping coefs */ - const opus_int HarmShapeGain_Q14[ MAX_NB_SUBFR ], /* I Long term shaping coefs */ - const opus_int Tilt_Q14[ MAX_NB_SUBFR ], /* I Spectral tilt */ - const opus_int32 LF_shp_Q14[ MAX_NB_SUBFR ], /* I Low frequency shaping coefs */ - const opus_int32 Gains_Q16[ MAX_NB_SUBFR ], /* I Quantization step sizes */ - const opus_int pitchL[ MAX_NB_SUBFR ], /* I Pitch lags */ - const opus_int Lambda_Q10, /* I Rate/distortion tradeoff */ - const opus_int LTP_scale_Q14 /* I LTP state scaling */ + const silk_encoder_state *psEncC, /* I Encoder State */ + silk_nsq_state *NSQ, /* I/O NSQ state */ + SideInfoIndices *psIndices, /* I/O Quantization Indices */ + const opus_int16 x16[], /* I Input */ + opus_int8 pulses[], /* O Quantized pulse signal */ + const opus_int16 PredCoef_Q12[ 2 * MAX_LPC_ORDER ], /* I Short term prediction coefs */ + const opus_int16 LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ], /* I Long term prediction coefs */ + const opus_int16 AR_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ], /* I Noise shaping coefs */ + const opus_int HarmShapeGain_Q14[ MAX_NB_SUBFR ], /* I Long term shaping coefs */ + const opus_int Tilt_Q14[ MAX_NB_SUBFR ], /* I Spectral tilt */ + const opus_int32 LF_shp_Q14[ MAX_NB_SUBFR ], /* I Low frequency shaping coefs */ + const opus_int32 Gains_Q16[ MAX_NB_SUBFR ], /* I Quantization step sizes */ + const opus_int pitchL[ MAX_NB_SUBFR ], /* I Pitch lags */ + const opus_int Lambda_Q10, /* I Rate/distortion tradeoff */ + const opus_int LTP_scale_Q14 /* I LTP state scaling */ );
# define silk_NSQ(psEncC, NSQ, psIndices, x_Q3, pulses, PredCoef_Q12, LTPCoef_Q14, AR2_Q13, \ @@ -140,57 +139,56 @@ extern void (*const SILK_NSQ_IMPL[OPUS_ARCHMASK + 1])( # define OVERRIDE_silk_NSQ_del_dec
void silk_NSQ_del_dec_sse4_1( - const silk_encoder_state *psEncC, /* I Encoder State */ - silk_nsq_state *NSQ, /* I/O NSQ state */ - SideInfoIndices *psIndices, /* I/O Quantization Indices */ - const opus_int32 x_Q3[], /* I Prefiltered input signal */ - opus_int8 pulses[], /* O Quantized pulse signal */ - const opus_int16 PredCoef_Q12[ 2 * MAX_LPC_ORDER ], /* I Short term prediction coefs */ - const opus_int16 LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ], /* I Long term prediction coefs */ - const opus_int16 AR2_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ], /* I Noise shaping coefs */ - const opus_int HarmShapeGain_Q14[ MAX_NB_SUBFR ], /* I Long term shaping coefs */ - const opus_int Tilt_Q14[ MAX_NB_SUBFR ], /* I Spectral tilt */ - const opus_int32 LF_shp_Q14[ MAX_NB_SUBFR ], /* I Low frequency shaping coefs */ - const opus_int32 Gains_Q16[ MAX_NB_SUBFR ], /* I Quantization step sizes */ - const opus_int pitchL[ MAX_NB_SUBFR ], /* I Pitch lags */ - const opus_int Lambda_Q10, /* I Rate/distortion tradeoff */ - const opus_int LTP_scale_Q14 /* I LTP state scaling */ + const silk_encoder_state *psEncC, /* I Encoder State */ + silk_nsq_state *NSQ, /* I/O NSQ state */ + SideInfoIndices *psIndices, /* I/O Quantization Indices */ + const opus_int16 x16[], /* I Input */ + opus_int8 pulses[], /* O Quantized pulse signal */ + const opus_int16 PredCoef_Q12[ 2 * MAX_LPC_ORDER ], /* I Short term prediction coefs */ + const opus_int16 LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ], /* I Long term prediction coefs */ + const opus_int16 AR_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ], /* I Noise shaping coefs */ + const opus_int HarmShapeGain_Q14[ MAX_NB_SUBFR ], /* I Long term shaping coefs */ + const opus_int Tilt_Q14[ MAX_NB_SUBFR ], /* I Spectral tilt */ + const opus_int32 LF_shp_Q14[ MAX_NB_SUBFR ], /* I Low frequency shaping coefs */ + const opus_int32 Gains_Q16[ MAX_NB_SUBFR ], /* I Quantization step sizes */ + const opus_int pitchL[ MAX_NB_SUBFR ], /* I Pitch lags */ + const opus_int Lambda_Q10, /* I Rate/distortion tradeoff */ + const opus_int LTP_scale_Q14 /* I LTP state scaling */ );
#if defined OPUS_X86_PRESUME_SSE4_1
-#define silk_NSQ_del_dec(psEncC, NSQ, psIndices, x_Q3, pulses, PredCoef_Q12, LTPCoef_Q14, AR2_Q13, \ +#define silk_NSQ_del_dec(psEncC, NSQ, psIndices, x16, pulses, PredCoef_Q12, LTPCoef_Q14, AR_Q13, \ HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, pitchL, Lambda_Q10, LTP_scale_Q14, arch) \ - ((void)(arch),silk_NSQ_del_dec_sse4_1(psEncC, NSQ, psIndices, x_Q3, pulses, PredCoef_Q12, LTPCoef_Q14, AR2_Q13, \ + ((void)(arch),silk_NSQ_del_dec_sse4_1(psEncC, NSQ, psIndices, x16, pulses, PredCoef_Q12, LTPCoef_Q14, AR_Q13, \ HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, pitchL, Lambda_Q10, LTP_scale_Q14))
#else
extern void (*const SILK_NSQ_DEL_DEC_IMPL[OPUS_ARCHMASK + 1])( - const silk_encoder_state *psEncC, /* I Encoder State */ - silk_nsq_state *NSQ, /* I/O NSQ state */ - SideInfoIndices *psIndices, /* I/O Quantization Indices */ - const opus_int32 x_Q3[], /* I Prefiltered input signal */ - opus_int8 pulses[], /* O Quantized pulse signal */ - const opus_int16 PredCoef_Q12[ 2 * MAX_LPC_ORDER ], /* I Short term prediction coefs */ - const opus_int16 LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ], /* I Long term prediction coefs */ - const opus_int16 AR2_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ], /* I Noise shaping coefs */ - const opus_int HarmShapeGain_Q14[ MAX_NB_SUBFR ], /* I Long term shaping coefs */ - const opus_int Tilt_Q14[ MAX_NB_SUBFR ], /* I Spectral tilt */ - const opus_int32 LF_shp_Q14[ MAX_NB_SUBFR ], /* I Low frequency shaping coefs */ - const opus_int32 Gains_Q16[ MAX_NB_SUBFR ], /* I Quantization step sizes */ - const opus_int pitchL[ MAX_NB_SUBFR ], /* I Pitch lags */ - const opus_int Lambda_Q10, /* I Rate/distortion tradeoff */ - const opus_int LTP_scale_Q14 /* I LTP state scaling */ + const silk_encoder_state *psEncC, /* I Encoder State */ + silk_nsq_state *NSQ, /* I/O NSQ state */ + SideInfoIndices *psIndices, /* I/O Quantization Indices */ + const opus_int16 x16[], /* I Input */ + opus_int8 pulses[], /* O Quantized pulse signal */ + const opus_int16 PredCoef_Q12[ 2 * MAX_LPC_ORDER ], /* I Short term prediction coefs */ + const opus_int16 LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ], /* I Long term prediction coefs */ + const opus_int16 AR_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ], /* I Noise shaping coefs */ + const opus_int HarmShapeGain_Q14[ MAX_NB_SUBFR ], /* I Long term shaping coefs */ + const opus_int Tilt_Q14[ MAX_NB_SUBFR ], /* I Spectral tilt */ + const opus_int32 LF_shp_Q14[ MAX_NB_SUBFR ], /* I Low frequency shaping coefs */ + const opus_int32 Gains_Q16[ MAX_NB_SUBFR ], /* I Quantization step sizes */ + const opus_int pitchL[ MAX_NB_SUBFR ], /* I Pitch lags */ + const opus_int Lambda_Q10, /* I Rate/distortion tradeoff */ + const opus_int LTP_scale_Q14 /* I LTP state scaling */ );
-# define silk_NSQ_del_dec(psEncC, NSQ, psIndices, x_Q3, pulses, PredCoef_Q12, LTPCoef_Q14, AR2_Q13, \ +# define silk_NSQ_del_dec(psEncC, NSQ, psIndices, x16, pulses, PredCoef_Q12, LTPCoef_Q14, AR_Q13, \ HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, pitchL, Lambda_Q10, LTP_scale_Q14, arch) \ - ((*SILK_NSQ_DEL_DEC_IMPL[(arch) & OPUS_ARCHMASK])(psEncC, NSQ, psIndices, x_Q3, pulses, PredCoef_Q12, LTPCoef_Q14, AR2_Q13, \ + ((*SILK_NSQ_DEL_DEC_IMPL[(arch) & OPUS_ARCHMASK])(psEncC, NSQ, psIndices, x16, pulses, PredCoef_Q12, LTPCoef_Q14, AR_Q13, \ HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, pitchL, Lambda_Q10, LTP_scale_Q14))
#endif -#endif
void silk_noise_shape_quantizer( silk_nsq_state *NSQ, /* I/O NSQ state */ diff --git a/media/libopus/silk/x86/x86_silk_map.c b/media/libopus/silk/x86/x86_silk_map.c index 32dcc3cab7a93..ca13cde91e5b1 100644 --- a/media/libopus/silk/x86/x86_silk_map.c +++ b/media/libopus/silk/x86/x86_silk_map.c @@ -41,16 +41,16 @@
#include "fixed/main_FIX.h"
-opus_int64 (*const SILK_INNER_PROD16_ALIGNED_64_IMPL[ OPUS_ARCHMASK + 1 ] )( +opus_int64 (*const SILK_INNER_PROD16_IMPL[ OPUS_ARCHMASK + 1 ] )( const opus_int16 *inVec1, const opus_int16 *inVec2, const opus_int len ) = { - silk_inner_prod16_aligned_64_c, /* non-sse */ - silk_inner_prod16_aligned_64_c, - silk_inner_prod16_aligned_64_c, - MAY_HAVE_SSE4_1( silk_inner_prod16_aligned_64 ), /* sse4.1 */ - MAY_HAVE_SSE4_1( silk_inner_prod16_aligned_64 ) /* avx */ + silk_inner_prod16_c, /* non-sse */ + silk_inner_prod16_c, + silk_inner_prod16_c, + MAY_HAVE_SSE4_1( silk_inner_prod16 ), /* sse4.1 */ + MAY_HAVE_SSE4_1( silk_inner_prod16 ) /* avx */ };
#endif @@ -66,23 +66,22 @@ opus_int (*const SILK_VAD_GETSA_Q8_IMPL[ OPUS_ARCHMASK + 1 ] )( MAY_HAVE_SSE4_1( silk_VAD_GetSA_Q8 ) /* avx */ };
-#if 0 /* FIXME: SSE disabled until the NSQ code gets updated. */ void (*const SILK_NSQ_IMPL[ OPUS_ARCHMASK + 1 ] )( - const silk_encoder_state *psEncC, /* I Encoder State */ - silk_nsq_state *NSQ, /* I/O NSQ state */ - SideInfoIndices *psIndices, /* I/O Quantization Indices */ - const opus_int32 x_Q3[], /* I Prefiltered input signal */ - opus_int8 pulses[], /* O Quantized pulse signal */ - const opus_int16 PredCoef_Q12[ 2 * MAX_LPC_ORDER ], /* I Short term prediction coefs */ - const opus_int16 LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ], /* I Long term prediction coefs */ - const opus_int16 AR2_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ], /* I Noise shaping coefs */ - const opus_int HarmShapeGain_Q14[ MAX_NB_SUBFR ], /* I Long term shaping coefs */ - const opus_int Tilt_Q14[ MAX_NB_SUBFR ], /* I Spectral tilt */ - const opus_int32 LF_shp_Q14[ MAX_NB_SUBFR ], /* I Low frequency shaping coefs */ - const opus_int32 Gains_Q16[ MAX_NB_SUBFR ], /* I Quantization step sizes */ - const opus_int pitchL[ MAX_NB_SUBFR ], /* I Pitch lags */ - const opus_int Lambda_Q10, /* I Rate/distortion tradeoff */ - const opus_int LTP_scale_Q14 /* I LTP state scaling */ + const silk_encoder_state *psEncC, /* I Encoder State */ + silk_nsq_state *NSQ, /* I/O NSQ state */ + SideInfoIndices *psIndices, /* I/O Quantization Indices */ + const opus_int16 x16[], /* I Input */ + opus_int8 pulses[], /* O Quantized pulse signal */ + const opus_int16 PredCoef_Q12[ 2 * MAX_LPC_ORDER ], /* I Short term prediction coefs */ + const opus_int16 LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ], /* I Long term prediction coefs */ + const opus_int16 AR_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ], /* I Noise shaping coefs */ + const opus_int HarmShapeGain_Q14[ MAX_NB_SUBFR ], /* I Long term shaping coefs */ + const opus_int Tilt_Q14[ MAX_NB_SUBFR ], /* I Spectral tilt */ + const opus_int32 LF_shp_Q14[ MAX_NB_SUBFR ], /* I Low frequency shaping coefs */ + const opus_int32 Gains_Q16[ MAX_NB_SUBFR ], /* I Quantization step sizes */ + const opus_int pitchL[ MAX_NB_SUBFR ], /* I Pitch lags */ + const opus_int Lambda_Q10, /* I Rate/distortion tradeoff */ + const opus_int LTP_scale_Q14 /* I LTP state scaling */ ) = { silk_NSQ_c, /* non-sse */ silk_NSQ_c, @@ -90,21 +89,20 @@ void (*const SILK_NSQ_IMPL[ OPUS_ARCHMASK + 1 ] )( MAY_HAVE_SSE4_1( silk_NSQ ), /* sse4.1 */ MAY_HAVE_SSE4_1( silk_NSQ ) /* avx */ }; -#endif
-#if 0 /* FIXME: SSE disabled until silk_VQ_WMat_EC_sse4_1() gets updated. */ void (*const SILK_VQ_WMAT_EC_IMPL[ OPUS_ARCHMASK + 1 ] )( opus_int8 *ind, /* O index of best codebook vector */ - opus_int32 *rate_dist_Q14, /* O best weighted quant error + mu * rate */ + opus_int32 *res_nrg_Q15, /* O best residual energy */ + opus_int32 *rate_dist_Q8, /* O best total bitrate */ opus_int *gain_Q7, /* O sum of absolute LTP coefficients */ - const opus_int16 *in_Q14, /* I input vector to be quantized */ - const opus_int32 *W_Q18, /* I weighting matrix */ + const opus_int32 *XX_Q17, /* I correlation matrix */ + const opus_int32 *xX_Q17, /* I correlation vector */ const opus_int8 *cb_Q7, /* I codebook */ const opus_uint8 *cb_gain_Q7, /* I codebook effective gain */ const opus_uint8 *cl_Q5, /* I code length for each codebook vector */ - const opus_int mu_Q9, /* I tradeoff betw. weighted error and rate */ + const opus_int subfr_len, /* I number of samples per subframe */ const opus_int32 max_gain_Q7, /* I maximum sum of absolute LTP coefficients */ - opus_int L /* I number of vectors in codebook */ + const opus_int L /* I number of vectors in codebook */ ) = { silk_VQ_WMat_EC_c, /* non-sse */ silk_VQ_WMat_EC_c, @@ -112,25 +110,23 @@ void (*const SILK_VQ_WMAT_EC_IMPL[ OPUS_ARCHMASK + 1 ] )( MAY_HAVE_SSE4_1( silk_VQ_WMat_EC ), /* sse4.1 */ MAY_HAVE_SSE4_1( silk_VQ_WMat_EC ) /* avx */ }; -#endif
-#if 0 /* FIXME: SSE disabled until the NSQ code gets updated. */ void (*const SILK_NSQ_DEL_DEC_IMPL[ OPUS_ARCHMASK + 1 ] )( - const silk_encoder_state *psEncC, /* I Encoder State */ - silk_nsq_state *NSQ, /* I/O NSQ state */ - SideInfoIndices *psIndices, /* I/O Quantization Indices */ - const opus_int32 x_Q3[], /* I Prefiltered input signal */ - opus_int8 pulses[], /* O Quantized pulse signal */ - const opus_int16 PredCoef_Q12[ 2 * MAX_LPC_ORDER ], /* I Short term prediction coefs */ - const opus_int16 LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ], /* I Long term prediction coefs */ - const opus_int16 AR2_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ], /* I Noise shaping coefs */ - const opus_int HarmShapeGain_Q14[ MAX_NB_SUBFR ], /* I Long term shaping coefs */ - const opus_int Tilt_Q14[ MAX_NB_SUBFR ], /* I Spectral tilt */ - const opus_int32 LF_shp_Q14[ MAX_NB_SUBFR ], /* I Low frequency shaping coefs */ - const opus_int32 Gains_Q16[ MAX_NB_SUBFR ], /* I Quantization step sizes */ - const opus_int pitchL[ MAX_NB_SUBFR ], /* I Pitch lags */ - const opus_int Lambda_Q10, /* I Rate/distortion tradeoff */ - const opus_int LTP_scale_Q14 /* I LTP state scaling */ + const silk_encoder_state *psEncC, /* I Encoder State */ + silk_nsq_state *NSQ, /* I/O NSQ state */ + SideInfoIndices *psIndices, /* I/O Quantization Indices */ + const opus_int16 x16[], /* I Input */ + opus_int8 pulses[], /* O Quantized pulse signal */ + const opus_int16 PredCoef_Q12[ 2 * MAX_LPC_ORDER ], /* I Short term prediction coefs */ + const opus_int16 LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ], /* I Long term prediction coefs */ + const opus_int16 AR_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ], /* I Noise shaping coefs */ + const opus_int HarmShapeGain_Q14[ MAX_NB_SUBFR ], /* I Long term shaping coefs */ + const opus_int Tilt_Q14[ MAX_NB_SUBFR ], /* I Spectral tilt */ + const opus_int32 LF_shp_Q14[ MAX_NB_SUBFR ], /* I Low frequency shaping coefs */ + const opus_int32 Gains_Q16[ MAX_NB_SUBFR ], /* I Quantization step sizes */ + const opus_int pitchL[ MAX_NB_SUBFR ], /* I Pitch lags */ + const opus_int Lambda_Q10, /* I Rate/distortion tradeoff */ + const opus_int LTP_scale_Q14 /* I LTP state scaling */ ) = { silk_NSQ_del_dec_c, /* non-sse */ silk_NSQ_del_dec_c, @@ -138,7 +134,6 @@ void (*const SILK_NSQ_DEL_DEC_IMPL[ OPUS_ARCHMASK + 1 ] )( MAY_HAVE_SSE4_1( silk_NSQ_del_dec ), /* sse4.1 */ MAY_HAVE_SSE4_1( silk_NSQ_del_dec ) /* avx */ }; -#endif
#if defined(FIXED_POINT)
diff --git a/media/libopus/src/analysis.c b/media/libopus/src/analysis.c index 8ee57aa0b4212..058328f0fd513 100644 --- a/media/libopus/src/analysis.c +++ b/media/libopus/src/analysis.c @@ -31,7 +31,9 @@
#define ANALYSIS_C
+#ifdef MLP_TRAINING #include <stdio.h> +#endif
#include "mathops.h" #include "kiss_fft.h" @@ -249,6 +251,15 @@ void tonality_get_info(TonalityAnalysisState *tonal, AnalysisInfo *info_out, int if (curr_lookahead<0) curr_lookahead += DETECT_SIZE;
+ tonal->read_subframe += len/(tonal->Fs/400); + while (tonal->read_subframe>=8) + { + tonal->read_subframe -= 8; + tonal->read_pos++; + } + if (tonal->read_pos>=DETECT_SIZE) + tonal->read_pos-=DETECT_SIZE; + /* On long frames, look at the second analysis window rather than the first. */ if (len > tonal->Fs/50 && pos != tonal->write_pos) { @@ -262,6 +273,8 @@ void tonality_get_info(TonalityAnalysisState *tonal, AnalysisInfo *info_out, int pos = DETECT_SIZE-1; pos0 = pos; OPUS_COPY(info_out, &tonal->info[pos], 1); + if (!info_out->valid) + return; tonality_max = tonality_avg = info_out->tonality; tonality_count = 1; /* Look at the neighbouring frames and pick largest bandwidth found (to be safe). */ @@ -393,14 +406,6 @@ void tonality_get_info(TonalityAnalysisState *tonal, AnalysisInfo *info_out, int info_out->music_prob_max = prob_max;
/* printf("%f %f %f %f %f\n", prob_min, prob_max, prob_avg/prob_count, vad_prob, info_out->music_prob); */ - tonal->read_subframe += len/(tonal->Fs/400); - while (tonal->read_subframe>=8) - { - tonal->read_subframe -= 8; - tonal->read_pos++; - } - if (tonal->read_pos>=DETECT_SIZE) - tonal->read_pos-=DETECT_SIZE; }
static const float std_feature_bias[9] = { @@ -420,6 +425,24 @@ static const float std_feature_bias[9] = { #define SCALE_ENER(e) (e) #endif
+#ifdef FIXED_POINT +static int is_digital_silence32(const opus_val32* pcm, int frame_size, int channels, int lsb_depth) +{ + int silence = 0; + opus_val32 sample_max = 0; +#ifdef MLP_TRAINING + return 0; +#endif + sample_max = celt_maxabs32(pcm, frame_size*channels); + + silence = (sample_max == 0); + (void)lsb_depth; + return silence; +} +#else +#define is_digital_silence32(pcm, frame_size, channels, lsb_depth) is_digital_silence(pcm, frame_size, channels, lsb_depth) +#endif + static void tonality_analysis(TonalityAnalysisState *tonal, const CELTMode *celt_mode, const void *x, int len, int offset, int c1, int c2, int C, int lsb_depth, downmix_func downmix) { int i, b; @@ -464,8 +487,14 @@ static void tonality_analysis(TonalityAnalysisState *tonal, const CELTMode *celt float layer_out[MAX_NEURONS]; float below_max_pitch; float above_max_pitch; + int is_silence; SAVE_STACK;
+ if (!tonal->initialized) + { + tonal->mem_fill = 240; + tonal->initialized = 1; + } alpha = 1.f/IMIN(10, 1+tonal->count); alphaE = 1.f/IMIN(25, 1+tonal->count); /* Noise floor related decay for bandwidth detection: -2.2 dB/second */ @@ -483,8 +512,6 @@ static void tonality_analysis(TonalityAnalysisState *tonal, const CELTMode *celt }
kfft = celt_mode->mdct.kfft[0]; - if (tonal->count==0) - tonal->mem_fill = 240; tonal->hp_ener_accum += (float)downmix_and_resample(downmix, x, &tonal->inmem[tonal->mem_fill], tonal->downmix_state, IMIN(len, ANALYSIS_BUF_SIZE-tonal->mem_fill), offset, c1, c2, C, tonal->Fs); @@ -500,6 +527,8 @@ static void tonality_analysis(TonalityAnalysisState *tonal, const CELTMode *celt if (tonal->write_pos>=DETECT_SIZE) tonal->write_pos-=DETECT_SIZE;
+ is_silence = is_digital_silence32(tonal->inmem, ANALYSIS_BUF_SIZE, 1, lsb_depth); + ALLOC(in, 480, kiss_fft_cpx); ALLOC(out, 480, kiss_fft_cpx); ALLOC(tonality, 240, float); @@ -518,6 +547,16 @@ static void tonality_analysis(TonalityAnalysisState *tonal, const CELTMode *celt &tonal->inmem[240], tonal->downmix_state, remaining, offset+ANALYSIS_BUF_SIZE-tonal->mem_fill, c1, c2, C, tonal->Fs); tonal->mem_fill = 240 + remaining; + if (is_silence) + { + /* On silence, copy the previous analysis. */ + int prev_pos = tonal->write_pos-2; + if (prev_pos < 0) + prev_pos += DETECT_SIZE; + OPUS_COPY(info, &tonal->info[prev_pos], 1); + RESTORE_STACK; + return; + } opus_fft(kfft, in, out, tonal->arch); #ifndef FIXED_POINT /* If there's any NaN on the input, the entire output will be NaN, so we only need to check one value. */ @@ -654,7 +693,7 @@ static void tonality_analysis(TonalityAnalysisState *tonal, const CELTMode *celt tonal->lowE[b] = logE[b]; tonal->highE[b] = MIN32(tonal->lowE[b]+15, tonal->highE[b]); } - relativeE += (logE[b]-tonal->lowE[b])/(1e-15f + (tonal->highE[b]-tonal->lowE[b])); + relativeE += (logE[b]-tonal->lowE[b])/(1e-5f + (tonal->highE[b]-tonal->lowE[b]));
L1=L2=0; for (i=0;i<NB_FRAMES;i++) @@ -896,9 +935,7 @@ static void tonality_analysis(TonalityAnalysisState *tonal, const CELTMode *celt
/* Probability of speech or music vs noise */ info->activity_probability = frame_probs[1]; - /* It seems like the RNN tends to have a bias towards speech and this - warping of the probabilities compensates for it. */ - info->music_prob = MAX16(1.f-10.f*(1.f-frame_probs[0]), MIN16(10.f*frame_probs[0], .12f+.69f*frame_probs[0]*(2.f-frame_probs[0]))); + info->music_prob = frame_probs[0];
/*printf("%f %f %f\n", frame_probs[0], frame_probs[1], info->music_prob);*/ #ifdef MLP_TRAINING @@ -940,7 +977,6 @@ void run_analysis(TonalityAnalysisState *analysis, const CELTMode *celt_mode, co analysis->analysis_offset -= frame_size; }
- analysis_info->valid = 0; tonality_get_info(analysis, analysis_info, frame_size); }
diff --git a/media/libopus/src/analysis.h b/media/libopus/src/analysis.h index 289c845e824a6..0b66555f2153b 100644 --- a/media/libopus/src/analysis.h +++ b/media/libopus/src/analysis.h @@ -74,6 +74,7 @@ typedef struct { int read_pos; int read_subframe; float hp_ener_accum; + int initialized; float rnn_state[MAX_NEURONS]; opus_val32 downmix_state[3]; AnalysisInfo info[DETECT_SIZE]; diff --git a/media/libopus/src/mapping_matrix.h b/media/libopus/src/mapping_matrix.h index 9c20483e7a4cf..98bc82df3eba5 100644 --- a/media/libopus/src/mapping_matrix.h +++ b/media/libopus/src/mapping_matrix.h @@ -53,7 +53,7 @@ opus_int32 mapping_matrix_get_size(int rows, int cols); opus_int16 *mapping_matrix_get_data(const MappingMatrix *matrix);
void mapping_matrix_init( - MappingMatrix * const st, + MappingMatrix * const matrix, int rows, int cols, int gain, diff --git a/media/libopus/src/mlp.c b/media/libopus/src/mlp.c index f43a704ec7894..964c6a98f6d43 100644 --- a/media/libopus/src/mlp.c +++ b/media/libopus/src/mlp.c @@ -69,22 +69,29 @@ static OPUS_INLINE float sigmoid_approx(float x) return .5f + .5f*tansig_approx(.5f*x); }
-void compute_dense(const DenseLayer *layer, float *output, const float *input) +static void gemm_accum(float *out, const opus_int8 *weights, int rows, int cols, int col_stride, const float *x) { int i, j; + for (i=0;i<rows;i++) + { + for (j=0;j<cols;j++) + out[i] += weights[j*col_stride + i]*x[j]; + } +} + +void compute_dense(const DenseLayer *layer, float *output, const float *input) +{ + int i; int N, M; int stride; M = layer->nb_inputs; N = layer->nb_neurons; stride = N; for (i=0;i<N;i++) - { - /* Compute update gate. */ - float sum = layer->bias[i]; - for (j=0;j<M;j++) - sum += layer->input_weights[j*stride + i]*input[j]; - output[i] = WEIGHTS_SCALE*sum; - } + output[i] = layer->bias[i]; + gemm_accum(output, layer->input_weights, N, M, stride, input); + for (i=0;i<N;i++) + output[i] *= WEIGHTS_SCALE; if (layer->sigmoid) { for (i=0;i<N;i++) output[i] = sigmoid_approx(output[i]); @@ -96,45 +103,41 @@ void compute_dense(const DenseLayer *layer, float *output, const float *input)
void compute_gru(const GRULayer *gru, float *state, const float *input) { - int i, j; + int i; int N, M; int stride; + float tmp[MAX_NEURONS]; float z[MAX_NEURONS]; float r[MAX_NEURONS]; float h[MAX_NEURONS]; M = gru->nb_inputs; N = gru->nb_neurons; stride = 3*N; + /* Compute update gate. */ for (i=0;i<N;i++) - { - /* Compute update gate. */ - float sum = gru->bias[i]; - for (j=0;j<M;j++) - sum += gru->input_weights[j*stride + i]*input[j]; - for (j=0;j<N;j++) - sum += gru->recurrent_weights[j*stride + i]*state[j]; - z[i] = sigmoid_approx(WEIGHTS_SCALE*sum); - } + z[i] = gru->bias[i]; + gemm_accum(z, gru->input_weights, N, M, stride, input); + gemm_accum(z, gru->recurrent_weights, N, N, stride, state); for (i=0;i<N;i++) - { - /* Compute reset gate. */ - float sum = gru->bias[N + i]; - for (j=0;j<M;j++) - sum += gru->input_weights[N + j*stride + i]*input[j]; - for (j=0;j<N;j++) - sum += gru->recurrent_weights[N + j*stride + i]*state[j]; - r[i] = sigmoid_approx(WEIGHTS_SCALE*sum); - } + z[i] = sigmoid_approx(WEIGHTS_SCALE*z[i]); + + /* Compute reset gate. */ for (i=0;i<N;i++) - { - /* Compute output. */ - float sum = gru->bias[2*N + i]; - for (j=0;j<M;j++) - sum += gru->input_weights[2*N + j*stride + i]*input[j]; - for (j=0;j<N;j++) - sum += gru->recurrent_weights[2*N + j*stride + i]*state[j]*r[j]; - h[i] = z[i]*state[i] + (1-z[i])*tansig_approx(WEIGHTS_SCALE*sum); - } + r[i] = gru->bias[N + i]; + gemm_accum(r, &gru->input_weights[N], N, M, stride, input); + gemm_accum(r, &gru->recurrent_weights[N], N, N, stride, state); + for (i=0;i<N;i++) + r[i] = sigmoid_approx(WEIGHTS_SCALE*r[i]); + + /* Compute output. */ + for (i=0;i<N;i++) + h[i] = gru->bias[2*N + i]; + for (i=0;i<N;i++) + tmp[i] = state[i] * r[i]; + gemm_accum(h, &gru->input_weights[2*N], N, M, stride, input); + gemm_accum(h, &gru->recurrent_weights[2*N], N, N, stride, tmp); + for (i=0;i<N;i++) + h[i] = z[i]*state[i] + (1-z[i])*tansig_approx(WEIGHTS_SCALE*h[i]); for (i=0;i<N;i++) state[i] = h[i]; } diff --git a/media/libopus/src/mlp_data.c b/media/libopus/src/mlp_data.c index b4b3ecb28ea1d..ae4178df764eb 100644 --- a/media/libopus/src/mlp_data.c +++ b/media/libopus/src/mlp_data.c @@ -7,648 +7,648 @@ #include "mlp.h"
static const opus_int8 layer0_weights[800] = { - 21, -8, -5, 3, -16, 13, 3, -24, - -18, 14, 9, 2, 5, -2, 2, -3, - -5, 35, 10, 10, -8, 2, 1, 3, - -26, -61, 9, 4, -8, 18, 3, -5, - 47, -34, -6, 35, 35, 30, 40, -40, - 11, -39, 11, -14, 14, -24, -8, -8, - 2, -23, -100, 1, 19, -14, 4, 7, - 13, -26, 47, -4, 44, 5, 27, -1, - 57, -1, -36, 34, -20, -60, 25, -73, - 24, -70, 20, -4, -6, 68, -16, 13, - -24, 40, -42, -18, 0, 10, 12, -56, - -6, 38, 21, 5, -116, 19, 11, -14, - 3, 26, 14, 23, 1, 35, -16, -2, - -4, 55, 23, 16, 22, 45, -71, -27, - -20, 8, -47, -47, -46, -14, -50, -82, - 36, 45, 54, -32, -6, 14, -2, -24, - 3, 23, -27, -8, 1, 70, 15, -4, - -7, 21, 17, -126, 5, -47, -53, -6, - 34, 25, -1, 10, 17, -37, 38, -1, - 30, 25, -35, 112, -21, 8, -10, -3, - 15, 18, -14, -8, -20, 11, 24, 37, - -35, 50, 52, 16, 9, 15, -26, 43, - -3, -9, -26, 126, 7, 33, 55, -88, - 9, 28, 5, -19, -76, 32, 3, 11, - 10, -54, 2, -13, 3, 94, 52, 106, - 26, 74, 44, -99, 58, -44, 50, 15, - 29, 25, 50, -84, 28, 1, -30, 11, - 8, 20, -67, 4, 11, -25, -39, -33, - -83, -15, -9, 18, -2, -24, 18, 77, - -39, -80, 25, -8, 52, -88, -21, 81, - 29, -23, -31, 69, -28, 0, 12, 47, - 15, 20, 34, -53, 5, 111, -39, -7, - 1, 56, 6, 3, 7, -67, -6, -31, - 107, -6, 12, -97, 36, -18, -34, -6, - -9, -63, 10, -9, 3, 12, -80, -87, - 17, -9, 39, 126, 13, 15, 19, -6, - 29, 11, 10, -30, 26, -54, 31, -47, - 25, -40, -24, 23, 16, -27, -32, 30, - -22, -59, -9, 65, -86, -21, 120, -25, - -52, -12, 34, -50, 25, -17, 30, -4, - 95, 4, 9, 61, 25, -6, 11, -33, - -8, -3, 26, 37, -28, 19, -17, 36, - 11, -8, 8, -89, 2, -68, -95, 18, - 22, 36, 9, -8, -48, 54, -26, -6, - 30, -28, 10, -18, 2, -11, 7, 2, - 17, -5, 27, -21, 9, 15, 10, -18, - -10, 0, -9, 19, 10, -48, -35, -32, - 15, 24, 25, -6, 3, 51, -15, 9, - 19, -17, 6, 8, -9, 13, 14, -31, - -24, 10, -23, 21, 16, 18, -8, 35, - 18, -18, -18, -26, 25, 10, 55, 9, - 3, -24, -15, 2, -28, 20, -45, -14, - 15, -19, -8, 10, 6, 40, -31, -45, - 40, 53, 40, 27, -15, -31, -30, 10, - 38, 50, 28, 71, -21, 20, 1, 23, - 22, 14, 5, -48, 39, 78, -7, -6, - 9, 50, 88, -15, 55, 36, 32, -22, - -60, 13, 31, -36, -71, -2, 52, 37, - 7, 46, -23, -43, 19, -49, -34, 15, - 60, -66, 36, 2, -57, -32, 64, -53, - -36, 49, -10, 21, -2, 23, -21, 22, - -3, -15, -6, 18, -12, -32, -11, -11, - -32, 24, 36, 37, -7, 30, -22, -12, - 22, -63, 26, 20, -16, -30, -13, -18, - -10, 45, 10, -24, -2, -26, 28, 18, - 36, 44, -40, -5, -14, -4, 18, -18, - -1, 18, -40, 15, 29, 15, 12, 3, - 25, -51, 8, -17, 9, -19, 22, -33, - -22, 39, 61, -15, 55, -24, 10, 32, - 12, 9, 29, 5, -33, -33, 5, 0, - 35, 105, -14, 39, 4, 43, 36, 52, - 50, -39, 45, -51, 27, 7, 65, -34, - 108, 127, 42, 26, 73, 19, 9, 17, - -62, 16, 11, 52, 85, -46, 10, -95, - 64, 53, 90, -8, -54, -5, -77, -45, - 127, -4, 54, -3, -19, 66, 40, -127, - 53, 22, -26, 24, 20, -36, -2, 101, - 1, 35, -1, -8, 5, -36, -16, 33, - 6, -73, -21, -23, -32, -21, -4, -46, - -42, -66, -16, -8, 0, -20, -21, 37, - -128, -128, -127, 126, -128, -127, 127, -3, - 127, 122, 127, 126, -127, -128, 127, -127, - 127, -124, 126, -126, -127, -128, 125, -127, - -38, -123, 125, -128, -128, 126, 125, 127, - -127, -123, -127, 127, -73, -113, 127, -17, - 127, 126, 127, 126, -126, -126, 127, -127, - 127, -125, 121, -126, -25, -127, 127, -126, - 26, -126, 71, -28, -128, 20, 20, 71, - -87, -93, 39, 116, 111, -85, 70, -26, - 62, 23, -15, 18, 4, 32, 114, -55, - 8, -116, 85, -67, -110, -49, 11, -5, - -128, -41, 61, 70, -29, 115, 127, 51, - 127, 127, 127, -128, -127, 127, -128, -127, - -126, -128, -126, -127, 127, 126, -128, 127, - -126, 125, -55, 127, 127, 127, -125, 115, - -128, -126, -128, -127, 127, -128, -128, 127, - 28, -127, -120, -127, 127, -127, -127, 9, - -26, -118, 73, 39, 124, 78, -126, 123, - 10, -127, -105, -64, 118, -84, -127, 61, - 77, 104, -128, -127, 94, -17, -28, 36 + -30, -9, 2, -12, 5, -1, 8, 9, + 9, 8, -13, 18, -17, -34, -5, 17, + -11, 0, -4, 10, 2, 10, 15, -8, + 2, -1, 0, 5, 13, -3, -16, 1, + -5, 3, 7, -28, -13, 6, 36, -3, + 19, -60, -17, -28, 7, -11, -30, -7, + 2, -42, -21, -3, 6, -22, 33, -9, + 7, -30, 21, -14, 24, -11, -20, -18, + -5, -12, 12, -49, -50, -49, 16, 9, + -37, -1, 9, 34, -13, -31, -31, 12, + 16, 44, -42, 2, -9, 8, -18, -6, + 9, 36, 19, 11, 13, 12, -21, 3, + -28, -12, 3, 33, 25, -14, 11, 1, + -94, -39, 18, -12, -11, -15, -7, 49, + 52, 10, -43, 9, 57, 8, 21, -6, + 14, -15, 44, -8, 7, -30, -13, -2, + -9, 25, -2, -127, 18, -11, -52, 26, + -27, 27, 10, -10, 7, 43, 6, -24, + 41, 10, -18, -27, 10, 17, 9, 10, + -17, -10, 20, -6, 22, 55, 35, -80, + 36, 25, -24, -36, 15, 9, -19, 88, + 19, 64, -51, -35, 17, 0, -7, 41, + -16, 27, 4, 15, -1, 18, -16, 47, + -39, -54, -8, 13, -25, -20, 102, -18, + -5, 44, 11, -28, 71, 2, -51, -5, + 5, 2, -83, -9, -29, 8, 21, -53, + 58, -37, -7, 13, 38, 9, 34, -1, + -41, 21, 4, -24, -36, -33, -21, 32, + 75, -2, 1, -68, -1, 47, -29, 32, + 20, 12, -65, -87, 5, 16, -12, 24, + 40, 15, 7, 19, -26, -17, 17, 6, + -2, -37, -30, -9, 32, -127, -39, 0, + -31, -27, 4, -22, 23, -6, -77, 35, + -61, 32, -37, -24, 13, -11, -1, -40, + -3, 17, -7, 13, 11, 59, -19, 10, + 6, -18, 0, 13, 3, -6, -23, 19, + 11, -17, 13, -1, -80, 40, -53, 69, + -29, -54, 0, -4, 33, -25, -2, 38, + 35, 36, -15, 46, 2, -13, -16, -8, + -8, 12, -24, -9, -55, -5, -9, 32, + 11, 7, 12, -18, -10, -86, -38, 54, + 37, -25, 18, -43, 7, -27, -27, -54, + 13, 9, 22, 70, 6, 35, -7, 23, + -15, -44, -6, 7, -66, -85, 32, 40, + -19, -9, -7, 12, -15, 7, 2, 6, + -35, 11, 28, 0, 26, 14, 1, 1, + 4, 12, 18, 35, 22, -18, -3, 14, + -1, 7, 14, -8, -14, -3, 4, -3, + -19, -7, -1, -25, -27, 25, -26, -2, + 33, -22, -27, -25, 4, -9, 7, 21, + 26, -30, 10, -9, -20, 11, 27, 10, + 5, -18, 14, -4, 2, -17, -5, -7, + -9, -13, 15, 29, 1, -10, -16, -10, + 35, 36, -7, -22, -44, 17, 30, 22, + 21, -1, 22, -11, 32, -8, -7, 5, + -10, 5, 30, -20, 29, -20, -34, 12, + -4, -6, 6, -13, 10, -5, -68, -1, + 24, 9, 19, -24, -64, 31, 19, 27, + -26, 75, -45, 41, 39, -42, 8, 6, + 23, -30, 16, -25, 30, 34, 8, -38, + -3, 18, 16, -31, 22, -4, -9, 1, + 20, 9, 38, -32, 0, -45, 0, -6, + -13, 11, -25, -32, -22, 31, -24, -11, + -11, -4, -4, 20, -34, 22, 20, 9, + -25, 27, -5, 28, -29, 29, 6, 21, + -6, -18, 54, 4, -46, 23, 21, -14, + -31, 36, -41, -24, 4, 22, 10, 11, + 7, 36, -32, -13, -52, -17, 24, 28, + -37, -36, -1, 24, 9, -38, 35, 48, + 18, 2, -1, 45, 10, 39, 24, -38, + 13, 8, -16, 8, 25, 11, 7, -29, + -11, 7, 20, -30, -38, -45, 14, -18, + -28, -9, 65, 61, 22, -53, -38, -16, + 36, 46, 20, -39, 32, -61, -6, -6, + -36, -33, -18, -28, 56, 101, 45, 11, + -28, -23, -29, -61, 20, -47, 2, 48, + 27, -17, 1, 40, 1, 3, -51, 15, + 35, 28, 22, 35, 53, -61, -29, 12, + -6, -21, 10, 3, -20, 2, -25, 1, + -6, 31, 11, -3, 1, -10, -52, 6, + 126, -105, 122, 127, -128, 127, 127, -128, + 127, 108, 12, 127, 48, -128, -36, -128, + 127, 127, -128, -128, 127, 89, -128, 127, + -128, -128, -128, 127, 127, -128, -128, -93, + -82, 20, 125, 65, -82, 127, 38, -74, + 81, 88, -88, 79, 51, -47, -111, -26, + 14, 83, -88, -112, 24, 35, -101, 98, + -99, -48, -45, 46, 83, -60, -79, 45, + -20, -41, 9, 4, 52, 54, 93, -10, + 4, 13, 3, 123, 6, 94, -111, -69, + -14, -31, 10, 12, 53, -79, -11, -21, + -2, -44, -72, 92, 65, -57, 56, -38, + 127, -56, -128, 127, 127, -128, 86, 117, + -75, -128, 127, -19, -99, -112, 127, -128, + 127, -48, 114, 118, -128, -128, 117, -17, + -6, 121, -128, 127, -128, 82, 54, -106, + 127, 127, -33, 100, -39, -23, 18, -78, + -34, -29, -1, -30, 127, -26, 127, -128, + 126, -128, 27, -23, -79, -120, -127, 127, + 72, 66, 29, 7, -66, -56, -117, -128 };
static const opus_int8 layer0_bias[32] = { - -39, 28, -7, -12, -36, -35, -49, 48, - 38, -17, 44, 15, -45, -18, -45, 9, - 11, 4, -25, 38, 12, -31, -90, -70, - -17, 27, 7, -73, 42, -103, 78, 99 + 51, -16, 1, 13, -5, -6, -16, -7, + 11, -6, 106, 26, 28, -14, 21, -29, + 7, 18, -18, -17, 21, -17, -9, 20, + -25, -3, -34, 48, 11, -13, -31, -20 };
static const opus_int8 layer1_weights[2304] = { - 29, -37, -21, -34, 19, -32, 44, -43, - 51, -5, -14, 37, -32, 74, 127, -29, - -75, -49, -5, -106, -64, 24, -11, 26, - 23, 25, -6, -1, -48, -16, -26, 29, - 24, -3, 50, -23, -45, -59, -11, -10, - -9, 51, 42, 16, -27, -79, 72, 7, - 55, -15, 5, -15, -24, -1, 48, -29, - -44, 26, -20, -40, 57, -15, 21, 45, - -4, -25, 13, 24, -26, 12, -2, 20, - 48, -60, 45, 37, 74, 27, 13, 0, - 126, 5, 68, 110, 7, 74, 51, 65, - -21, 32, 27, -30, 11, -3, -43, 73, - 31, 43, 119, -51, 13, -80, 32, -23, - 37, 69, 101, -33, -35, 50, -47, 126, - -84, 64, 88, 28, 57, 56, -28, -102, - 0, -15, -57, 1, -34, 25, 59, 36, - -11, 26, -42, 14, -4, -24, -37, 63, - -18, 2, -32, -9, -37, -15, -9, 9, - -41, -36, 105, 20, 14, -14, 64, 92, - 68, -83, -7, 20, 86, 22, 38, 8, - -63, -6, -13, -56, 61, 55, 50, -33, - -9, 22, -22, 77, 44, -33, 44, -20, - 77, 33, 34, -78, -53, 31, -108, 33, - -23, -54, 63, 34, -9, 43, -17, 15, - -15, 15, -3, -50, 15, -26, -6, -35, - -52, 57, 0, -10, 67, -57, -47, 36, - -64, 19, -19, -29, -97, -15, -49, 69, - -18, 84, 122, 4, -81, 18, -85, -4, - -40, 99, -46, 11, -10, -29, -51, -59, - 112, -103, 29, -45, 5, 43, 94, 24, - -4, -37, 27, -54, -24, 30, 43, 40, - -10, 13, 18, 75, 51, 11, -14, -7, - 34, 80, -119, 73, -47, -68, 50, 79, - 42, -2, -53, 22, -9, -21, -4, 88, - 97, -5, -28, 16, -64, -1, -25, 9, - -20, -63, 10, 57, -3, 127, -2, -26, - 8, 66, -48, -118, 47, -73, 15, 51, - -29, 28, 72, 41, -2, 48, 75, -24, - -47, 44, 8, -39, 70, -73, -44, 56, - -14, -62, 30, 74, 31, -98, 13, 8, - 83, -36, -7, -8, 62, 13, -29, -20, - 21, -66, 53, -49, -24, -14, 19, 101, - -47, -51, 65, -52, 25, -32, -38, 39, - -56, 41, 49, 22, 10, 21, 1, 38, - -18, 48, 36, 55, -39, -10, -4, 3, - 17, -27, -81, 61, 1, 8, -32, 35, - 29, 85, -33, -56, 16, 3, -48, 35, - 56, -40, 14, -65, 3, -70, -29, 84, - 51, 75, -52, 56, -55, 0, -13, -58, - 6, 3, 22, 31, -34, 20, -6, 26, - -121, 85, -75, 11, -34, -23, 46, -11, - 0, 9, 20, 39, 59, -12, 38, 14, - 6, -33, -15, -86, 39, -48, -5, 4, - 29, -28, 52, -8, -51, 52, 60, -11, - -22, 81, 81, 17, -127, -28, 27, -40, - -36, -57, 43, 52, 51, -38, -10, 23, - -44, -72, -63, 3, 20, -68, -72, -38, - 26, -45, 19, -75, -18, 4, 14, 15, - -47, 104, -9, 51, 25, 16, -95, 64, - -11, -2, -5, 39, 52, -40, 77, 44, - 11, -29, -37, 5, -58, -38, -28, 36, - 50, 2, -26, -3, -16, -11, -11, 16, - -14, -47, -3, -3, -48, 45, 40, -14, - 3, -67, -4, 19, -25, 14, -34, 48, - -66, 67, 70, 20, 52, -21, 21, 28, - 16, 70, -114, -11, 29, 9, -52, -52, - -39, 44, 17, 11, -15, -111, 84, -20, - -23, -60, 15, -39, 48, 10, -31, -24, - -38, 4, -42, -25, -70, -3, -75, -40, - -3, -19, 33, -14, 18, -14, -5, -16, - 46, -56, 38, -1, 2, 9, 17, -41, - -44, 11, -23, 28, -32, 8, -1, 57, - -5, 44, -64, -21, -54, 47, 22, -19, - -12, -24, -48, -4, -42, -46, -17, 52, - -39, -33, 79, -26, 20, 21, 9, -5, - -127, -31, 26, -86, -20, -115, 27, 33, - 33, 72, 30, 57, -17, 2, 71, 58, - -52, -10, 20, -79, -3, 40, -106, 17, - 34, 68, -17, 31, -27, 23, 17, 18, - 21, 25, 15, 41, -51, 30, -3, 1, - -4, -60, -13, -59, 53, -42, -34, -29, - -22, -50, 27, -70, -58, -21, -59, -27, - -28, 45, -66, 12, 85, -61, 0, -39, - -73, -75, -29, -42, -47, -88, -46, 27, - -43, 112, 83, -51, -36, -64, 13, 74, - -9, 2, 25, 38, -18, -64, -81, -23, - -12, 40, 18, -38, -121, -36, -6, -73, - -16, -59, 28, -26, -2, -69, -6, -7, - 43, -21, 61, 0, 1, 0, 13, 18, - -18, -21, -3, 18, 42, 0, 67, -35, - 39, 15, -97, -87, 103, 65, 86, 20, - -11, -17, 9, -9, 15, 15, -35, 52, - 34, -105, -85, 10, -36, -68, -64, 60, - -85, 51, -54, -39, -19, 77, 0, 35, - -20, 35, -78, 41, -11, 49, 14, 17, - -31, 1, 41, -1, 10, -25, -90, -12, - -9, -12, -26, 6, 34, 22, 31, 25, - 6, 27, -26, 5, -35, 29, 18, -12, - 54, -42, -22, -14, -6, 36, -14, -16, - 35, 69, 75, 7, -113, 44, -2, -18, - 3, -15, 50, -28, -36, -9, -25, 47, - 127, -6, -35, 35, -46, 17, 116, -62, - -17, -98, -105, 77, -99, -17, 41, 16, - -7, 17, -89, 37, -16, -72, -3, -29, - 50, 30, -43, -7, -72, 125, 51, 59, - -73, 37, 61, 2, -19, -11, 4, 46, - 33, 51, 74, 88, -51, 68, 124, 27, - 97, -51, 16, -3, -6, -12, -30, 28, - 33, 3, -59, 22, 72, 53, 24, -29, - -4, -65, 89, -59, 92, 121, 12, 11, - 111, -20, 12, -85, -123, -33, -65, 70, - 68, 127, 2, -25, -104, 57, -74, -94, - -128, -27, 28, -7, -126, -123, -2, -57, - -57, 76, 55, -43, -44, -92, 1, 29, - 12, 10, 15, 10, 88, 0, -65, -57, - -66, 14, -10, -10, 36, 19, -49, -4, - 33, 3, 99, -49, -28, 41, 21, 37, - 46, 62, 16, 43, -58, 119, -32, 34, - 27, 13, -46, -1, -4, 23, 31, -21, - 108, 4, 107, 37, 26, -48, 70, 18, - 30, -18, -101, 77, -91, -117, -74, 16, - -116, 43, -24, 26, -19, -49, 34, -54, - 101, -14, 15, 12, 80, -9, 110, 42, - 8, -30, 53, -42, 34, -8, 60, -89, - 7, 41, 21, -94, 51, -1, -22, -37, - 22, 12, 49, -1, 55, 30, 5, -26, - -12, 4, -29, -16, -118, -12, -48, 85, - -44, -5, -27, -13, -84, -39, -63, -83, - 44, 5, -62, -52, -110, -8, -24, -36, - -22, -52, 20, -10, 42, 117, -19, -30, - 21, -16, -38, 92, 35, -5, -7, 45, - -69, -57, -69, 20, 18, 14, 107, -36, - -37, 43, 25, -38, -44, -104, 46, 8, - 93, -9, 54, 39, -48, -28, 21, 2, - 24, 11, -5, -14, -2, -10, 28, 105, - 5, -60, -65, -38, 121, 5, -33, -60, - 44, 68, 21, -69, -9, 7, 55, -26, - -75, 19, -76, 0, 10, -101, -56, -64, - 19, -68, 14, 19, 9, -49, 23, -1, - 19, -52, -15, -67, -63, -18, 24, -40, - -44, -11, -6, 43, 62, 67, -27, 5, - -57, 6, 25, -14, 19, 53, 24, -29, - -64, -26, -50, -19, 28, -15, -29, -56, - 6, -40, 35, 54, -6, -45, -17, 41, - 106, -42, -47, 43, -22, 20, -2, -126, - -29, 72, 85, -32, -30, 43, -6, 14, - 31, -84, 4, 7, 16, -47, -37, -33, - -14, -60, -52, -55, -44, 41, 39, -114, - -52, 24, -100, 55, -6, 40, 102, 48, - -1, 55, -55, -35, -27, 51, -1, 6, - -10, -16, -38, 29, 37, 55, 18, 22, - 28, -58, 13, 60, -60, 18, 6, -8, - 31, -52, 14, 2, 16, 30, -22, -39, - 0, 35, -29, 7, -48, 41, 78, 24, - 30, 1, -89, 72, 27, -33, -33, -12, - 61, -82, 123, 36, -12, -25, 55, 37, - 66, 27, -19, 37, 10, 44, -14, -17, - -20, 30, 20, 2, 21, 15, 37, 35, - 3, -40, 6, 12, 4, 22, -21, -79, - 29, 3, -55, -77, -31, 13, -19, -24, - 10, -38, 9, -78, 24, -50, -15, 31, - 30, -4, -33, 25, 27, 10, -24, 43, - -7, 78, -2, 27, 69, -23, 10, 79, - -19, -69, -3, 6, 25, 71, -42, -4, - -10, -21, -43, 18, 63, 6, 15, -41, - -7, 77, 37, -47, -7, 41, -34, 40, - 6, 25, 25, -74, -32, 4, 43, -26, - -8, 44, -5, 39, -4, -12, -9, -90, - -51, 38, 32, 42, 28, -42, 13, 4, - 30, 18, 54, -13, -11, 3, -2, 58, - 30, -53, 5, -76, 4, -14, -13, 16, - 10, -108, 6, -114, 28, -93, -65, -40, - 21, 28, -31, -85, -52, 29, 9, -10, - -12, -26, -27, -82, 43, 0, -75, -6, - 29, -4, 64, -12, -5, 25, 14, -2, - -54, -127, 81, -35, 14, -59, -75, 69, - -29, -65, 43, -88, -21, 34, -87, -48, - 51, 2, 8, -37, 25, 10, -25, -7, - -37, -39, -8, 28, 55, -72, -26, 16, - -30, 41, -49, 32, 37, -7, -12, 23, - 38, -61, -13, 28, 16, 4, -16, -122, - 37, 8, 17, 3, -79, 23, -17, 28, - -28, -31, -14, -39, 114, -49, -15, -47, - -14, 7, -7, -79, 98, -72, 19, -26, - 65, -44, -60, -56, 18, -20, -35, 19, - -72, 8, 78, -84, 40, -3, 46, 40, - 3, -31, 16, 38, -58, 48, 34, 0, - -75, -25, -12, -25, -5, -15, 18, -11, - 6, 34, 20, 21, -6, -19, -34, 22, - -18, -48, 69, 122, -16, 1, 12, 11, - 80, 31, 28, -34, 23, 27, -19, 10, - 63, 81, 66, 91, 10, 2, 123, 126, - 126, 74, 21, 75, 9, -15, 36, 29, - 34, -54, 101, -57, -51, -61, 47, 66, - 41, 38, -124, 56, -36, 48, 51, -7, - -11, -6, 30, -55, 48, 3, -18, -20, - 24, 24, 0, -55, 62, 3, -74, -49, - 2, -13, -31, -3, -12, 42, -33, 5, - -24, -39, 117, -81, -31, -39, 0, 2, - 6, 25, 77, 13, 6, 10, 26, -21, - -127, 35, 7, -26, -69, -19, -21, -25, - 111, -34, -3, 29, 6, -31, -44, 73, - -1, 6, -4, 2, 99, 25, 92, 34, - 40, 5, -94, 21, 47, 126, 61, -5, - 2, -37, 4, 21, -27, 32, -18, -10, - 70, -40, -15, 38, 19, 30, 12, -19, - -11, -2, -50, 22, -33, 3, 33, -39, - -19, 87, -67, -27, -11, -117, 36, 104, - 11, -56, -29, -6, 5, 14, -5, 44, - 38, 9, 24, 16, 127, 47, -6, -9, - 63, -58, -106, -19, 62, -30, 29, -23, - 69, 4, -31, 3, 14, -101, 5, 3, - 31, -6, -88, 28, 13, 0, 42, 0, - 6, 60, 54, -11, 5, -34, -33, -24, - -5, 42, 19, -63, 10, 32, -9, -32, - 25, 26, -28, -5, -7, -45, 32, 11, - -12, 31, -26, -33, -46, -76, 40, -5, - 3, -5, 3, -18, -12, 93, 17, 62, - -121, -14, 42, 76, 24, 4, 34, 14, - 41, 18, -10, 93, 91, -62, 58, -55, - 88, -64, -9, 23, 25, 45, -2, 70, - 43, 9, 103, 22, 48, 46, -13, 9, - -27, -37, 35, -51, -54, -10, 57, -35, - -2, -1, 7, -3, -37, 5, 9, 3, - 27, -7, 50, 5, -50, 22, 21, -13, - 22, 43, 10, -18, 37, -20, -38, -32, - -2, 27, 1, -67, -41, 60, -28, -33, - -50, -38, 76, -18, 23, -16, -25, 0, - -8, -71, -10, -12, 20, -34, 40, -19, - 12, -42, 14, 11, -28, 37, -33, 4, - 41, 19, 65, 85, -26, 23, -50, 38, - 3, -34, 64, 43, 1, 16, 34, 35, - -15, -27, 20, -1, 50, 0, -30, -50, - 14, -60, -69, 10, -57, -45, -32, -14, - 32, -16, 46, -61, -46, 1, -40, -75, - -28, -16, -73, -50, -13, -14, 9, -78, - 5, -44, -9, -25, -79, -16, -46, 92, - -70, 61, -20, 12, -43, 3, -19, 40, - 11, 25, 32, -58, 58, 45, 28, 1, - 13, -27, -53, 30, -30, -31, -52, 10, - 17, 68, -6, -44, -8, 25, 33, 48, - 81, -43, -10, 67, 29, 23, 11, 20, - -39, -28, 31, 18, -72, 29, 22, -11, - -71, 39, -10, -121, -20, -8, -40, 125, - -7, -43, 10, 49, 12, 38, 43, -79, - -72, -41, -48, -60, -41, 5, -17, 35, - 95, 35, 124, -11, 2, 70, -60, -25, - 0, -27, 76, -19, -94, 40, -96, 65, - -9, -31, -8, -35, 27, -44, 64, -60, - -12, -72, 12, -56, 8, -45, -5, -27, - -6, 50, 30, -20, -18, 64, -108, -15, - -43, 44, 11, -14, -8, -21, -24, 42, - 51, 47, -3, 17, -17, -24, 33, -28, - 25, -15, -33, -21, 6, -13, 39, 19, - 7, 116, 37, 53, 105, 4, 15, -56, - 10, 12, -92, -30, 117, 4, 32, -13, - -17, -21, 43, 29, -25, -38, 51, 32, - 74, -41, -15, 29, -5, -114, 35, -36, - 46, 51, 15, 16, -39, 24, 17, 3, - -26, 40, -37, 34, 43, 20, -61, -14, - 31, -29, 34, 25, -22, 25, -39, 39, - -33, -10, -56, -61, -6, -48, -114, -96, - -12, 3, 82, 45, 8, -2, -4, -28, - -42, -58, -50, -34, -54, -26, -64, -16, - -82, 49, -28, 0, -30, -20, -64, -68, - -18, 18, -44, -34, -42, -61, -17, 14, - -28, 8, 27, -49, -18, 45, -41, 11, - -2, 10, -8, -17, -24, -28, -42, 12, - 79, 46, 30, -26, 5, 3, 3, 58, - 12, -73, 23, 17, 5, 2, 20, 36, - 56, -33, 80, 71, 17, 87, 40, -21, - 26, 6, 48, -71, 76, 15, -47, 32, - 87, 30, 58, -11, 65, -43, 91, 54, - -31, 8, 34, 25, -14, 37, -30, 20, - -35, 4, -75, 56, -29, 22, 64, 48, - 47, -78, -74, 22, 11, -62, -28, 62, - -30, 12, -25, -31, 41, -42, 22, 23, - -8, -20, 38, 21, -6, 52, 23, 5, - -20, 32, 3, 16, 26, 50, 3, -4, - 48, -77, -3, -4, 21, 23, 30, 11, - -1, 9, -56, -100, 39, 5, -25, 35, - 95, 44, 22, 75, 19, -20, 126, -31, - -8, -24, 37, 35, -32, -4, 20, 47, - 7, -84, 2, 10, 7, 7, 75, -64, - 46, 36, -77, -1, -38, -19, -52, 39, - 26, 41, 82, 38, 67, 62, -6, -25, - -16, -35, -5, -14, 32, 15, -3, -38, - 28, 43, -59, 7, 58, 26, -63, -56 + 22, -1, -7, 7, 29, -27, -31, -17, + -13, 33, 44, -8, 11, 33, 24, 78, + 15, 19, 30, -2, -24, 5, 49, 5, + 36, 29, -14, -11, -48, -33, 21, -42, + -38, -12, 55, -37, 54, -8, 1, 36, + 17, 0, 51, 31, 59, 7, -12, 53, + 4, 32, -14, 48, 5, -10, -16, -8, + 1, -16, -56, -24, -6, 18, -2, 23, + 6, 46, -6, -10, 20, 35, -44, -15, + -49, 36, 16, 5, -7, -79, -67, 12, + 70, -3, -79, -54, -85, -24, 47, -22, + 33, 21, 69, -1, 11, 22, 14, -16, + -16, -22, -28, -11, 11, -41, 31, -26, + -33, -19, -4, 27, 32, -50, 5, -10, + -38, -22, -8, 35, -31, 1, -41, -15, + -11, 44, 28, -17, -41, -23, 17, 2, + -23, -26, -13, -13, -17, 6, 14, -31, + -25, 9, -19, 39, -8, 4, 31, -1, + -45, -11, -28, -92, -46, -15, 21, 118, + -22, 45, -51, 11, -20, -20, -15, 13, + -21, -97, -29, -32, -23, -42, 94, 1, + 23, -8, 63, -3, -46, 19, -26, 32, + -40, -74, -26, 26, -4, -13, 30, -20, + -30, -25, -14, -31, -45, -43, 4, -60, + -48, -12, -34, 2, 2, 3, 13, 15, + 11, 16, 5, 46, -9, -55, -16, -57, + 29, 14, 38, -50, -2, -44, -11, -8, + 52, -27, -38, -7, 20, 47, 17, -59, + 0, 47, 46, -63, 35, -17, 19, 33, + 68, -19, 2, 15, -16, 28, -16, -103, + 26, -35, 47, -39, -60, 30, 31, -23, + -52, -13, 116, 47, -25, 30, 40, 30, + -22, 2, 12, -27, -18, 31, -10, 27, + -8, -66, 12, 14, 4, -26, -28, -13, + 3, 13, -26, -51, 37, 5, 2, -21, + 47, 3, 13, 25, -41, -27, -8, -4, + 5, -76, -33, 28, 10, 9, -46, -74, + 19, 28, 25, 31, 54, -55, 68, 38, + -24, -32, 2, 4, 68, 11, -1, 99, + 5, 16, -2, -74, 40, 26, -26, 33, + 31, -1, -68, 14, -6, 25, 9, 29, + 60, 61, 7, -7, 0, -24, 7, 77, + 4, -1, 16, -7, 13, -15, -19, 28, + -31, -24, -16, 37, 24, 13, 30, 10, + -30, 11, 11, -10, 22, 60, 28, 45, + -3, -40, -62, -5, -102, 9, -32, -27, + -54, 21, 15, -5, 37, -43, -11, 37, + -19, 47, -64, -128, -27, -114, 21, -66, + 59, 46, -3, -12, -87, -9, 4, 19, + -113, -36, 78, 57, -26, -38, -77, -10, + 6, 6, -75, 25, -97, -11, 33, -46, + 1, 13, -21, -33, -20, 16, -6, -3, + -11, -4, -27, 38, 8, -41, -2, -33, + 18, 19, -26, 1, -29, -22, -4, -14, + -55, -11, -80, -3, 11, 34, 90, 51, + 11, 17, 43, 36, 127, -32, 29, 103, + 9, 27, 13, 64, 56, 70, -14, 3, + -12, 10, 37, 3, 12, -22, -10, 46, + 28, 10, 20, 26, -24, 18, 9, 7, + 14, 34, -5, -7, 31, -14, -56, 11, + -18, -8, -17, -7, -10, -40, 10, -33, + -32, -43, 5, 9, 11, -4, 10, 50, + -12, -5, 46, 9, 7, 1, 11, 15, + 91, -17, 7, -50, 23, 6, -30, -99, + 0, -17, 14, 8, -10, -25, -30, -69, + -62, 31, 127, 114, -23, 101, -5, -54, + -6, -22, 7, -56, 39, 18, -29, 0, + 46, 8, -79, 4, -21, 18, -32, 62, + -12, -8, -12, -58, 31, -32, 17, 6, + -24, 25, 24, 9, -4, -19, 45, 6, + 17, -14, 5, -27, 16, -4, -41, 25, + -36, 5, 15, 12, 50, 27, 25, 23, + -44, -69, -9, -19, -48, -8, 4, 12, + -6, 13, -19, -30, -36, 26, 37, -1, + -3, -30, -42, -14, -10, -20, 26, -54, + -27, -44, 4, 73, -26, 90, 32, -69, + -29, -16, 3, 103, 15, -17, 37, 24, + -23, -31, 33, -37, -64, 25, 13, -81, + -28, -32, 27, 5, -35, -23, 15, -22, + 19, -7, 9, 30, 19, -23, 27, -13, + 43, 29, -29, -6, 9, -40, -33, -33, + -32, 9, 11, -48, -8, -23, -52, 46, + 17, -22, -42, 35, -15, -41, 16, 34, + 31, -42, -19, -11, 55, 7, -39, 89, + -11, -33, 20, -14, 22, 32, 3, -17, + -6, 14, 34, 1, 55, -21, -90, -8, + 18, 27, 13, -29, 21, 15, -33, -51, + -9, -11, 4, -16, -18, 23, -4, -4, + 48, 1, 7, 29, -14, -12, -16, 17, + 35, 8, 0, -7, -2, 9, 8, 17, + -6, 53, -32, -21, -50, 5, 99, -60, + -5, -53, 10, -31, 12, -5, 7, 80, + 36, 18, -31, 9, 98, 36, -63, -35, + 4, -13, -28, -24, 28, -13, 18, 16, + -1, -18, -34, 10, 20, 7, 4, 29, + 11, 25, -7, 36, 14, 45, 24, 1, + -16, 30, 6, 35, -6, -11, -24, 13, + -1, 27, 39, 20, 48, -11, -4, -13, + 28, 11, -31, -18, 31, -29, 22, -2, + -20, -16, 5, 30, -12, -28, -3, 93, + -16, 23, 18, -29, 6, -54, -37, 28, + -3, -3, -47, -3, -36, -55, -3, 41, + -10, 47, -2, 23, 42, -7, -71, -27, + 83, -64, 7, -24, 8, 26, -17, 15, + 12, 31, -30, -38, -13, -33, -56, 4, + -17, 20, 18, 1, -30, -5, -6, -31, + -14, -37, 0, 22, 10, -30, 37, -17, + 18, 6, 5, 23, -36, -32, 14, 18, + -13, -61, -52, -69, 44, -30, 16, 18, + -4, -25, 14, 81, 26, -8, -23, -59, + 52, -104, 17, 119, -32, 26, 17, 1, + 23, 45, 29, -64, -57, -14, 73, 21, + -13, -13, 9, -68, -7, -52, 3, 24, + -39, 44, -15, 27, 14, 19, -9, -28, + -11, 5, 3, -34, -2, 2, 22, -6, + -23, 4, 3, 13, -22, -13, -10, -18, + 29, 6, 44, -13, -24, -8, 2, 30, + 14, 43, 6, 17, -73, -6, -7, 20, + -80, -7, -7, -28, 15, -69, -38, -5, + -100, -35, 15, -79, 23, 29, -18, -27, + 21, -66, -37, 8, -22, -39, 48, 4, + -13, 1, -9, 11, -29, 22, 6, -49, + 32, -14, 47, -18, -4, 44, -52, -74, + 43, 30, 23, -14, 5, 0, -27, 4, + -7, 10, -4, 10, 1, -16, 11, -18, + -2, -5, 2, -11, 0, -20, -4, 38, + 74, 59, 39, 64, -10, 26, -3, -40, + -68, 3, -30, -51, 8, -19, -27, -46, + 51, 52, 54, 36, 90, 92, 14, 13, + -5, 0, 16, -62, 16, 11, -47, -37, + -6, -5, 21, 54, -57, 32, 42, -6, + 62, -9, 16, 21, 24, 9, -10, -4, + 33, 50, 13, -15, 1, -35, -48, 18, + -11, -17, -67, -13, 21, 38, -44, 36, + -16, 29, 17, 5, -10, 18, 17, -32, + 2, 8, 22, -56, -15, -32, 40, 43, + 19, 46, -7, -100, -96, 19, 53, 24, + 21, -26, -48, -101, -82, 61, 38, -85, + -28, -34, -1, 63, -5, -5, 39, 39, + -38, 32, -12, -28, 20, 40, -8, 2, + 31, 12, -35, -13, 20, -25, 30, 8, + 3, -13, -9, -20, 2, -13, 24, 37, + -10, 33, 6, 20, -16, -24, -6, -6, + -19, -5, 22, 21, 10, 11, -4, -39, + -1, 6, 49, 41, -15, -57, 21, -62, + 77, -69, -13, 0, -74, 1, -7, -38, + -8, 6, 63, 28, 4, 26, -52, 82, + 63, 13, 45, -33, 44, -52, -65, -21, + -46, -49, 64, -17, 32, 24, 68, -39, + -16, -5, -26, 28, 5, -61, -28, 2, + 24, 11, -12, -33, 9, -37, -3, -28, + 22, -37, -12, 19, 0, -18, -2, 14, + 1, 4, 8, -9, -2, 43, -17, -2, + -66, -31, 56, -40, -87, -36, -2, -4, + -42, -45, -1, 31, -43, -15, 27, 63, + -11, 32, -10, -33, 27, -19, 4, 15, + -26, -34, 29, -4, -39, -65, 14, -20, + -21, -17, -36, 13, 59, 47, -38, -33, + 13, -37, -8, -37, -7, -6, -76, -31, + -12, -46, 7, 24, -21, -30, -14, 9, + 15, -12, -13, 47, -27, -25, -1, -39, + 0, 20, -9, 6, 7, 4, 3, 7, + 39, 50, 22, -7, 14, -20, 1, 70, + -28, 29, -41, 10, -16, -5, -28, -2, + -37, 32, -18, 17, 62, -11, -20, -50, + 36, 21, -62, -12, -56, 52, 50, 17, + 3, 48, 44, -41, -25, 3, 16, -3, + 0, 33, -6, 15, 27, 34, -25, 22, + 9, 17, -11, 36, 16, -2, 12, 21, + -52, 45, -2, -10, 46, 21, -18, 67, + -28, -13, 30, 37, 42, 16, -9, 11, + 75, 7, -64, -40, -10, 29, 57, -23, + 5, 53, -77, 3, -17, -5, 47, -55, + -35, -36, -13, 52, -53, -71, 52, -111, + -23, -26, -28, 29, -43, 55, -19, 43, + -19, 54, -12, -33, -44, -39, -19, -10, + -31, -10, 21, 38, -57, -20, 2, -25, + 8, -6, 50, 12, 15, 25, -25, 15, + -30, -6, 9, 25, 37, 19, -4, 31, + -22, 2, 4, 2, 36, 7, 3, -34, + -80, 36, -10, -2, -5, 31, -36, 49, + -70, 20, -36, 21, 24, 25, -46, -51, + 36, -58, -48, -40, -10, 55, 71, 47, + 10, -1, 1, 2, -46, -68, 16, 13, + 0, -74, -29, 73, -52, -18, -11, 7, + -44, -82, -32, -70, -28, -1, -39, -68, + -6, -41, 12, -22, -16, 40, -11, -25, + 51, -9, 21, 4, 4, -34, 7, -78, + 16, 6, -38, -30, -2, -44, 32, 0, + 22, 64, 5, -72, -2, -14, -10, -16, + -8, -25, 12, 102, -58, 37, -10, -23, + 15, 49, 7, -7, 2, -20, -32, 45, + -6, 48, 28, 30, 33, -1, 22, -6, + 30, 65, -17, 29, 74, 37, -26, -10, + 15, -24, 19, -66, 22, -10, -31, -1, + -18, -9, 11, 37, -4, 45, 5, 41, + 17, 1, 1, 24, -58, 41, 5, -51, + 14, 8, 43, 16, -10, -1, 45, 32, + -64, 3, -33, -25, -3, -27, -68, 12, + 23, -11, -13, -37, -40, 4, -21, -12, + 32, -23, -19, 76, 41, -23, -24, -44, + -65, -1, -15, 1, 71, 63, 5, 20, + -3, 21, -23, 31, -32, 18, -2, 27, + 31, 46, -5, -39, -5, -35, 18, -18, + -40, -10, 3, 12, 2, -2, -22, 40, + 5, -6, 60, 36, 3, 29, -27, 10, + 25, -54, 5, 26, 39, 35, -24, -37, + 30, -91, 28, -4, -21, -27, -39, -6, + 5, 12, -128, 38, -16, 29, -95, -29, + 82, -2, 35, 2, 12, 8, -22, 10, + 80, -47, 2, -25, -73, -79, 16, -30, + -32, -66, 48, 21, -45, -11, -47, 14, + -27, -17, -7, 15, -44, -14, -44, -26, + -32, 26, -23, 17, -7, -28, 26, -6, + 28, 6, -26, 2, 13, -14, -23, -14, + 19, 46, 16, 2, -33, -21, 28, -17, + -42, 44, -37, 1, -39, 28, 84, -46, + 15, 10, 13, -44, 72, -26, 26, 32, + -28, -12, -83, 2, 10, -30, -44, -10, + -28, 53, 45, 65, 0, -25, 57, 36, + -33, 6, 29, 44, -53, 11, 19, -2, + -27, 35, 32, 49, 4, 23, 38, 36, + 24, 10, 51, -39, 4, -7, 26, 37, + -35, 11, -47, -18, 28, 16, -35, 42, + 17, -21, -41, 28, 14, -12, 11, -45, + 7, -43, -15, 18, -5, 38, -40, -50, + -30, -21, 9, -98, 13, 12, 23, 75, + -56, -7, -3, -4, -1, -34, 12, -49, + 11, 26, -18, -28, -17, 33, 13, -14, + 40, 24, -72, -37, 10, 17, -6, 22, + 16, 16, -6, -12, -30, -14, 10, 40, + -23, 12, 15, -3, -15, 13, -56, -4, + -30, 1, -3, -17, 27, 50, -5, 64, + -36, -19, 7, 29, 22, 25, 9, -16, + -58, -69, -40, -61, -71, -14, 42, 93, + 26, 11, -6, -58, -11, 70, -52, 19, + 9, -30, -33, 11, -37, -47, -21, -22, + -40, 10, 47, 4, -23, 17, 48, 41, + -48, 14, 10, 15, 34, -23, -2, -47, + 23, -32, -13, -10, -26, -26, -4, 16, + 38, -14, 0, -12, -7, -7, 20, 44, + -1, -32, -27, -16, 4, -6, -18, 14, + 5, 4, -29, 28, 7, -7, 15, -11, + -20, -45, -36, 16, 84, 34, -59, -30, + 22, 126, 8, 68, 79, -17, 21, -68, + 37, 5, 15, 63, 49, 127, -90, 85, + 43, 7, 16, 9, 6, -45, -57, -43, + 57, 11, -23, -11, -29, 60, -26, 0, + 7, 42, -24, 10, 23, -25, 8, -7, + -40, 19, -17, 35, 4, 27, -39, -91, + 27, -36, 34, 2, 16, -24, 25, 7, + -21, 5, 17, 10, -22, -30, 9, -17, + -61, -26, 33, 21, 58, -51, -14, 69, + -38, 20, 7, 80, -4, -65, -6, -27, + 53, -12, 47, -1, -15, 1, 60, 102, + -79, -4, 12, 9, 22, 37, -8, -4, + 37, 2, -3, -15, -16, -11, -5, 19, + -6, -43, 20, -25, -18, 10, -27, 0, + -28, -27, -11, 10, -18, -2, -4, -16, + 26, 14, -6, 7, -6, 1, 53, -2, + -29, 23, 9, -30, -6, -4, -6, 56, + 70, 0, -33, -20, -17, -9, -24, 46, + -5, -105, 47, -46, -51, 20, 20, -53, + -81, -1, -7, 75, -5, -21, -65, 12, + -52, 22, -50, -12, 49, 54, 76, -81, + 10, 45, -41, -59, 18, -19, 25, 14, + -31, -53, -5, 12, 31, 84, -23, 2, + 7, 2, 10, -32, 39, -2, -12, 1, + -9, 0, -10, -11, 9, 15, -8, -2, + 2, -1, 10, 14, -5, -40, 19, -7, + -7, 26, -4, 2, 1, -27, 35, 32, + 21, -31, 26, 43, -9, 4, -32, 40, + -62, -52, 36, 22, 38, 22, 36, -96, + 6, -10, -23, -49, 15, -33, -18, -3, + 0, 41, 21, -19, 21, 23, -39, -23, + -6, 6, 47, 56, 4, 74, 0, -98, + 29, -47, -14, -36, 21, -22, 22, 16, + 13, 12, 16, -5, 13, 17, -13, -15, + 1, -34, -26, 26, 12, 32, 27, 13, + -67, 27, 2, 8, 10, 18, 16, 20, + -17, -17, 57, -64, 5, 14, 19, 31, + -18, -44, -46, -16, 4, -25, 17, -126, + -24, 39, 4, 8, 55, -25, -34, 39, + -16, 3, 9, 71, 72, -31, -55, 6, + 10, -25, 32, -85, -21, 18, -8, 15, + 12, -27, -7, 1, -21, -2, -5, 48, + -16, 18, 1, -22, -26, 16, 14, -31, + 27, -6, -15, -21, 4, -14, 18, -36 };
static const opus_int8 layer1_recur_weights[1728] = {};
static const opus_int8 layer1_bias[72] = { - 47, 0, -35, 34, 104, 95, 120, -3, - -5, 105, -41, 79, -27, 122, 46, 89, - 89, 34, 71, 94, 70, 68, 79, 18, - 44, 25, -15, -22, 18, -22, 9, -14, - -45, 5, 1, 20, 38, 22, 15, 24, - 28, -14, -2, 10, 8, -27, -18, -46, - -5, 34, -43, -33, 12, 13, 0, 0, - 9, -47, 28, 14, -18, 17, 8, 10, - -38, -23, -20, -11, 59, 45, 76, 24 + -42, 20, 16, 0, 105, 60, 1, -97, + 24, 60, 18, 13, 62, 25, 127, 34, + 79, 55, 118, 127, 95, 31, -4, 87, + 21, 12, 2, -14, 18, 23, 8, 17, + -1, -8, 5, 4, 24, 37, 21, 13, + 36, 13, 17, 18, 37, 30, 33, 1, + 8, -16, -11, -5, -31, -3, -5, 0, + 6, 3, 58, -7, -1, -16, 5, -13, + 16, 10, -2, -14, 11, -4, 3, -11 };
static const opus_int8 layer2_weights[48] = { - 122, -51, 118, -67, -23, -128, 126, -124, - 127, 12, 2, -68, 117, -80, 3, 127, - 71, 127, -128, -83, -15, 93, 13, 40, - 27, -127, 65, 101, 84, 16, 85, 117, - 127, -120, -59, -55, -128, -51, -128, -65, - 127, 1, 99, 127, -60, 127, -128, 50 + -113, -88, 31, -128, -126, -61, 85, -35, + 118, -128, -61, 127, -128, -17, -128, 127, + 104, -9, -128, 33, 45, 127, 5, 83, + 84, -128, -85, -128, -45, 48, -53, -128, + 46, 127, -17, 125, 117, -41, -117, -91, + -127, -68, -1, -89, -80, 32, 106, 7 };
static const opus_int8 layer2_bias[2] = { - 34, 115 + 14, 117 };
const DenseLayer layer0 = { diff --git a/media/libopus/src/opus.c b/media/libopus/src/opus.c index cdbd13a11cd78..538b5ea74ebee 100644 --- a/media/libopus/src/opus.c +++ b/media/libopus/src/opus.c @@ -252,7 +252,7 @@ int opus_packet_parse_impl(const unsigned char *data, opus_int32 len, /* Number of frames encoded in bits 0 to 5 */ ch = *data++; count = ch&0x3F; - if (count <= 0 || framesize*count > 5760) + if (count <= 0 || framesize*(opus_int32)count > 5760) return OPUS_INVALID_PACKET; len--; /* Padding flag is bit 6 */ diff --git a/media/libopus/src/opus_encoder.c b/media/libopus/src/opus_encoder.c index 1c5a8b3387356..253fe9e880b70 100644 --- a/media/libopus/src/opus_encoder.c +++ b/media/libopus/src/opus_encoder.c @@ -112,7 +112,7 @@ struct OpusEncoder { opus_val16 delay_buffer[MAX_ENCODER_BUFFER*2]; #ifndef DISABLE_FLOAT_API int detected_bandwidth; - int nb_no_activity_frames; + int nb_no_activity_ms_Q1; opus_val32 peak_signal_energy; #endif int nonfinal_frame; /* current frame is not the final in a packet */ @@ -837,7 +837,7 @@ static opus_int32 compute_equiv_rate(opus_int32 bitrate, int channels,
#ifndef DISABLE_FLOAT_API
-static int is_digital_silence(const opus_val16* pcm, int frame_size, int channels, int lsb_depth) +int is_digital_silence(const opus_val16* pcm, int frame_size, int channels, int lsb_depth) { int silence = 0; opus_val32 sample_max = 0; @@ -892,44 +892,29 @@ static opus_val32 compute_frame_energy(const opus_val16 *pcm, int frame_size, in #endif
/* Decides if DTX should be turned on (=1) or off (=0) */ -static int decide_dtx_mode(float activity_probability, /* probability that current frame contains speech/music */ - int *nb_no_activity_frames, /* number of consecutive frames with no activity */ - opus_val32 peak_signal_energy, /* peak energy of desired signal detected so far */ - const opus_val16 *pcm, /* input pcm signal */ - int frame_size, /* frame size */ - int channels, - int is_silence, /* only digital silence detected in this frame */ - int arch - ) -{ - opus_val32 noise_energy; - - if (!is_silence) - { - if (activity_probability < DTX_ACTIVITY_THRESHOLD) /* is noise */ - { - noise_energy = compute_frame_energy(pcm, frame_size, channels, arch); +static int decide_dtx_mode(opus_int activity, /* indicates if this frame contains speech/music */ + int *nb_no_activity_ms_Q1, /* number of consecutive milliseconds with no activity, in Q1 */ + int frame_size_ms_Q1 /* number of miliseconds in this update, in Q1 */ + )
- /* but is sufficiently quiet */ - is_silence = peak_signal_energy >= (PSEUDO_SNR_THRESHOLD * noise_energy); - } - } - - if (is_silence) +{ + if (!activity) { - /* The number of consecutive DTX frames should be within the allowed bounds */ - (*nb_no_activity_frames)++; - - if (*nb_no_activity_frames > NB_SPEECH_FRAMES_BEFORE_DTX) + /* The number of consecutive DTX frames should be within the allowed bounds. + Note that the allowed bound is defined in the SILK headers and assumes 20 ms + frames. As this function can be called with any frame length, a conversion to + milliseconds is done before the comparisons. */ + (*nb_no_activity_ms_Q1) += frame_size_ms_Q1; + if (*nb_no_activity_ms_Q1 > NB_SPEECH_FRAMES_BEFORE_DTX*20*2) { - if (*nb_no_activity_frames <= (NB_SPEECH_FRAMES_BEFORE_DTX + MAX_CONSECUTIVE_DTX)) + if (*nb_no_activity_ms_Q1 <= (NB_SPEECH_FRAMES_BEFORE_DTX + MAX_CONSECUTIVE_DTX)*20*2) /* Valid frame for DTX! */ return 1; else - (*nb_no_activity_frames) = NB_SPEECH_FRAMES_BEFORE_DTX; + (*nb_no_activity_ms_Q1) = NB_SPEECH_FRAMES_BEFORE_DTX*20*2; } } else - (*nb_no_activity_frames) = 0; + (*nb_no_activity_ms_Q1) = 0;
return 0; } @@ -1102,6 +1087,8 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_ int analysis_read_subframe_bak=-1; int is_silence = 0; #endif + opus_int activity = VAD_NO_DECISION; + VARDECL(opus_val16, tmp_prefill);
ALLOC_STACK; @@ -1140,21 +1127,19 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_ if (st->silk_mode.complexity >= 7 && st->Fs>=16000) #endif { - if (is_digital_silence(pcm, frame_size, st->channels, lsb_depth)) - { - is_silence = 1; - } else { - analysis_read_pos_bak = st->analysis.read_pos; - analysis_read_subframe_bak = st->analysis.read_subframe; - run_analysis(&st->analysis, celt_mode, analysis_pcm, analysis_size, frame_size, - c1, c2, analysis_channels, st->Fs, - lsb_depth, downmix, &analysis_info); - } + is_silence = is_digital_silence(pcm, frame_size, st->channels, lsb_depth); + analysis_read_pos_bak = st->analysis.read_pos; + analysis_read_subframe_bak = st->analysis.read_subframe; + run_analysis(&st->analysis, celt_mode, analysis_pcm, analysis_size, frame_size, + c1, c2, analysis_channels, st->Fs, + lsb_depth, downmix, &analysis_info);
/* Track the peak signal energy */ if (!is_silence && analysis_info.activity_probability > DTX_ACTIVITY_THRESHOLD) st->peak_signal_energy = MAX32(MULT16_32_Q15(QCONST16(0.999f, 15), st->peak_signal_energy), compute_frame_energy(pcm, frame_size, st->channels, st->arch)); + } else if (st->analysis.initialized) { + tonality_analysis_reset(&st->analysis); } #else (void)analysis_pcm; @@ -1171,6 +1156,20 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_ if (!is_silence) st->voice_ratio = -1;
+ if (is_silence) + { + activity = !is_silence; + } else if (analysis_info.valid) + { + activity = analysis_info.activity_probability >= DTX_ACTIVITY_THRESHOLD; + if (!activity) + { + /* Mark as active if this noise frame is sufficiently loud */ + opus_val32 noise_energy = compute_frame_energy(pcm, frame_size, st->channels, st->arch); + activity = st->peak_signal_energy < (PSEUDO_SNR_THRESHOLD * noise_energy); + } + } + st->detected_bandwidth = 0; if (analysis_info.valid) { @@ -1338,6 +1337,14 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_ equiv_rate = compute_equiv_rate(st->bitrate_bps, st->stream_channels, st->Fs/frame_size, st->use_vbr, 0, st->silk_mode.complexity, st->silk_mode.packetLossPercentage);
+ /* Allow SILK DTX if DTX is enabled but the generalized DTX cannot be used, + e.g. because of the complexity setting or sample rate. */ +#ifndef DISABLE_FLOAT_API + st->silk_mode.useDTX = st->use_dtx && !(analysis_info.valid || is_silence); +#else + st->silk_mode.useDTX = st->use_dtx; +#endif + /* Mode selection depending on application and signal type */ if (st->application == OPUS_APPLICATION_RESTRICTED_LOWDELAY) { @@ -1386,13 +1393,7 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_ if (st->silk_mode.useInBandFEC && st->silk_mode.packetLossPercentage > (128-voice_est)>>4) st->mode = MODE_SILK_ONLY; /* When encoding voice and DTX is enabled but the generalized DTX cannot be used, - because of complexity and sampling frequency settings, switch to SILK DTX and - set the encoder to SILK mode */ -#ifndef DISABLE_FLOAT_API - st->silk_mode.useDTX = st->use_dtx && !(analysis_info.valid || is_silence); -#else - st->silk_mode.useDTX = st->use_dtx; -#endif + use SILK in order to make use of its DTX. */ if (st->silk_mode.useDTX && voice_est > 100) st->mode = MODE_SILK_ONLY; #endif @@ -1668,7 +1669,6 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_ if (st->mode != MODE_CELT_ONLY) { opus_int32 total_bitRate, celt_rate; - opus_int activity; #ifdef FIXED_POINT const opus_int16 *pcm_silk; #else @@ -1676,14 +1676,6 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_ ALLOC(pcm_silk, st->channels*frame_size, opus_int16); #endif
- activity = VAD_NO_DECISION; -#ifndef DISABLE_FLOAT_API - if( analysis_info.valid ) { - /* Inform SILK about the Opus VAD decision */ - activity = ( analysis_info.activity_probability >= DTX_ACTIVITY_THRESHOLD ); - } -#endif - /* Distribute bits between SILK and CELT */ total_bitRate = 8 * bytes_target * frame_rate; if( st->mode == MODE_HYBRID ) { @@ -2144,14 +2136,15 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_ #ifndef DISABLE_FLOAT_API if (st->use_dtx && (analysis_info.valid || is_silence)) { - if (decide_dtx_mode(analysis_info.activity_probability, &st->nb_no_activity_frames, - st->peak_signal_energy, pcm, frame_size, st->channels, is_silence, st->arch)) + if (decide_dtx_mode(activity, &st->nb_no_activity_ms_Q1, 2*1000*frame_size/st->Fs)) { st->rangeFinal = 0; data[0] = gen_toc(st->mode, st->Fs/frame_size, curr_bandwidth, st->stream_channels); RESTORE_STACK; return 1; } + } else { + st->nb_no_activity_ms_Q1 = 0; } #endif
@@ -2629,7 +2622,6 @@ int opus_encoder_ctl(OpusEncoder *st, int request, ...) goto bad_arg; } st->variable_duration = value; - celt_encoder_ctl(celt_enc, OPUS_SET_EXPERT_FRAME_DURATION(value)); } break; case OPUS_GET_EXPERT_FRAME_DURATION_REQUEST: @@ -2726,7 +2718,33 @@ int opus_encoder_ctl(OpusEncoder *st, int request, ...) ret = celt_encoder_ctl(celt_enc, OPUS_SET_ENERGY_MASK(value)); } break; - + case OPUS_GET_IN_DTX_REQUEST: + { + opus_int32 *value = va_arg(ap, opus_int32*); + if (!value) + { + goto bad_arg; + } + if (st->silk_mode.useDTX && (st->prev_mode == MODE_SILK_ONLY || st->prev_mode == MODE_HYBRID)) { + /* DTX determined by Silk. */ + silk_encoder *silk_enc = (silk_encoder*)(void *)((char*)st+st->silk_enc_offset); + *value = silk_enc->state_Fxx[0].sCmn.noSpeechCounter >= NB_SPEECH_FRAMES_BEFORE_DTX; + /* Stereo: check second channel unless only the middle channel was encoded. */ + if(*value == 1 && st->silk_mode.nChannelsInternal == 2 && silk_enc->prev_decode_only_middle == 0) { + *value = silk_enc->state_Fxx[1].sCmn.noSpeechCounter >= NB_SPEECH_FRAMES_BEFORE_DTX; + } + } +#ifndef DISABLE_FLOAT_API + else if (st->use_dtx) { + /* DTX determined by Opus. */ + *value = st->nb_no_activity_ms_Q1 >= NB_SPEECH_FRAMES_BEFORE_DTX*20*2; + } +#endif + else { + *value = 0; + } + } + break; case CELT_GET_MODE_REQUEST: { const CELTMode ** value = va_arg(ap, const CELTMode**); diff --git a/media/libopus/src/opus_multistream_decoder.c b/media/libopus/src/opus_multistream_decoder.c index 562103cd0a317..a2837c3549505 100644 --- a/media/libopus/src/opus_multistream_decoder.c +++ b/media/libopus/src/opus_multistream_decoder.c @@ -251,8 +251,11 @@ int opus_multistream_decode_native( } packet_offset = 0; ret = opus_decode_native(dec, data, len, buf, frame_size, decode_fec, s!=st->layout.nb_streams-1, &packet_offset, soft_clip); - data += packet_offset; - len -= packet_offset; + if (!do_plc) + { + data += packet_offset; + len -= packet_offset; + } if (ret <= 0) { RESTORE_STACK; @@ -487,7 +490,7 @@ int opus_multistream_decoder_ctl_va_list(OpusMSDecoder *st, int request, OpusDecoder **value; stream_id = va_arg(ap, opus_int32); if (stream_id<0 || stream_id >= st->layout.nb_streams) - ret = OPUS_BAD_ARG; + goto bad_arg; value = va_arg(ap, OpusDecoder**); if (!value) { diff --git a/media/libopus/src/opus_multistream_encoder.c b/media/libopus/src/opus_multistream_encoder.c index 9cb9bf3458f7d..213e3eb2c2265 100644 --- a/media/libopus/src/opus_multistream_encoder.c +++ b/media/libopus/src/opus_multistream_encoder.c @@ -443,7 +443,8 @@ static int opus_multistream_encoder_init_impl( char *ptr;
if ((channels>255) || (channels<1) || (coupled_streams>streams) || - (streams<1) || (coupled_streams<0) || (streams>255-coupled_streams)) + (streams<1) || (coupled_streams<0) || (streams>255-coupled_streams) || + (streams+coupled_streams>channels)) return OPUS_BAD_ARG;
st->arch = opus_select_arch(); @@ -459,8 +460,7 @@ static int opus_multistream_encoder_init_impl( st->layout.mapping[i] = mapping[i]; if (!validate_layout(&st->layout)) return OPUS_BAD_ARG; - if (mapping_type == MAPPING_TYPE_SURROUND && - !validate_encoder_layout(&st->layout)) + if (!validate_encoder_layout(&st->layout)) return OPUS_BAD_ARG; if (mapping_type == MAPPING_TYPE_AMBISONICS && !validate_ambisonics(st->layout.nb_channels, NULL, NULL)) @@ -595,7 +595,8 @@ OpusMSEncoder *opus_multistream_encoder_create( int ret; OpusMSEncoder *st; if ((channels>255) || (channels<1) || (coupled_streams>streams) || - (streams<1) || (coupled_streams<0) || (streams>255-coupled_streams)) + (streams<1) || (coupled_streams<0) || (streams>255-coupled_streams) || + (streams+coupled_streams>channels)) { if (error) *error = OPUS_BAD_ARG; @@ -1249,7 +1250,7 @@ int opus_multistream_encoder_ctl_va_list(OpusMSEncoder *st, int request, OpusEncoder **value; stream_id = va_arg(ap, opus_int32); if (stream_id<0 || stream_id >= st->layout.nb_streams) - ret = OPUS_BAD_ARG; + goto bad_arg; value = va_arg(ap, OpusEncoder**); if (!value) { diff --git a/media/libopus/src/opus_private.h b/media/libopus/src/opus_private.h index 09783ceefcab7..5e2463f546fe7 100644 --- a/media/libopus/src/opus_private.h +++ b/media/libopus/src/opus_private.h @@ -135,6 +135,7 @@ typedef void (*opus_copy_channel_out_func)( typedef void (*downmix_func)(const void *, opus_val32 *, int, int, int, int, int); void downmix_float(const void *_x, opus_val32 *sub, int subframe, int offset, int c1, int c2, int C); void downmix_int(const void *_x, opus_val32 *sub, int subframe, int offset, int c1, int c2, int C); +int is_digital_silence(const opus_val16* pcm, int frame_size, int channels, int lsb_depth);
int encode_size(int size, unsigned char *data);