Commit 83ff3a7f authored by mayong's avatar mayong
Browse files

Add cpp_onnxruntime

parent 5f46ad1c
/*
* Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
/*
* This file contains the function WebRtcSpl_Energy().
* The description header can be found in signal_processing_library.h
*
*/
#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
int32_t WebRtcSpl_Energy(int16_t* vector,
size_t vector_length,
int* scale_factor)
{
int32_t en = 0;
size_t i;
int scaling =
WebRtcSpl_GetScalingSquare(vector, vector_length, vector_length);
size_t looptimes = vector_length;
int16_t *vectorptr = vector;
for (i = 0; i < looptimes; i++)
{
en += (*vectorptr * *vectorptr) >> scaling;
vectorptr++;
}
*scale_factor = scaling;
return en;
}
/*
* Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
/*
* This file contains the function WebRtcSpl_GetScalingSquare().
* The description header can be found in signal_processing_library.h
*
*/
#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
int16_t WebRtcSpl_GetScalingSquare(int16_t* in_vector,
size_t in_vector_length,
size_t times)
{
int16_t nbits = WebRtcSpl_GetSizeInBits((uint32_t)times);
size_t i;
int16_t smax = -1;
int16_t sabs;
int16_t *sptr = in_vector;
int16_t t;
size_t looptimes = in_vector_length;
for (i = looptimes; i > 0; i--)
{
sabs = (*sptr > 0 ? *sptr++ : -*sptr++);
smax = (sabs > smax ? sabs : smax);
}
t = WebRtcSpl_NormW32(WEBRTC_SPL_MUL(smax, smax));
if (smax == 0)
{
return 0; // Since norm(0) returns 0
} else
{
return (t > nbits) ? 0 : nbits - t;
}
}
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef COMMON_AUDIO_SIGNAL_PROCESSING_INCLUDE_REAL_FFT_H_
#define COMMON_AUDIO_SIGNAL_PROCESSING_INCLUDE_REAL_FFT_H_
#include <stdint.h>
// For ComplexFFT(), the maximum fft order is 10;
// WebRTC APM uses orders of only 7 and 8.
enum { kMaxFFTOrder = 10 };
struct RealFFT;
#ifdef __cplusplus
extern "C" {
#endif
struct RealFFT* WebRtcSpl_CreateRealFFT(int order);
void WebRtcSpl_FreeRealFFT(struct RealFFT* self);
// Compute an FFT for a real-valued signal of length of 2^order,
// where 1 < order <= MAX_FFT_ORDER. Transform length is determined by the
// specification structure, which must be initialized prior to calling the FFT
// function with WebRtcSpl_CreateRealFFT().
// The relationship between the input and output sequences can
// be expressed in terms of the DFT, i.e.:
// x[n] = (2^(-scalefactor)/N) . SUM[k=0,...,N-1] X[k].e^(jnk.2.pi/N)
// n=0,1,2,...N-1
// N=2^order.
// The conjugate-symmetric output sequence is represented using a CCS vector,
// which is of length N+2, and is organized as follows:
// Index: 0 1 2 3 4 5 . . . N-2 N-1 N N+1
// Component: R0 0 R1 I1 R2 I2 . . . R[N/2-1] I[N/2-1] R[N/2] 0
// where R[n] and I[n], respectively, denote the real and imaginary components
// for FFT bin 'n'. Bins are numbered from 0 to N/2, where N is the FFT length.
// Bin index 0 corresponds to the DC component, and bin index N/2 corresponds to
// the foldover frequency.
//
// Input Arguments:
// self - pointer to preallocated and initialized FFT specification structure.
// real_data_in - the input signal. For an ARM Neon platform, it must be
// aligned on a 32-byte boundary.
//
// Output Arguments:
// complex_data_out - the output complex signal with (2^order + 2) 16-bit
// elements. For an ARM Neon platform, it must be different
// from real_data_in, and aligned on a 32-byte boundary.
//
// Return Value:
// 0 - FFT calculation is successful.
// -1 - Error with bad arguments (null pointers).
int WebRtcSpl_RealForwardFFT(struct RealFFT* self,
const int16_t* real_data_in,
int16_t* complex_data_out);
// Compute the inverse FFT for a conjugate-symmetric input sequence of length of
// 2^order, where 1 < order <= MAX_FFT_ORDER. Transform length is determined by
// the specification structure, which must be initialized prior to calling the
// FFT function with WebRtcSpl_CreateRealFFT().
// For a transform of length M, the input sequence is represented using a packed
// CCS vector of length M+2, which is explained in the comments for
// WebRtcSpl_RealForwardFFTC above.
//
// Input Arguments:
// self - pointer to preallocated and initialized FFT specification structure.
// complex_data_in - the input complex signal with (2^order + 2) 16-bit
// elements. For an ARM Neon platform, it must be aligned on
// a 32-byte boundary.
//
// Output Arguments:
// real_data_out - the output real signal. For an ARM Neon platform, it must
// be different to complex_data_in, and aligned on a 32-byte
// boundary.
//
// Return Value:
// 0 or a positive number - a value that the elements in the |real_data_out|
// should be shifted left with in order to get
// correct physical values.
// -1 - Error with bad arguments (null pointers).
int WebRtcSpl_RealInverseFFT(struct RealFFT* self,
const int16_t* complex_data_in,
int16_t* real_data_out);
#ifdef __cplusplus
}
#endif
#endif // COMMON_AUDIO_SIGNAL_PROCESSING_INCLUDE_REAL_FFT_H_
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
/*
* This header file includes all of the fix point signal processing library
* (SPL) function descriptions and declarations. For specific function calls,
* see bottom of file.
*/
#ifndef COMMON_AUDIO_SIGNAL_PROCESSING_INCLUDE_SIGNAL_PROCESSING_LIBRARY_H_
#define COMMON_AUDIO_SIGNAL_PROCESSING_INCLUDE_SIGNAL_PROCESSING_LIBRARY_H_
#include <string.h>
#include "webrtc/common_audio/signal_processing/dot_product_with_scale.h"
// Macros specific for the fixed point implementation
#define WEBRTC_SPL_WORD16_MAX 32767
#define WEBRTC_SPL_WORD16_MIN -32768
#define WEBRTC_SPL_WORD32_MAX (int32_t)0x7fffffff
#define WEBRTC_SPL_WORD32_MIN (int32_t)0x80000000
#define WEBRTC_SPL_MAX_LPC_ORDER 14
#define WEBRTC_SPL_MIN(A, B) (A < B ? A : B) // Get min value
#define WEBRTC_SPL_MAX(A, B) (A > B ? A : B) // Get max value
// TODO(kma/bjorn): For the next two macros, investigate how to correct the code
// for inputs of a = WEBRTC_SPL_WORD16_MIN or WEBRTC_SPL_WORD32_MIN.
#define WEBRTC_SPL_ABS_W16(a) (((int16_t)a >= 0) ? ((int16_t)a) : -((int16_t)a))
#define WEBRTC_SPL_ABS_W32(a) (((int32_t)a >= 0) ? ((int32_t)a) : -((int32_t)a))
#define WEBRTC_SPL_MUL(a, b) ((int32_t)((int32_t)(a) * (int32_t)(b)))
#define WEBRTC_SPL_UMUL(a, b) ((uint32_t)((uint32_t)(a) * (uint32_t)(b)))
#define WEBRTC_SPL_UMUL_32_16(a, b) ((uint32_t)((uint32_t)(a) * (uint16_t)(b)))
#define WEBRTC_SPL_MUL_16_U16(a, b) ((int32_t)(int16_t)(a) * (uint16_t)(b))
// clang-format off
// clang-format would choose some identation
// leading to presubmit error (cpplint.py)
#ifndef WEBRTC_ARCH_ARM_V7
// For ARMv7 platforms, these are inline functions in spl_inl_armv7.h
#ifndef MIPS32_LE
// For MIPS platforms, these are inline functions in spl_inl_mips.h
#define WEBRTC_SPL_MUL_16_16(a, b) ((int32_t)(((int16_t)(a)) * ((int16_t)(b))))
#define WEBRTC_SPL_MUL_16_32_RSFT16(a, b) \
(WEBRTC_SPL_MUL_16_16(a, b >> 16) + \
((WEBRTC_SPL_MUL_16_16(a, (b & 0xffff) >> 1) + 0x4000) >> 15))
#endif
#endif
#define WEBRTC_SPL_MUL_16_32_RSFT11(a, b) \
(WEBRTC_SPL_MUL_16_16(a, (b) >> 16) * (1 << 5) + \
(((WEBRTC_SPL_MUL_16_U16(a, (uint16_t)(b)) >> 1) + 0x0200) >> 10))
#define WEBRTC_SPL_MUL_16_32_RSFT14(a, b) \
(WEBRTC_SPL_MUL_16_16(a, (b) >> 16) * (1 << 2) + \
(((WEBRTC_SPL_MUL_16_U16(a, (uint16_t)(b)) >> 1) + 0x1000) >> 13))
#define WEBRTC_SPL_MUL_16_32_RSFT15(a, b) \
((WEBRTC_SPL_MUL_16_16(a, (b) >> 16) * (1 << 1)) + \
(((WEBRTC_SPL_MUL_16_U16(a, (uint16_t)(b)) >> 1) + 0x2000) >> 14))
// clang-format on
#define WEBRTC_SPL_MUL_16_16_RSFT(a, b, c) (WEBRTC_SPL_MUL_16_16(a, b) >> (c))
#define WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(a, b, c) \
((WEBRTC_SPL_MUL_16_16(a, b) + ((int32_t)(((int32_t)1) << ((c)-1)))) >> (c))
// C + the 32 most significant bits of A * B
#define WEBRTC_SPL_SCALEDIFF32(A, B, C) \
(C + (B >> 16) * A + (((uint32_t)(B & 0x0000FFFF) * A) >> 16))
#define WEBRTC_SPL_SAT(a, b, c) (b > a ? a : b < c ? c : b)
// Shifting with negative numbers allowed
// Positive means left shift
#define WEBRTC_SPL_SHIFT_W32(x, c) ((c) >= 0 ? (x) * (1 << (c)) : (x) >> -(c))
// Shifting with negative numbers not allowed
// We cannot do casting here due to signed/unsigned problem
#define WEBRTC_SPL_LSHIFT_W32(x, c) ((x) << (c))
#define WEBRTC_SPL_RSHIFT_U32(x, c) ((uint32_t)(x) >> (c))
#define WEBRTC_SPL_RAND(a) ((int16_t)((((int16_t)a * 18816) >> 7) & 0x00007fff))
#ifdef __cplusplus
extern "C" {
#endif
#define WEBRTC_SPL_MEMCPY_W16(v1, v2, length) \
memcpy(v1, v2, (length) * sizeof(int16_t))
// inline functions:
#include "webrtc/common_audio/signal_processing/include/spl_inl.h"
// third party math functions
#include "webrtc/common_audio/third_party/spl_sqrt_floor/spl_sqrt_floor.h"
// Initialize SPL. Currently it contains only function pointer initialization.
// If the underlying platform is known to be ARM-Neon (WEBRTC_HAS_NEON defined),
// the pointers will be assigned to code optimized for Neon; otherwise, generic
// C code will be assigned.
// Note that this function MUST be called in any application that uses SPL
// functions.
void WebRtcSpl_Init(void);
int16_t WebRtcSpl_GetScalingSquare(int16_t* in_vector,
size_t in_vector_length,
size_t times);
// Copy and set operations. Implementation in copy_set_operations.c.
// Descriptions at bottom of file.
void WebRtcSpl_MemSetW16(int16_t* vector,
int16_t set_value,
size_t vector_length);
void WebRtcSpl_MemSetW32(int32_t* vector,
int32_t set_value,
size_t vector_length);
void WebRtcSpl_MemCpyReversedOrder(int16_t* out_vector,
int16_t* in_vector,
size_t vector_length);
void WebRtcSpl_CopyFromEndW16(const int16_t* in_vector,
size_t in_vector_length,
size_t samples,
int16_t* out_vector);
void WebRtcSpl_ZerosArrayW16(int16_t* vector, size_t vector_length);
void WebRtcSpl_ZerosArrayW32(int32_t* vector, size_t vector_length);
// End: Copy and set operations.
// Minimum and maximum operation functions and their pointers.
// Implementation in min_max_operations.c.
// Returns the largest absolute value in a signed 16-bit vector.
//
// Input:
// - vector : 16-bit input vector.
// - length : Number of samples in vector.
//
// Return value : Maximum absolute value in vector.
typedef int16_t (*MaxAbsValueW16)(const int16_t* vector, size_t length);
extern MaxAbsValueW16 WebRtcSpl_MaxAbsValueW16;
int16_t WebRtcSpl_MaxAbsValueW16C(const int16_t* vector, size_t length);
#if defined(WEBRTC_HAS_NEON)
int16_t WebRtcSpl_MaxAbsValueW16Neon(const int16_t* vector, size_t length);
#endif
#if defined(MIPS32_LE)
int16_t WebRtcSpl_MaxAbsValueW16_mips(const int16_t* vector, size_t length);
#endif
// Returns the largest absolute value in a signed 32-bit vector.
//
// Input:
// - vector : 32-bit input vector.
// - length : Number of samples in vector.
//
// Return value : Maximum absolute value in vector.
typedef int32_t (*MaxAbsValueW32)(const int32_t* vector, size_t length);
extern MaxAbsValueW32 WebRtcSpl_MaxAbsValueW32;
int32_t WebRtcSpl_MaxAbsValueW32C(const int32_t* vector, size_t length);
#if defined(WEBRTC_HAS_NEON)
int32_t WebRtcSpl_MaxAbsValueW32Neon(const int32_t* vector, size_t length);
#endif
#if defined(MIPS_DSP_R1_LE)
int32_t WebRtcSpl_MaxAbsValueW32_mips(const int32_t* vector, size_t length);
#endif
// Returns the maximum value of a 16-bit vector.
//
// Input:
// - vector : 16-bit input vector.
// - length : Number of samples in vector.
//
// Return value : Maximum sample value in |vector|.
typedef int16_t (*MaxValueW16)(const int16_t* vector, size_t length);
extern MaxValueW16 WebRtcSpl_MaxValueW16;
int16_t WebRtcSpl_MaxValueW16C(const int16_t* vector, size_t length);
#if defined(WEBRTC_HAS_NEON)
int16_t WebRtcSpl_MaxValueW16Neon(const int16_t* vector, size_t length);
#endif
#if defined(MIPS32_LE)
int16_t WebRtcSpl_MaxValueW16_mips(const int16_t* vector, size_t length);
#endif
// Returns the maximum value of a 32-bit vector.
//
// Input:
// - vector : 32-bit input vector.
// - length : Number of samples in vector.
//
// Return value : Maximum sample value in |vector|.
typedef int32_t (*MaxValueW32)(const int32_t* vector, size_t length);
extern MaxValueW32 WebRtcSpl_MaxValueW32;
int32_t WebRtcSpl_MaxValueW32C(const int32_t* vector, size_t length);
#if defined(WEBRTC_HAS_NEON)
int32_t WebRtcSpl_MaxValueW32Neon(const int32_t* vector, size_t length);
#endif
#if defined(MIPS32_LE)
int32_t WebRtcSpl_MaxValueW32_mips(const int32_t* vector, size_t length);
#endif
// Returns the minimum value of a 16-bit vector.
//
// Input:
// - vector : 16-bit input vector.
// - length : Number of samples in vector.
//
// Return value : Minimum sample value in |vector|.
typedef int16_t (*MinValueW16)(const int16_t* vector, size_t length);
extern MinValueW16 WebRtcSpl_MinValueW16;
int16_t WebRtcSpl_MinValueW16C(const int16_t* vector, size_t length);
#if defined(WEBRTC_HAS_NEON)
int16_t WebRtcSpl_MinValueW16Neon(const int16_t* vector, size_t length);
#endif
#if defined(MIPS32_LE)
int16_t WebRtcSpl_MinValueW16_mips(const int16_t* vector, size_t length);
#endif
// Returns the minimum value of a 32-bit vector.
//
// Input:
// - vector : 32-bit input vector.
// - length : Number of samples in vector.
//
// Return value : Minimum sample value in |vector|.
typedef int32_t (*MinValueW32)(const int32_t* vector, size_t length);
extern MinValueW32 WebRtcSpl_MinValueW32;
int32_t WebRtcSpl_MinValueW32C(const int32_t* vector, size_t length);
#if defined(WEBRTC_HAS_NEON)
int32_t WebRtcSpl_MinValueW32Neon(const int32_t* vector, size_t length);
#endif
#if defined(MIPS32_LE)
int32_t WebRtcSpl_MinValueW32_mips(const int32_t* vector, size_t length);
#endif
// Returns the vector index to the largest absolute value of a 16-bit vector.
//
// Input:
// - vector : 16-bit input vector.
// - length : Number of samples in vector.
//
// Return value : Index to the maximum absolute value in vector.
// If there are multiple equal maxima, return the index of the
// first. -32768 will always have precedence over 32767 (despite
// -32768 presenting an int16 absolute value of 32767).
size_t WebRtcSpl_MaxAbsIndexW16(const int16_t* vector, size_t length);
// Returns the vector index to the maximum sample value of a 16-bit vector.
//
// Input:
// - vector : 16-bit input vector.
// - length : Number of samples in vector.
//
// Return value : Index to the maximum value in vector (if multiple
// indexes have the maximum, return the first).
size_t WebRtcSpl_MaxIndexW16(const int16_t* vector, size_t length);
// Returns the vector index to the maximum sample value of a 32-bit vector.
//
// Input:
// - vector : 32-bit input vector.
// - length : Number of samples in vector.
//
// Return value : Index to the maximum value in vector (if multiple
// indexes have the maximum, return the first).
size_t WebRtcSpl_MaxIndexW32(const int32_t* vector, size_t length);
// Returns the vector index to the minimum sample value of a 16-bit vector.
//
// Input:
// - vector : 16-bit input vector.
// - length : Number of samples in vector.
//
// Return value : Index to the mimimum value in vector (if multiple
// indexes have the minimum, return the first).
size_t WebRtcSpl_MinIndexW16(const int16_t* vector, size_t length);
// Returns the vector index to the minimum sample value of a 32-bit vector.
//
// Input:
// - vector : 32-bit input vector.
// - length : Number of samples in vector.
//
// Return value : Index to the mimimum value in vector (if multiple
// indexes have the minimum, return the first).
size_t WebRtcSpl_MinIndexW32(const int32_t* vector, size_t length);
// End: Minimum and maximum operations.
// Vector scaling operations. Implementation in vector_scaling_operations.c.
// Description at bottom of file.
void WebRtcSpl_VectorBitShiftW16(int16_t* out_vector,
size_t vector_length,
const int16_t* in_vector,
int16_t right_shifts);
void WebRtcSpl_VectorBitShiftW32(int32_t* out_vector,
size_t vector_length,
const int32_t* in_vector,
int16_t right_shifts);
void WebRtcSpl_VectorBitShiftW32ToW16(int16_t* out_vector,
size_t vector_length,
const int32_t* in_vector,
int right_shifts);
void WebRtcSpl_ScaleVector(const int16_t* in_vector,
int16_t* out_vector,
int16_t gain,
size_t vector_length,
int16_t right_shifts);
void WebRtcSpl_ScaleVectorWithSat(const int16_t* in_vector,
int16_t* out_vector,
int16_t gain,
size_t vector_length,
int16_t right_shifts);
void WebRtcSpl_ScaleAndAddVectors(const int16_t* in_vector1,
int16_t gain1,
int right_shifts1,
const int16_t* in_vector2,
int16_t gain2,
int right_shifts2,
int16_t* out_vector,
size_t vector_length);
// The functions (with related pointer) perform the vector operation:
// out_vector[k] = ((scale1 * in_vector1[k]) + (scale2 * in_vector2[k])
// + round_value) >> right_shifts,
// where round_value = (1 << right_shifts) >> 1.
//
// Input:
// - in_vector1 : Input vector 1
// - in_vector1_scale : Gain to be used for vector 1
// - in_vector2 : Input vector 2
// - in_vector2_scale : Gain to be used for vector 2
// - right_shifts : Number of right bit shifts to be applied
// - length : Number of elements in the input vectors
//
// Output:
// - out_vector : Output vector
// Return value : 0 if OK, -1 if (in_vector1 == null
// || in_vector2 == null || out_vector == null
// || length <= 0 || right_shift < 0).
typedef int (*ScaleAndAddVectorsWithRound)(const int16_t* in_vector1,
int16_t in_vector1_scale,
const int16_t* in_vector2,
int16_t in_vector2_scale,
int right_shifts,
int16_t* out_vector,
size_t length);
extern ScaleAndAddVectorsWithRound WebRtcSpl_ScaleAndAddVectorsWithRound;
int WebRtcSpl_ScaleAndAddVectorsWithRoundC(const int16_t* in_vector1,
int16_t in_vector1_scale,
const int16_t* in_vector2,
int16_t in_vector2_scale,
int right_shifts,
int16_t* out_vector,
size_t length);
#if defined(MIPS_DSP_R1_LE)
int WebRtcSpl_ScaleAndAddVectorsWithRound_mips(const int16_t* in_vector1,
int16_t in_vector1_scale,
const int16_t* in_vector2,
int16_t in_vector2_scale,
int right_shifts,
int16_t* out_vector,
size_t length);
#endif
// End: Vector scaling operations.
// iLBC specific functions. Implementations in ilbc_specific_functions.c.
// Description at bottom of file.
void WebRtcSpl_ReverseOrderMultArrayElements(int16_t* out_vector,
const int16_t* in_vector,
const int16_t* window,
size_t vector_length,
int16_t right_shifts);
void WebRtcSpl_ElementwiseVectorMult(int16_t* out_vector,
const int16_t* in_vector,
const int16_t* window,
size_t vector_length,
int16_t right_shifts);
void WebRtcSpl_AddVectorsAndShift(int16_t* out_vector,
const int16_t* in_vector1,
const int16_t* in_vector2,
size_t vector_length,
int16_t right_shifts);
void WebRtcSpl_AddAffineVectorToVector(int16_t* out_vector,
int16_t* in_vector,
int16_t gain,
int32_t add_constant,
int16_t right_shifts,
size_t vector_length);
void WebRtcSpl_AffineTransformVector(int16_t* out_vector,
int16_t* in_vector,
int16_t gain,
int32_t add_constant,
int16_t right_shifts,
size_t vector_length);
// End: iLBC specific functions.
// Signal processing operations.
// A 32-bit fix-point implementation of auto-correlation computation
//
// Input:
// - in_vector : Vector to calculate autocorrelation upon
// - in_vector_length : Length (in samples) of |vector|
// - order : The order up to which the autocorrelation should be
// calculated
//
// Output:
// - result : auto-correlation values (values should be seen
// relative to each other since the absolute values
// might have been down shifted to avoid overflow)
//
// - scale : The number of left shifts required to obtain the
// auto-correlation in Q0
//
// Return value : Number of samples in |result|, i.e. (order+1)
size_t WebRtcSpl_AutoCorrelation(const int16_t* in_vector,
size_t in_vector_length,
size_t order,
int32_t* result,
int* scale);
// A 32-bit fix-point implementation of the Levinson-Durbin algorithm that
// does NOT use the 64 bit class
//
// Input:
// - auto_corr : Vector with autocorrelation values of length >= |order|+1
// - order : The LPC filter order (support up to order 20)
//
// Output:
// - lpc_coef : lpc_coef[0..order] LPC coefficients in Q12
// - refl_coef : refl_coef[0...order-1]| Reflection coefficients in Q15
//
// Return value : 1 for stable 0 for unstable
int16_t WebRtcSpl_LevinsonDurbin(const int32_t* auto_corr,
int16_t* lpc_coef,
int16_t* refl_coef,
size_t order);
// Converts reflection coefficients |refl_coef| to LPC coefficients |lpc_coef|.
// This version is a 16 bit operation.
//
// NOTE: The 16 bit refl_coef -> lpc_coef conversion might result in a
// "slightly unstable" filter (i.e., a pole just outside the unit circle) in
// "rare" cases even if the reflection coefficients are stable.
//
// Input:
// - refl_coef : Reflection coefficients in Q15 that should be converted
// to LPC coefficients
// - use_order : Number of coefficients in |refl_coef|
//
// Output:
// - lpc_coef : LPC coefficients in Q12
void WebRtcSpl_ReflCoefToLpc(const int16_t* refl_coef,
int use_order,
int16_t* lpc_coef);
// Converts LPC coefficients |lpc_coef| to reflection coefficients |refl_coef|.
// This version is a 16 bit operation.
// The conversion is implemented by the step-down algorithm.
//
// Input:
// - lpc_coef : LPC coefficients in Q12, that should be converted to
// reflection coefficients
// - use_order : Number of coefficients in |lpc_coef|
//
// Output:
// - refl_coef : Reflection coefficients in Q15.
void WebRtcSpl_LpcToReflCoef(int16_t* lpc_coef,
int use_order,
int16_t* refl_coef);
// Calculates reflection coefficients (16 bit) from auto-correlation values
//
// Input:
// - auto_corr : Auto-correlation values
// - use_order : Number of coefficients wanted be calculated
//
// Output:
// - refl_coef : Reflection coefficients in Q15.
void WebRtcSpl_AutoCorrToReflCoef(const int32_t* auto_corr,
int use_order,
int16_t* refl_coef);
// The functions (with related pointer) calculate the cross-correlation between
// two sequences |seq1| and |seq2|.
// |seq1| is fixed and |seq2| slides as the pointer is increased with the
// amount |step_seq2|. Note the arguments should obey the relationship:
// |dim_seq| - 1 + |step_seq2| * (|dim_cross_correlation| - 1) <
// buffer size of |seq2|
//
// Input:
// - seq1 : First sequence (fixed throughout the correlation)
// - seq2 : Second sequence (slides |step_vector2| for each
// new correlation)
// - dim_seq : Number of samples to use in the cross-correlation
// - dim_cross_correlation : Number of cross-correlations to calculate (the
// start position for |vector2| is updated for each
// new one)
// - right_shifts : Number of right bit shifts to use. This will
// become the output Q-domain.
// - step_seq2 : How many (positive or negative) steps the
// |vector2| pointer should be updated for each new
// cross-correlation value.
//
// Output:
// - cross_correlation : The cross-correlation in Q(-right_shifts)
typedef void (*CrossCorrelation)(int32_t* cross_correlation,
const int16_t* seq1,
const int16_t* seq2,
size_t dim_seq,
size_t dim_cross_correlation,
int right_shifts,
int step_seq2);
extern CrossCorrelation WebRtcSpl_CrossCorrelation;
void WebRtcSpl_CrossCorrelationC(int32_t* cross_correlation,
const int16_t* seq1,
const int16_t* seq2,
size_t dim_seq,
size_t dim_cross_correlation,
int right_shifts,
int step_seq2);
#if defined(WEBRTC_HAS_NEON)
void WebRtcSpl_CrossCorrelationNeon(int32_t* cross_correlation,
const int16_t* seq1,
const int16_t* seq2,
size_t dim_seq,
size_t dim_cross_correlation,
int right_shifts,
int step_seq2);
#endif
#if defined(MIPS32_LE)
void WebRtcSpl_CrossCorrelation_mips(int32_t* cross_correlation,
const int16_t* seq1,
const int16_t* seq2,
size_t dim_seq,
size_t dim_cross_correlation,
int right_shifts,
int step_seq2);
#endif
// Creates (the first half of) a Hanning window. Size must be at least 1 and
// at most 512.
//
// Input:
// - size : Length of the requested Hanning window (1 to 512)
//
// Output:
// - window : Hanning vector in Q14.
void WebRtcSpl_GetHanningWindow(int16_t* window, size_t size);
// Calculates y[k] = sqrt(1 - x[k]^2) for each element of the input vector
// |in_vector|. Input and output values are in Q15.
//
// Inputs:
// - in_vector : Values to calculate sqrt(1 - x^2) of
// - vector_length : Length of vector |in_vector|
//
// Output:
// - out_vector : Output values in Q15
void WebRtcSpl_SqrtOfOneMinusXSquared(int16_t* in_vector,
size_t vector_length,
int16_t* out_vector);
// End: Signal processing operations.
// Randomization functions. Implementations collected in
// randomization_functions.c and descriptions at bottom of this file.
int16_t WebRtcSpl_RandU(uint32_t* seed);
int16_t WebRtcSpl_RandN(uint32_t* seed);
int16_t WebRtcSpl_RandUArray(int16_t* vector,
int16_t vector_length,
uint32_t* seed);
// End: Randomization functions.
// Math functions
int32_t WebRtcSpl_Sqrt(int32_t value);
// Divisions. Implementations collected in division_operations.c and
// descriptions at bottom of this file.
uint32_t WebRtcSpl_DivU32U16(uint32_t num, uint16_t den);
int32_t WebRtcSpl_DivW32W16(int32_t num, int16_t den);
int16_t WebRtcSpl_DivW32W16ResW16(int32_t num, int16_t den);
int32_t WebRtcSpl_DivResultInQ31(int32_t num, int32_t den);
int32_t WebRtcSpl_DivW32HiLow(int32_t num, int16_t den_hi, int16_t den_low);
// End: Divisions.
int32_t WebRtcSpl_Energy(int16_t* vector,
size_t vector_length,
int* scale_factor);
// Filter operations.
size_t WebRtcSpl_FilterAR(const int16_t* ar_coef,
size_t ar_coef_length,
const int16_t* in_vector,
size_t in_vector_length,
int16_t* filter_state,
size_t filter_state_length,
int16_t* filter_state_low,
size_t filter_state_low_length,
int16_t* out_vector,
int16_t* out_vector_low,
size_t out_vector_low_length);
// WebRtcSpl_FilterMAFastQ12(...)
//
// Performs a MA filtering on a vector in Q12
//
// Input:
// - in_vector : Input samples (state in positions
// in_vector[-order] .. in_vector[-1])
// - ma_coef : Filter coefficients (in Q12)
// - ma_coef_length : Number of B coefficients (order+1)
// - vector_length : Number of samples to be filtered
//
// Output:
// - out_vector : Filtered samples
//
void WebRtcSpl_FilterMAFastQ12(const int16_t* in_vector,
int16_t* out_vector,
const int16_t* ma_coef,
size_t ma_coef_length,
size_t vector_length);
// Performs a AR filtering on a vector in Q12
// Input:
// - data_in : Input samples
// - data_out : State information in positions
// data_out[-order] .. data_out[-1]
// - coefficients : Filter coefficients (in Q12)
// - coefficients_length: Number of coefficients (order+1)
// - data_length : Number of samples to be filtered
// Output:
// - data_out : Filtered samples
void WebRtcSpl_FilterARFastQ12(const int16_t* data_in,
int16_t* data_out,
const int16_t* __restrict coefficients,
size_t coefficients_length,
size_t data_length);
// The functions (with related pointer) perform a MA down sampling filter
// on a vector.
// Input:
// - data_in : Input samples (state in positions
// data_in[-order] .. data_in[-1])
// - data_in_length : Number of samples in |data_in| to be filtered.
// This must be at least
// |delay| + |factor|*(|out_vector_length|-1) + 1)
// - data_out_length : Number of down sampled samples desired
// - coefficients : Filter coefficients (in Q12)
// - coefficients_length: Number of coefficients (order+1)
// - factor : Decimation factor
// - delay : Delay of filter (compensated for in out_vector)
// Output:
// - data_out : Filtered samples
// Return value : 0 if OK, -1 if |in_vector| is too short
typedef int (*DownsampleFast)(const int16_t* data_in,
size_t data_in_length,
int16_t* data_out,
size_t data_out_length,
const int16_t* __restrict coefficients,
size_t coefficients_length,
int factor,
size_t delay);
extern DownsampleFast WebRtcSpl_DownsampleFast;
int WebRtcSpl_DownsampleFastC(const int16_t* data_in,
size_t data_in_length,
int16_t* data_out,
size_t data_out_length,
const int16_t* __restrict coefficients,
size_t coefficients_length,
int factor,
size_t delay);
#if defined(WEBRTC_HAS_NEON)
int WebRtcSpl_DownsampleFastNeon(const int16_t* data_in,
size_t data_in_length,
int16_t* data_out,
size_t data_out_length,
const int16_t* __restrict coefficients,
size_t coefficients_length,
int factor,
size_t delay);
#endif
#if defined(MIPS32_LE)
int WebRtcSpl_DownsampleFast_mips(const int16_t* data_in,
size_t data_in_length,
int16_t* data_out,
size_t data_out_length,
const int16_t* __restrict coefficients,
size_t coefficients_length,
int factor,
size_t delay);
#endif
// End: Filter operations.
// FFT operations
int WebRtcSpl_ComplexFFT(int16_t vector[], int stages, int mode);
int WebRtcSpl_ComplexIFFT(int16_t vector[], int stages, int mode);
// Treat a 16-bit complex data buffer |complex_data| as an array of 32-bit
// values, and swap elements whose indexes are bit-reverses of each other.
//
// Input:
// - complex_data : Complex data buffer containing 2^|stages| real
// elements interleaved with 2^|stages| imaginary
// elements: [Re Im Re Im Re Im....]
// - stages : Number of FFT stages. Must be at least 3 and at most
// 10, since the table WebRtcSpl_kSinTable1024[] is 1024
// elements long.
//
// Output:
// - complex_data : The complex data buffer.
void WebRtcSpl_ComplexBitReverse(int16_t* __restrict complex_data, int stages);
// End: FFT operations
/************************************************************
*
* RESAMPLING FUNCTIONS AND THEIR STRUCTS ARE DEFINED BELOW
*
************************************************************/
/*******************************************************************
* resample.c
*
* Includes the following resampling combinations
* 22 kHz -> 16 kHz
* 16 kHz -> 22 kHz
* 22 kHz -> 8 kHz
* 8 kHz -> 22 kHz
*
******************************************************************/
// state structure for 22 -> 16 resampler
typedef struct {
int32_t S_22_44[8];
int32_t S_44_32[8];
int32_t S_32_16[8];
} WebRtcSpl_State22khzTo16khz;
void WebRtcSpl_Resample22khzTo16khz(const int16_t* in,
int16_t* out,
WebRtcSpl_State22khzTo16khz* state,
int32_t* tmpmem);
void WebRtcSpl_ResetResample22khzTo16khz(WebRtcSpl_State22khzTo16khz* state);
// state structure for 16 -> 22 resampler
typedef struct {
int32_t S_16_32[8];
int32_t S_32_22[8];
} WebRtcSpl_State16khzTo22khz;
void WebRtcSpl_Resample16khzTo22khz(const int16_t* in,
int16_t* out,
WebRtcSpl_State16khzTo22khz* state,
int32_t* tmpmem);
void WebRtcSpl_ResetResample16khzTo22khz(WebRtcSpl_State16khzTo22khz* state);
// state structure for 22 -> 8 resampler
typedef struct {
int32_t S_22_22[16];
int32_t S_22_16[8];
int32_t S_16_8[8];
} WebRtcSpl_State22khzTo8khz;
void WebRtcSpl_Resample22khzTo8khz(const int16_t* in,
int16_t* out,
WebRtcSpl_State22khzTo8khz* state,
int32_t* tmpmem);
void WebRtcSpl_ResetResample22khzTo8khz(WebRtcSpl_State22khzTo8khz* state);
// state structure for 8 -> 22 resampler
typedef struct {
int32_t S_8_16[8];
int32_t S_16_11[8];
int32_t S_11_22[8];
} WebRtcSpl_State8khzTo22khz;
void WebRtcSpl_Resample8khzTo22khz(const int16_t* in,
int16_t* out,
WebRtcSpl_State8khzTo22khz* state,
int32_t* tmpmem);
void WebRtcSpl_ResetResample8khzTo22khz(WebRtcSpl_State8khzTo22khz* state);
/*******************************************************************
* resample_fractional.c
* Functions for internal use in the other resample functions
*
* Includes the following resampling combinations
* 48 kHz -> 32 kHz
* 32 kHz -> 24 kHz
* 44 kHz -> 32 kHz
*
******************************************************************/
void WebRtcSpl_Resample48khzTo32khz(const int32_t* In, int32_t* Out, size_t K);
void WebRtcSpl_Resample32khzTo24khz(const int32_t* In, int32_t* Out, size_t K);
void WebRtcSpl_Resample44khzTo32khz(const int32_t* In, int32_t* Out, size_t K);
/*******************************************************************
* resample_48khz.c
*
* Includes the following resampling combinations
* 48 kHz -> 16 kHz
* 16 kHz -> 48 kHz
* 48 kHz -> 8 kHz
* 8 kHz -> 48 kHz
*
******************************************************************/
typedef struct {
int32_t S_48_48[16];
int32_t S_48_32[8];
int32_t S_32_16[8];
} WebRtcSpl_State48khzTo16khz;
void WebRtcSpl_Resample48khzTo16khz(const int16_t* in,
int16_t* out,
WebRtcSpl_State48khzTo16khz* state,
int32_t* tmpmem);
void WebRtcSpl_ResetResample48khzTo16khz(WebRtcSpl_State48khzTo16khz* state);
typedef struct {
int32_t S_16_32[8];
int32_t S_32_24[8];
int32_t S_24_48[8];
} WebRtcSpl_State16khzTo48khz;
void WebRtcSpl_Resample16khzTo48khz(const int16_t* in,
int16_t* out,
WebRtcSpl_State16khzTo48khz* state,
int32_t* tmpmem);
void WebRtcSpl_ResetResample16khzTo48khz(WebRtcSpl_State16khzTo48khz* state);
typedef struct {
int32_t S_48_24[8];
int32_t S_24_24[16];
int32_t S_24_16[8];
int32_t S_16_8[8];
} WebRtcSpl_State48khzTo8khz;
void WebRtcSpl_Resample48khzTo8khz(const int16_t* in,
int16_t* out,
WebRtcSpl_State48khzTo8khz* state,
int32_t* tmpmem);
void WebRtcSpl_ResetResample48khzTo8khz(WebRtcSpl_State48khzTo8khz* state);
typedef struct {
int32_t S_8_16[8];
int32_t S_16_12[8];
int32_t S_12_24[8];
int32_t S_24_48[8];
} WebRtcSpl_State8khzTo48khz;
void WebRtcSpl_Resample8khzTo48khz(const int16_t* in,
int16_t* out,
WebRtcSpl_State8khzTo48khz* state,
int32_t* tmpmem);
void WebRtcSpl_ResetResample8khzTo48khz(WebRtcSpl_State8khzTo48khz* state);
/*******************************************************************
* resample_by_2.c
*
* Includes down and up sampling by a factor of two.
*
******************************************************************/
void WebRtcSpl_DownsampleBy2(const int16_t* in,
size_t len,
int16_t* out,
int32_t* filtState);
void WebRtcSpl_UpsampleBy2(const int16_t* in,
size_t len,
int16_t* out,
int32_t* filtState);
/************************************************************
* END OF RESAMPLING FUNCTIONS
************************************************************/
void WebRtcSpl_AnalysisQMF(const int16_t* in_data,
size_t in_data_length,
int16_t* low_band,
int16_t* high_band,
int32_t* filter_state1,
int32_t* filter_state2);
void WebRtcSpl_SynthesisQMF(const int16_t* low_band,
const int16_t* high_band,
size_t band_length,
int16_t* out_data,
int32_t* filter_state1,
int32_t* filter_state2);
#ifdef __cplusplus
}
#endif // __cplusplus
#endif // COMMON_AUDIO_SIGNAL_PROCESSING_INCLUDE_SIGNAL_PROCESSING_LIBRARY_H_
//
// WebRtcSpl_AddSatW16(...)
// WebRtcSpl_AddSatW32(...)
//
// Returns the result of a saturated 16-bit, respectively 32-bit, addition of
// the numbers specified by the |var1| and |var2| parameters.
//
// Input:
// - var1 : Input variable 1
// - var2 : Input variable 2
//
// Return value : Added and saturated value
//
//
// WebRtcSpl_SubSatW16(...)
// WebRtcSpl_SubSatW32(...)
//
// Returns the result of a saturated 16-bit, respectively 32-bit, subtraction
// of the numbers specified by the |var1| and |var2| parameters.
//
// Input:
// - var1 : Input variable 1
// - var2 : Input variable 2
//
// Returned value : Subtracted and saturated value
//
//
// WebRtcSpl_GetSizeInBits(...)
//
// Returns the # of bits that are needed at the most to represent the number
// specified by the |value| parameter.
//
// Input:
// - value : Input value
//
// Return value : Number of bits needed to represent |value|
//
//
// WebRtcSpl_NormW32(...)
//
// Norm returns the # of left shifts required to 32-bit normalize the 32-bit
// signed number specified by the |value| parameter.
//
// Input:
// - value : Input value
//
// Return value : Number of bit shifts needed to 32-bit normalize |value|
//
//
// WebRtcSpl_NormW16(...)
//
// Norm returns the # of left shifts required to 16-bit normalize the 16-bit
// signed number specified by the |value| parameter.
//
// Input:
// - value : Input value
//
// Return value : Number of bit shifts needed to 32-bit normalize |value|
//
//
// WebRtcSpl_NormU32(...)
//
// Norm returns the # of left shifts required to 32-bit normalize the unsigned
// 32-bit number specified by the |value| parameter.
//
// Input:
// - value : Input value
//
// Return value : Number of bit shifts needed to 32-bit normalize |value|
//
//
// WebRtcSpl_GetScalingSquare(...)
//
// Returns the # of bits required to scale the samples specified in the
// |in_vector| parameter so that, if the squares of the samples are added the
// # of times specified by the |times| parameter, the 32-bit addition will not
// overflow (result in int32_t).
//
// Input:
// - in_vector : Input vector to check scaling on
// - in_vector_length : Samples in |in_vector|
// - times : Number of additions to be performed
//
// Return value : Number of right bit shifts needed to avoid
// overflow in the addition calculation
//
//
// WebRtcSpl_MemSetW16(...)
//
// Sets all the values in the int16_t vector |vector| of length
// |vector_length| to the specified value |set_value|
//
// Input:
// - vector : Pointer to the int16_t vector
// - set_value : Value specified
// - vector_length : Length of vector
//
//
// WebRtcSpl_MemSetW32(...)
//
// Sets all the values in the int32_t vector |vector| of length
// |vector_length| to the specified value |set_value|
//
// Input:
// - vector : Pointer to the int16_t vector
// - set_value : Value specified
// - vector_length : Length of vector
//
//
// WebRtcSpl_MemCpyReversedOrder(...)
//
// Copies all the values from the source int16_t vector |in_vector| to a
// destination int16_t vector |out_vector|. It is done in reversed order,
// meaning that the first sample of |in_vector| is copied to the last sample of
// the |out_vector|. The procedure continues until the last sample of
// |in_vector| has been copied to the first sample of |out_vector|. This
// creates a reversed vector. Used in e.g. prediction in iLBC.
//
// Input:
// - in_vector : Pointer to the first sample in a int16_t vector
// of length |length|
// - vector_length : Number of elements to copy
//
// Output:
// - out_vector : Pointer to the last sample in a int16_t vector
// of length |length|
//
//
// WebRtcSpl_CopyFromEndW16(...)
//
// Copies the rightmost |samples| of |in_vector| (of length |in_vector_length|)
// to the vector |out_vector|.
//
// Input:
// - in_vector : Input vector
// - in_vector_length : Number of samples in |in_vector|
// - samples : Number of samples to extract (from right side)
// from |in_vector|
//
// Output:
// - out_vector : Vector with the requested samples
//
//
// WebRtcSpl_ZerosArrayW16(...)
// WebRtcSpl_ZerosArrayW32(...)
//
// Inserts the value "zero" in all positions of a w16 and a w32 vector
// respectively.
//
// Input:
// - vector_length : Number of samples in vector
//
// Output:
// - vector : Vector containing all zeros
//
//
// WebRtcSpl_VectorBitShiftW16(...)
// WebRtcSpl_VectorBitShiftW32(...)
//
// Bit shifts all the values in a vector up or downwards. Different calls for
// int16_t and int32_t vectors respectively.
//
// Input:
// - vector_length : Length of vector
// - in_vector : Pointer to the vector that should be bit shifted
// - right_shifts : Number of right bit shifts (negative value gives left
// shifts)
//
// Output:
// - out_vector : Pointer to the result vector (can be the same as
// |in_vector|)
//
//
// WebRtcSpl_VectorBitShiftW32ToW16(...)
//
// Bit shifts all the values in a int32_t vector up or downwards and
// stores the result as an int16_t vector. The function will saturate the
// signal if needed, before storing in the output vector.
//
// Input:
// - vector_length : Length of vector
// - in_vector : Pointer to the vector that should be bit shifted
// - right_shifts : Number of right bit shifts (negative value gives left
// shifts)
//
// Output:
// - out_vector : Pointer to the result vector (can be the same as
// |in_vector|)
//
//
// WebRtcSpl_ScaleVector(...)
//
// Performs the vector operation:
// out_vector[k] = (gain*in_vector[k])>>right_shifts
//
// Input:
// - in_vector : Input vector
// - gain : Scaling gain
// - vector_length : Elements in the |in_vector|
// - right_shifts : Number of right bit shifts applied
//
// Output:
// - out_vector : Output vector (can be the same as |in_vector|)
//
//
// WebRtcSpl_ScaleVectorWithSat(...)
//
// Performs the vector operation:
// out_vector[k] = SATURATE( (gain*in_vector[k])>>right_shifts )
//
// Input:
// - in_vector : Input vector
// - gain : Scaling gain
// - vector_length : Elements in the |in_vector|
// - right_shifts : Number of right bit shifts applied
//
// Output:
// - out_vector : Output vector (can be the same as |in_vector|)
//
//
// WebRtcSpl_ScaleAndAddVectors(...)
//
// Performs the vector operation:
// out_vector[k] = (gain1*in_vector1[k])>>right_shifts1
// + (gain2*in_vector2[k])>>right_shifts2
//
// Input:
// - in_vector1 : Input vector 1
// - gain1 : Gain to be used for vector 1
// - right_shifts1 : Right bit shift to be used for vector 1
// - in_vector2 : Input vector 2
// - gain2 : Gain to be used for vector 2
// - right_shifts2 : Right bit shift to be used for vector 2
// - vector_length : Elements in the input vectors
//
// Output:
// - out_vector : Output vector
//
//
// WebRtcSpl_ReverseOrderMultArrayElements(...)
//
// Performs the vector operation:
// out_vector[n] = (in_vector[n]*window[-n])>>right_shifts
//
// Input:
// - in_vector : Input vector
// - window : Window vector (should be reversed). The pointer
// should be set to the last value in the vector
// - right_shifts : Number of right bit shift to be applied after the
// multiplication
// - vector_length : Number of elements in |in_vector|
//
// Output:
// - out_vector : Output vector (can be same as |in_vector|)
//
//
// WebRtcSpl_ElementwiseVectorMult(...)
//
// Performs the vector operation:
// out_vector[n] = (in_vector[n]*window[n])>>right_shifts
//
// Input:
// - in_vector : Input vector
// - window : Window vector.
// - right_shifts : Number of right bit shift to be applied after the
// multiplication
// - vector_length : Number of elements in |in_vector|
//
// Output:
// - out_vector : Output vector (can be same as |in_vector|)
//
//
// WebRtcSpl_AddVectorsAndShift(...)
//
// Performs the vector operation:
// out_vector[k] = (in_vector1[k] + in_vector2[k])>>right_shifts
//
// Input:
// - in_vector1 : Input vector 1
// - in_vector2 : Input vector 2
// - right_shifts : Number of right bit shift to be applied after the
// multiplication
// - vector_length : Number of elements in |in_vector1| and |in_vector2|
//
// Output:
// - out_vector : Output vector (can be same as |in_vector1|)
//
//
// WebRtcSpl_AddAffineVectorToVector(...)
//
// Adds an affine transformed vector to another vector |out_vector|, i.e,
// performs
// out_vector[k] += (in_vector[k]*gain+add_constant)>>right_shifts
//
// Input:
// - in_vector : Input vector
// - gain : Gain value, used to multiply the in vector with
// - add_constant : Constant value to add (usually 1<<(right_shifts-1),
// but others can be used as well
// - right_shifts : Number of right bit shifts (0-16)
// - vector_length : Number of samples in |in_vector| and |out_vector|
//
// Output:
// - out_vector : Vector with the output
//
//
// WebRtcSpl_AffineTransformVector(...)
//
// Affine transforms a vector, i.e, performs
// out_vector[k] = (in_vector[k]*gain+add_constant)>>right_shifts
//
// Input:
// - in_vector : Input vector
// - gain : Gain value, used to multiply the in vector with
// - add_constant : Constant value to add (usually 1<<(right_shifts-1),
// but others can be used as well
// - right_shifts : Number of right bit shifts (0-16)
// - vector_length : Number of samples in |in_vector| and |out_vector|
//
// Output:
// - out_vector : Vector with the output
//
//
// WebRtcSpl_IncreaseSeed(...)
//
// Increases the seed (and returns the new value)
//
// Input:
// - seed : Seed for random calculation
//
// Output:
// - seed : Updated seed value
//
// Return value : The new seed value
//
//
// WebRtcSpl_RandU(...)
//
// Produces a uniformly distributed value in the int16_t range
//
// Input:
// - seed : Seed for random calculation
//
// Output:
// - seed : Updated seed value
//
// Return value : Uniformly distributed value in the range
// [Word16_MIN...Word16_MAX]
//
//
// WebRtcSpl_RandN(...)
//
// Produces a normal distributed value in the int16_t range
//
// Input:
// - seed : Seed for random calculation
//
// Output:
// - seed : Updated seed value
//
// Return value : N(0,1) value in the Q13 domain
//
//
// WebRtcSpl_RandUArray(...)
//
// Produces a uniformly distributed vector with elements in the int16_t
// range
//
// Input:
// - vector_length : Samples wanted in the vector
// - seed : Seed for random calculation
//
// Output:
// - vector : Vector with the uniform values
// - seed : Updated seed value
//
// Return value : Number of samples in vector, i.e., |vector_length|
//
//
// WebRtcSpl_Sqrt(...)
//
// Returns the square root of the input value |value|. The precision of this
// function is integer precision, i.e., sqrt(8) gives 2 as answer.
// If |value| is a negative number then 0 is returned.
//
// Algorithm:
//
// A sixth order Taylor Series expansion is used here to compute the square
// root of a number y^0.5 = (1+x)^0.5
// where
// x = y-1
// = 1+(x/2)-0.5*((x/2)^2+0.5*((x/2)^3-0.625*((x/2)^4+0.875*((x/2)^5)
// 0.5 <= x < 1
//
// Input:
// - value : Value to calculate sqrt of
//
// Return value : Result of the sqrt calculation
//
//
// WebRtcSpl_DivU32U16(...)
//
// Divides a uint32_t |num| by a uint16_t |den|.
//
// If |den|==0, (uint32_t)0xFFFFFFFF is returned.
//
// Input:
// - num : Numerator
// - den : Denominator
//
// Return value : Result of the division (as a uint32_t), i.e., the
// integer part of num/den.
//
//
// WebRtcSpl_DivW32W16(...)
//
// Divides a int32_t |num| by a int16_t |den|.
//
// If |den|==0, (int32_t)0x7FFFFFFF is returned.
//
// Input:
// - num : Numerator
// - den : Denominator
//
// Return value : Result of the division (as a int32_t), i.e., the
// integer part of num/den.
//
//
// WebRtcSpl_DivW32W16ResW16(...)
//
// Divides a int32_t |num| by a int16_t |den|, assuming that the
// result is less than 32768, otherwise an unpredictable result will occur.
//
// If |den|==0, (int16_t)0x7FFF is returned.
//
// Input:
// - num : Numerator
// - den : Denominator
//
// Return value : Result of the division (as a int16_t), i.e., the
// integer part of num/den.
//
//
// WebRtcSpl_DivResultInQ31(...)
//
// Divides a int32_t |num| by a int16_t |den|, assuming that the
// absolute value of the denominator is larger than the numerator, otherwise
// an unpredictable result will occur.
//
// Input:
// - num : Numerator
// - den : Denominator
//
// Return value : Result of the division in Q31.
//
//
// WebRtcSpl_DivW32HiLow(...)
//
// Divides a int32_t |num| by a denominator in hi, low format. The
// absolute value of the denominator has to be larger (or equal to) the
// numerator.
//
// Input:
// - num : Numerator
// - den_hi : High part of denominator
// - den_low : Low part of denominator
//
// Return value : Divided value in Q31
//
//
// WebRtcSpl_Energy(...)
//
// Calculates the energy of a vector
//
// Input:
// - vector : Vector which the energy should be calculated on
// - vector_length : Number of samples in vector
//
// Output:
// - scale_factor : Number of left bit shifts needed to get the physical
// energy value, i.e, to get the Q0 value
//
// Return value : Energy value in Q(-|scale_factor|)
//
//
// WebRtcSpl_FilterAR(...)
//
// Performs a 32-bit AR filtering on a vector in Q12
//
// Input:
// - ar_coef : AR-coefficient vector (values in Q12),
// ar_coef[0] must be 4096.
// - ar_coef_length : Number of coefficients in |ar_coef|.
// - in_vector : Vector to be filtered.
// - in_vector_length : Number of samples in |in_vector|.
// - filter_state : Current state (higher part) of the filter.
// - filter_state_length : Length (in samples) of |filter_state|.
// - filter_state_low : Current state (lower part) of the filter.
// - filter_state_low_length : Length (in samples) of |filter_state_low|.
// - out_vector_low_length : Maximum length (in samples) of
// |out_vector_low|.
//
// Output:
// - filter_state : Updated state (upper part) vector.
// - filter_state_low : Updated state (lower part) vector.
// - out_vector : Vector containing the upper part of the
// filtered values.
// - out_vector_low : Vector containing the lower part of the
// filtered values.
//
// Return value : Number of samples in the |out_vector|.
//
//
// WebRtcSpl_ComplexIFFT(...)
//
// Complex Inverse FFT
//
// Computes an inverse complex 2^|stages|-point FFT on the input vector, which
// is in bit-reversed order. The original content of the vector is destroyed in
// the process, since the input is overwritten by the output, normal-ordered,
// FFT vector. With X as the input complex vector, y as the output complex
// vector and with M = 2^|stages|, the following is computed:
//
// M-1
// y(k) = sum[X(i)*[cos(2*pi*i*k/M) + j*sin(2*pi*i*k/M)]]
// i=0
//
// The implementations are optimized for speed, not for code size. It uses the
// decimation-in-time algorithm with radix-2 butterfly technique.
//
// Input:
// - vector : In pointer to complex vector containing 2^|stages|
// real elements interleaved with 2^|stages| imaginary
// elements.
// [ReImReImReIm....]
// The elements are in Q(-scale) domain, see more on Return
// Value below.
//
// - stages : Number of FFT stages. Must be at least 3 and at most 10,
// since the table WebRtcSpl_kSinTable1024[] is 1024
// elements long.
//
// - mode : This parameter gives the user to choose how the FFT
// should work.
// mode==0: Low-complexity and Low-accuracy mode
// mode==1: High-complexity and High-accuracy mode
//
// Output:
// - vector : Out pointer to the FFT vector (the same as input).
//
// Return Value : The scale value that tells the number of left bit shifts
// that the elements in the |vector| should be shifted with
// in order to get Q0 values, i.e. the physically correct
// values. The scale parameter is always 0 or positive,
// except if N>1024 (|stages|>10), which returns a scale
// value of -1, indicating error.
//
//
// WebRtcSpl_ComplexFFT(...)
//
// Complex FFT
//
// Computes a complex 2^|stages|-point FFT on the input vector, which is in
// bit-reversed order. The original content of the vector is destroyed in
// the process, since the input is overwritten by the output, normal-ordered,
// FFT vector. With x as the input complex vector, Y as the output complex
// vector and with M = 2^|stages|, the following is computed:
//
// M-1
// Y(k) = 1/M * sum[x(i)*[cos(2*pi*i*k/M) + j*sin(2*pi*i*k/M)]]
// i=0
//
// The implementations are optimized for speed, not for code size. It uses the
// decimation-in-time algorithm with radix-2 butterfly technique.
//
// This routine prevents overflow by scaling by 2 before each FFT stage. This is
// a fixed scaling, for proper normalization - there will be log2(n) passes, so
// this results in an overall factor of 1/n, distributed to maximize arithmetic
// accuracy.
//
// Input:
// - vector : In pointer to complex vector containing 2^|stages| real
// elements interleaved with 2^|stages| imaginary elements.
// [ReImReImReIm....]
// The output is in the Q0 domain.
//
// - stages : Number of FFT stages. Must be at least 3 and at most 10,
// since the table WebRtcSpl_kSinTable1024[] is 1024
// elements long.
//
// - mode : This parameter gives the user to choose how the FFT
// should work.
// mode==0: Low-complexity and Low-accuracy mode
// mode==1: High-complexity and High-accuracy mode
//
// Output:
// - vector : The output FFT vector is in the Q0 domain.
//
// Return value : The scale parameter is always 0, except if N>1024,
// which returns a scale value of -1, indicating error.
//
//
// WebRtcSpl_AnalysisQMF(...)
//
// Splits a 0-2*F Hz signal into two sub bands: 0-F Hz and F-2*F Hz. The
// current version has F = 8000, therefore, a super-wideband audio signal is
// split to lower-band 0-8 kHz and upper-band 8-16 kHz.
//
// Input:
// - in_data : Wide band speech signal, 320 samples (10 ms)
//
// Input & Output:
// - filter_state1 : Filter state for first All-pass filter
// - filter_state2 : Filter state for second All-pass filter
//
// Output:
// - low_band : Lower-band signal 0-8 kHz band, 160 samples (10 ms)
// - high_band : Upper-band signal 8-16 kHz band (flipped in frequency
// domain), 160 samples (10 ms)
//
//
// WebRtcSpl_SynthesisQMF(...)
//
// Combines the two sub bands (0-F and F-2*F Hz) into a signal of 0-2*F
// Hz, (current version has F = 8000 Hz). So the filter combines lower-band
// (0-8 kHz) and upper-band (8-16 kHz) channels to obtain super-wideband 0-16
// kHz audio.
//
// Input:
// - low_band : The signal with the 0-8 kHz band, 160 samples (10 ms)
// - high_band : The signal with the 8-16 kHz band, 160 samples (10 ms)
//
// Input & Output:
// - filter_state1 : Filter state for first All-pass filter
// - filter_state2 : Filter state for second All-pass filter
//
// Output:
// - out_data : Super-wideband speech signal, 0-16 kHz
//
// int16_t WebRtcSpl_SatW32ToW16(...)
//
// This function saturates a 32-bit word into a 16-bit word.
//
// Input:
// - value32 : The value of a 32-bit word.
//
// Output:
// - out16 : the saturated 16-bit word.
//
// int32_t WebRtc_MulAccumW16(...)
//
// This function multiply a 16-bit word by a 16-bit word, and accumulate this
// value to a 32-bit integer.
//
// Input:
// - a : The value of the first 16-bit word.
// - b : The value of the second 16-bit word.
// - c : The value of an 32-bit integer.
//
// Return Value: The value of a * b + c.
//
/*
* Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
// This header file includes the inline functions in
// the fix point signal processing library.
#ifndef COMMON_AUDIO_SIGNAL_PROCESSING_INCLUDE_SPL_INL_H_
#define COMMON_AUDIO_SIGNAL_PROCESSING_INCLUDE_SPL_INL_H_
#include "webrtc/rtc_base/compile_assert_c.h"
extern const int8_t kWebRtcSpl_CountLeadingZeros32_Table[64];
// Don't call this directly except in tests!
static __inline int WebRtcSpl_CountLeadingZeros32_NotBuiltin(uint32_t n) {
// Normalize n by rounding up to the nearest number that is a sequence of 0
// bits followed by a sequence of 1 bits. This number has the same number of
// leading zeros as the original n. There are exactly 33 such values.
n |= n >> 1;
n |= n >> 2;
n |= n >> 4;
n |= n >> 8;
n |= n >> 16;
// Multiply the modified n with a constant selected (by exhaustive search)
// such that each of the 33 possible values of n give a product whose 6 most
// significant bits are unique. Then look up the answer in the table.
return kWebRtcSpl_CountLeadingZeros32_Table[(n * 0x8c0b2891) >> 26];
}
// Don't call this directly except in tests!
static __inline int WebRtcSpl_CountLeadingZeros64_NotBuiltin(uint64_t n) {
const int leading_zeros = n >> 32 == 0 ? 32 : 0;
return leading_zeros + WebRtcSpl_CountLeadingZeros32_NotBuiltin(
(uint32_t)(n >> (32 - leading_zeros)));
}
// Returns the number of leading zero bits in the argument.
static __inline int WebRtcSpl_CountLeadingZeros32(uint32_t n) {
#ifdef __GNUC__
RTC_COMPILE_ASSERT(sizeof(unsigned int) == sizeof(uint32_t));
return n == 0 ? 32 : __builtin_clz(n);
#else
return WebRtcSpl_CountLeadingZeros32_NotBuiltin(n);
#endif
}
// Returns the number of leading zero bits in the argument.
static __inline int WebRtcSpl_CountLeadingZeros64(uint64_t n) {
#ifdef __GNUC__
RTC_COMPILE_ASSERT(sizeof(unsigned long long) == sizeof(uint64_t)); // NOLINT
return n == 0 ? 64 : __builtin_clzll(n);
#else
return WebRtcSpl_CountLeadingZeros64_NotBuiltin(n);
#endif
}
#ifdef WEBRTC_ARCH_ARM_V7
#include "webrtc/common_audio/signal_processing/include/spl_inl_armv7.h"
#else
#if defined(MIPS32_LE)
#include "webrtc/common_audio/signal_processing/include/spl_inl_mips.h"
#endif
#if !defined(MIPS_DSP_R1_LE)
static __inline int16_t WebRtcSpl_SatW32ToW16(int32_t value32) {
int16_t out16 = (int16_t)value32;
if (value32 > 32767)
out16 = 32767;
else if (value32 < -32768)
out16 = -32768;
return out16;
}
static __inline int32_t WebRtcSpl_AddSatW32(int32_t a, int32_t b) {
// Do the addition in unsigned numbers, since signed overflow is undefined
// behavior.
const int32_t sum = (int32_t)((uint32_t)a + (uint32_t)b);
// a + b can't overflow if a and b have different signs. If they have the
// same sign, a + b also has the same sign iff it didn't overflow.
if ((a < 0) == (b < 0) && (a < 0) != (sum < 0)) {
// The direction of the overflow is obvious from the sign of a + b.
return sum < 0 ? INT32_MAX : INT32_MIN;
}
return sum;
}
static __inline int32_t WebRtcSpl_SubSatW32(int32_t a, int32_t b) {
// Do the subtraction in unsigned numbers, since signed overflow is undefined
// behavior.
const int32_t diff = (int32_t)((uint32_t)a - (uint32_t)b);
// a - b can't overflow if a and b have the same sign. If they have different
// signs, a - b has the same sign as a iff it didn't overflow.
if ((a < 0) != (b < 0) && (a < 0) != (diff < 0)) {
// The direction of the overflow is obvious from the sign of a - b.
return diff < 0 ? INT32_MAX : INT32_MIN;
}
return diff;
}
static __inline int16_t WebRtcSpl_AddSatW16(int16_t a, int16_t b) {
return WebRtcSpl_SatW32ToW16((int32_t)a + (int32_t)b);
}
static __inline int16_t WebRtcSpl_SubSatW16(int16_t var1, int16_t var2) {
return WebRtcSpl_SatW32ToW16((int32_t)var1 - (int32_t)var2);
}
#endif // #if !defined(MIPS_DSP_R1_LE)
#if !defined(MIPS32_LE)
static __inline int16_t WebRtcSpl_GetSizeInBits(uint32_t n) {
return 32 - WebRtcSpl_CountLeadingZeros32(n);
}
// Return the number of steps a can be left-shifted without overflow,
// or 0 if a == 0.
static __inline int16_t WebRtcSpl_NormW32(int32_t a) {
return a == 0 ? 0 : WebRtcSpl_CountLeadingZeros32(a < 0 ? ~a : a) - 1;
}
// Return the number of steps a can be left-shifted without overflow,
// or 0 if a == 0.
static __inline int16_t WebRtcSpl_NormU32(uint32_t a) {
return a == 0 ? 0 : WebRtcSpl_CountLeadingZeros32(a);
}
// Return the number of steps a can be left-shifted without overflow,
// or 0 if a == 0.
static __inline int16_t WebRtcSpl_NormW16(int16_t a) {
const int32_t a32 = a;
return a == 0 ? 0 : WebRtcSpl_CountLeadingZeros32(a < 0 ? ~a32 : a32) - 17;
}
static __inline int32_t WebRtc_MulAccumW16(int16_t a, int16_t b, int32_t c) {
return (a * b + c);
}
#endif // #if !defined(MIPS32_LE)
#endif // WEBRTC_ARCH_ARM_V7
#endif // COMMON_AUDIO_SIGNAL_PROCESSING_INCLUDE_SPL_INL_H_
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
/*
* This file contains the implementation of functions
* WebRtcSpl_MaxAbsValueW16C()
* WebRtcSpl_MaxAbsValueW32C()
* WebRtcSpl_MaxValueW16C()
* WebRtcSpl_MaxValueW32C()
* WebRtcSpl_MinValueW16C()
* WebRtcSpl_MinValueW32C()
* WebRtcSpl_MaxAbsIndexW16()
* WebRtcSpl_MaxIndexW16()
* WebRtcSpl_MaxIndexW32()
* WebRtcSpl_MinIndexW16()
* WebRtcSpl_MinIndexW32()
*
*/
#include <stdlib.h>
#include "webrtc/rtc_base/checks.h"
#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
// TODO(bjorn/kma): Consolidate function pairs (e.g. combine
// WebRtcSpl_MaxAbsValueW16C and WebRtcSpl_MaxAbsIndexW16 into a single one.)
// TODO(kma): Move the next six functions into min_max_operations_c.c.
// Maximum absolute value of word16 vector. C version for generic platforms.
int16_t WebRtcSpl_MaxAbsValueW16C(const int16_t* vector, size_t length) {
size_t i = 0;
int absolute = 0, maximum = 0;
RTC_DCHECK_GT(length, 0);
for (i = 0; i < length; i++) {
absolute = abs((int)vector[i]);
if (absolute > maximum) {
maximum = absolute;
}
}
// Guard the case for abs(-32768).
if (maximum > WEBRTC_SPL_WORD16_MAX) {
maximum = WEBRTC_SPL_WORD16_MAX;
}
return (int16_t)maximum;
}
// Maximum absolute value of word32 vector. C version for generic platforms.
int32_t WebRtcSpl_MaxAbsValueW32C(const int32_t* vector, size_t length) {
// Use uint32_t for the local variables, to accommodate the return value
// of abs(0x80000000), which is 0x80000000.
uint32_t absolute = 0, maximum = 0;
size_t i = 0;
RTC_DCHECK_GT(length, 0);
for (i = 0; i < length; i++) {
absolute = abs((int)vector[i]);
if (absolute > maximum) {
maximum = absolute;
}
}
maximum = WEBRTC_SPL_MIN(maximum, WEBRTC_SPL_WORD32_MAX);
return (int32_t)maximum;
}
// Maximum value of word16 vector. C version for generic platforms.
int16_t WebRtcSpl_MaxValueW16C(const int16_t* vector, size_t length) {
int16_t maximum = WEBRTC_SPL_WORD16_MIN;
size_t i = 0;
RTC_DCHECK_GT(length, 0);
for (i = 0; i < length; i++) {
if (vector[i] > maximum)
maximum = vector[i];
}
return maximum;
}
// Maximum value of word32 vector. C version for generic platforms.
int32_t WebRtcSpl_MaxValueW32C(const int32_t* vector, size_t length) {
int32_t maximum = WEBRTC_SPL_WORD32_MIN;
size_t i = 0;
RTC_DCHECK_GT(length, 0);
for (i = 0; i < length; i++) {
if (vector[i] > maximum)
maximum = vector[i];
}
return maximum;
}
// Minimum value of word16 vector. C version for generic platforms.
int16_t WebRtcSpl_MinValueW16C(const int16_t* vector, size_t length) {
int16_t minimum = WEBRTC_SPL_WORD16_MAX;
size_t i = 0;
RTC_DCHECK_GT(length, 0);
for (i = 0; i < length; i++) {
if (vector[i] < minimum)
minimum = vector[i];
}
return minimum;
}
// Minimum value of word32 vector. C version for generic platforms.
int32_t WebRtcSpl_MinValueW32C(const int32_t* vector, size_t length) {
int32_t minimum = WEBRTC_SPL_WORD32_MAX;
size_t i = 0;
RTC_DCHECK_GT(length, 0);
for (i = 0; i < length; i++) {
if (vector[i] < minimum)
minimum = vector[i];
}
return minimum;
}
// Index of maximum absolute value in a word16 vector.
size_t WebRtcSpl_MaxAbsIndexW16(const int16_t* vector, size_t length) {
// Use type int for local variables, to accomodate the value of abs(-32768).
size_t i = 0, index = 0;
int absolute = 0, maximum = 0;
RTC_DCHECK_GT(length, 0);
for (i = 0; i < length; i++) {
absolute = abs((int)vector[i]);
if (absolute > maximum) {
maximum = absolute;
index = i;
}
}
return index;
}
// Index of maximum value in a word16 vector.
size_t WebRtcSpl_MaxIndexW16(const int16_t* vector, size_t length) {
size_t i = 0, index = 0;
int16_t maximum = WEBRTC_SPL_WORD16_MIN;
RTC_DCHECK_GT(length, 0);
for (i = 0; i < length; i++) {
if (vector[i] > maximum) {
maximum = vector[i];
index = i;
}
}
return index;
}
// Index of maximum value in a word32 vector.
size_t WebRtcSpl_MaxIndexW32(const int32_t* vector, size_t length) {
size_t i = 0, index = 0;
int32_t maximum = WEBRTC_SPL_WORD32_MIN;
RTC_DCHECK_GT(length, 0);
for (i = 0; i < length; i++) {
if (vector[i] > maximum) {
maximum = vector[i];
index = i;
}
}
return index;
}
// Index of minimum value in a word16 vector.
size_t WebRtcSpl_MinIndexW16(const int16_t* vector, size_t length) {
size_t i = 0, index = 0;
int16_t minimum = WEBRTC_SPL_WORD16_MAX;
RTC_DCHECK_GT(length, 0);
for (i = 0; i < length; i++) {
if (vector[i] < minimum) {
minimum = vector[i];
index = i;
}
}
return index;
}
// Index of minimum value in a word32 vector.
size_t WebRtcSpl_MinIndexW32(const int32_t* vector, size_t length) {
size_t i = 0, index = 0;
int32_t minimum = WEBRTC_SPL_WORD32_MAX;
RTC_DCHECK_GT(length, 0);
for (i = 0; i < length; i++) {
if (vector[i] < minimum) {
minimum = vector[i];
index = i;
}
}
return index;
}
/*
* Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
/*
* This file contains resampling functions between 48 kHz and nb/wb.
* The description header can be found in signal_processing_library.h
*
*/
#include <string.h>
#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
#include "webrtc/common_audio/signal_processing/resample_by_2_internal.h"
////////////////////////////
///// 48 kHz -> 16 kHz /////
////////////////////////////
// 48 -> 16 resampler
void WebRtcSpl_Resample48khzTo16khz(const int16_t* in, int16_t* out,
WebRtcSpl_State48khzTo16khz* state, int32_t* tmpmem)
{
///// 48 --> 48(LP) /////
// int16_t in[480]
// int32_t out[480]
/////
WebRtcSpl_LPBy2ShortToInt(in, 480, tmpmem + 16, state->S_48_48);
///// 48 --> 32 /////
// int32_t in[480]
// int32_t out[320]
/////
// copy state to and from input array
memcpy(tmpmem + 8, state->S_48_32, 8 * sizeof(int32_t));
memcpy(state->S_48_32, tmpmem + 488, 8 * sizeof(int32_t));
WebRtcSpl_Resample48khzTo32khz(tmpmem + 8, tmpmem, 160);
///// 32 --> 16 /////
// int32_t in[320]
// int16_t out[160]
/////
WebRtcSpl_DownBy2IntToShort(tmpmem, 320, out, state->S_32_16);
}
// initialize state of 48 -> 16 resampler
void WebRtcSpl_ResetResample48khzTo16khz(WebRtcSpl_State48khzTo16khz* state)
{
memset(state->S_48_48, 0, 16 * sizeof(int32_t));
memset(state->S_48_32, 0, 8 * sizeof(int32_t));
memset(state->S_32_16, 0, 8 * sizeof(int32_t));
}
////////////////////////////
///// 16 kHz -> 48 kHz /////
////////////////////////////
// 16 -> 48 resampler
void WebRtcSpl_Resample16khzTo48khz(const int16_t* in, int16_t* out,
WebRtcSpl_State16khzTo48khz* state, int32_t* tmpmem)
{
///// 16 --> 32 /////
// int16_t in[160]
// int32_t out[320]
/////
WebRtcSpl_UpBy2ShortToInt(in, 160, tmpmem + 16, state->S_16_32);
///// 32 --> 24 /////
// int32_t in[320]
// int32_t out[240]
// copy state to and from input array
/////
memcpy(tmpmem + 8, state->S_32_24, 8 * sizeof(int32_t));
memcpy(state->S_32_24, tmpmem + 328, 8 * sizeof(int32_t));
WebRtcSpl_Resample32khzTo24khz(tmpmem + 8, tmpmem, 80);
///// 24 --> 48 /////
// int32_t in[240]
// int16_t out[480]
/////
WebRtcSpl_UpBy2IntToShort(tmpmem, 240, out, state->S_24_48);
}
// initialize state of 16 -> 48 resampler
void WebRtcSpl_ResetResample16khzTo48khz(WebRtcSpl_State16khzTo48khz* state)
{
memset(state->S_16_32, 0, 8 * sizeof(int32_t));
memset(state->S_32_24, 0, 8 * sizeof(int32_t));
memset(state->S_24_48, 0, 8 * sizeof(int32_t));
}
////////////////////////////
///// 48 kHz -> 8 kHz /////
////////////////////////////
// 48 -> 8 resampler
void WebRtcSpl_Resample48khzTo8khz(const int16_t* in, int16_t* out,
WebRtcSpl_State48khzTo8khz* state, int32_t* tmpmem)
{
///// 48 --> 24 /////
// int16_t in[480]
// int32_t out[240]
/////
WebRtcSpl_DownBy2ShortToInt(in, 480, tmpmem + 256, state->S_48_24);
///// 24 --> 24(LP) /////
// int32_t in[240]
// int32_t out[240]
/////
WebRtcSpl_LPBy2IntToInt(tmpmem + 256, 240, tmpmem + 16, state->S_24_24);
///// 24 --> 16 /////
// int32_t in[240]
// int32_t out[160]
/////
// copy state to and from input array
memcpy(tmpmem + 8, state->S_24_16, 8 * sizeof(int32_t));
memcpy(state->S_24_16, tmpmem + 248, 8 * sizeof(int32_t));
WebRtcSpl_Resample48khzTo32khz(tmpmem + 8, tmpmem, 80);
///// 16 --> 8 /////
// int32_t in[160]
// int16_t out[80]
/////
WebRtcSpl_DownBy2IntToShort(tmpmem, 160, out, state->S_16_8);
}
// initialize state of 48 -> 8 resampler
void WebRtcSpl_ResetResample48khzTo8khz(WebRtcSpl_State48khzTo8khz* state)
{
memset(state->S_48_24, 0, 8 * sizeof(int32_t));
memset(state->S_24_24, 0, 16 * sizeof(int32_t));
memset(state->S_24_16, 0, 8 * sizeof(int32_t));
memset(state->S_16_8, 0, 8 * sizeof(int32_t));
}
////////////////////////////
///// 8 kHz -> 48 kHz /////
////////////////////////////
// 8 -> 48 resampler
void WebRtcSpl_Resample8khzTo48khz(const int16_t* in, int16_t* out,
WebRtcSpl_State8khzTo48khz* state, int32_t* tmpmem)
{
///// 8 --> 16 /////
// int16_t in[80]
// int32_t out[160]
/////
WebRtcSpl_UpBy2ShortToInt(in, 80, tmpmem + 264, state->S_8_16);
///// 16 --> 12 /////
// int32_t in[160]
// int32_t out[120]
/////
// copy state to and from input array
memcpy(tmpmem + 256, state->S_16_12, 8 * sizeof(int32_t));
memcpy(state->S_16_12, tmpmem + 416, 8 * sizeof(int32_t));
WebRtcSpl_Resample32khzTo24khz(tmpmem + 256, tmpmem + 240, 40);
///// 12 --> 24 /////
// int32_t in[120]
// int16_t out[240]
/////
WebRtcSpl_UpBy2IntToInt(tmpmem + 240, 120, tmpmem, state->S_12_24);
///// 24 --> 48 /////
// int32_t in[240]
// int16_t out[480]
/////
WebRtcSpl_UpBy2IntToShort(tmpmem, 240, out, state->S_24_48);
}
// initialize state of 8 -> 48 resampler
void WebRtcSpl_ResetResample8khzTo48khz(WebRtcSpl_State8khzTo48khz* state)
{
memset(state->S_8_16, 0, 8 * sizeof(int32_t));
memset(state->S_16_12, 0, 8 * sizeof(int32_t));
memset(state->S_12_24, 0, 8 * sizeof(int32_t));
memset(state->S_24_48, 0, 8 * sizeof(int32_t));
}
/*
* Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
/*
* This header file contains some internal resampling functions.
*
*/
#include "webrtc/common_audio/signal_processing/resample_by_2_internal.h"
#include "webrtc/rtc_base/sanitizer.h"
// allpass filter coefficients.
static const int16_t kResampleAllpass[2][3] = {
{821, 6110, 12382},
{3050, 9368, 15063}
};
//
// decimator
// input: int32_t (shifted 15 positions to the left, + offset 16384) OVERWRITTEN!
// output: int16_t (saturated) (of length len/2)
// state: filter state array; length = 8
void RTC_NO_SANITIZE("signed-integer-overflow") // bugs.webrtc.org/5486
WebRtcSpl_DownBy2IntToShort(int32_t *in, int32_t len, int16_t *out,
int32_t *state)
{
int32_t tmp0, tmp1, diff;
int32_t i;
len >>= 1;
// lower allpass filter (operates on even input samples)
for (i = 0; i < len; i++)
{
tmp0 = in[i << 1];
diff = tmp0 - state[1];
// UBSan: -1771017321 - 999586185 cannot be represented in type 'int'
// scale down and round
diff = (diff + (1 << 13)) >> 14;
tmp1 = state[0] + diff * kResampleAllpass[1][0];
state[0] = tmp0;
diff = tmp1 - state[2];
// scale down and truncate
diff = diff >> 14;
if (diff < 0)
diff += 1;
tmp0 = state[1] + diff * kResampleAllpass[1][1];
state[1] = tmp1;
diff = tmp0 - state[3];
// scale down and truncate
diff = diff >> 14;
if (diff < 0)
diff += 1;
state[3] = state[2] + diff * kResampleAllpass[1][2];
state[2] = tmp0;
// divide by two and store temporarily
in[i << 1] = (state[3] >> 1);
}
in++;
// upper allpass filter (operates on odd input samples)
for (i = 0; i < len; i++)
{
tmp0 = in[i << 1];
diff = tmp0 - state[5];
// scale down and round
diff = (diff + (1 << 13)) >> 14;
tmp1 = state[4] + diff * kResampleAllpass[0][0];
state[4] = tmp0;
diff = tmp1 - state[6];
// scale down and round
diff = diff >> 14;
if (diff < 0)
diff += 1;
tmp0 = state[5] + diff * kResampleAllpass[0][1];
state[5] = tmp1;
diff = tmp0 - state[7];
// scale down and truncate
diff = diff >> 14;
if (diff < 0)
diff += 1;
state[7] = state[6] + diff * kResampleAllpass[0][2];
state[6] = tmp0;
// divide by two and store temporarily
in[i << 1] = (state[7] >> 1);
}
in--;
// combine allpass outputs
for (i = 0; i < len; i += 2)
{
// divide by two, add both allpass outputs and round
tmp0 = (in[i << 1] + in[(i << 1) + 1]) >> 15;
tmp1 = (in[(i << 1) + 2] + in[(i << 1) + 3]) >> 15;
if (tmp0 > (int32_t)0x00007FFF)
tmp0 = 0x00007FFF;
if (tmp0 < (int32_t)0xFFFF8000)
tmp0 = 0xFFFF8000;
out[i] = (int16_t)tmp0;
if (tmp1 > (int32_t)0x00007FFF)
tmp1 = 0x00007FFF;
if (tmp1 < (int32_t)0xFFFF8000)
tmp1 = 0xFFFF8000;
out[i + 1] = (int16_t)tmp1;
}
}
//
// decimator
// input: int16_t
// output: int32_t (shifted 15 positions to the left, + offset 16384) (of length len/2)
// state: filter state array; length = 8
void RTC_NO_SANITIZE("signed-integer-overflow") // bugs.webrtc.org/5486
WebRtcSpl_DownBy2ShortToInt(const int16_t *in,
int32_t len,
int32_t *out,
int32_t *state)
{
int32_t tmp0, tmp1, diff;
int32_t i;
len >>= 1;
// lower allpass filter (operates on even input samples)
for (i = 0; i < len; i++)
{
tmp0 = ((int32_t)in[i << 1] << 15) + (1 << 14);
diff = tmp0 - state[1];
// scale down and round
diff = (diff + (1 << 13)) >> 14;
tmp1 = state[0] + diff * kResampleAllpass[1][0];
state[0] = tmp0;
diff = tmp1 - state[2];
// UBSan: -1379909682 - 834099714 cannot be represented in type 'int'
// scale down and truncate
diff = diff >> 14;
if (diff < 0)
diff += 1;
tmp0 = state[1] + diff * kResampleAllpass[1][1];
state[1] = tmp1;
diff = tmp0 - state[3];
// scale down and truncate
diff = diff >> 14;
if (diff < 0)
diff += 1;
state[3] = state[2] + diff * kResampleAllpass[1][2];
state[2] = tmp0;
// divide by two and store temporarily
out[i] = (state[3] >> 1);
}
in++;
// upper allpass filter (operates on odd input samples)
for (i = 0; i < len; i++)
{
tmp0 = ((int32_t)in[i << 1] << 15) + (1 << 14);
diff = tmp0 - state[5];
// scale down and round
diff = (diff + (1 << 13)) >> 14;
tmp1 = state[4] + diff * kResampleAllpass[0][0];
state[4] = tmp0;
diff = tmp1 - state[6];
// scale down and round
diff = diff >> 14;
if (diff < 0)
diff += 1;
tmp0 = state[5] + diff * kResampleAllpass[0][1];
state[5] = tmp1;
diff = tmp0 - state[7];
// scale down and truncate
diff = diff >> 14;
if (diff < 0)
diff += 1;
state[7] = state[6] + diff * kResampleAllpass[0][2];
state[6] = tmp0;
// divide by two and store temporarily
out[i] += (state[7] >> 1);
}
in--;
}
//
// interpolator
// input: int16_t
// output: int32_t (normalized, not saturated) (of length len*2)
// state: filter state array; length = 8
void WebRtcSpl_UpBy2ShortToInt(const int16_t *in, int32_t len, int32_t *out,
int32_t *state)
{
int32_t tmp0, tmp1, diff;
int32_t i;
// upper allpass filter (generates odd output samples)
for (i = 0; i < len; i++)
{
tmp0 = ((int32_t)in[i] << 15) + (1 << 14);
diff = tmp0 - state[5];
// scale down and round
diff = (diff + (1 << 13)) >> 14;
tmp1 = state[4] + diff * kResampleAllpass[0][0];
state[4] = tmp0;
diff = tmp1 - state[6];
// scale down and truncate
diff = diff >> 14;
if (diff < 0)
diff += 1;
tmp0 = state[5] + diff * kResampleAllpass[0][1];
state[5] = tmp1;
diff = tmp0 - state[7];
// scale down and truncate
diff = diff >> 14;
if (diff < 0)
diff += 1;
state[7] = state[6] + diff * kResampleAllpass[0][2];
state[6] = tmp0;
// scale down, round and store
out[i << 1] = state[7] >> 15;
}
out++;
// lower allpass filter (generates even output samples)
for (i = 0; i < len; i++)
{
tmp0 = ((int32_t)in[i] << 15) + (1 << 14);
diff = tmp0 - state[1];
// scale down and round
diff = (diff + (1 << 13)) >> 14;
tmp1 = state[0] + diff * kResampleAllpass[1][0];
state[0] = tmp0;
diff = tmp1 - state[2];
// scale down and truncate
diff = diff >> 14;
if (diff < 0)
diff += 1;
tmp0 = state[1] + diff * kResampleAllpass[1][1];
state[1] = tmp1;
diff = tmp0 - state[3];
// scale down and truncate
diff = diff >> 14;
if (diff < 0)
diff += 1;
state[3] = state[2] + diff * kResampleAllpass[1][2];
state[2] = tmp0;
// scale down, round and store
out[i << 1] = state[3] >> 15;
}
}
//
// interpolator
// input: int32_t (shifted 15 positions to the left, + offset 16384)
// output: int32_t (shifted 15 positions to the left, + offset 16384) (of length len*2)
// state: filter state array; length = 8
void WebRtcSpl_UpBy2IntToInt(const int32_t *in, int32_t len, int32_t *out,
int32_t *state)
{
int32_t tmp0, tmp1, diff;
int32_t i;
// upper allpass filter (generates odd output samples)
for (i = 0; i < len; i++)
{
tmp0 = in[i];
diff = tmp0 - state[5];
// scale down and round
diff = (diff + (1 << 13)) >> 14;
tmp1 = state[4] + diff * kResampleAllpass[0][0];
state[4] = tmp0;
diff = tmp1 - state[6];
// scale down and truncate
diff = diff >> 14;
if (diff < 0)
diff += 1;
tmp0 = state[5] + diff * kResampleAllpass[0][1];
state[5] = tmp1;
diff = tmp0 - state[7];
// scale down and truncate
diff = diff >> 14;
if (diff < 0)
diff += 1;
state[7] = state[6] + diff * kResampleAllpass[0][2];
state[6] = tmp0;
// scale down, round and store
out[i << 1] = state[7];
}
out++;
// lower allpass filter (generates even output samples)
for (i = 0; i < len; i++)
{
tmp0 = in[i];
diff = tmp0 - state[1];
// scale down and round
diff = (diff + (1 << 13)) >> 14;
tmp1 = state[0] + diff * kResampleAllpass[1][0];
state[0] = tmp0;
diff = tmp1 - state[2];
// scale down and truncate
diff = diff >> 14;
if (diff < 0)
diff += 1;
tmp0 = state[1] + diff * kResampleAllpass[1][1];
state[1] = tmp1;
diff = tmp0 - state[3];
// scale down and truncate
diff = diff >> 14;
if (diff < 0)
diff += 1;
state[3] = state[2] + diff * kResampleAllpass[1][2];
state[2] = tmp0;
// scale down, round and store
out[i << 1] = state[3];
}
}
//
// interpolator
// input: int32_t (shifted 15 positions to the left, + offset 16384)
// output: int16_t (saturated) (of length len*2)
// state: filter state array; length = 8
void WebRtcSpl_UpBy2IntToShort(const int32_t *in, int32_t len, int16_t *out,
int32_t *state)
{
int32_t tmp0, tmp1, diff;
int32_t i;
// upper allpass filter (generates odd output samples)
for (i = 0; i < len; i++)
{
tmp0 = in[i];
diff = tmp0 - state[5];
// scale down and round
diff = (diff + (1 << 13)) >> 14;
tmp1 = state[4] + diff * kResampleAllpass[0][0];
state[4] = tmp0;
diff = tmp1 - state[6];
// scale down and round
diff = diff >> 14;
if (diff < 0)
diff += 1;
tmp0 = state[5] + diff * kResampleAllpass[0][1];
state[5] = tmp1;
diff = tmp0 - state[7];
// scale down and truncate
diff = diff >> 14;
if (diff < 0)
diff += 1;
state[7] = state[6] + diff * kResampleAllpass[0][2];
state[6] = tmp0;
// scale down, saturate and store
tmp1 = state[7] >> 15;
if (tmp1 > (int32_t)0x00007FFF)
tmp1 = 0x00007FFF;
if (tmp1 < (int32_t)0xFFFF8000)
tmp1 = 0xFFFF8000;
out[i << 1] = (int16_t)tmp1;
}
out++;
// lower allpass filter (generates even output samples)
for (i = 0; i < len; i++)
{
tmp0 = in[i];
diff = tmp0 - state[1];
// scale down and round
diff = (diff + (1 << 13)) >> 14;
tmp1 = state[0] + diff * kResampleAllpass[1][0];
state[0] = tmp0;
diff = tmp1 - state[2];
// scale down and truncate
diff = diff >> 14;
if (diff < 0)
diff += 1;
tmp0 = state[1] + diff * kResampleAllpass[1][1];
state[1] = tmp1;
diff = tmp0 - state[3];
// scale down and truncate
diff = diff >> 14;
if (diff < 0)
diff += 1;
state[3] = state[2] + diff * kResampleAllpass[1][2];
state[2] = tmp0;
// scale down, saturate and store
tmp1 = state[3] >> 15;
if (tmp1 > (int32_t)0x00007FFF)
tmp1 = 0x00007FFF;
if (tmp1 < (int32_t)0xFFFF8000)
tmp1 = 0xFFFF8000;
out[i << 1] = (int16_t)tmp1;
}
}
// lowpass filter
// input: int16_t
// output: int32_t (normalized, not saturated)
// state: filter state array; length = 8
void WebRtcSpl_LPBy2ShortToInt(const int16_t* in, int32_t len, int32_t* out,
int32_t* state)
{
int32_t tmp0, tmp1, diff;
int32_t i;
len >>= 1;
// lower allpass filter: odd input -> even output samples
in++;
// initial state of polyphase delay element
tmp0 = state[12];
for (i = 0; i < len; i++)
{
diff = tmp0 - state[1];
// scale down and round
diff = (diff + (1 << 13)) >> 14;
tmp1 = state[0] + diff * kResampleAllpass[1][0];
state[0] = tmp0;
diff = tmp1 - state[2];
// scale down and truncate
diff = diff >> 14;
if (diff < 0)
diff += 1;
tmp0 = state[1] + diff * kResampleAllpass[1][1];
state[1] = tmp1;
diff = tmp0 - state[3];
// scale down and truncate
diff = diff >> 14;
if (diff < 0)
diff += 1;
state[3] = state[2] + diff * kResampleAllpass[1][2];
state[2] = tmp0;
// scale down, round and store
out[i << 1] = state[3] >> 1;
tmp0 = ((int32_t)in[i << 1] << 15) + (1 << 14);
}
in--;
// upper allpass filter: even input -> even output samples
for (i = 0; i < len; i++)
{
tmp0 = ((int32_t)in[i << 1] << 15) + (1 << 14);
diff = tmp0 - state[5];
// scale down and round
diff = (diff + (1 << 13)) >> 14;
tmp1 = state[4] + diff * kResampleAllpass[0][0];
state[4] = tmp0;
diff = tmp1 - state[6];
// scale down and round
diff = diff >> 14;
if (diff < 0)
diff += 1;
tmp0 = state[5] + diff * kResampleAllpass[0][1];
state[5] = tmp1;
diff = tmp0 - state[7];
// scale down and truncate
diff = diff >> 14;
if (diff < 0)
diff += 1;
state[7] = state[6] + diff * kResampleAllpass[0][2];
state[6] = tmp0;
// average the two allpass outputs, scale down and store
out[i << 1] = (out[i << 1] + (state[7] >> 1)) >> 15;
}
// switch to odd output samples
out++;
// lower allpass filter: even input -> odd output samples
for (i = 0; i < len; i++)
{
tmp0 = ((int32_t)in[i << 1] << 15) + (1 << 14);
diff = tmp0 - state[9];
// scale down and round
diff = (diff + (1 << 13)) >> 14;
tmp1 = state[8] + diff * kResampleAllpass[1][0];
state[8] = tmp0;
diff = tmp1 - state[10];
// scale down and truncate
diff = diff >> 14;
if (diff < 0)
diff += 1;
tmp0 = state[9] + diff * kResampleAllpass[1][1];
state[9] = tmp1;
diff = tmp0 - state[11];
// scale down and truncate
diff = diff >> 14;
if (diff < 0)
diff += 1;
state[11] = state[10] + diff * kResampleAllpass[1][2];
state[10] = tmp0;
// scale down, round and store
out[i << 1] = state[11] >> 1;
}
// upper allpass filter: odd input -> odd output samples
in++;
for (i = 0; i < len; i++)
{
tmp0 = ((int32_t)in[i << 1] << 15) + (1 << 14);
diff = tmp0 - state[13];
// scale down and round
diff = (diff + (1 << 13)) >> 14;
tmp1 = state[12] + diff * kResampleAllpass[0][0];
state[12] = tmp0;
diff = tmp1 - state[14];
// scale down and round
diff = diff >> 14;
if (diff < 0)
diff += 1;
tmp0 = state[13] + diff * kResampleAllpass[0][1];
state[13] = tmp1;
diff = tmp0 - state[15];
// scale down and truncate
diff = diff >> 14;
if (diff < 0)
diff += 1;
state[15] = state[14] + diff * kResampleAllpass[0][2];
state[14] = tmp0;
// average the two allpass outputs, scale down and store
out[i << 1] = (out[i << 1] + (state[15] >> 1)) >> 15;
}
}
// lowpass filter
// input: int32_t (shifted 15 positions to the left, + offset 16384)
// output: int32_t (normalized, not saturated)
// state: filter state array; length = 8
void RTC_NO_SANITIZE("signed-integer-overflow") // bugs.webrtc.org/5486
WebRtcSpl_LPBy2IntToInt(const int32_t* in, int32_t len, int32_t* out,
int32_t* state)
{
int32_t tmp0, tmp1, diff;
int32_t i;
len >>= 1;
// lower allpass filter: odd input -> even output samples
in++;
// initial state of polyphase delay element
tmp0 = state[12];
for (i = 0; i < len; i++)
{
diff = tmp0 - state[1];
// scale down and round
diff = (diff + (1 << 13)) >> 14;
tmp1 = state[0] + diff * kResampleAllpass[1][0];
state[0] = tmp0;
diff = tmp1 - state[2];
// scale down and truncate
diff = diff >> 14;
if (diff < 0)
diff += 1;
tmp0 = state[1] + diff * kResampleAllpass[1][1];
state[1] = tmp1;
diff = tmp0 - state[3];
// scale down and truncate
diff = diff >> 14;
if (diff < 0)
diff += 1;
state[3] = state[2] + diff * kResampleAllpass[1][2];
state[2] = tmp0;
// scale down, round and store
out[i << 1] = state[3] >> 1;
tmp0 = in[i << 1];
}
in--;
// upper allpass filter: even input -> even output samples
for (i = 0; i < len; i++)
{
tmp0 = in[i << 1];
diff = tmp0 - state[5];
// UBSan: -794814117 - 1566149201 cannot be represented in type 'int'
// scale down and round
diff = (diff + (1 << 13)) >> 14;
tmp1 = state[4] + diff * kResampleAllpass[0][0];
state[4] = tmp0;
diff = tmp1 - state[6];
// scale down and round
diff = diff >> 14;
if (diff < 0)
diff += 1;
tmp0 = state[5] + diff * kResampleAllpass[0][1];
state[5] = tmp1;
diff = tmp0 - state[7];
// scale down and truncate
diff = diff >> 14;
if (diff < 0)
diff += 1;
state[7] = state[6] + diff * kResampleAllpass[0][2];
state[6] = tmp0;
// average the two allpass outputs, scale down and store
out[i << 1] = (out[i << 1] + (state[7] >> 1)) >> 15;
}
// switch to odd output samples
out++;
// lower allpass filter: even input -> odd output samples
for (i = 0; i < len; i++)
{
tmp0 = in[i << 1];
diff = tmp0 - state[9];
// scale down and round
diff = (diff + (1 << 13)) >> 14;
tmp1 = state[8] + diff * kResampleAllpass[1][0];
state[8] = tmp0;
diff = tmp1 - state[10];
// scale down and truncate
diff = diff >> 14;
if (diff < 0)
diff += 1;
tmp0 = state[9] + diff * kResampleAllpass[1][1];
state[9] = tmp1;
diff = tmp0 - state[11];
// scale down and truncate
diff = diff >> 14;
if (diff < 0)
diff += 1;
state[11] = state[10] + diff * kResampleAllpass[1][2];
state[10] = tmp0;
// scale down, round and store
out[i << 1] = state[11] >> 1;
}
// upper allpass filter: odd input -> odd output samples
in++;
for (i = 0; i < len; i++)
{
tmp0 = in[i << 1];
diff = tmp0 - state[13];
// scale down and round
diff = (diff + (1 << 13)) >> 14;
tmp1 = state[12] + diff * kResampleAllpass[0][0];
state[12] = tmp0;
diff = tmp1 - state[14];
// scale down and round
diff = diff >> 14;
if (diff < 0)
diff += 1;
tmp0 = state[13] + diff * kResampleAllpass[0][1];
state[13] = tmp1;
diff = tmp0 - state[15];
// scale down and truncate
diff = diff >> 14;
if (diff < 0)
diff += 1;
state[15] = state[14] + diff * kResampleAllpass[0][2];
state[14] = tmp0;
// average the two allpass outputs, scale down and store
out[i << 1] = (out[i << 1] + (state[15] >> 1)) >> 15;
}
}
/*
* Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
/*
* This header file contains some internal resampling functions.
*
*/
#ifndef COMMON_AUDIO_SIGNAL_PROCESSING_RESAMPLE_BY_2_INTERNAL_H_
#define COMMON_AUDIO_SIGNAL_PROCESSING_RESAMPLE_BY_2_INTERNAL_H_
#include <stdint.h>
/*******************************************************************
* resample_by_2_fast.c
* Functions for internal use in the other resample functions
******************************************************************/
void WebRtcSpl_DownBy2IntToShort(int32_t* in,
int32_t len,
int16_t* out,
int32_t* state);
void WebRtcSpl_DownBy2ShortToInt(const int16_t* in,
int32_t len,
int32_t* out,
int32_t* state);
void WebRtcSpl_UpBy2ShortToInt(const int16_t* in,
int32_t len,
int32_t* out,
int32_t* state);
void WebRtcSpl_UpBy2IntToInt(const int32_t* in,
int32_t len,
int32_t* out,
int32_t* state);
void WebRtcSpl_UpBy2IntToShort(const int32_t* in,
int32_t len,
int16_t* out,
int32_t* state);
void WebRtcSpl_LPBy2ShortToInt(const int16_t* in,
int32_t len,
int32_t* out,
int32_t* state);
void WebRtcSpl_LPBy2IntToInt(const int32_t* in,
int32_t len,
int32_t* out,
int32_t* state);
#endif // COMMON_AUDIO_SIGNAL_PROCESSING_RESAMPLE_BY_2_INTERNAL_H_
/*
* Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
/*
* This file contains the resampling functions between 48, 44, 32 and 24 kHz.
* The description headers can be found in signal_processing_library.h
*
*/
#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
// interpolation coefficients
static const int16_t kCoefficients48To32[2][8] = {
{778, -2050, 1087, 23285, 12903, -3783, 441, 222},
{222, 441, -3783, 12903, 23285, 1087, -2050, 778}
};
static const int16_t kCoefficients32To24[3][8] = {
{767, -2362, 2434, 24406, 10620, -3838, 721, 90},
{386, -381, -2646, 19062, 19062, -2646, -381, 386},
{90, 721, -3838, 10620, 24406, 2434, -2362, 767}
};
static const int16_t kCoefficients44To32[4][9] = {
{117, -669, 2245, -6183, 26267, 13529, -3245, 845, -138},
{-101, 612, -2283, 8532, 29790, -5138, 1789, -524, 91},
{50, -292, 1016, -3064, 32010, 3933, -1147, 315, -53},
{-156, 974, -3863, 18603, 21691, -6246, 2353, -712, 126}
};
// Resampling ratio: 2/3
// input: int32_t (normalized, not saturated) :: size 3 * K
// output: int32_t (shifted 15 positions to the left, + offset 16384) :: size 2 * K
// K: number of blocks
void WebRtcSpl_Resample48khzTo32khz(const int32_t *In, int32_t *Out, size_t K)
{
/////////////////////////////////////////////////////////////
// Filter operation:
//
// Perform resampling (3 input samples -> 2 output samples);
// process in sub blocks of size 3 samples.
int32_t tmp;
size_t m;
for (m = 0; m < K; m++)
{
tmp = 1 << 14;
tmp += kCoefficients48To32[0][0] * In[0];
tmp += kCoefficients48To32[0][1] * In[1];
tmp += kCoefficients48To32[0][2] * In[2];
tmp += kCoefficients48To32[0][3] * In[3];
tmp += kCoefficients48To32[0][4] * In[4];
tmp += kCoefficients48To32[0][5] * In[5];
tmp += kCoefficients48To32[0][6] * In[6];
tmp += kCoefficients48To32[0][7] * In[7];
Out[0] = tmp;
tmp = 1 << 14;
tmp += kCoefficients48To32[1][0] * In[1];
tmp += kCoefficients48To32[1][1] * In[2];
tmp += kCoefficients48To32[1][2] * In[3];
tmp += kCoefficients48To32[1][3] * In[4];
tmp += kCoefficients48To32[1][4] * In[5];
tmp += kCoefficients48To32[1][5] * In[6];
tmp += kCoefficients48To32[1][6] * In[7];
tmp += kCoefficients48To32[1][7] * In[8];
Out[1] = tmp;
// update pointers
In += 3;
Out += 2;
}
}
// Resampling ratio: 3/4
// input: int32_t (normalized, not saturated) :: size 4 * K
// output: int32_t (shifted 15 positions to the left, + offset 16384) :: size 3 * K
// K: number of blocks
void WebRtcSpl_Resample32khzTo24khz(const int32_t *In, int32_t *Out, size_t K)
{
/////////////////////////////////////////////////////////////
// Filter operation:
//
// Perform resampling (4 input samples -> 3 output samples);
// process in sub blocks of size 4 samples.
size_t m;
int32_t tmp;
for (m = 0; m < K; m++)
{
tmp = 1 << 14;
tmp += kCoefficients32To24[0][0] * In[0];
tmp += kCoefficients32To24[0][1] * In[1];
tmp += kCoefficients32To24[0][2] * In[2];
tmp += kCoefficients32To24[0][3] * In[3];
tmp += kCoefficients32To24[0][4] * In[4];
tmp += kCoefficients32To24[0][5] * In[5];
tmp += kCoefficients32To24[0][6] * In[6];
tmp += kCoefficients32To24[0][7] * In[7];
Out[0] = tmp;
tmp = 1 << 14;
tmp += kCoefficients32To24[1][0] * In[1];
tmp += kCoefficients32To24[1][1] * In[2];
tmp += kCoefficients32To24[1][2] * In[3];
tmp += kCoefficients32To24[1][3] * In[4];
tmp += kCoefficients32To24[1][4] * In[5];
tmp += kCoefficients32To24[1][5] * In[6];
tmp += kCoefficients32To24[1][6] * In[7];
tmp += kCoefficients32To24[1][7] * In[8];
Out[1] = tmp;
tmp = 1 << 14;
tmp += kCoefficients32To24[2][0] * In[2];
tmp += kCoefficients32To24[2][1] * In[3];
tmp += kCoefficients32To24[2][2] * In[4];
tmp += kCoefficients32To24[2][3] * In[5];
tmp += kCoefficients32To24[2][4] * In[6];
tmp += kCoefficients32To24[2][5] * In[7];
tmp += kCoefficients32To24[2][6] * In[8];
tmp += kCoefficients32To24[2][7] * In[9];
Out[2] = tmp;
// update pointers
In += 4;
Out += 3;
}
}
//
// fractional resampling filters
// Fout = 11/16 * Fin
// Fout = 8/11 * Fin
//
// compute two inner-products and store them to output array
static void WebRtcSpl_ResampDotProduct(const int32_t *in1, const int32_t *in2,
const int16_t *coef_ptr, int32_t *out1,
int32_t *out2)
{
int32_t tmp1 = 16384;
int32_t tmp2 = 16384;
int16_t coef;
coef = coef_ptr[0];
tmp1 += coef * in1[0];
tmp2 += coef * in2[-0];
coef = coef_ptr[1];
tmp1 += coef * in1[1];
tmp2 += coef * in2[-1];
coef = coef_ptr[2];
tmp1 += coef * in1[2];
tmp2 += coef * in2[-2];
coef = coef_ptr[3];
tmp1 += coef * in1[3];
tmp2 += coef * in2[-3];
coef = coef_ptr[4];
tmp1 += coef * in1[4];
tmp2 += coef * in2[-4];
coef = coef_ptr[5];
tmp1 += coef * in1[5];
tmp2 += coef * in2[-5];
coef = coef_ptr[6];
tmp1 += coef * in1[6];
tmp2 += coef * in2[-6];
coef = coef_ptr[7];
tmp1 += coef * in1[7];
tmp2 += coef * in2[-7];
coef = coef_ptr[8];
*out1 = tmp1 + coef * in1[8];
*out2 = tmp2 + coef * in2[-8];
}
// Resampling ratio: 8/11
// input: int32_t (normalized, not saturated) :: size 11 * K
// output: int32_t (shifted 15 positions to the left, + offset 16384) :: size 8 * K
// K: number of blocks
void WebRtcSpl_Resample44khzTo32khz(const int32_t *In, int32_t *Out, size_t K)
{
/////////////////////////////////////////////////////////////
// Filter operation:
//
// Perform resampling (11 input samples -> 8 output samples);
// process in sub blocks of size 11 samples.
int32_t tmp;
size_t m;
for (m = 0; m < K; m++)
{
tmp = 1 << 14;
// first output sample
Out[0] = ((int32_t)In[3] << 15) + tmp;
// sum and accumulate filter coefficients and input samples
tmp += kCoefficients44To32[3][0] * In[5];
tmp += kCoefficients44To32[3][1] * In[6];
tmp += kCoefficients44To32[3][2] * In[7];
tmp += kCoefficients44To32[3][3] * In[8];
tmp += kCoefficients44To32[3][4] * In[9];
tmp += kCoefficients44To32[3][5] * In[10];
tmp += kCoefficients44To32[3][6] * In[11];
tmp += kCoefficients44To32[3][7] * In[12];
tmp += kCoefficients44To32[3][8] * In[13];
Out[4] = tmp;
// sum and accumulate filter coefficients and input samples
WebRtcSpl_ResampDotProduct(&In[0], &In[17], kCoefficients44To32[0], &Out[1], &Out[7]);
// sum and accumulate filter coefficients and input samples
WebRtcSpl_ResampDotProduct(&In[2], &In[15], kCoefficients44To32[1], &Out[2], &Out[6]);
// sum and accumulate filter coefficients and input samples
WebRtcSpl_ResampDotProduct(&In[3], &In[14], kCoefficients44To32[2], &Out[3], &Out[5]);
// update pointers
In += 11;
Out += 8;
}
}
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
/* The global function contained in this file initializes SPL function
* pointers, currently only for ARM platforms.
*
* Some code came from common/rtcd.c in the WebM project.
*/
#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
#include "webrtc/system_wrappers/include/cpu_features_wrapper.h"
/* Declare function pointers. */
MaxAbsValueW16 WebRtcSpl_MaxAbsValueW16;
MaxAbsValueW32 WebRtcSpl_MaxAbsValueW32;
MaxValueW16 WebRtcSpl_MaxValueW16;
MaxValueW32 WebRtcSpl_MaxValueW32;
MinValueW16 WebRtcSpl_MinValueW16;
MinValueW32 WebRtcSpl_MinValueW32;
CrossCorrelation WebRtcSpl_CrossCorrelation;
DownsampleFast WebRtcSpl_DownsampleFast;
ScaleAndAddVectorsWithRound WebRtcSpl_ScaleAndAddVectorsWithRound;
#if (!defined(WEBRTC_HAS_NEON)) && !defined(MIPS32_LE)
/* Initialize function pointers to the generic C version. */
static void InitPointersToC(void) {
WebRtcSpl_MaxAbsValueW16 = WebRtcSpl_MaxAbsValueW16C;
WebRtcSpl_MaxAbsValueW32 = WebRtcSpl_MaxAbsValueW32C;
WebRtcSpl_MaxValueW16 = WebRtcSpl_MaxValueW16C;
WebRtcSpl_MaxValueW32 = WebRtcSpl_MaxValueW32C;
WebRtcSpl_MinValueW16 = WebRtcSpl_MinValueW16C;
WebRtcSpl_MinValueW32 = WebRtcSpl_MinValueW32C;
WebRtcSpl_CrossCorrelation = WebRtcSpl_CrossCorrelationC;
WebRtcSpl_DownsampleFast = WebRtcSpl_DownsampleFastC;
WebRtcSpl_ScaleAndAddVectorsWithRound =
WebRtcSpl_ScaleAndAddVectorsWithRoundC;
}
#endif
#if defined(WEBRTC_HAS_NEON)
/* Initialize function pointers to the Neon version. */
static void InitPointersToNeon(void) {
WebRtcSpl_MaxAbsValueW16 = WebRtcSpl_MaxAbsValueW16Neon;
WebRtcSpl_MaxAbsValueW32 = WebRtcSpl_MaxAbsValueW32Neon;
WebRtcSpl_MaxValueW16 = WebRtcSpl_MaxValueW16Neon;
WebRtcSpl_MaxValueW32 = WebRtcSpl_MaxValueW32Neon;
WebRtcSpl_MinValueW16 = WebRtcSpl_MinValueW16Neon;
WebRtcSpl_MinValueW32 = WebRtcSpl_MinValueW32Neon;
WebRtcSpl_CrossCorrelation = WebRtcSpl_CrossCorrelationNeon;
WebRtcSpl_DownsampleFast = WebRtcSpl_DownsampleFastNeon;
WebRtcSpl_ScaleAndAddVectorsWithRound =
WebRtcSpl_ScaleAndAddVectorsWithRoundC;
}
#endif
#if defined(MIPS32_LE)
/* Initialize function pointers to the MIPS version. */
static void InitPointersToMIPS(void) {
WebRtcSpl_MaxAbsValueW16 = WebRtcSpl_MaxAbsValueW16_mips;
WebRtcSpl_MaxValueW16 = WebRtcSpl_MaxValueW16_mips;
WebRtcSpl_MaxValueW32 = WebRtcSpl_MaxValueW32_mips;
WebRtcSpl_MinValueW16 = WebRtcSpl_MinValueW16_mips;
WebRtcSpl_MinValueW32 = WebRtcSpl_MinValueW32_mips;
WebRtcSpl_CrossCorrelation = WebRtcSpl_CrossCorrelation_mips;
WebRtcSpl_DownsampleFast = WebRtcSpl_DownsampleFast_mips;
#if defined(MIPS_DSP_R1_LE)
WebRtcSpl_MaxAbsValueW32 = WebRtcSpl_MaxAbsValueW32_mips;
WebRtcSpl_ScaleAndAddVectorsWithRound =
WebRtcSpl_ScaleAndAddVectorsWithRound_mips;
#else
WebRtcSpl_MaxAbsValueW32 = WebRtcSpl_MaxAbsValueW32C;
WebRtcSpl_ScaleAndAddVectorsWithRound =
WebRtcSpl_ScaleAndAddVectorsWithRoundC;
#endif
}
#endif
static void InitFunctionPointers(void) {
#if defined(WEBRTC_HAS_NEON)
InitPointersToNeon();
#elif defined(MIPS32_LE)
InitPointersToMIPS();
#else
InitPointersToC();
#endif /* WEBRTC_HAS_NEON */
}
#if defined(WEBRTC_POSIX)
#include <pthread.h>
static void once(void (*func)(void)) {
static pthread_once_t lock = PTHREAD_ONCE_INIT;
pthread_once(&lock, func);
}
#elif defined(_WIN32)
#include <windows.h>
static void once(void (*func)(void)) {
/* Didn't use InitializeCriticalSection() since there's no race-free context
* in which to execute it.
*
* TODO(kma): Change to different implementation (e.g.
* InterlockedCompareExchangePointer) to avoid issues similar to
* http://code.google.com/p/webm/issues/detail?id=467.
*/
static CRITICAL_SECTION lock = {(void *)((size_t)-1), -1, 0, 0, 0, 0};
static int done = 0;
EnterCriticalSection(&lock);
if (!done) {
func();
done = 1;
}
LeaveCriticalSection(&lock);
}
/* There's no fallback version as an #else block here to ensure thread safety.
* In case of neither pthread for WEBRTC_POSIX nor _WIN32 is present, build
* system should pick it up.
*/
#endif /* WEBRTC_POSIX */
void WebRtcSpl_Init(void) {
once(InitFunctionPointers);
}
/*
* Copyright (c) 2016 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include <stdint.h>
#include "webrtc/common_audio/signal_processing/include/spl_inl.h"
// Table used by WebRtcSpl_CountLeadingZeros32_NotBuiltin. For each uint32_t n
// that's a sequence of 0 bits followed by a sequence of 1 bits, the entry at
// index (n * 0x8c0b2891) >> 26 in this table gives the number of zero bits in
// n.
const int8_t kWebRtcSpl_CountLeadingZeros32_Table[64] = {
32, 8, 17, -1, -1, 14, -1, -1, -1, 20, -1, -1, -1, 28, -1, 18,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 26, 25, 24,
4, 11, 23, 31, 3, 7, 10, 16, 22, 30, -1, -1, 2, 6, 13, 9,
-1, 15, -1, 21, -1, 29, 19, -1, -1, -1, -1, -1, 1, 27, 5, 12,
};
/*
* Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
/*
* This file contains the function WebRtcSpl_Sqrt().
* The description header can be found in signal_processing_library.h
*
*/
#include "webrtc/rtc_base/checks.h"
#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
int32_t WebRtcSpl_SqrtLocal(int32_t in);
int32_t WebRtcSpl_SqrtLocal(int32_t in)
{
int16_t x_half, t16;
int32_t A, B, x2;
/* The following block performs:
y=in/2
x=y-2^30
x_half=x/2^31
t = 1 + (x_half) - 0.5*((x_half)^2) + 0.5*((x_half)^3) - 0.625*((x_half)^4)
+ 0.875*((x_half)^5)
*/
B = in / 2;
B = B - ((int32_t)0x40000000); // B = in/2 - 1/2
x_half = (int16_t)(B >> 16); // x_half = x/2 = (in-1)/2
B = B + ((int32_t)0x40000000); // B = 1 + x/2
B = B + ((int32_t)0x40000000); // Add 0.5 twice (since 1.0 does not exist in Q31)
x2 = ((int32_t)x_half) * ((int32_t)x_half) * 2; // A = (x/2)^2
A = -x2; // A = -(x/2)^2
B = B + (A >> 1); // B = 1 + x/2 - 0.5*(x/2)^2
A >>= 16;
A = A * A * 2; // A = (x/2)^4
t16 = (int16_t)(A >> 16);
B += -20480 * t16 * 2; // B = B - 0.625*A
// After this, B = 1 + x/2 - 0.5*(x/2)^2 - 0.625*(x/2)^4
A = x_half * t16 * 2; // A = (x/2)^5
t16 = (int16_t)(A >> 16);
B += 28672 * t16 * 2; // B = B + 0.875*A
// After this, B = 1 + x/2 - 0.5*(x/2)^2 - 0.625*(x/2)^4 + 0.875*(x/2)^5
t16 = (int16_t)(x2 >> 16);
A = x_half * t16 * 2; // A = x/2^3
B = B + (A >> 1); // B = B + 0.5*A
// After this, B = 1 + x/2 - 0.5*(x/2)^2 + 0.5*(x/2)^3 - 0.625*(x/2)^4 + 0.875*(x/2)^5
B = B + ((int32_t)32768); // Round off bit
return B;
}
int32_t WebRtcSpl_Sqrt(int32_t value)
{
/*
Algorithm:
Six term Taylor Series is used here to compute the square root of a number
y^0.5 = (1+x)^0.5 where x = y-1
= 1+(x/2)-0.5*((x/2)^2+0.5*((x/2)^3-0.625*((x/2)^4+0.875*((x/2)^5)
0.5 <= x < 1
Example of how the algorithm works, with ut=sqrt(in), and
with in=73632 and ut=271 (even shift value case):
in=73632
y= in/131072
x=y-1
t = 1 + (x/2) - 0.5*((x/2)^2) + 0.5*((x/2)^3) - 0.625*((x/2)^4) + 0.875*((x/2)^5)
ut=t*(1/sqrt(2))*512
or:
in=73632
in2=73632*2^14
y= in2/2^31
x=y-1
t = 1 + (x/2) - 0.5*((x/2)^2) + 0.5*((x/2)^3) - 0.625*((x/2)^4) + 0.875*((x/2)^5)
ut=t*(1/sqrt(2))
ut2=ut*2^9
which gives:
in = 73632
in2 = 1206386688
y = 0.56176757812500
x = -0.43823242187500
t = 0.74973506527313
ut = 0.53014274874797
ut2 = 2.714330873589594e+002
or:
in=73632
in2=73632*2^14
y=in2/2
x=y-2^30
x_half=x/2^31
t = 1 + (x_half) - 0.5*((x_half)^2) + 0.5*((x_half)^3) - 0.625*((x_half)^4)
+ 0.875*((x_half)^5)
ut=t*(1/sqrt(2))
ut2=ut*2^9
which gives:
in = 73632
in2 = 1206386688
y = 603193344
x = -470548480
x_half = -0.21911621093750
t = 0.74973506527313
ut = 0.53014274874797
ut2 = 2.714330873589594e+002
*/
int16_t x_norm, nshift, t16, sh;
int32_t A;
int16_t k_sqrt_2 = 23170; // 1/sqrt2 (==5a82)
A = value;
// The convention in this function is to calculate sqrt(abs(A)). Negate the
// input if it is negative.
if (A < 0) {
if (A == WEBRTC_SPL_WORD32_MIN) {
// This number cannot be held in an int32_t after negating.
// Map it to the maximum positive value.
A = WEBRTC_SPL_WORD32_MAX;
} else {
A = -A;
}
} else if (A == 0) {
return 0; // sqrt(0) = 0
}
sh = WebRtcSpl_NormW32(A); // # shifts to normalize A
A = WEBRTC_SPL_LSHIFT_W32(A, sh); // Normalize A
if (A < (WEBRTC_SPL_WORD32_MAX - 32767))
{
A = A + ((int32_t)32768); // Round off bit
} else
{
A = WEBRTC_SPL_WORD32_MAX;
}
x_norm = (int16_t)(A >> 16); // x_norm = AH
nshift = (sh / 2);
RTC_DCHECK_GE(nshift, 0);
A = (int32_t)WEBRTC_SPL_LSHIFT_W32((int32_t)x_norm, 16);
A = WEBRTC_SPL_ABS_W32(A); // A = abs(x_norm<<16)
A = WebRtcSpl_SqrtLocal(A); // A = sqrt(A)
if (2 * nshift == sh) {
// Even shift value case
t16 = (int16_t)(A >> 16); // t16 = AH
A = k_sqrt_2 * t16 * 2; // A = 1/sqrt(2)*t16
A = A + ((int32_t)32768); // Round off
A = A & ((int32_t)0x7fff0000); // Round off
A >>= 15; // A = A>>16
} else
{
A >>= 16; // A = A>>16
}
A = A & ((int32_t)0x0000ffff);
A >>= nshift; // De-normalize the result.
return A;
}
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
/*
* This file contains implementations of the functions
* WebRtcSpl_VectorBitShiftW16()
* WebRtcSpl_VectorBitShiftW32()
* WebRtcSpl_VectorBitShiftW32ToW16()
* WebRtcSpl_ScaleVector()
* WebRtcSpl_ScaleVectorWithSat()
* WebRtcSpl_ScaleAndAddVectors()
* WebRtcSpl_ScaleAndAddVectorsWithRoundC()
*/
#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
void WebRtcSpl_VectorBitShiftW16(int16_t *res, size_t length,
const int16_t *in, int16_t right_shifts)
{
size_t i;
if (right_shifts > 0)
{
for (i = length; i > 0; i--)
{
(*res++) = ((*in++) >> right_shifts);
}
} else
{
for (i = length; i > 0; i--)
{
(*res++) = ((*in++) * (1 << (-right_shifts)));
}
}
}
void WebRtcSpl_VectorBitShiftW32(int32_t *out_vector,
size_t vector_length,
const int32_t *in_vector,
int16_t right_shifts)
{
size_t i;
if (right_shifts > 0)
{
for (i = vector_length; i > 0; i--)
{
(*out_vector++) = ((*in_vector++) >> right_shifts);
}
} else
{
for (i = vector_length; i > 0; i--)
{
(*out_vector++) = ((*in_vector++) << (-right_shifts));
}
}
}
void WebRtcSpl_VectorBitShiftW32ToW16(int16_t* out, size_t length,
const int32_t* in, int right_shifts) {
size_t i;
int32_t tmp_w32;
if (right_shifts >= 0) {
for (i = length; i > 0; i--) {
tmp_w32 = (*in++) >> right_shifts;
(*out++) = WebRtcSpl_SatW32ToW16(tmp_w32);
}
} else {
int left_shifts = -right_shifts;
for (i = length; i > 0; i--) {
tmp_w32 = (*in++) << left_shifts;
(*out++) = WebRtcSpl_SatW32ToW16(tmp_w32);
}
}
}
void WebRtcSpl_ScaleVector(const int16_t *in_vector, int16_t *out_vector,
int16_t gain, size_t in_vector_length,
int16_t right_shifts)
{
// Performs vector operation: out_vector = (gain*in_vector)>>right_shifts
size_t i;
const int16_t *inptr;
int16_t *outptr;
inptr = in_vector;
outptr = out_vector;
for (i = 0; i < in_vector_length; i++)
{
*outptr++ = (int16_t)((*inptr++ * gain) >> right_shifts);
}
}
void WebRtcSpl_ScaleVectorWithSat(const int16_t *in_vector, int16_t *out_vector,
int16_t gain, size_t in_vector_length,
int16_t right_shifts)
{
// Performs vector operation: out_vector = (gain*in_vector)>>right_shifts
size_t i;
const int16_t *inptr;
int16_t *outptr;
inptr = in_vector;
outptr = out_vector;
for (i = 0; i < in_vector_length; i++) {
*outptr++ = WebRtcSpl_SatW32ToW16((*inptr++ * gain) >> right_shifts);
}
}
void WebRtcSpl_ScaleAndAddVectors(const int16_t *in1, int16_t gain1, int shift1,
const int16_t *in2, int16_t gain2, int shift2,
int16_t *out, size_t vector_length)
{
// Performs vector operation: out = (gain1*in1)>>shift1 + (gain2*in2)>>shift2
size_t i;
const int16_t *in1ptr;
const int16_t *in2ptr;
int16_t *outptr;
in1ptr = in1;
in2ptr = in2;
outptr = out;
for (i = 0; i < vector_length; i++)
{
*outptr++ = (int16_t)((gain1 * *in1ptr++) >> shift1) +
(int16_t)((gain2 * *in2ptr++) >> shift2);
}
}
// C version of WebRtcSpl_ScaleAndAddVectorsWithRound() for generic platforms.
int WebRtcSpl_ScaleAndAddVectorsWithRoundC(const int16_t* in_vector1,
int16_t in_vector1_scale,
const int16_t* in_vector2,
int16_t in_vector2_scale,
int right_shifts,
int16_t* out_vector,
size_t length) {
size_t i = 0;
int round_value = (1 << right_shifts) >> 1;
if (in_vector1 == NULL || in_vector2 == NULL || out_vector == NULL ||
length == 0 || right_shifts < 0) {
return -1;
}
for (i = 0; i < length; i++) {
out_vector[i] = (int16_t)((
in_vector1[i] * in_vector1_scale + in_vector2[i] * in_vector2_scale +
round_value) >> right_shifts);
}
return 0;
}
/*
* Written by Wilco Dijkstra, 1996. The following email exchange establishes the
* license.
*
* From: Wilco Dijkstra <Wilco.Dijkstra@ntlworld.com>
* Date: Fri, Jun 24, 2011 at 3:20 AM
* Subject: Re: sqrt routine
* To: Kevin Ma <kma@google.com>
* Hi Kevin,
* Thanks for asking. Those routines are public domain (originally posted to
* comp.sys.arm a long time ago), so you can use them freely for any purpose.
* Cheers,
* Wilco
*
* ----- Original Message -----
* From: "Kevin Ma" <kma@google.com>
* To: <Wilco.Dijkstra@ntlworld.com>
* Sent: Thursday, June 23, 2011 11:44 PM
* Subject: Fwd: sqrt routine
* Hi Wilco,
* I saw your sqrt routine from several web sites, including
* http://www.finesse.demon.co.uk/steven/sqrt.html.
* Just wonder if there's any copyright information with your Successive
* approximation routines, or if I can freely use it for any purpose.
* Thanks.
* Kevin
*/
// Minor modifications in code style for WebRTC, 2012.
#include "webrtc/common_audio/third_party/spl_sqrt_floor/spl_sqrt_floor.h"
/*
* Algorithm:
* Successive approximation of the equation (root + delta) ^ 2 = N
* until delta < 1. If delta < 1 we have the integer part of SQRT (N).
* Use delta = 2^i for i = 15 .. 0.
*
* Output precision is 16 bits. Note for large input values (close to
* 0x7FFFFFFF), bit 15 (the highest bit of the low 16-bit half word)
* contains the MSB information (a non-sign value). Do with caution
* if you need to cast the output to int16_t type.
*
* If the input value is negative, it returns 0.
*/
#define WEBRTC_SPL_SQRT_ITER(N) \
try1 = root + (1 << (N)); \
if (value >= try1 << (N)) \
{ \
value -= try1 << (N); \
root |= 2 << (N); \
}
int32_t WebRtcSpl_SqrtFloor(int32_t value)
{
int32_t root = 0, try1;
WEBRTC_SPL_SQRT_ITER (15);
WEBRTC_SPL_SQRT_ITER (14);
WEBRTC_SPL_SQRT_ITER (13);
WEBRTC_SPL_SQRT_ITER (12);
WEBRTC_SPL_SQRT_ITER (11);
WEBRTC_SPL_SQRT_ITER (10);
WEBRTC_SPL_SQRT_ITER ( 9);
WEBRTC_SPL_SQRT_ITER ( 8);
WEBRTC_SPL_SQRT_ITER ( 7);
WEBRTC_SPL_SQRT_ITER ( 6);
WEBRTC_SPL_SQRT_ITER ( 5);
WEBRTC_SPL_SQRT_ITER ( 4);
WEBRTC_SPL_SQRT_ITER ( 3);
WEBRTC_SPL_SQRT_ITER ( 2);
WEBRTC_SPL_SQRT_ITER ( 1);
WEBRTC_SPL_SQRT_ITER ( 0);
return root >> 1;
}
/*
* Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include <stdint.h>
//
// WebRtcSpl_SqrtFloor(...)
//
// Returns the square root of the input value |value|. The precision of this
// function is rounding down integer precision, i.e., sqrt(8) gives 2 as answer.
// If |value| is a negative number then 0 is returned.
//
// Algorithm:
//
// An iterative 4 cylce/bit routine
//
// Input:
// - value : Value to calculate sqrt of
//
// Return value : Result of the sqrt calculation
//
int32_t WebRtcSpl_SqrtFloor(int32_t value);
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
/*
* This header file includes the VAD API calls. Specific function calls are
* given below.
*/
#ifndef COMMON_AUDIO_VAD_INCLUDE_WEBRTC_VAD_H_ // NOLINT
#define COMMON_AUDIO_VAD_INCLUDE_WEBRTC_VAD_H_
#include <stddef.h>
#include <stdint.h>
typedef struct WebRtcVadInst VadInst;
#ifdef __cplusplus
extern "C" {
#endif
// Creates an instance to the VAD structure.
VadInst* WebRtcVad_Create(void);
// Frees the dynamic memory of a specified VAD instance.
//
// - handle [i] : Pointer to VAD instance that should be freed.
void WebRtcVad_Free(VadInst* handle);
// Initializes a VAD instance.
//
// - handle [i/o] : Instance that should be initialized.
//
// returns : 0 - (OK),
// -1 - (null pointer or Default mode could not be set).
int WebRtcVad_Init(VadInst* handle);
// Sets the VAD operating mode. A more aggressive (higher mode) VAD is more
// restrictive in reporting speech. Put in other words the probability of being
// speech when the VAD returns 1 is increased with increasing mode. As a
// consequence also the missed detection rate goes up.
//
// - handle [i/o] : VAD instance.
// - mode [i] : Aggressiveness mode (0, 1, 2, or 3).
//
// returns : 0 - (OK),
// -1 - (null pointer, mode could not be set or the VAD instance
// has not been initialized).
int WebRtcVad_set_mode(VadInst* handle, int mode);
// Calculates a VAD decision for the |audio_frame|. For valid sampling rates
// frame lengths, see the description of WebRtcVad_ValidRatesAndFrameLengths().
//
// - handle [i/o] : VAD Instance. Needs to be initialized by
// WebRtcVad_Init() before call.
// - fs [i] : Sampling frequency (Hz): 8000, 16000, or 32000
// - audio_frame [i] : Audio frame buffer.
// - frame_length [i] : Length of audio frame buffer in number of samples.
//
// returns : 1 - (Active Voice),
// 0 - (Non-active Voice),
// -1 - (Error)
int WebRtcVad_Process(VadInst* handle,
int fs,
const int16_t* audio_frame,
size_t frame_length);
// Checks for valid combinations of |rate| and |frame_length|. We support 10,
// 20 and 30 ms frames and the rates 8000, 16000 and 32000 Hz.
//
// - rate [i] : Sampling frequency (Hz).
// - frame_length [i] : Speech frame buffer length in number of samples.
//
// returns : 0 - (valid combination), -1 - (invalid combination)
int WebRtcVad_ValidRateAndFrameLength(int rate, size_t frame_length);
#ifdef __cplusplus
}
#endif
#endif // COMMON_AUDIO_VAD_INCLUDE_WEBRTC_VAD_H_ // NOLINT
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "webrtc/common_audio/vad/vad_core.h"
#include "webrtc/rtc_base/sanitizer.h"
#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
#include "webrtc/common_audio/vad/vad_filterbank.h"
#include "webrtc/common_audio/vad/vad_gmm.h"
#include "webrtc/common_audio/vad/vad_sp.h"
// Spectrum Weighting
static const int16_t kSpectrumWeight[kNumChannels] = { 6, 8, 10, 12, 14, 16 };
static const int16_t kNoiseUpdateConst = 655; // Q15
static const int16_t kSpeechUpdateConst = 6554; // Q15
static const int16_t kBackEta = 154; // Q8
// Minimum difference between the two models, Q5
static const int16_t kMinimumDifference[kNumChannels] = {
544, 544, 576, 576, 576, 576 };
// Upper limit of mean value for speech model, Q7
static const int16_t kMaximumSpeech[kNumChannels] = {
11392, 11392, 11520, 11520, 11520, 11520 };
// Minimum value for mean value
static const int16_t kMinimumMean[kNumGaussians] = { 640, 768 };
// Upper limit of mean value for noise model, Q7
static const int16_t kMaximumNoise[kNumChannels] = {
9216, 9088, 8960, 8832, 8704, 8576 };
// Start values for the Gaussian models, Q7
// Weights for the two Gaussians for the six channels (noise)
static const int16_t kNoiseDataWeights[kTableSize] = {
34, 62, 72, 66, 53, 25, 94, 66, 56, 62, 75, 103 };
// Weights for the two Gaussians for the six channels (speech)
static const int16_t kSpeechDataWeights[kTableSize] = {
48, 82, 45, 87, 50, 47, 80, 46, 83, 41, 78, 81 };
// Means for the two Gaussians for the six channels (noise)
static const int16_t kNoiseDataMeans[kTableSize] = {
6738, 4892, 7065, 6715, 6771, 3369, 7646, 3863, 7820, 7266, 5020, 4362 };
// Means for the two Gaussians for the six channels (speech)
static const int16_t kSpeechDataMeans[kTableSize] = {
8306, 10085, 10078, 11823, 11843, 6309, 9473, 9571, 10879, 7581, 8180, 7483
};
// Stds for the two Gaussians for the six channels (noise)
static const int16_t kNoiseDataStds[kTableSize] = {
378, 1064, 493, 582, 688, 593, 474, 697, 475, 688, 421, 455 };
// Stds for the two Gaussians for the six channels (speech)
static const int16_t kSpeechDataStds[kTableSize] = {
555, 505, 567, 524, 585, 1231, 509, 828, 492, 1540, 1079, 850 };
// Constants used in GmmProbability().
//
// Maximum number of counted speech (VAD = 1) frames in a row.
static const int16_t kMaxSpeechFrames = 6;
// Minimum standard deviation for both speech and noise.
static const int16_t kMinStd = 384;
// Constants in WebRtcVad_InitCore().
// Default aggressiveness mode.
static const short kDefaultMode = 0;
static const int kInitCheck = 42;
// Constants used in WebRtcVad_set_mode_core().
//
// Thresholds for different frame lengths (10 ms, 20 ms and 30 ms).
//
// Mode 0, Quality.
static const int16_t kOverHangMax1Q[3] = { 8, 4, 3 };
static const int16_t kOverHangMax2Q[3] = { 14, 7, 5 };
static const int16_t kLocalThresholdQ[3] = { 24, 21, 24 };
static const int16_t kGlobalThresholdQ[3] = { 57, 48, 57 };
// Mode 1, Low bitrate.
static const int16_t kOverHangMax1LBR[3] = { 8, 4, 3 };
static const int16_t kOverHangMax2LBR[3] = { 14, 7, 5 };
static const int16_t kLocalThresholdLBR[3] = { 37, 32, 37 };
static const int16_t kGlobalThresholdLBR[3] = { 100, 80, 100 };
// Mode 2, Aggressive.
static const int16_t kOverHangMax1AGG[3] = { 6, 3, 2 };
static const int16_t kOverHangMax2AGG[3] = { 9, 5, 3 };
static const int16_t kLocalThresholdAGG[3] = { 82, 78, 82 };
static const int16_t kGlobalThresholdAGG[3] = { 285, 260, 285 };
// Mode 3, Very aggressive.
static const int16_t kOverHangMax1VAG[3] = { 6, 3, 2 };
static const int16_t kOverHangMax2VAG[3] = { 9, 5, 3 };
static const int16_t kLocalThresholdVAG[3] = { 94, 94, 94 };
static const int16_t kGlobalThresholdVAG[3] = { 1100, 1050, 1100 };
// Calculates the weighted average w.r.t. number of Gaussians. The |data| are
// updated with an |offset| before averaging.
//
// - data [i/o] : Data to average.
// - offset [i] : An offset added to |data|.
// - weights [i] : Weights used for averaging.
//
// returns : The weighted average.
static int32_t WeightedAverage(int16_t* data, int16_t offset,
const int16_t* weights) {
int k;
int32_t weighted_average = 0;
for (k = 0; k < kNumGaussians; k++) {
data[k * kNumChannels] += offset;
weighted_average += data[k * kNumChannels] * weights[k * kNumChannels];
}
return weighted_average;
}
// An s16 x s32 -> s32 multiplication that's allowed to overflow. (It's still
// undefined behavior, so not a good idea; this just makes UBSan ignore the
// violation, so that our old code can continue to do what it's always been
// doing.)
static inline int32_t RTC_NO_SANITIZE("signed-integer-overflow")
OverflowingMulS16ByS32ToS32(int16_t a, int32_t b) {
return a * b;
}
// Calculates the probabilities for both speech and background noise using
// Gaussian Mixture Models (GMM). A hypothesis-test is performed to decide which
// type of signal is most probable.
//
// - self [i/o] : Pointer to VAD instance
// - features [i] : Feature vector of length |kNumChannels|
// = log10(energy in frequency band)
// - total_power [i] : Total power in audio frame.
// - frame_length [i] : Number of input samples
//
// - returns : the VAD decision (0 - noise, 1 - speech).
static int16_t GmmProbability(VadInstT* self, int16_t* features,
int16_t total_power, size_t frame_length) {
int channel, k;
int16_t feature_minimum;
int16_t h0, h1;
int16_t log_likelihood_ratio;
int16_t vadflag = 0;
int16_t shifts_h0, shifts_h1;
int16_t tmp_s16, tmp1_s16, tmp2_s16;
int16_t diff;
int gaussian;
int16_t nmk, nmk2, nmk3, smk, smk2, nsk, ssk;
int16_t delt, ndelt;
int16_t maxspe, maxmu;
int16_t deltaN[kTableSize], deltaS[kTableSize];
int16_t ngprvec[kTableSize] = { 0 }; // Conditional probability = 0.
int16_t sgprvec[kTableSize] = { 0 }; // Conditional probability = 0.
int32_t h0_test, h1_test;
int32_t tmp1_s32, tmp2_s32;
int32_t sum_log_likelihood_ratios = 0;
int32_t noise_global_mean, speech_global_mean;
int32_t noise_probability[kNumGaussians], speech_probability[kNumGaussians];
int16_t overhead1, overhead2, individualTest, totalTest;
// Set various thresholds based on frame lengths (80, 160 or 240 samples).
if (frame_length == 80) {
overhead1 = self->over_hang_max_1[0];
overhead2 = self->over_hang_max_2[0];
individualTest = self->individual[0];
totalTest = self->total[0];
} else if (frame_length == 160) {
overhead1 = self->over_hang_max_1[1];
overhead2 = self->over_hang_max_2[1];
individualTest = self->individual[1];
totalTest = self->total[1];
} else {
overhead1 = self->over_hang_max_1[2];
overhead2 = self->over_hang_max_2[2];
individualTest = self->individual[2];
totalTest = self->total[2];
}
if (total_power > kMinEnergy) {
// The signal power of current frame is large enough for processing. The
// processing consists of two parts:
// 1) Calculating the likelihood of speech and thereby a VAD decision.
// 2) Updating the underlying model, w.r.t., the decision made.
// The detection scheme is an LRT with hypothesis
// H0: Noise
// H1: Speech
//
// We combine a global LRT with local tests, for each frequency sub-band,
// here defined as |channel|.
for (channel = 0; channel < kNumChannels; channel++) {
// For each channel we model the probability with a GMM consisting of
// |kNumGaussians|, with different means and standard deviations depending
// on H0 or H1.
h0_test = 0;
h1_test = 0;
for (k = 0; k < kNumGaussians; k++) {
gaussian = channel + k * kNumChannels;
// Probability under H0, that is, probability of frame being noise.
// Value given in Q27 = Q7 * Q20.
tmp1_s32 = WebRtcVad_GaussianProbability(features[channel],
self->noise_means[gaussian],
self->noise_stds[gaussian],
&deltaN[gaussian]);
noise_probability[k] = kNoiseDataWeights[gaussian] * tmp1_s32;
h0_test += noise_probability[k]; // Q27
// Probability under H1, that is, probability of frame being speech.
// Value given in Q27 = Q7 * Q20.
tmp1_s32 = WebRtcVad_GaussianProbability(features[channel],
self->speech_means[gaussian],
self->speech_stds[gaussian],
&deltaS[gaussian]);
speech_probability[k] = kSpeechDataWeights[gaussian] * tmp1_s32;
h1_test += speech_probability[k]; // Q27
}
// Calculate the log likelihood ratio: log2(Pr{X|H1} / Pr{X|H1}).
// Approximation:
// log2(Pr{X|H1} / Pr{X|H1}) = log2(Pr{X|H1}*2^Q) - log2(Pr{X|H1}*2^Q)
// = log2(h1_test) - log2(h0_test)
// = log2(2^(31-shifts_h1)*(1+b1))
// - log2(2^(31-shifts_h0)*(1+b0))
// = shifts_h0 - shifts_h1
// + log2(1+b1) - log2(1+b0)
// ~= shifts_h0 - shifts_h1
//
// Note that b0 and b1 are values less than 1, hence, 0 <= log2(1+b0) < 1.
// Further, b0 and b1 are independent and on the average the two terms
// cancel.
shifts_h0 = WebRtcSpl_NormW32(h0_test);
shifts_h1 = WebRtcSpl_NormW32(h1_test);
if (h0_test == 0) {
shifts_h0 = 31;
}
if (h1_test == 0) {
shifts_h1 = 31;
}
log_likelihood_ratio = shifts_h0 - shifts_h1;
// Update |sum_log_likelihood_ratios| with spectrum weighting. This is
// used for the global VAD decision.
sum_log_likelihood_ratios +=
(int32_t) (log_likelihood_ratio * kSpectrumWeight[channel]);
// Local VAD decision.
if ((log_likelihood_ratio * 4) > individualTest) {
vadflag = 1;
}
// TODO(bjornv): The conditional probabilities below are applied on the
// hard coded number of Gaussians set to two. Find a way to generalize.
// Calculate local noise probabilities used later when updating the GMM.
h0 = (int16_t) (h0_test >> 12); // Q15
if (h0 > 0) {
// High probability of noise. Assign conditional probabilities for each
// Gaussian in the GMM.
tmp1_s32 = (noise_probability[0] & 0xFFFFF000) << 2; // Q29
ngprvec[channel] = (int16_t) WebRtcSpl_DivW32W16(tmp1_s32, h0); // Q14
ngprvec[channel + kNumChannels] = 16384 - ngprvec[channel];
} else {
// Low noise probability. Assign conditional probability 1 to the first
// Gaussian and 0 to the rest (which is already set at initialization).
ngprvec[channel] = 16384;
}
// Calculate local speech probabilities used later when updating the GMM.
h1 = (int16_t) (h1_test >> 12); // Q15
if (h1 > 0) {
// High probability of speech. Assign conditional probabilities for each
// Gaussian in the GMM. Otherwise use the initialized values, i.e., 0.
tmp1_s32 = (speech_probability[0] & 0xFFFFF000) << 2; // Q29
sgprvec[channel] = (int16_t) WebRtcSpl_DivW32W16(tmp1_s32, h1); // Q14
sgprvec[channel + kNumChannels] = 16384 - sgprvec[channel];
}
}
// Make a global VAD decision.
vadflag |= (sum_log_likelihood_ratios >= totalTest);
// Update the model parameters.
maxspe = 12800;
for (channel = 0; channel < kNumChannels; channel++) {
// Get minimum value in past which is used for long term correction in Q4.
feature_minimum = WebRtcVad_FindMinimum(self, features[channel], channel);
// Compute the "global" mean, that is the sum of the two means weighted.
noise_global_mean = WeightedAverage(&self->noise_means[channel], 0,
&kNoiseDataWeights[channel]);
tmp1_s16 = (int16_t) (noise_global_mean >> 6); // Q8
for (k = 0; k < kNumGaussians; k++) {
gaussian = channel + k * kNumChannels;
nmk = self->noise_means[gaussian];
smk = self->speech_means[gaussian];
nsk = self->noise_stds[gaussian];
ssk = self->speech_stds[gaussian];
// Update noise mean vector if the frame consists of noise only.
nmk2 = nmk;
if (!vadflag) {
// deltaN = (x-mu)/sigma^2
// ngprvec[k] = |noise_probability[k]| /
// (|noise_probability[0]| + |noise_probability[1]|)
// (Q14 * Q11 >> 11) = Q14.
delt = (int16_t)((ngprvec[gaussian] * deltaN[gaussian]) >> 11);
// Q7 + (Q14 * Q15 >> 22) = Q7.
nmk2 = nmk + (int16_t)((delt * kNoiseUpdateConst) >> 22);
}
// Long term correction of the noise mean.
// Q8 - Q8 = Q8.
ndelt = (feature_minimum << 4) - tmp1_s16;
// Q7 + (Q8 * Q8) >> 9 = Q7.
nmk3 = nmk2 + (int16_t)((ndelt * kBackEta) >> 9);
// Control that the noise mean does not drift to much.
tmp_s16 = (int16_t) ((k + 5) << 7);
if (nmk3 < tmp_s16) {
nmk3 = tmp_s16;
}
tmp_s16 = (int16_t) ((72 + k - channel) << 7);
if (nmk3 > tmp_s16) {
nmk3 = tmp_s16;
}
self->noise_means[gaussian] = nmk3;
if (vadflag) {
// Update speech mean vector:
// |deltaS| = (x-mu)/sigma^2
// sgprvec[k] = |speech_probability[k]| /
// (|speech_probability[0]| + |speech_probability[1]|)
// (Q14 * Q11) >> 11 = Q14.
delt = (int16_t)((sgprvec[gaussian] * deltaS[gaussian]) >> 11);
// Q14 * Q15 >> 21 = Q8.
tmp_s16 = (int16_t)((delt * kSpeechUpdateConst) >> 21);
// Q7 + (Q8 >> 1) = Q7. With rounding.
smk2 = smk + ((tmp_s16 + 1) >> 1);
// Control that the speech mean does not drift to much.
maxmu = maxspe + 640;
if (smk2 < kMinimumMean[k]) {
smk2 = kMinimumMean[k];
}
if (smk2 > maxmu) {
smk2 = maxmu;
}
self->speech_means[gaussian] = smk2; // Q7.
// (Q7 >> 3) = Q4. With rounding.
tmp_s16 = ((smk + 4) >> 3);
tmp_s16 = features[channel] - tmp_s16; // Q4
// (Q11 * Q4 >> 3) = Q12.
tmp1_s32 = (deltaS[gaussian] * tmp_s16) >> 3;
tmp2_s32 = tmp1_s32 - 4096;
tmp_s16 = sgprvec[gaussian] >> 2;
// (Q14 >> 2) * Q12 = Q24.
tmp1_s32 = tmp_s16 * tmp2_s32;
tmp2_s32 = tmp1_s32 >> 4; // Q20
// 0.1 * Q20 / Q7 = Q13.
if (tmp2_s32 > 0) {
tmp_s16 = (int16_t) WebRtcSpl_DivW32W16(tmp2_s32, ssk * 10);
} else {
tmp_s16 = (int16_t) WebRtcSpl_DivW32W16(-tmp2_s32, ssk * 10);
tmp_s16 = -tmp_s16;
}
// Divide by 4 giving an update factor of 0.025 (= 0.1 / 4).
// Note that division by 4 equals shift by 2, hence,
// (Q13 >> 8) = (Q13 >> 6) / 4 = Q7.
tmp_s16 += 128; // Rounding.
ssk += (tmp_s16 >> 8);
if (ssk < kMinStd) {
ssk = kMinStd;
}
self->speech_stds[gaussian] = ssk;
} else {
// Update GMM variance vectors.
// deltaN * (features[channel] - nmk) - 1
// Q4 - (Q7 >> 3) = Q4.
tmp_s16 = features[channel] - (nmk >> 3);
// (Q11 * Q4 >> 3) = Q12.
tmp1_s32 = (deltaN[gaussian] * tmp_s16) >> 3;
tmp1_s32 -= 4096;
// (Q14 >> 2) * Q12 = Q24.
tmp_s16 = (ngprvec[gaussian] + 2) >> 2;
tmp2_s32 = OverflowingMulS16ByS32ToS32(tmp_s16, tmp1_s32);
// Q20 * approx 0.001 (2^-10=0.0009766), hence,
// (Q24 >> 14) = (Q24 >> 4) / 2^10 = Q20.
tmp1_s32 = tmp2_s32 >> 14;
// Q20 / Q7 = Q13.
if (tmp1_s32 > 0) {
tmp_s16 = (int16_t) WebRtcSpl_DivW32W16(tmp1_s32, nsk);
} else {
tmp_s16 = (int16_t) WebRtcSpl_DivW32W16(-tmp1_s32, nsk);
tmp_s16 = -tmp_s16;
}
tmp_s16 += 32; // Rounding
nsk += tmp_s16 >> 6; // Q13 >> 6 = Q7.
if (nsk < kMinStd) {
nsk = kMinStd;
}
self->noise_stds[gaussian] = nsk;
}
}
// Separate models if they are too close.
// |noise_global_mean| in Q14 (= Q7 * Q7).
noise_global_mean = WeightedAverage(&self->noise_means[channel], 0,
&kNoiseDataWeights[channel]);
// |speech_global_mean| in Q14 (= Q7 * Q7).
speech_global_mean = WeightedAverage(&self->speech_means[channel], 0,
&kSpeechDataWeights[channel]);
// |diff| = "global" speech mean - "global" noise mean.
// (Q14 >> 9) - (Q14 >> 9) = Q5.
diff = (int16_t) (speech_global_mean >> 9) -
(int16_t) (noise_global_mean >> 9);
if (diff < kMinimumDifference[channel]) {
tmp_s16 = kMinimumDifference[channel] - diff;
// |tmp1_s16| = ~0.8 * (kMinimumDifference - diff) in Q7.
// |tmp2_s16| = ~0.2 * (kMinimumDifference - diff) in Q7.
tmp1_s16 = (int16_t)((13 * tmp_s16) >> 2);
tmp2_s16 = (int16_t)((3 * tmp_s16) >> 2);
// Move Gaussian means for speech model by |tmp1_s16| and update
// |speech_global_mean|. Note that |self->speech_means[channel]| is
// changed after the call.
speech_global_mean = WeightedAverage(&self->speech_means[channel],
tmp1_s16,
&kSpeechDataWeights[channel]);
// Move Gaussian means for noise model by -|tmp2_s16| and update
// |noise_global_mean|. Note that |self->noise_means[channel]| is
// changed after the call.
noise_global_mean = WeightedAverage(&self->noise_means[channel],
-tmp2_s16,
&kNoiseDataWeights[channel]);
}
// Control that the speech & noise means do not drift to much.
maxspe = kMaximumSpeech[channel];
tmp2_s16 = (int16_t) (speech_global_mean >> 7);
if (tmp2_s16 > maxspe) {
// Upper limit of speech model.
tmp2_s16 -= maxspe;
for (k = 0; k < kNumGaussians; k++) {
self->speech_means[channel + k * kNumChannels] -= tmp2_s16;
}
}
tmp2_s16 = (int16_t) (noise_global_mean >> 7);
if (tmp2_s16 > kMaximumNoise[channel]) {
tmp2_s16 -= kMaximumNoise[channel];
for (k = 0; k < kNumGaussians; k++) {
self->noise_means[channel + k * kNumChannels] -= tmp2_s16;
}
}
}
self->frame_counter++;
}
// Smooth with respect to transition hysteresis.
if (!vadflag) {
if (self->over_hang > 0) {
vadflag = 2 + self->over_hang;
self->over_hang--;
}
self->num_of_speech = 0;
} else {
self->num_of_speech++;
if (self->num_of_speech > kMaxSpeechFrames) {
self->num_of_speech = kMaxSpeechFrames;
self->over_hang = overhead2;
} else {
self->over_hang = overhead1;
}
}
return vadflag;
}
// Initialize the VAD. Set aggressiveness mode to default value.
int WebRtcVad_InitCore(VadInstT* self) {
int i;
if (self == NULL) {
return -1;
}
// Initialization of general struct variables.
self->vad = 1; // Speech active (=1).
self->frame_counter = 0;
self->over_hang = 0;
self->num_of_speech = 0;
// Initialization of downsampling filter state.
memset(self->downsampling_filter_states, 0,
sizeof(self->downsampling_filter_states));
// Initialization of 48 to 8 kHz downsampling.
WebRtcSpl_ResetResample48khzTo8khz(&self->state_48_to_8);
// Read initial PDF parameters.
for (i = 0; i < kTableSize; i++) {
self->noise_means[i] = kNoiseDataMeans[i];
self->speech_means[i] = kSpeechDataMeans[i];
self->noise_stds[i] = kNoiseDataStds[i];
self->speech_stds[i] = kSpeechDataStds[i];
}
// Initialize Index and Minimum value vectors.
for (i = 0; i < 16 * kNumChannels; i++) {
self->low_value_vector[i] = 10000;
self->index_vector[i] = 0;
}
// Initialize splitting filter states.
memset(self->upper_state, 0, sizeof(self->upper_state));
memset(self->lower_state, 0, sizeof(self->lower_state));
// Initialize high pass filter states.
memset(self->hp_filter_state, 0, sizeof(self->hp_filter_state));
// Initialize mean value memory, for WebRtcVad_FindMinimum().
for (i = 0; i < kNumChannels; i++) {
self->mean_value[i] = 1600;
}
// Set aggressiveness mode to default (=|kDefaultMode|).
if (WebRtcVad_set_mode_core(self, kDefaultMode) != 0) {
return -1;
}
self->init_flag = kInitCheck;
return 0;
}
// Set aggressiveness mode
int WebRtcVad_set_mode_core(VadInstT* self, int mode) {
int return_value = 0;
switch (mode) {
case 0:
// Quality mode.
memcpy(self->over_hang_max_1, kOverHangMax1Q,
sizeof(self->over_hang_max_1));
memcpy(self->over_hang_max_2, kOverHangMax2Q,
sizeof(self->over_hang_max_2));
memcpy(self->individual, kLocalThresholdQ,
sizeof(self->individual));
memcpy(self->total, kGlobalThresholdQ,
sizeof(self->total));
break;
case 1:
// Low bitrate mode.
memcpy(self->over_hang_max_1, kOverHangMax1LBR,
sizeof(self->over_hang_max_1));
memcpy(self->over_hang_max_2, kOverHangMax2LBR,
sizeof(self->over_hang_max_2));
memcpy(self->individual, kLocalThresholdLBR,
sizeof(self->individual));
memcpy(self->total, kGlobalThresholdLBR,
sizeof(self->total));
break;
case 2:
// Aggressive mode.
memcpy(self->over_hang_max_1, kOverHangMax1AGG,
sizeof(self->over_hang_max_1));
memcpy(self->over_hang_max_2, kOverHangMax2AGG,
sizeof(self->over_hang_max_2));
memcpy(self->individual, kLocalThresholdAGG,
sizeof(self->individual));
memcpy(self->total, kGlobalThresholdAGG,
sizeof(self->total));
break;
case 3:
// Very aggressive mode.
memcpy(self->over_hang_max_1, kOverHangMax1VAG,
sizeof(self->over_hang_max_1));
memcpy(self->over_hang_max_2, kOverHangMax2VAG,
sizeof(self->over_hang_max_2));
memcpy(self->individual, kLocalThresholdVAG,
sizeof(self->individual));
memcpy(self->total, kGlobalThresholdVAG,
sizeof(self->total));
break;
default:
return_value = -1;
break;
}
return return_value;
}
// Calculate VAD decision by first extracting feature values and then calculate
// probability for both speech and background noise.
int WebRtcVad_CalcVad48khz(VadInstT* inst, const int16_t* speech_frame,
size_t frame_length) {
int vad;
size_t i;
int16_t speech_nb[240]; // 30 ms in 8 kHz.
// |tmp_mem| is a temporary memory used by resample function, length is
// frame length in 10 ms (480 samples) + 256 extra.
int32_t tmp_mem[480 + 256] = { 0 };
const size_t kFrameLen10ms48khz = 480;
const size_t kFrameLen10ms8khz = 80;
size_t num_10ms_frames = frame_length / kFrameLen10ms48khz;
for (i = 0; i < num_10ms_frames; i++) {
WebRtcSpl_Resample48khzTo8khz(speech_frame,
&speech_nb[i * kFrameLen10ms8khz],
&inst->state_48_to_8,
tmp_mem);
}
// Do VAD on an 8 kHz signal
vad = WebRtcVad_CalcVad8khz(inst, speech_nb, frame_length / 6);
return vad;
}
int WebRtcVad_CalcVad32khz(VadInstT* inst, const int16_t* speech_frame,
size_t frame_length)
{
size_t len;
int vad;
int16_t speechWB[480]; // Downsampled speech frame: 960 samples (30ms in SWB)
int16_t speechNB[240]; // Downsampled speech frame: 480 samples (30ms in WB)
// Downsample signal 32->16->8 before doing VAD
WebRtcVad_Downsampling(speech_frame, speechWB, &(inst->downsampling_filter_states[2]),
frame_length);
len = frame_length / 2;
WebRtcVad_Downsampling(speechWB, speechNB, inst->downsampling_filter_states, len);
len /= 2;
// Do VAD on an 8 kHz signal
vad = WebRtcVad_CalcVad8khz(inst, speechNB, len);
return vad;
}
int WebRtcVad_CalcVad16khz(VadInstT* inst, const int16_t* speech_frame,
size_t frame_length)
{
size_t len;
int vad;
int16_t speechNB[240]; // Downsampled speech frame: 480 samples (30ms in WB)
// Wideband: Downsample signal before doing VAD
WebRtcVad_Downsampling(speech_frame, speechNB, inst->downsampling_filter_states,
frame_length);
len = frame_length / 2;
vad = WebRtcVad_CalcVad8khz(inst, speechNB, len);
return vad;
}
int WebRtcVad_CalcVad8khz(VadInstT* inst, const int16_t* speech_frame,
size_t frame_length)
{
int16_t feature_vector[kNumChannels], total_power;
// Get power in the bands
total_power = WebRtcVad_CalculateFeatures(inst, speech_frame, frame_length,
feature_vector);
// Make a VAD
inst->vad = GmmProbability(inst, feature_vector, total_power, frame_length);
return inst->vad;
}
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
/*
* This header file includes the descriptions of the core VAD calls.
*/
#ifndef COMMON_AUDIO_VAD_VAD_CORE_H_
#define COMMON_AUDIO_VAD_VAD_CORE_H_
#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
enum { kNumChannels = 6 }; // Number of frequency bands (named channels).
enum { kNumGaussians = 2 }; // Number of Gaussians per channel in the GMM.
enum { kTableSize = kNumChannels * kNumGaussians };
enum { kMinEnergy = 10 }; // Minimum energy required to trigger audio signal.
typedef struct VadInstT_ {
int vad;
int32_t downsampling_filter_states[4];
WebRtcSpl_State48khzTo8khz state_48_to_8;
int16_t noise_means[kTableSize];
int16_t speech_means[kTableSize];
int16_t noise_stds[kTableSize];
int16_t speech_stds[kTableSize];
// TODO(bjornv): Change to |frame_count|.
int32_t frame_counter;
int16_t over_hang; // Over Hang
int16_t num_of_speech;
// TODO(bjornv): Change to |age_vector|.
int16_t index_vector[16 * kNumChannels];
int16_t low_value_vector[16 * kNumChannels];
// TODO(bjornv): Change to |median|.
int16_t mean_value[kNumChannels];
int16_t upper_state[5];
int16_t lower_state[5];
int16_t hp_filter_state[4];
int16_t over_hang_max_1[3];
int16_t over_hang_max_2[3];
int16_t individual[3];
int16_t total[3];
int init_flag;
} VadInstT;
// Initializes the core VAD component. The default aggressiveness mode is
// controlled by |kDefaultMode| in vad_core.c.
//
// - self [i/o] : Instance that should be initialized
//
// returns : 0 (OK), -1 (null pointer in or if the default mode can't be
// set)
int WebRtcVad_InitCore(VadInstT* self);
/****************************************************************************
* WebRtcVad_set_mode_core(...)
*
* This function changes the VAD settings
*
* Input:
* - inst : VAD instance
* - mode : Aggressiveness degree
* 0 (High quality) - 3 (Highly aggressive)
*
* Output:
* - inst : Changed instance
*
* Return value : 0 - Ok
* -1 - Error
*/
int WebRtcVad_set_mode_core(VadInstT* self, int mode);
/****************************************************************************
* WebRtcVad_CalcVad48khz(...)
* WebRtcVad_CalcVad32khz(...)
* WebRtcVad_CalcVad16khz(...)
* WebRtcVad_CalcVad8khz(...)
*
* Calculate probability for active speech and make VAD decision.
*
* Input:
* - inst : Instance that should be initialized
* - speech_frame : Input speech frame
* - frame_length : Number of input samples
*
* Output:
* - inst : Updated filter states etc.
*
* Return value : VAD decision
* 0 - No active speech
* 1-6 - Active speech
*/
int WebRtcVad_CalcVad48khz(VadInstT* inst,
const int16_t* speech_frame,
size_t frame_length);
int WebRtcVad_CalcVad32khz(VadInstT* inst,
const int16_t* speech_frame,
size_t frame_length);
int WebRtcVad_CalcVad16khz(VadInstT* inst,
const int16_t* speech_frame,
size_t frame_length);
int WebRtcVad_CalcVad8khz(VadInstT* inst,
const int16_t* speech_frame,
size_t frame_length);
#endif // COMMON_AUDIO_VAD_VAD_CORE_H_
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "webrtc/common_audio/vad/vad_filterbank.h"
#include "webrtc/rtc_base/checks.h"
#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
// Constants used in LogOfEnergy().
static const int16_t kLogConst = 24660; // 160*log10(2) in Q9.
static const int16_t kLogEnergyIntPart = 14336; // 14 in Q10
// Coefficients used by HighPassFilter, Q14.
static const int16_t kHpZeroCoefs[3] = { 6631, -13262, 6631 };
static const int16_t kHpPoleCoefs[3] = { 16384, -7756, 5620 };
// Allpass filter coefficients, upper and lower, in Q15.
// Upper: 0.64, Lower: 0.17
static const int16_t kAllPassCoefsQ15[2] = { 20972, 5571 };
// Adjustment for division with two in SplitFilter.
static const int16_t kOffsetVector[6] = { 368, 368, 272, 176, 176, 176 };
// High pass filtering, with a cut-off frequency at 80 Hz, if the |data_in| is
// sampled at 500 Hz.
//
// - data_in [i] : Input audio data sampled at 500 Hz.
// - data_length [i] : Length of input and output data.
// - filter_state [i/o] : State of the filter.
// - data_out [o] : Output audio data in the frequency interval
// 80 - 250 Hz.
static void HighPassFilter(const int16_t* data_in, size_t data_length,
int16_t* filter_state, int16_t* data_out) {
size_t i;
const int16_t* in_ptr = data_in;
int16_t* out_ptr = data_out;
int32_t tmp32 = 0;
// The sum of the absolute values of the impulse response:
// The zero/pole-filter has a max amplification of a single sample of: 1.4546
// Impulse response: 0.4047 -0.6179 -0.0266 0.1993 0.1035 -0.0194
// The all-zero section has a max amplification of a single sample of: 1.6189
// Impulse response: 0.4047 -0.8094 0.4047 0 0 0
// The all-pole section has a max amplification of a single sample of: 1.9931
// Impulse response: 1.0000 0.4734 -0.1189 -0.2187 -0.0627 0.04532
for (i = 0; i < data_length; i++) {
// All-zero section (filter coefficients in Q14).
tmp32 = kHpZeroCoefs[0] * *in_ptr;
tmp32 += kHpZeroCoefs[1] * filter_state[0];
tmp32 += kHpZeroCoefs[2] * filter_state[1];
filter_state[1] = filter_state[0];
filter_state[0] = *in_ptr++;
// All-pole section (filter coefficients in Q14).
tmp32 -= kHpPoleCoefs[1] * filter_state[2];
tmp32 -= kHpPoleCoefs[2] * filter_state[3];
filter_state[3] = filter_state[2];
filter_state[2] = (int16_t) (tmp32 >> 14);
*out_ptr++ = filter_state[2];
}
}
// All pass filtering of |data_in|, used before splitting the signal into two
// frequency bands (low pass vs high pass).
// Note that |data_in| and |data_out| can NOT correspond to the same address.
//
// - data_in [i] : Input audio signal given in Q0.
// - data_length [i] : Length of input and output data.
// - filter_coefficient [i] : Given in Q15.
// - filter_state [i/o] : State of the filter given in Q(-1).
// - data_out [o] : Output audio signal given in Q(-1).
static void AllPassFilter(const int16_t* data_in, size_t data_length,
int16_t filter_coefficient, int16_t* filter_state,
int16_t* data_out) {
// The filter can only cause overflow (in the w16 output variable)
// if more than 4 consecutive input numbers are of maximum value and
// has the the same sign as the impulse responses first taps.
// First 6 taps of the impulse response:
// 0.6399 0.5905 -0.3779 0.2418 -0.1547 0.0990
size_t i;
int16_t tmp16 = 0;
int32_t tmp32 = 0;
int32_t state32 = ((int32_t) (*filter_state) * (1 << 16)); // Q15
for (i = 0; i < data_length; i++) {
tmp32 = state32 + filter_coefficient * *data_in;
tmp16 = (int16_t) (tmp32 >> 16); // Q(-1)
*data_out++ = tmp16;
state32 = (*data_in * (1 << 14)) - filter_coefficient * tmp16; // Q14
state32 *= 2; // Q15.
data_in += 2;
}
*filter_state = (int16_t) (state32 >> 16); // Q(-1)
}
// Splits |data_in| into |hp_data_out| and |lp_data_out| corresponding to
// an upper (high pass) part and a lower (low pass) part respectively.
//
// - data_in [i] : Input audio data to be split into two frequency bands.
// - data_length [i] : Length of |data_in|.
// - upper_state [i/o] : State of the upper filter, given in Q(-1).
// - lower_state [i/o] : State of the lower filter, given in Q(-1).
// - hp_data_out [o] : Output audio data of the upper half of the spectrum.
// The length is |data_length| / 2.
// - lp_data_out [o] : Output audio data of the lower half of the spectrum.
// The length is |data_length| / 2.
static void SplitFilter(const int16_t* data_in, size_t data_length,
int16_t* upper_state, int16_t* lower_state,
int16_t* hp_data_out, int16_t* lp_data_out) {
size_t i;
size_t half_length = data_length >> 1; // Downsampling by 2.
int16_t tmp_out;
// All-pass filtering upper branch.
AllPassFilter(&data_in[0], half_length, kAllPassCoefsQ15[0], upper_state,
hp_data_out);
// All-pass filtering lower branch.
AllPassFilter(&data_in[1], half_length, kAllPassCoefsQ15[1], lower_state,
lp_data_out);
// Make LP and HP signals.
for (i = 0; i < half_length; i++) {
tmp_out = *hp_data_out;
*hp_data_out++ -= *lp_data_out;
*lp_data_out++ += tmp_out;
}
}
// Calculates the energy of |data_in| in dB, and also updates an overall
// |total_energy| if necessary.
//
// - data_in [i] : Input audio data for energy calculation.
// - data_length [i] : Length of input data.
// - offset [i] : Offset value added to |log_energy|.
// - total_energy [i/o] : An external energy updated with the energy of
// |data_in|.
// NOTE: |total_energy| is only updated if
// |total_energy| <= |kMinEnergy|.
// - log_energy [o] : 10 * log10("energy of |data_in|") given in Q4.
static void LogOfEnergy(const int16_t* data_in, size_t data_length,
int16_t offset, int16_t* total_energy,
int16_t* log_energy) {
// |tot_rshifts| accumulates the number of right shifts performed on |energy|.
int tot_rshifts = 0;
// The |energy| will be normalized to 15 bits. We use unsigned integer because
// we eventually will mask out the fractional part.
uint32_t energy = 0;
RTC_DCHECK(data_in);
RTC_DCHECK_GT(data_length, 0);
energy = (uint32_t) WebRtcSpl_Energy((int16_t*) data_in, data_length,
&tot_rshifts);
if (energy != 0) {
// By construction, normalizing to 15 bits is equivalent with 17 leading
// zeros of an unsigned 32 bit value.
int normalizing_rshifts = 17 - WebRtcSpl_NormU32(energy);
// In a 15 bit representation the leading bit is 2^14. log2(2^14) in Q10 is
// (14 << 10), which is what we initialize |log2_energy| with. For a more
// detailed derivations, see below.
int16_t log2_energy = kLogEnergyIntPart;
tot_rshifts += normalizing_rshifts;
// Normalize |energy| to 15 bits.
// |tot_rshifts| is now the total number of right shifts performed on
// |energy| after normalization. This means that |energy| is in
// Q(-tot_rshifts).
if (normalizing_rshifts < 0) {
energy <<= -normalizing_rshifts;
} else {
energy >>= normalizing_rshifts;
}
// Calculate the energy of |data_in| in dB, in Q4.
//
// 10 * log10("true energy") in Q4 = 2^4 * 10 * log10("true energy") =
// 160 * log10(|energy| * 2^|tot_rshifts|) =
// 160 * log10(2) * log2(|energy| * 2^|tot_rshifts|) =
// 160 * log10(2) * (log2(|energy|) + log2(2^|tot_rshifts|)) =
// (160 * log10(2)) * (log2(|energy|) + |tot_rshifts|) =
// |kLogConst| * (|log2_energy| + |tot_rshifts|)
//
// We know by construction that |energy| is normalized to 15 bits. Hence,
// |energy| = 2^14 + frac_Q15, where frac_Q15 is a fractional part in Q15.
// Further, we'd like |log2_energy| in Q10
// log2(|energy|) in Q10 = 2^10 * log2(2^14 + frac_Q15) =
// 2^10 * log2(2^14 * (1 + frac_Q15 * 2^-14)) =
// 2^10 * (14 + log2(1 + frac_Q15 * 2^-14)) ~=
// (14 << 10) + 2^10 * (frac_Q15 * 2^-14) =
// (14 << 10) + (frac_Q15 * 2^-4) = (14 << 10) + (frac_Q15 >> 4)
//
// Note that frac_Q15 = (|energy| & 0x00003FFF)
// Calculate and add the fractional part to |log2_energy|.
log2_energy += (int16_t) ((energy & 0x00003FFF) >> 4);
// |kLogConst| is in Q9, |log2_energy| in Q10 and |tot_rshifts| in Q0.
// Note that we in our derivation above have accounted for an output in Q4.
*log_energy = (int16_t)(((kLogConst * log2_energy) >> 19) +
((tot_rshifts * kLogConst) >> 9));
if (*log_energy < 0) {
*log_energy = 0;
}
} else {
*log_energy = offset;
return;
}
*log_energy += offset;
// Update the approximate |total_energy| with the energy of |data_in|, if
// |total_energy| has not exceeded |kMinEnergy|. |total_energy| is used as an
// energy indicator in WebRtcVad_GmmProbability() in vad_core.c.
if (*total_energy <= kMinEnergy) {
if (tot_rshifts >= 0) {
// We know by construction that the |energy| > |kMinEnergy| in Q0, so add
// an arbitrary value such that |total_energy| exceeds |kMinEnergy|.
*total_energy += kMinEnergy + 1;
} else {
// By construction |energy| is represented by 15 bits, hence any number of
// right shifted |energy| will fit in an int16_t. In addition, adding the
// value to |total_energy| is wrap around safe as long as
// |kMinEnergy| < 8192.
*total_energy += (int16_t) (energy >> -tot_rshifts); // Q0.
}
}
}
int16_t WebRtcVad_CalculateFeatures(VadInstT* self, const int16_t* data_in,
size_t data_length, int16_t* features) {
int16_t total_energy = 0;
// We expect |data_length| to be 80, 160 or 240 samples, which corresponds to
// 10, 20 or 30 ms in 8 kHz. Therefore, the intermediate downsampled data will
// have at most 120 samples after the first split and at most 60 samples after
// the second split.
int16_t hp_120[120], lp_120[120];
int16_t hp_60[60], lp_60[60];
const size_t half_data_length = data_length >> 1;
size_t length = half_data_length; // |data_length| / 2, corresponds to
// bandwidth = 2000 Hz after downsampling.
// Initialize variables for the first SplitFilter().
int frequency_band = 0;
const int16_t* in_ptr = data_in; // [0 - 4000] Hz.
int16_t* hp_out_ptr = hp_120; // [2000 - 4000] Hz.
int16_t* lp_out_ptr = lp_120; // [0 - 2000] Hz.
RTC_DCHECK_LE(data_length, 240);
RTC_DCHECK_LT(4, kNumChannels - 1); // Checking maximum |frequency_band|.
// Split at 2000 Hz and downsample.
SplitFilter(in_ptr, data_length, &self->upper_state[frequency_band],
&self->lower_state[frequency_band], hp_out_ptr, lp_out_ptr);
// For the upper band (2000 Hz - 4000 Hz) split at 3000 Hz and downsample.
frequency_band = 1;
in_ptr = hp_120; // [2000 - 4000] Hz.
hp_out_ptr = hp_60; // [3000 - 4000] Hz.
lp_out_ptr = lp_60; // [2000 - 3000] Hz.
SplitFilter(in_ptr, length, &self->upper_state[frequency_band],
&self->lower_state[frequency_band], hp_out_ptr, lp_out_ptr);
// Energy in 3000 Hz - 4000 Hz.
length >>= 1; // |data_length| / 4 <=> bandwidth = 1000 Hz.
LogOfEnergy(hp_60, length, kOffsetVector[5], &total_energy, &features[5]);
// Energy in 2000 Hz - 3000 Hz.
LogOfEnergy(lp_60, length, kOffsetVector[4], &total_energy, &features[4]);
// For the lower band (0 Hz - 2000 Hz) split at 1000 Hz and downsample.
frequency_band = 2;
in_ptr = lp_120; // [0 - 2000] Hz.
hp_out_ptr = hp_60; // [1000 - 2000] Hz.
lp_out_ptr = lp_60; // [0 - 1000] Hz.
length = half_data_length; // |data_length| / 2 <=> bandwidth = 2000 Hz.
SplitFilter(in_ptr, length, &self->upper_state[frequency_band],
&self->lower_state[frequency_band], hp_out_ptr, lp_out_ptr);
// Energy in 1000 Hz - 2000 Hz.
length >>= 1; // |data_length| / 4 <=> bandwidth = 1000 Hz.
LogOfEnergy(hp_60, length, kOffsetVector[3], &total_energy, &features[3]);
// For the lower band (0 Hz - 1000 Hz) split at 500 Hz and downsample.
frequency_band = 3;
in_ptr = lp_60; // [0 - 1000] Hz.
hp_out_ptr = hp_120; // [500 - 1000] Hz.
lp_out_ptr = lp_120; // [0 - 500] Hz.
SplitFilter(in_ptr, length, &self->upper_state[frequency_band],
&self->lower_state[frequency_band], hp_out_ptr, lp_out_ptr);
// Energy in 500 Hz - 1000 Hz.
length >>= 1; // |data_length| / 8 <=> bandwidth = 500 Hz.
LogOfEnergy(hp_120, length, kOffsetVector[2], &total_energy, &features[2]);
// For the lower band (0 Hz - 500 Hz) split at 250 Hz and downsample.
frequency_band = 4;
in_ptr = lp_120; // [0 - 500] Hz.
hp_out_ptr = hp_60; // [250 - 500] Hz.
lp_out_ptr = lp_60; // [0 - 250] Hz.
SplitFilter(in_ptr, length, &self->upper_state[frequency_band],
&self->lower_state[frequency_band], hp_out_ptr, lp_out_ptr);
// Energy in 250 Hz - 500 Hz.
length >>= 1; // |data_length| / 16 <=> bandwidth = 250 Hz.
LogOfEnergy(hp_60, length, kOffsetVector[1], &total_energy, &features[1]);
// Remove 0 Hz - 80 Hz, by high pass filtering the lower band.
HighPassFilter(lp_60, length, self->hp_filter_state, hp_120);
// Energy in 80 Hz - 250 Hz.
LogOfEnergy(hp_120, length, kOffsetVector[0], &total_energy, &features[0]);
return total_energy;
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment