Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
RapidASR
Commits
83ff3a7f
Commit
83ff3a7f
authored
Feb 25, 2023
by
mayong
Browse files
Add cpp_onnxruntime
parent
5f46ad1c
Changes
77
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
5181 additions
and
0 deletions
+5181
-0
cpp_onnx/third_party/webrtc/common_audio/signal_processing/energy.c
...hird_party/webrtc/common_audio/signal_processing/energy.c
+39
-0
cpp_onnx/third_party/webrtc/common_audio/signal_processing/get_scaling_square.c
...ebrtc/common_audio/signal_processing/get_scaling_square.c
+46
-0
cpp_onnx/third_party/webrtc/common_audio/signal_processing/include/real_fft.h
.../webrtc/common_audio/signal_processing/include/real_fft.h
+96
-0
cpp_onnx/third_party/webrtc/common_audio/signal_processing/include/signal_processing_library.h
...dio/signal_processing/include/signal_processing_library.h
+1612
-0
cpp_onnx/third_party/webrtc/common_audio/signal_processing/include/spl_inl.h
...y/webrtc/common_audio/signal_processing/include/spl_inl.h
+153
-0
cpp_onnx/third_party/webrtc/common_audio/signal_processing/min_max_operations.c
...ebrtc/common_audio/signal_processing/min_max_operations.c
+224
-0
cpp_onnx/third_party/webrtc/common_audio/signal_processing/resample_48khz.c
...ty/webrtc/common_audio/signal_processing/resample_48khz.c
+186
-0
cpp_onnx/third_party/webrtc/common_audio/signal_processing/resample_by_2_internal.c
...c/common_audio/signal_processing/resample_by_2_internal.c
+689
-0
cpp_onnx/third_party/webrtc/common_audio/signal_processing/resample_by_2_internal.h
...c/common_audio/signal_processing/resample_by_2_internal.h
+60
-0
cpp_onnx/third_party/webrtc/common_audio/signal_processing/resample_fractional.c
...brtc/common_audio/signal_processing/resample_fractional.c
+239
-0
cpp_onnx/third_party/webrtc/common_audio/signal_processing/spl_init.c
...rd_party/webrtc/common_audio/signal_processing/spl_init.c
+133
-0
cpp_onnx/third_party/webrtc/common_audio/signal_processing/spl_inl.c
...ird_party/webrtc/common_audio/signal_processing/spl_inl.c
+24
-0
cpp_onnx/third_party/webrtc/common_audio/signal_processing/spl_sqrt.c
...rd_party/webrtc/common_audio/signal_processing/spl_sqrt.c
+194
-0
cpp_onnx/third_party/webrtc/common_audio/signal_processing/vector_scaling_operations.c
...ommon_audio/signal_processing/vector_scaling_operations.c
+165
-0
cpp_onnx/third_party/webrtc/common_audio/third_party/spl_sqrt_floor/spl_sqrt_floor.c
.../common_audio/third_party/spl_sqrt_floor/spl_sqrt_floor.c
+77
-0
cpp_onnx/third_party/webrtc/common_audio/third_party/spl_sqrt_floor/spl_sqrt_floor.h
.../common_audio/third_party/spl_sqrt_floor/spl_sqrt_floor.h
+29
-0
cpp_onnx/third_party/webrtc/common_audio/vad/include/webrtc_vad.h
.../third_party/webrtc/common_audio/vad/include/webrtc_vad.h
+87
-0
cpp_onnx/third_party/webrtc/common_audio/vad/vad_core.c
cpp_onnx/third_party/webrtc/common_audio/vad/vad_core.c
+685
-0
cpp_onnx/third_party/webrtc/common_audio/vad/vad_core.h
cpp_onnx/third_party/webrtc/common_audio/vad/vad_core.h
+114
-0
cpp_onnx/third_party/webrtc/common_audio/vad/vad_filterbank.c
...onnx/third_party/webrtc/common_audio/vad/vad_filterbank.c
+329
-0
No files found.
cpp_onnx/third_party/webrtc/common_audio/signal_processing/energy.c
0 → 100644
View file @
83ff3a7f
/*
* Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
/*
* This file contains the function WebRtcSpl_Energy().
* The description header can be found in signal_processing_library.h
*
*/
#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
int32_t
WebRtcSpl_Energy
(
int16_t
*
vector
,
size_t
vector_length
,
int
*
scale_factor
)
{
int32_t
en
=
0
;
size_t
i
;
int
scaling
=
WebRtcSpl_GetScalingSquare
(
vector
,
vector_length
,
vector_length
);
size_t
looptimes
=
vector_length
;
int16_t
*
vectorptr
=
vector
;
for
(
i
=
0
;
i
<
looptimes
;
i
++
)
{
en
+=
(
*
vectorptr
*
*
vectorptr
)
>>
scaling
;
vectorptr
++
;
}
*
scale_factor
=
scaling
;
return
en
;
}
cpp_onnx/third_party/webrtc/common_audio/signal_processing/get_scaling_square.c
0 → 100644
View file @
83ff3a7f
/*
* Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
/*
* This file contains the function WebRtcSpl_GetScalingSquare().
* The description header can be found in signal_processing_library.h
*
*/
#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
int16_t
WebRtcSpl_GetScalingSquare
(
int16_t
*
in_vector
,
size_t
in_vector_length
,
size_t
times
)
{
int16_t
nbits
=
WebRtcSpl_GetSizeInBits
((
uint32_t
)
times
);
size_t
i
;
int16_t
smax
=
-
1
;
int16_t
sabs
;
int16_t
*
sptr
=
in_vector
;
int16_t
t
;
size_t
looptimes
=
in_vector_length
;
for
(
i
=
looptimes
;
i
>
0
;
i
--
)
{
sabs
=
(
*
sptr
>
0
?
*
sptr
++
:
-*
sptr
++
);
smax
=
(
sabs
>
smax
?
sabs
:
smax
);
}
t
=
WebRtcSpl_NormW32
(
WEBRTC_SPL_MUL
(
smax
,
smax
));
if
(
smax
==
0
)
{
return
0
;
// Since norm(0) returns 0
}
else
{
return
(
t
>
nbits
)
?
0
:
nbits
-
t
;
}
}
cpp_onnx/third_party/webrtc/common_audio/signal_processing/include/real_fft.h
0 → 100644
View file @
83ff3a7f
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef COMMON_AUDIO_SIGNAL_PROCESSING_INCLUDE_REAL_FFT_H_
#define COMMON_AUDIO_SIGNAL_PROCESSING_INCLUDE_REAL_FFT_H_
#include <stdint.h>
// For ComplexFFT(), the maximum fft order is 10;
// WebRTC APM uses orders of only 7 and 8.
enum
{
kMaxFFTOrder
=
10
};
struct
RealFFT
;
#ifdef __cplusplus
extern
"C"
{
#endif
struct
RealFFT
*
WebRtcSpl_CreateRealFFT
(
int
order
);
void
WebRtcSpl_FreeRealFFT
(
struct
RealFFT
*
self
);
// Compute an FFT for a real-valued signal of length of 2^order,
// where 1 < order <= MAX_FFT_ORDER. Transform length is determined by the
// specification structure, which must be initialized prior to calling the FFT
// function with WebRtcSpl_CreateRealFFT().
// The relationship between the input and output sequences can
// be expressed in terms of the DFT, i.e.:
// x[n] = (2^(-scalefactor)/N) . SUM[k=0,...,N-1] X[k].e^(jnk.2.pi/N)
// n=0,1,2,...N-1
// N=2^order.
// The conjugate-symmetric output sequence is represented using a CCS vector,
// which is of length N+2, and is organized as follows:
// Index: 0 1 2 3 4 5 . . . N-2 N-1 N N+1
// Component: R0 0 R1 I1 R2 I2 . . . R[N/2-1] I[N/2-1] R[N/2] 0
// where R[n] and I[n], respectively, denote the real and imaginary components
// for FFT bin 'n'. Bins are numbered from 0 to N/2, where N is the FFT length.
// Bin index 0 corresponds to the DC component, and bin index N/2 corresponds to
// the foldover frequency.
//
// Input Arguments:
// self - pointer to preallocated and initialized FFT specification structure.
// real_data_in - the input signal. For an ARM Neon platform, it must be
// aligned on a 32-byte boundary.
//
// Output Arguments:
// complex_data_out - the output complex signal with (2^order + 2) 16-bit
// elements. For an ARM Neon platform, it must be different
// from real_data_in, and aligned on a 32-byte boundary.
//
// Return Value:
// 0 - FFT calculation is successful.
// -1 - Error with bad arguments (null pointers).
int
WebRtcSpl_RealForwardFFT
(
struct
RealFFT
*
self
,
const
int16_t
*
real_data_in
,
int16_t
*
complex_data_out
);
// Compute the inverse FFT for a conjugate-symmetric input sequence of length of
// 2^order, where 1 < order <= MAX_FFT_ORDER. Transform length is determined by
// the specification structure, which must be initialized prior to calling the
// FFT function with WebRtcSpl_CreateRealFFT().
// For a transform of length M, the input sequence is represented using a packed
// CCS vector of length M+2, which is explained in the comments for
// WebRtcSpl_RealForwardFFTC above.
//
// Input Arguments:
// self - pointer to preallocated and initialized FFT specification structure.
// complex_data_in - the input complex signal with (2^order + 2) 16-bit
// elements. For an ARM Neon platform, it must be aligned on
// a 32-byte boundary.
//
// Output Arguments:
// real_data_out - the output real signal. For an ARM Neon platform, it must
// be different to complex_data_in, and aligned on a 32-byte
// boundary.
//
// Return Value:
// 0 or a positive number - a value that the elements in the |real_data_out|
// should be shifted left with in order to get
// correct physical values.
// -1 - Error with bad arguments (null pointers).
int
WebRtcSpl_RealInverseFFT
(
struct
RealFFT
*
self
,
const
int16_t
*
complex_data_in
,
int16_t
*
real_data_out
);
#ifdef __cplusplus
}
#endif
#endif // COMMON_AUDIO_SIGNAL_PROCESSING_INCLUDE_REAL_FFT_H_
cpp_onnx/third_party/webrtc/common_audio/signal_processing/include/signal_processing_library.h
0 → 100644
View file @
83ff3a7f
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
/*
* This header file includes all of the fix point signal processing library
* (SPL) function descriptions and declarations. For specific function calls,
* see bottom of file.
*/
#ifndef COMMON_AUDIO_SIGNAL_PROCESSING_INCLUDE_SIGNAL_PROCESSING_LIBRARY_H_
#define COMMON_AUDIO_SIGNAL_PROCESSING_INCLUDE_SIGNAL_PROCESSING_LIBRARY_H_
#include <string.h>
#include "webrtc/common_audio/signal_processing/dot_product_with_scale.h"
// Macros specific for the fixed point implementation
#define WEBRTC_SPL_WORD16_MAX 32767
#define WEBRTC_SPL_WORD16_MIN -32768
#define WEBRTC_SPL_WORD32_MAX (int32_t)0x7fffffff
#define WEBRTC_SPL_WORD32_MIN (int32_t)0x80000000
#define WEBRTC_SPL_MAX_LPC_ORDER 14
#define WEBRTC_SPL_MIN(A, B) (A < B ? A : B) // Get min value
#define WEBRTC_SPL_MAX(A, B) (A > B ? A : B) // Get max value
// TODO(kma/bjorn): For the next two macros, investigate how to correct the code
// for inputs of a = WEBRTC_SPL_WORD16_MIN or WEBRTC_SPL_WORD32_MIN.
#define WEBRTC_SPL_ABS_W16(a) (((int16_t)a >= 0) ? ((int16_t)a) : -((int16_t)a))
#define WEBRTC_SPL_ABS_W32(a) (((int32_t)a >= 0) ? ((int32_t)a) : -((int32_t)a))
#define WEBRTC_SPL_MUL(a, b) ((int32_t)((int32_t)(a) * (int32_t)(b)))
#define WEBRTC_SPL_UMUL(a, b) ((uint32_t)((uint32_t)(a) * (uint32_t)(b)))
#define WEBRTC_SPL_UMUL_32_16(a, b) ((uint32_t)((uint32_t)(a) * (uint16_t)(b)))
#define WEBRTC_SPL_MUL_16_U16(a, b) ((int32_t)(int16_t)(a) * (uint16_t)(b))
// clang-format off
// clang-format would choose some identation
// leading to presubmit error (cpplint.py)
#ifndef WEBRTC_ARCH_ARM_V7
// For ARMv7 platforms, these are inline functions in spl_inl_armv7.h
#ifndef MIPS32_LE
// For MIPS platforms, these are inline functions in spl_inl_mips.h
#define WEBRTC_SPL_MUL_16_16(a, b) ((int32_t)(((int16_t)(a)) * ((int16_t)(b))))
#define WEBRTC_SPL_MUL_16_32_RSFT16(a, b) \
(WEBRTC_SPL_MUL_16_16(a, b >> 16) + \
((WEBRTC_SPL_MUL_16_16(a, (b & 0xffff) >> 1) + 0x4000) >> 15))
#endif
#endif
#define WEBRTC_SPL_MUL_16_32_RSFT11(a, b) \
(WEBRTC_SPL_MUL_16_16(a, (b) >> 16) * (1 << 5) + \
(((WEBRTC_SPL_MUL_16_U16(a, (uint16_t)(b)) >> 1) + 0x0200) >> 10))
#define WEBRTC_SPL_MUL_16_32_RSFT14(a, b) \
(WEBRTC_SPL_MUL_16_16(a, (b) >> 16) * (1 << 2) + \
(((WEBRTC_SPL_MUL_16_U16(a, (uint16_t)(b)) >> 1) + 0x1000) >> 13))
#define WEBRTC_SPL_MUL_16_32_RSFT15(a, b) \
((WEBRTC_SPL_MUL_16_16(a, (b) >> 16) * (1 << 1)) + \
(((WEBRTC_SPL_MUL_16_U16(a, (uint16_t)(b)) >> 1) + 0x2000) >> 14))
// clang-format on
#define WEBRTC_SPL_MUL_16_16_RSFT(a, b, c) (WEBRTC_SPL_MUL_16_16(a, b) >> (c))
#define WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(a, b, c) \
((WEBRTC_SPL_MUL_16_16(a, b) + ((int32_t)(((int32_t)1) << ((c)-1)))) >> (c))
// C + the 32 most significant bits of A * B
#define WEBRTC_SPL_SCALEDIFF32(A, B, C) \
(C + (B >> 16) * A + (((uint32_t)(B & 0x0000FFFF) * A) >> 16))
#define WEBRTC_SPL_SAT(a, b, c) (b > a ? a : b < c ? c : b)
// Shifting with negative numbers allowed
// Positive means left shift
#define WEBRTC_SPL_SHIFT_W32(x, c) ((c) >= 0 ? (x) * (1 << (c)) : (x) >> -(c))
// Shifting with negative numbers not allowed
// We cannot do casting here due to signed/unsigned problem
#define WEBRTC_SPL_LSHIFT_W32(x, c) ((x) << (c))
#define WEBRTC_SPL_RSHIFT_U32(x, c) ((uint32_t)(x) >> (c))
#define WEBRTC_SPL_RAND(a) ((int16_t)((((int16_t)a * 18816) >> 7) & 0x00007fff))
#ifdef __cplusplus
extern
"C"
{
#endif
#define WEBRTC_SPL_MEMCPY_W16(v1, v2, length) \
memcpy(v1, v2, (length) * sizeof(int16_t))
// inline functions:
#include "webrtc/common_audio/signal_processing/include/spl_inl.h"
// third party math functions
#include "webrtc/common_audio/third_party/spl_sqrt_floor/spl_sqrt_floor.h"
// Initialize SPL. Currently it contains only function pointer initialization.
// If the underlying platform is known to be ARM-Neon (WEBRTC_HAS_NEON defined),
// the pointers will be assigned to code optimized for Neon; otherwise, generic
// C code will be assigned.
// Note that this function MUST be called in any application that uses SPL
// functions.
void
WebRtcSpl_Init
(
void
);
int16_t
WebRtcSpl_GetScalingSquare
(
int16_t
*
in_vector
,
size_t
in_vector_length
,
size_t
times
);
// Copy and set operations. Implementation in copy_set_operations.c.
// Descriptions at bottom of file.
void
WebRtcSpl_MemSetW16
(
int16_t
*
vector
,
int16_t
set_value
,
size_t
vector_length
);
void
WebRtcSpl_MemSetW32
(
int32_t
*
vector
,
int32_t
set_value
,
size_t
vector_length
);
void
WebRtcSpl_MemCpyReversedOrder
(
int16_t
*
out_vector
,
int16_t
*
in_vector
,
size_t
vector_length
);
void
WebRtcSpl_CopyFromEndW16
(
const
int16_t
*
in_vector
,
size_t
in_vector_length
,
size_t
samples
,
int16_t
*
out_vector
);
void
WebRtcSpl_ZerosArrayW16
(
int16_t
*
vector
,
size_t
vector_length
);
void
WebRtcSpl_ZerosArrayW32
(
int32_t
*
vector
,
size_t
vector_length
);
// End: Copy and set operations.
// Minimum and maximum operation functions and their pointers.
// Implementation in min_max_operations.c.
// Returns the largest absolute value in a signed 16-bit vector.
//
// Input:
// - vector : 16-bit input vector.
// - length : Number of samples in vector.
//
// Return value : Maximum absolute value in vector.
typedef
int16_t
(
*
MaxAbsValueW16
)(
const
int16_t
*
vector
,
size_t
length
);
extern
MaxAbsValueW16
WebRtcSpl_MaxAbsValueW16
;
int16_t
WebRtcSpl_MaxAbsValueW16C
(
const
int16_t
*
vector
,
size_t
length
);
#if defined(WEBRTC_HAS_NEON)
int16_t
WebRtcSpl_MaxAbsValueW16Neon
(
const
int16_t
*
vector
,
size_t
length
);
#endif
#if defined(MIPS32_LE)
int16_t
WebRtcSpl_MaxAbsValueW16_mips
(
const
int16_t
*
vector
,
size_t
length
);
#endif
// Returns the largest absolute value in a signed 32-bit vector.
//
// Input:
// - vector : 32-bit input vector.
// - length : Number of samples in vector.
//
// Return value : Maximum absolute value in vector.
typedef
int32_t
(
*
MaxAbsValueW32
)(
const
int32_t
*
vector
,
size_t
length
);
extern
MaxAbsValueW32
WebRtcSpl_MaxAbsValueW32
;
int32_t
WebRtcSpl_MaxAbsValueW32C
(
const
int32_t
*
vector
,
size_t
length
);
#if defined(WEBRTC_HAS_NEON)
int32_t
WebRtcSpl_MaxAbsValueW32Neon
(
const
int32_t
*
vector
,
size_t
length
);
#endif
#if defined(MIPS_DSP_R1_LE)
int32_t
WebRtcSpl_MaxAbsValueW32_mips
(
const
int32_t
*
vector
,
size_t
length
);
#endif
// Returns the maximum value of a 16-bit vector.
//
// Input:
// - vector : 16-bit input vector.
// - length : Number of samples in vector.
//
// Return value : Maximum sample value in |vector|.
typedef
int16_t
(
*
MaxValueW16
)(
const
int16_t
*
vector
,
size_t
length
);
extern
MaxValueW16
WebRtcSpl_MaxValueW16
;
int16_t
WebRtcSpl_MaxValueW16C
(
const
int16_t
*
vector
,
size_t
length
);
#if defined(WEBRTC_HAS_NEON)
int16_t
WebRtcSpl_MaxValueW16Neon
(
const
int16_t
*
vector
,
size_t
length
);
#endif
#if defined(MIPS32_LE)
int16_t
WebRtcSpl_MaxValueW16_mips
(
const
int16_t
*
vector
,
size_t
length
);
#endif
// Returns the maximum value of a 32-bit vector.
//
// Input:
// - vector : 32-bit input vector.
// - length : Number of samples in vector.
//
// Return value : Maximum sample value in |vector|.
typedef
int32_t
(
*
MaxValueW32
)(
const
int32_t
*
vector
,
size_t
length
);
extern
MaxValueW32
WebRtcSpl_MaxValueW32
;
int32_t
WebRtcSpl_MaxValueW32C
(
const
int32_t
*
vector
,
size_t
length
);
#if defined(WEBRTC_HAS_NEON)
int32_t
WebRtcSpl_MaxValueW32Neon
(
const
int32_t
*
vector
,
size_t
length
);
#endif
#if defined(MIPS32_LE)
int32_t
WebRtcSpl_MaxValueW32_mips
(
const
int32_t
*
vector
,
size_t
length
);
#endif
// Returns the minimum value of a 16-bit vector.
//
// Input:
// - vector : 16-bit input vector.
// - length : Number of samples in vector.
//
// Return value : Minimum sample value in |vector|.
typedef
int16_t
(
*
MinValueW16
)(
const
int16_t
*
vector
,
size_t
length
);
extern
MinValueW16
WebRtcSpl_MinValueW16
;
int16_t
WebRtcSpl_MinValueW16C
(
const
int16_t
*
vector
,
size_t
length
);
#if defined(WEBRTC_HAS_NEON)
int16_t
WebRtcSpl_MinValueW16Neon
(
const
int16_t
*
vector
,
size_t
length
);
#endif
#if defined(MIPS32_LE)
int16_t
WebRtcSpl_MinValueW16_mips
(
const
int16_t
*
vector
,
size_t
length
);
#endif
// Returns the minimum value of a 32-bit vector.
//
// Input:
// - vector : 32-bit input vector.
// - length : Number of samples in vector.
//
// Return value : Minimum sample value in |vector|.
typedef
int32_t
(
*
MinValueW32
)(
const
int32_t
*
vector
,
size_t
length
);
extern
MinValueW32
WebRtcSpl_MinValueW32
;
int32_t
WebRtcSpl_MinValueW32C
(
const
int32_t
*
vector
,
size_t
length
);
#if defined(WEBRTC_HAS_NEON)
int32_t
WebRtcSpl_MinValueW32Neon
(
const
int32_t
*
vector
,
size_t
length
);
#endif
#if defined(MIPS32_LE)
int32_t
WebRtcSpl_MinValueW32_mips
(
const
int32_t
*
vector
,
size_t
length
);
#endif
// Returns the vector index to the largest absolute value of a 16-bit vector.
//
// Input:
// - vector : 16-bit input vector.
// - length : Number of samples in vector.
//
// Return value : Index to the maximum absolute value in vector.
// If there are multiple equal maxima, return the index of the
// first. -32768 will always have precedence over 32767 (despite
// -32768 presenting an int16 absolute value of 32767).
size_t
WebRtcSpl_MaxAbsIndexW16
(
const
int16_t
*
vector
,
size_t
length
);
// Returns the vector index to the maximum sample value of a 16-bit vector.
//
// Input:
// - vector : 16-bit input vector.
// - length : Number of samples in vector.
//
// Return value : Index to the maximum value in vector (if multiple
// indexes have the maximum, return the first).
size_t
WebRtcSpl_MaxIndexW16
(
const
int16_t
*
vector
,
size_t
length
);
// Returns the vector index to the maximum sample value of a 32-bit vector.
//
// Input:
// - vector : 32-bit input vector.
// - length : Number of samples in vector.
//
// Return value : Index to the maximum value in vector (if multiple
// indexes have the maximum, return the first).
size_t
WebRtcSpl_MaxIndexW32
(
const
int32_t
*
vector
,
size_t
length
);
// Returns the vector index to the minimum sample value of a 16-bit vector.
//
// Input:
// - vector : 16-bit input vector.
// - length : Number of samples in vector.
//
// Return value : Index to the mimimum value in vector (if multiple
// indexes have the minimum, return the first).
size_t
WebRtcSpl_MinIndexW16
(
const
int16_t
*
vector
,
size_t
length
);
// Returns the vector index to the minimum sample value of a 32-bit vector.
//
// Input:
// - vector : 32-bit input vector.
// - length : Number of samples in vector.
//
// Return value : Index to the mimimum value in vector (if multiple
// indexes have the minimum, return the first).
size_t
WebRtcSpl_MinIndexW32
(
const
int32_t
*
vector
,
size_t
length
);
// End: Minimum and maximum operations.
// Vector scaling operations. Implementation in vector_scaling_operations.c.
// Description at bottom of file.
void
WebRtcSpl_VectorBitShiftW16
(
int16_t
*
out_vector
,
size_t
vector_length
,
const
int16_t
*
in_vector
,
int16_t
right_shifts
);
void
WebRtcSpl_VectorBitShiftW32
(
int32_t
*
out_vector
,
size_t
vector_length
,
const
int32_t
*
in_vector
,
int16_t
right_shifts
);
void
WebRtcSpl_VectorBitShiftW32ToW16
(
int16_t
*
out_vector
,
size_t
vector_length
,
const
int32_t
*
in_vector
,
int
right_shifts
);
void
WebRtcSpl_ScaleVector
(
const
int16_t
*
in_vector
,
int16_t
*
out_vector
,
int16_t
gain
,
size_t
vector_length
,
int16_t
right_shifts
);
void
WebRtcSpl_ScaleVectorWithSat
(
const
int16_t
*
in_vector
,
int16_t
*
out_vector
,
int16_t
gain
,
size_t
vector_length
,
int16_t
right_shifts
);
void
WebRtcSpl_ScaleAndAddVectors
(
const
int16_t
*
in_vector1
,
int16_t
gain1
,
int
right_shifts1
,
const
int16_t
*
in_vector2
,
int16_t
gain2
,
int
right_shifts2
,
int16_t
*
out_vector
,
size_t
vector_length
);
// The functions (with related pointer) perform the vector operation:
// out_vector[k] = ((scale1 * in_vector1[k]) + (scale2 * in_vector2[k])
// + round_value) >> right_shifts,
// where round_value = (1 << right_shifts) >> 1.
//
// Input:
// - in_vector1 : Input vector 1
// - in_vector1_scale : Gain to be used for vector 1
// - in_vector2 : Input vector 2
// - in_vector2_scale : Gain to be used for vector 2
// - right_shifts : Number of right bit shifts to be applied
// - length : Number of elements in the input vectors
//
// Output:
// - out_vector : Output vector
// Return value : 0 if OK, -1 if (in_vector1 == null
// || in_vector2 == null || out_vector == null
// || length <= 0 || right_shift < 0).
typedef
int
(
*
ScaleAndAddVectorsWithRound
)(
const
int16_t
*
in_vector1
,
int16_t
in_vector1_scale
,
const
int16_t
*
in_vector2
,
int16_t
in_vector2_scale
,
int
right_shifts
,
int16_t
*
out_vector
,
size_t
length
);
extern
ScaleAndAddVectorsWithRound
WebRtcSpl_ScaleAndAddVectorsWithRound
;
int
WebRtcSpl_ScaleAndAddVectorsWithRoundC
(
const
int16_t
*
in_vector1
,
int16_t
in_vector1_scale
,
const
int16_t
*
in_vector2
,
int16_t
in_vector2_scale
,
int
right_shifts
,
int16_t
*
out_vector
,
size_t
length
);
#if defined(MIPS_DSP_R1_LE)
int
WebRtcSpl_ScaleAndAddVectorsWithRound_mips
(
const
int16_t
*
in_vector1
,
int16_t
in_vector1_scale
,
const
int16_t
*
in_vector2
,
int16_t
in_vector2_scale
,
int
right_shifts
,
int16_t
*
out_vector
,
size_t
length
);
#endif
// End: Vector scaling operations.
// iLBC specific functions. Implementations in ilbc_specific_functions.c.
// Description at bottom of file.
void
WebRtcSpl_ReverseOrderMultArrayElements
(
int16_t
*
out_vector
,
const
int16_t
*
in_vector
,
const
int16_t
*
window
,
size_t
vector_length
,
int16_t
right_shifts
);
void
WebRtcSpl_ElementwiseVectorMult
(
int16_t
*
out_vector
,
const
int16_t
*
in_vector
,
const
int16_t
*
window
,
size_t
vector_length
,
int16_t
right_shifts
);
void
WebRtcSpl_AddVectorsAndShift
(
int16_t
*
out_vector
,
const
int16_t
*
in_vector1
,
const
int16_t
*
in_vector2
,
size_t
vector_length
,
int16_t
right_shifts
);
void
WebRtcSpl_AddAffineVectorToVector
(
int16_t
*
out_vector
,
int16_t
*
in_vector
,
int16_t
gain
,
int32_t
add_constant
,
int16_t
right_shifts
,
size_t
vector_length
);
void
WebRtcSpl_AffineTransformVector
(
int16_t
*
out_vector
,
int16_t
*
in_vector
,
int16_t
gain
,
int32_t
add_constant
,
int16_t
right_shifts
,
size_t
vector_length
);
// End: iLBC specific functions.
// Signal processing operations.
// A 32-bit fix-point implementation of auto-correlation computation
//
// Input:
// - in_vector : Vector to calculate autocorrelation upon
// - in_vector_length : Length (in samples) of |vector|
// - order : The order up to which the autocorrelation should be
// calculated
//
// Output:
// - result : auto-correlation values (values should be seen
// relative to each other since the absolute values
// might have been down shifted to avoid overflow)
//
// - scale : The number of left shifts required to obtain the
// auto-correlation in Q0
//
// Return value : Number of samples in |result|, i.e. (order+1)
size_t
WebRtcSpl_AutoCorrelation
(
const
int16_t
*
in_vector
,
size_t
in_vector_length
,
size_t
order
,
int32_t
*
result
,
int
*
scale
);
// A 32-bit fix-point implementation of the Levinson-Durbin algorithm that
// does NOT use the 64 bit class
//
// Input:
// - auto_corr : Vector with autocorrelation values of length >= |order|+1
// - order : The LPC filter order (support up to order 20)
//
// Output:
// - lpc_coef : lpc_coef[0..order] LPC coefficients in Q12
// - refl_coef : refl_coef[0...order-1]| Reflection coefficients in Q15
//
// Return value : 1 for stable 0 for unstable
int16_t
WebRtcSpl_LevinsonDurbin
(
const
int32_t
*
auto_corr
,
int16_t
*
lpc_coef
,
int16_t
*
refl_coef
,
size_t
order
);
// Converts reflection coefficients |refl_coef| to LPC coefficients |lpc_coef|.
// This version is a 16 bit operation.
//
// NOTE: The 16 bit refl_coef -> lpc_coef conversion might result in a
// "slightly unstable" filter (i.e., a pole just outside the unit circle) in
// "rare" cases even if the reflection coefficients are stable.
//
// Input:
// - refl_coef : Reflection coefficients in Q15 that should be converted
// to LPC coefficients
// - use_order : Number of coefficients in |refl_coef|
//
// Output:
// - lpc_coef : LPC coefficients in Q12
void
WebRtcSpl_ReflCoefToLpc
(
const
int16_t
*
refl_coef
,
int
use_order
,
int16_t
*
lpc_coef
);
// Converts LPC coefficients |lpc_coef| to reflection coefficients |refl_coef|.
// This version is a 16 bit operation.
// The conversion is implemented by the step-down algorithm.
//
// Input:
// - lpc_coef : LPC coefficients in Q12, that should be converted to
// reflection coefficients
// - use_order : Number of coefficients in |lpc_coef|
//
// Output:
// - refl_coef : Reflection coefficients in Q15.
void
WebRtcSpl_LpcToReflCoef
(
int16_t
*
lpc_coef
,
int
use_order
,
int16_t
*
refl_coef
);
// Calculates reflection coefficients (16 bit) from auto-correlation values
//
// Input:
// - auto_corr : Auto-correlation values
// - use_order : Number of coefficients wanted be calculated
//
// Output:
// - refl_coef : Reflection coefficients in Q15.
void
WebRtcSpl_AutoCorrToReflCoef
(
const
int32_t
*
auto_corr
,
int
use_order
,
int16_t
*
refl_coef
);
// The functions (with related pointer) calculate the cross-correlation between
// two sequences |seq1| and |seq2|.
// |seq1| is fixed and |seq2| slides as the pointer is increased with the
// amount |step_seq2|. Note the arguments should obey the relationship:
// |dim_seq| - 1 + |step_seq2| * (|dim_cross_correlation| - 1) <
// buffer size of |seq2|
//
// Input:
// - seq1 : First sequence (fixed throughout the correlation)
// - seq2 : Second sequence (slides |step_vector2| for each
// new correlation)
// - dim_seq : Number of samples to use in the cross-correlation
// - dim_cross_correlation : Number of cross-correlations to calculate (the
// start position for |vector2| is updated for each
// new one)
// - right_shifts : Number of right bit shifts to use. This will
// become the output Q-domain.
// - step_seq2 : How many (positive or negative) steps the
// |vector2| pointer should be updated for each new
// cross-correlation value.
//
// Output:
// - cross_correlation : The cross-correlation in Q(-right_shifts)
typedef
void
(
*
CrossCorrelation
)(
int32_t
*
cross_correlation
,
const
int16_t
*
seq1
,
const
int16_t
*
seq2
,
size_t
dim_seq
,
size_t
dim_cross_correlation
,
int
right_shifts
,
int
step_seq2
);
extern
CrossCorrelation
WebRtcSpl_CrossCorrelation
;
void
WebRtcSpl_CrossCorrelationC
(
int32_t
*
cross_correlation
,
const
int16_t
*
seq1
,
const
int16_t
*
seq2
,
size_t
dim_seq
,
size_t
dim_cross_correlation
,
int
right_shifts
,
int
step_seq2
);
#if defined(WEBRTC_HAS_NEON)
void
WebRtcSpl_CrossCorrelationNeon
(
int32_t
*
cross_correlation
,
const
int16_t
*
seq1
,
const
int16_t
*
seq2
,
size_t
dim_seq
,
size_t
dim_cross_correlation
,
int
right_shifts
,
int
step_seq2
);
#endif
#if defined(MIPS32_LE)
void
WebRtcSpl_CrossCorrelation_mips
(
int32_t
*
cross_correlation
,
const
int16_t
*
seq1
,
const
int16_t
*
seq2
,
size_t
dim_seq
,
size_t
dim_cross_correlation
,
int
right_shifts
,
int
step_seq2
);
#endif
// Creates (the first half of) a Hanning window. Size must be at least 1 and
// at most 512.
//
// Input:
// - size : Length of the requested Hanning window (1 to 512)
//
// Output:
// - window : Hanning vector in Q14.
void
WebRtcSpl_GetHanningWindow
(
int16_t
*
window
,
size_t
size
);
// Calculates y[k] = sqrt(1 - x[k]^2) for each element of the input vector
// |in_vector|. Input and output values are in Q15.
//
// Inputs:
// - in_vector : Values to calculate sqrt(1 - x^2) of
// - vector_length : Length of vector |in_vector|
//
// Output:
// - out_vector : Output values in Q15
void
WebRtcSpl_SqrtOfOneMinusXSquared
(
int16_t
*
in_vector
,
size_t
vector_length
,
int16_t
*
out_vector
);
// End: Signal processing operations.
// Randomization functions. Implementations collected in
// randomization_functions.c and descriptions at bottom of this file.
int16_t
WebRtcSpl_RandU
(
uint32_t
*
seed
);
int16_t
WebRtcSpl_RandN
(
uint32_t
*
seed
);
int16_t
WebRtcSpl_RandUArray
(
int16_t
*
vector
,
int16_t
vector_length
,
uint32_t
*
seed
);
// End: Randomization functions.
// Math functions
int32_t
WebRtcSpl_Sqrt
(
int32_t
value
);
// Divisions. Implementations collected in division_operations.c and
// descriptions at bottom of this file.
uint32_t
WebRtcSpl_DivU32U16
(
uint32_t
num
,
uint16_t
den
);
int32_t
WebRtcSpl_DivW32W16
(
int32_t
num
,
int16_t
den
);
int16_t
WebRtcSpl_DivW32W16ResW16
(
int32_t
num
,
int16_t
den
);
int32_t
WebRtcSpl_DivResultInQ31
(
int32_t
num
,
int32_t
den
);
int32_t
WebRtcSpl_DivW32HiLow
(
int32_t
num
,
int16_t
den_hi
,
int16_t
den_low
);
// End: Divisions.
int32_t
WebRtcSpl_Energy
(
int16_t
*
vector
,
size_t
vector_length
,
int
*
scale_factor
);
// Filter operations.
size_t
WebRtcSpl_FilterAR
(
const
int16_t
*
ar_coef
,
size_t
ar_coef_length
,
const
int16_t
*
in_vector
,
size_t
in_vector_length
,
int16_t
*
filter_state
,
size_t
filter_state_length
,
int16_t
*
filter_state_low
,
size_t
filter_state_low_length
,
int16_t
*
out_vector
,
int16_t
*
out_vector_low
,
size_t
out_vector_low_length
);
// WebRtcSpl_FilterMAFastQ12(...)
//
// Performs a MA filtering on a vector in Q12
//
// Input:
// - in_vector : Input samples (state in positions
// in_vector[-order] .. in_vector[-1])
// - ma_coef : Filter coefficients (in Q12)
// - ma_coef_length : Number of B coefficients (order+1)
// - vector_length : Number of samples to be filtered
//
// Output:
// - out_vector : Filtered samples
//
void
WebRtcSpl_FilterMAFastQ12
(
const
int16_t
*
in_vector
,
int16_t
*
out_vector
,
const
int16_t
*
ma_coef
,
size_t
ma_coef_length
,
size_t
vector_length
);
// Performs a AR filtering on a vector in Q12
// Input:
// - data_in : Input samples
// - data_out : State information in positions
// data_out[-order] .. data_out[-1]
// - coefficients : Filter coefficients (in Q12)
// - coefficients_length: Number of coefficients (order+1)
// - data_length : Number of samples to be filtered
// Output:
// - data_out : Filtered samples
void
WebRtcSpl_FilterARFastQ12
(
const
int16_t
*
data_in
,
int16_t
*
data_out
,
const
int16_t
*
__restrict
coefficients
,
size_t
coefficients_length
,
size_t
data_length
);
// The functions (with related pointer) perform a MA down sampling filter
// on a vector.
// Input:
// - data_in : Input samples (state in positions
// data_in[-order] .. data_in[-1])
// - data_in_length : Number of samples in |data_in| to be filtered.
// This must be at least
// |delay| + |factor|*(|out_vector_length|-1) + 1)
// - data_out_length : Number of down sampled samples desired
// - coefficients : Filter coefficients (in Q12)
// - coefficients_length: Number of coefficients (order+1)
// - factor : Decimation factor
// - delay : Delay of filter (compensated for in out_vector)
// Output:
// - data_out : Filtered samples
// Return value : 0 if OK, -1 if |in_vector| is too short
typedef
int
(
*
DownsampleFast
)(
const
int16_t
*
data_in
,
size_t
data_in_length
,
int16_t
*
data_out
,
size_t
data_out_length
,
const
int16_t
*
__restrict
coefficients
,
size_t
coefficients_length
,
int
factor
,
size_t
delay
);
extern
DownsampleFast
WebRtcSpl_DownsampleFast
;
int
WebRtcSpl_DownsampleFastC
(
const
int16_t
*
data_in
,
size_t
data_in_length
,
int16_t
*
data_out
,
size_t
data_out_length
,
const
int16_t
*
__restrict
coefficients
,
size_t
coefficients_length
,
int
factor
,
size_t
delay
);
#if defined(WEBRTC_HAS_NEON)
int
WebRtcSpl_DownsampleFastNeon
(
const
int16_t
*
data_in
,
size_t
data_in_length
,
int16_t
*
data_out
,
size_t
data_out_length
,
const
int16_t
*
__restrict
coefficients
,
size_t
coefficients_length
,
int
factor
,
size_t
delay
);
#endif
#if defined(MIPS32_LE)
int
WebRtcSpl_DownsampleFast_mips
(
const
int16_t
*
data_in
,
size_t
data_in_length
,
int16_t
*
data_out
,
size_t
data_out_length
,
const
int16_t
*
__restrict
coefficients
,
size_t
coefficients_length
,
int
factor
,
size_t
delay
);
#endif
// End: Filter operations.
// FFT operations
int
WebRtcSpl_ComplexFFT
(
int16_t
vector
[],
int
stages
,
int
mode
);
int
WebRtcSpl_ComplexIFFT
(
int16_t
vector
[],
int
stages
,
int
mode
);
// Treat a 16-bit complex data buffer |complex_data| as an array of 32-bit
// values, and swap elements whose indexes are bit-reverses of each other.
//
// Input:
// - complex_data : Complex data buffer containing 2^|stages| real
// elements interleaved with 2^|stages| imaginary
// elements: [Re Im Re Im Re Im....]
// - stages : Number of FFT stages. Must be at least 3 and at most
// 10, since the table WebRtcSpl_kSinTable1024[] is 1024
// elements long.
//
// Output:
// - complex_data : The complex data buffer.
void
WebRtcSpl_ComplexBitReverse
(
int16_t
*
__restrict
complex_data
,
int
stages
);
// End: FFT operations
/************************************************************
*
* RESAMPLING FUNCTIONS AND THEIR STRUCTS ARE DEFINED BELOW
*
************************************************************/
/*******************************************************************
* resample.c
*
* Includes the following resampling combinations
* 22 kHz -> 16 kHz
* 16 kHz -> 22 kHz
* 22 kHz -> 8 kHz
* 8 kHz -> 22 kHz
*
******************************************************************/
// state structure for 22 -> 16 resampler
typedef
struct
{
int32_t
S_22_44
[
8
];
int32_t
S_44_32
[
8
];
int32_t
S_32_16
[
8
];
}
WebRtcSpl_State22khzTo16khz
;
void
WebRtcSpl_Resample22khzTo16khz
(
const
int16_t
*
in
,
int16_t
*
out
,
WebRtcSpl_State22khzTo16khz
*
state
,
int32_t
*
tmpmem
);
void
WebRtcSpl_ResetResample22khzTo16khz
(
WebRtcSpl_State22khzTo16khz
*
state
);
// state structure for 16 -> 22 resampler
typedef
struct
{
int32_t
S_16_32
[
8
];
int32_t
S_32_22
[
8
];
}
WebRtcSpl_State16khzTo22khz
;
void
WebRtcSpl_Resample16khzTo22khz
(
const
int16_t
*
in
,
int16_t
*
out
,
WebRtcSpl_State16khzTo22khz
*
state
,
int32_t
*
tmpmem
);
void
WebRtcSpl_ResetResample16khzTo22khz
(
WebRtcSpl_State16khzTo22khz
*
state
);
// state structure for 22 -> 8 resampler
typedef
struct
{
int32_t
S_22_22
[
16
];
int32_t
S_22_16
[
8
];
int32_t
S_16_8
[
8
];
}
WebRtcSpl_State22khzTo8khz
;
void
WebRtcSpl_Resample22khzTo8khz
(
const
int16_t
*
in
,
int16_t
*
out
,
WebRtcSpl_State22khzTo8khz
*
state
,
int32_t
*
tmpmem
);
void
WebRtcSpl_ResetResample22khzTo8khz
(
WebRtcSpl_State22khzTo8khz
*
state
);
// state structure for 8 -> 22 resampler
typedef
struct
{
int32_t
S_8_16
[
8
];
int32_t
S_16_11
[
8
];
int32_t
S_11_22
[
8
];
}
WebRtcSpl_State8khzTo22khz
;
void
WebRtcSpl_Resample8khzTo22khz
(
const
int16_t
*
in
,
int16_t
*
out
,
WebRtcSpl_State8khzTo22khz
*
state
,
int32_t
*
tmpmem
);
void
WebRtcSpl_ResetResample8khzTo22khz
(
WebRtcSpl_State8khzTo22khz
*
state
);
/*******************************************************************
* resample_fractional.c
* Functions for internal use in the other resample functions
*
* Includes the following resampling combinations
* 48 kHz -> 32 kHz
* 32 kHz -> 24 kHz
* 44 kHz -> 32 kHz
*
******************************************************************/
void
WebRtcSpl_Resample48khzTo32khz
(
const
int32_t
*
In
,
int32_t
*
Out
,
size_t
K
);
void
WebRtcSpl_Resample32khzTo24khz
(
const
int32_t
*
In
,
int32_t
*
Out
,
size_t
K
);
void
WebRtcSpl_Resample44khzTo32khz
(
const
int32_t
*
In
,
int32_t
*
Out
,
size_t
K
);
/*******************************************************************
* resample_48khz.c
*
* Includes the following resampling combinations
* 48 kHz -> 16 kHz
* 16 kHz -> 48 kHz
* 48 kHz -> 8 kHz
* 8 kHz -> 48 kHz
*
******************************************************************/
typedef
struct
{
int32_t
S_48_48
[
16
];
int32_t
S_48_32
[
8
];
int32_t
S_32_16
[
8
];
}
WebRtcSpl_State48khzTo16khz
;
void
WebRtcSpl_Resample48khzTo16khz
(
const
int16_t
*
in
,
int16_t
*
out
,
WebRtcSpl_State48khzTo16khz
*
state
,
int32_t
*
tmpmem
);
void
WebRtcSpl_ResetResample48khzTo16khz
(
WebRtcSpl_State48khzTo16khz
*
state
);
typedef
struct
{
int32_t
S_16_32
[
8
];
int32_t
S_32_24
[
8
];
int32_t
S_24_48
[
8
];
}
WebRtcSpl_State16khzTo48khz
;
void
WebRtcSpl_Resample16khzTo48khz
(
const
int16_t
*
in
,
int16_t
*
out
,
WebRtcSpl_State16khzTo48khz
*
state
,
int32_t
*
tmpmem
);
void
WebRtcSpl_ResetResample16khzTo48khz
(
WebRtcSpl_State16khzTo48khz
*
state
);
typedef
struct
{
int32_t
S_48_24
[
8
];
int32_t
S_24_24
[
16
];
int32_t
S_24_16
[
8
];
int32_t
S_16_8
[
8
];
}
WebRtcSpl_State48khzTo8khz
;
void
WebRtcSpl_Resample48khzTo8khz
(
const
int16_t
*
in
,
int16_t
*
out
,
WebRtcSpl_State48khzTo8khz
*
state
,
int32_t
*
tmpmem
);
void
WebRtcSpl_ResetResample48khzTo8khz
(
WebRtcSpl_State48khzTo8khz
*
state
);
typedef
struct
{
int32_t
S_8_16
[
8
];
int32_t
S_16_12
[
8
];
int32_t
S_12_24
[
8
];
int32_t
S_24_48
[
8
];
}
WebRtcSpl_State8khzTo48khz
;
void
WebRtcSpl_Resample8khzTo48khz
(
const
int16_t
*
in
,
int16_t
*
out
,
WebRtcSpl_State8khzTo48khz
*
state
,
int32_t
*
tmpmem
);
void
WebRtcSpl_ResetResample8khzTo48khz
(
WebRtcSpl_State8khzTo48khz
*
state
);
/*******************************************************************
* resample_by_2.c
*
* Includes down and up sampling by a factor of two.
*
******************************************************************/
void
WebRtcSpl_DownsampleBy2
(
const
int16_t
*
in
,
size_t
len
,
int16_t
*
out
,
int32_t
*
filtState
);
void
WebRtcSpl_UpsampleBy2
(
const
int16_t
*
in
,
size_t
len
,
int16_t
*
out
,
int32_t
*
filtState
);
/************************************************************
* END OF RESAMPLING FUNCTIONS
************************************************************/
void
WebRtcSpl_AnalysisQMF
(
const
int16_t
*
in_data
,
size_t
in_data_length
,
int16_t
*
low_band
,
int16_t
*
high_band
,
int32_t
*
filter_state1
,
int32_t
*
filter_state2
);
void
WebRtcSpl_SynthesisQMF
(
const
int16_t
*
low_band
,
const
int16_t
*
high_band
,
size_t
band_length
,
int16_t
*
out_data
,
int32_t
*
filter_state1
,
int32_t
*
filter_state2
);
#ifdef __cplusplus
}
#endif // __cplusplus
#endif // COMMON_AUDIO_SIGNAL_PROCESSING_INCLUDE_SIGNAL_PROCESSING_LIBRARY_H_
//
// WebRtcSpl_AddSatW16(...)
// WebRtcSpl_AddSatW32(...)
//
// Returns the result of a saturated 16-bit, respectively 32-bit, addition of
// the numbers specified by the |var1| and |var2| parameters.
//
// Input:
// - var1 : Input variable 1
// - var2 : Input variable 2
//
// Return value : Added and saturated value
//
//
// WebRtcSpl_SubSatW16(...)
// WebRtcSpl_SubSatW32(...)
//
// Returns the result of a saturated 16-bit, respectively 32-bit, subtraction
// of the numbers specified by the |var1| and |var2| parameters.
//
// Input:
// - var1 : Input variable 1
// - var2 : Input variable 2
//
// Returned value : Subtracted and saturated value
//
//
// WebRtcSpl_GetSizeInBits(...)
//
// Returns the # of bits that are needed at the most to represent the number
// specified by the |value| parameter.
//
// Input:
// - value : Input value
//
// Return value : Number of bits needed to represent |value|
//
//
// WebRtcSpl_NormW32(...)
//
// Norm returns the # of left shifts required to 32-bit normalize the 32-bit
// signed number specified by the |value| parameter.
//
// Input:
// - value : Input value
//
// Return value : Number of bit shifts needed to 32-bit normalize |value|
//
//
// WebRtcSpl_NormW16(...)
//
// Norm returns the # of left shifts required to 16-bit normalize the 16-bit
// signed number specified by the |value| parameter.
//
// Input:
// - value : Input value
//
// Return value : Number of bit shifts needed to 32-bit normalize |value|
//
//
// WebRtcSpl_NormU32(...)
//
// Norm returns the # of left shifts required to 32-bit normalize the unsigned
// 32-bit number specified by the |value| parameter.
//
// Input:
// - value : Input value
//
// Return value : Number of bit shifts needed to 32-bit normalize |value|
//
//
// WebRtcSpl_GetScalingSquare(...)
//
// Returns the # of bits required to scale the samples specified in the
// |in_vector| parameter so that, if the squares of the samples are added the
// # of times specified by the |times| parameter, the 32-bit addition will not
// overflow (result in int32_t).
//
// Input:
// - in_vector : Input vector to check scaling on
// - in_vector_length : Samples in |in_vector|
// - times : Number of additions to be performed
//
// Return value : Number of right bit shifts needed to avoid
// overflow in the addition calculation
//
//
// WebRtcSpl_MemSetW16(...)
//
// Sets all the values in the int16_t vector |vector| of length
// |vector_length| to the specified value |set_value|
//
// Input:
// - vector : Pointer to the int16_t vector
// - set_value : Value specified
// - vector_length : Length of vector
//
//
// WebRtcSpl_MemSetW32(...)
//
// Sets all the values in the int32_t vector |vector| of length
// |vector_length| to the specified value |set_value|
//
// Input:
// - vector : Pointer to the int16_t vector
// - set_value : Value specified
// - vector_length : Length of vector
//
//
// WebRtcSpl_MemCpyReversedOrder(...)
//
// Copies all the values from the source int16_t vector |in_vector| to a
// destination int16_t vector |out_vector|. It is done in reversed order,
// meaning that the first sample of |in_vector| is copied to the last sample of
// the |out_vector|. The procedure continues until the last sample of
// |in_vector| has been copied to the first sample of |out_vector|. This
// creates a reversed vector. Used in e.g. prediction in iLBC.
//
// Input:
// - in_vector : Pointer to the first sample in a int16_t vector
// of length |length|
// - vector_length : Number of elements to copy
//
// Output:
// - out_vector : Pointer to the last sample in a int16_t vector
// of length |length|
//
//
// WebRtcSpl_CopyFromEndW16(...)
//
// Copies the rightmost |samples| of |in_vector| (of length |in_vector_length|)
// to the vector |out_vector|.
//
// Input:
// - in_vector : Input vector
// - in_vector_length : Number of samples in |in_vector|
// - samples : Number of samples to extract (from right side)
// from |in_vector|
//
// Output:
// - out_vector : Vector with the requested samples
//
//
// WebRtcSpl_ZerosArrayW16(...)
// WebRtcSpl_ZerosArrayW32(...)
//
// Inserts the value "zero" in all positions of a w16 and a w32 vector
// respectively.
//
// Input:
// - vector_length : Number of samples in vector
//
// Output:
// - vector : Vector containing all zeros
//
//
// WebRtcSpl_VectorBitShiftW16(...)
// WebRtcSpl_VectorBitShiftW32(...)
//
// Bit shifts all the values in a vector up or downwards. Different calls for
// int16_t and int32_t vectors respectively.
//
// Input:
// - vector_length : Length of vector
// - in_vector : Pointer to the vector that should be bit shifted
// - right_shifts : Number of right bit shifts (negative value gives left
// shifts)
//
// Output:
// - out_vector : Pointer to the result vector (can be the same as
// |in_vector|)
//
//
// WebRtcSpl_VectorBitShiftW32ToW16(...)
//
// Bit shifts all the values in a int32_t vector up or downwards and
// stores the result as an int16_t vector. The function will saturate the
// signal if needed, before storing in the output vector.
//
// Input:
// - vector_length : Length of vector
// - in_vector : Pointer to the vector that should be bit shifted
// - right_shifts : Number of right bit shifts (negative value gives left
// shifts)
//
// Output:
// - out_vector : Pointer to the result vector (can be the same as
// |in_vector|)
//
//
// WebRtcSpl_ScaleVector(...)
//
// Performs the vector operation:
// out_vector[k] = (gain*in_vector[k])>>right_shifts
//
// Input:
// - in_vector : Input vector
// - gain : Scaling gain
// - vector_length : Elements in the |in_vector|
// - right_shifts : Number of right bit shifts applied
//
// Output:
// - out_vector : Output vector (can be the same as |in_vector|)
//
//
// WebRtcSpl_ScaleVectorWithSat(...)
//
// Performs the vector operation:
// out_vector[k] = SATURATE( (gain*in_vector[k])>>right_shifts )
//
// Input:
// - in_vector : Input vector
// - gain : Scaling gain
// - vector_length : Elements in the |in_vector|
// - right_shifts : Number of right bit shifts applied
//
// Output:
// - out_vector : Output vector (can be the same as |in_vector|)
//
//
// WebRtcSpl_ScaleAndAddVectors(...)
//
// Performs the vector operation:
// out_vector[k] = (gain1*in_vector1[k])>>right_shifts1
// + (gain2*in_vector2[k])>>right_shifts2
//
// Input:
// - in_vector1 : Input vector 1
// - gain1 : Gain to be used for vector 1
// - right_shifts1 : Right bit shift to be used for vector 1
// - in_vector2 : Input vector 2
// - gain2 : Gain to be used for vector 2
// - right_shifts2 : Right bit shift to be used for vector 2
// - vector_length : Elements in the input vectors
//
// Output:
// - out_vector : Output vector
//
//
// WebRtcSpl_ReverseOrderMultArrayElements(...)
//
// Performs the vector operation:
// out_vector[n] = (in_vector[n]*window[-n])>>right_shifts
//
// Input:
// - in_vector : Input vector
// - window : Window vector (should be reversed). The pointer
// should be set to the last value in the vector
// - right_shifts : Number of right bit shift to be applied after the
// multiplication
// - vector_length : Number of elements in |in_vector|
//
// Output:
// - out_vector : Output vector (can be same as |in_vector|)
//
//
// WebRtcSpl_ElementwiseVectorMult(...)
//
// Performs the vector operation:
// out_vector[n] = (in_vector[n]*window[n])>>right_shifts
//
// Input:
// - in_vector : Input vector
// - window : Window vector.
// - right_shifts : Number of right bit shift to be applied after the
// multiplication
// - vector_length : Number of elements in |in_vector|
//
// Output:
// - out_vector : Output vector (can be same as |in_vector|)
//
//
// WebRtcSpl_AddVectorsAndShift(...)
//
// Performs the vector operation:
// out_vector[k] = (in_vector1[k] + in_vector2[k])>>right_shifts
//
// Input:
// - in_vector1 : Input vector 1
// - in_vector2 : Input vector 2
// - right_shifts : Number of right bit shift to be applied after the
// multiplication
// - vector_length : Number of elements in |in_vector1| and |in_vector2|
//
// Output:
// - out_vector : Output vector (can be same as |in_vector1|)
//
//
// WebRtcSpl_AddAffineVectorToVector(...)
//
// Adds an affine transformed vector to another vector |out_vector|, i.e,
// performs
// out_vector[k] += (in_vector[k]*gain+add_constant)>>right_shifts
//
// Input:
// - in_vector : Input vector
// - gain : Gain value, used to multiply the in vector with
// - add_constant : Constant value to add (usually 1<<(right_shifts-1),
// but others can be used as well
// - right_shifts : Number of right bit shifts (0-16)
// - vector_length : Number of samples in |in_vector| and |out_vector|
//
// Output:
// - out_vector : Vector with the output
//
//
// WebRtcSpl_AffineTransformVector(...)
//
// Affine transforms a vector, i.e, performs
// out_vector[k] = (in_vector[k]*gain+add_constant)>>right_shifts
//
// Input:
// - in_vector : Input vector
// - gain : Gain value, used to multiply the in vector with
// - add_constant : Constant value to add (usually 1<<(right_shifts-1),
// but others can be used as well
// - right_shifts : Number of right bit shifts (0-16)
// - vector_length : Number of samples in |in_vector| and |out_vector|
//
// Output:
// - out_vector : Vector with the output
//
//
// WebRtcSpl_IncreaseSeed(...)
//
// Increases the seed (and returns the new value)
//
// Input:
// - seed : Seed for random calculation
//
// Output:
// - seed : Updated seed value
//
// Return value : The new seed value
//
//
// WebRtcSpl_RandU(...)
//
// Produces a uniformly distributed value in the int16_t range
//
// Input:
// - seed : Seed for random calculation
//
// Output:
// - seed : Updated seed value
//
// Return value : Uniformly distributed value in the range
// [Word16_MIN...Word16_MAX]
//
//
// WebRtcSpl_RandN(...)
//
// Produces a normal distributed value in the int16_t range
//
// Input:
// - seed : Seed for random calculation
//
// Output:
// - seed : Updated seed value
//
// Return value : N(0,1) value in the Q13 domain
//
//
// WebRtcSpl_RandUArray(...)
//
// Produces a uniformly distributed vector with elements in the int16_t
// range
//
// Input:
// - vector_length : Samples wanted in the vector
// - seed : Seed for random calculation
//
// Output:
// - vector : Vector with the uniform values
// - seed : Updated seed value
//
// Return value : Number of samples in vector, i.e., |vector_length|
//
//
// WebRtcSpl_Sqrt(...)
//
// Returns the square root of the input value |value|. The precision of this
// function is integer precision, i.e., sqrt(8) gives 2 as answer.
// If |value| is a negative number then 0 is returned.
//
// Algorithm:
//
// A sixth order Taylor Series expansion is used here to compute the square
// root of a number y^0.5 = (1+x)^0.5
// where
// x = y-1
// = 1+(x/2)-0.5*((x/2)^2+0.5*((x/2)^3-0.625*((x/2)^4+0.875*((x/2)^5)
// 0.5 <= x < 1
//
// Input:
// - value : Value to calculate sqrt of
//
// Return value : Result of the sqrt calculation
//
//
// WebRtcSpl_DivU32U16(...)
//
// Divides a uint32_t |num| by a uint16_t |den|.
//
// If |den|==0, (uint32_t)0xFFFFFFFF is returned.
//
// Input:
// - num : Numerator
// - den : Denominator
//
// Return value : Result of the division (as a uint32_t), i.e., the
// integer part of num/den.
//
//
// WebRtcSpl_DivW32W16(...)
//
// Divides a int32_t |num| by a int16_t |den|.
//
// If |den|==0, (int32_t)0x7FFFFFFF is returned.
//
// Input:
// - num : Numerator
// - den : Denominator
//
// Return value : Result of the division (as a int32_t), i.e., the
// integer part of num/den.
//
//
// WebRtcSpl_DivW32W16ResW16(...)
//
// Divides a int32_t |num| by a int16_t |den|, assuming that the
// result is less than 32768, otherwise an unpredictable result will occur.
//
// If |den|==0, (int16_t)0x7FFF is returned.
//
// Input:
// - num : Numerator
// - den : Denominator
//
// Return value : Result of the division (as a int16_t), i.e., the
// integer part of num/den.
//
//
// WebRtcSpl_DivResultInQ31(...)
//
// Divides a int32_t |num| by a int16_t |den|, assuming that the
// absolute value of the denominator is larger than the numerator, otherwise
// an unpredictable result will occur.
//
// Input:
// - num : Numerator
// - den : Denominator
//
// Return value : Result of the division in Q31.
//
//
// WebRtcSpl_DivW32HiLow(...)
//
// Divides a int32_t |num| by a denominator in hi, low format. The
// absolute value of the denominator has to be larger (or equal to) the
// numerator.
//
// Input:
// - num : Numerator
// - den_hi : High part of denominator
// - den_low : Low part of denominator
//
// Return value : Divided value in Q31
//
//
// WebRtcSpl_Energy(...)
//
// Calculates the energy of a vector
//
// Input:
// - vector : Vector which the energy should be calculated on
// - vector_length : Number of samples in vector
//
// Output:
// - scale_factor : Number of left bit shifts needed to get the physical
// energy value, i.e, to get the Q0 value
//
// Return value : Energy value in Q(-|scale_factor|)
//
//
// WebRtcSpl_FilterAR(...)
//
// Performs a 32-bit AR filtering on a vector in Q12
//
// Input:
// - ar_coef : AR-coefficient vector (values in Q12),
// ar_coef[0] must be 4096.
// - ar_coef_length : Number of coefficients in |ar_coef|.
// - in_vector : Vector to be filtered.
// - in_vector_length : Number of samples in |in_vector|.
// - filter_state : Current state (higher part) of the filter.
// - filter_state_length : Length (in samples) of |filter_state|.
// - filter_state_low : Current state (lower part) of the filter.
// - filter_state_low_length : Length (in samples) of |filter_state_low|.
// - out_vector_low_length : Maximum length (in samples) of
// |out_vector_low|.
//
// Output:
// - filter_state : Updated state (upper part) vector.
// - filter_state_low : Updated state (lower part) vector.
// - out_vector : Vector containing the upper part of the
// filtered values.
// - out_vector_low : Vector containing the lower part of the
// filtered values.
//
// Return value : Number of samples in the |out_vector|.
//
//
// WebRtcSpl_ComplexIFFT(...)
//
// Complex Inverse FFT
//
// Computes an inverse complex 2^|stages|-point FFT on the input vector, which
// is in bit-reversed order. The original content of the vector is destroyed in
// the process, since the input is overwritten by the output, normal-ordered,
// FFT vector. With X as the input complex vector, y as the output complex
// vector and with M = 2^|stages|, the following is computed:
//
// M-1
// y(k) = sum[X(i)*[cos(2*pi*i*k/M) + j*sin(2*pi*i*k/M)]]
// i=0
//
// The implementations are optimized for speed, not for code size. It uses the
// decimation-in-time algorithm with radix-2 butterfly technique.
//
// Input:
// - vector : In pointer to complex vector containing 2^|stages|
// real elements interleaved with 2^|stages| imaginary
// elements.
// [ReImReImReIm....]
// The elements are in Q(-scale) domain, see more on Return
// Value below.
//
// - stages : Number of FFT stages. Must be at least 3 and at most 10,
// since the table WebRtcSpl_kSinTable1024[] is 1024
// elements long.
//
// - mode : This parameter gives the user to choose how the FFT
// should work.
// mode==0: Low-complexity and Low-accuracy mode
// mode==1: High-complexity and High-accuracy mode
//
// Output:
// - vector : Out pointer to the FFT vector (the same as input).
//
// Return Value : The scale value that tells the number of left bit shifts
// that the elements in the |vector| should be shifted with
// in order to get Q0 values, i.e. the physically correct
// values. The scale parameter is always 0 or positive,
// except if N>1024 (|stages|>10), which returns a scale
// value of -1, indicating error.
//
//
// WebRtcSpl_ComplexFFT(...)
//
// Complex FFT
//
// Computes a complex 2^|stages|-point FFT on the input vector, which is in
// bit-reversed order. The original content of the vector is destroyed in
// the process, since the input is overwritten by the output, normal-ordered,
// FFT vector. With x as the input complex vector, Y as the output complex
// vector and with M = 2^|stages|, the following is computed:
//
// M-1
// Y(k) = 1/M * sum[x(i)*[cos(2*pi*i*k/M) + j*sin(2*pi*i*k/M)]]
// i=0
//
// The implementations are optimized for speed, not for code size. It uses the
// decimation-in-time algorithm with radix-2 butterfly technique.
//
// This routine prevents overflow by scaling by 2 before each FFT stage. This is
// a fixed scaling, for proper normalization - there will be log2(n) passes, so
// this results in an overall factor of 1/n, distributed to maximize arithmetic
// accuracy.
//
// Input:
// - vector : In pointer to complex vector containing 2^|stages| real
// elements interleaved with 2^|stages| imaginary elements.
// [ReImReImReIm....]
// The output is in the Q0 domain.
//
// - stages : Number of FFT stages. Must be at least 3 and at most 10,
// since the table WebRtcSpl_kSinTable1024[] is 1024
// elements long.
//
// - mode : This parameter gives the user to choose how the FFT
// should work.
// mode==0: Low-complexity and Low-accuracy mode
// mode==1: High-complexity and High-accuracy mode
//
// Output:
// - vector : The output FFT vector is in the Q0 domain.
//
// Return value : The scale parameter is always 0, except if N>1024,
// which returns a scale value of -1, indicating error.
//
//
// WebRtcSpl_AnalysisQMF(...)
//
// Splits a 0-2*F Hz signal into two sub bands: 0-F Hz and F-2*F Hz. The
// current version has F = 8000, therefore, a super-wideband audio signal is
// split to lower-band 0-8 kHz and upper-band 8-16 kHz.
//
// Input:
// - in_data : Wide band speech signal, 320 samples (10 ms)
//
// Input & Output:
// - filter_state1 : Filter state for first All-pass filter
// - filter_state2 : Filter state for second All-pass filter
//
// Output:
// - low_band : Lower-band signal 0-8 kHz band, 160 samples (10 ms)
// - high_band : Upper-band signal 8-16 kHz band (flipped in frequency
// domain), 160 samples (10 ms)
//
//
// WebRtcSpl_SynthesisQMF(...)
//
// Combines the two sub bands (0-F and F-2*F Hz) into a signal of 0-2*F
// Hz, (current version has F = 8000 Hz). So the filter combines lower-band
// (0-8 kHz) and upper-band (8-16 kHz) channels to obtain super-wideband 0-16
// kHz audio.
//
// Input:
// - low_band : The signal with the 0-8 kHz band, 160 samples (10 ms)
// - high_band : The signal with the 8-16 kHz band, 160 samples (10 ms)
//
// Input & Output:
// - filter_state1 : Filter state for first All-pass filter
// - filter_state2 : Filter state for second All-pass filter
//
// Output:
// - out_data : Super-wideband speech signal, 0-16 kHz
//
// int16_t WebRtcSpl_SatW32ToW16(...)
//
// This function saturates a 32-bit word into a 16-bit word.
//
// Input:
// - value32 : The value of a 32-bit word.
//
// Output:
// - out16 : the saturated 16-bit word.
//
// int32_t WebRtc_MulAccumW16(...)
//
// This function multiply a 16-bit word by a 16-bit word, and accumulate this
// value to a 32-bit integer.
//
// Input:
// - a : The value of the first 16-bit word.
// - b : The value of the second 16-bit word.
// - c : The value of an 32-bit integer.
//
// Return Value: The value of a * b + c.
//
cpp_onnx/third_party/webrtc/common_audio/signal_processing/include/spl_inl.h
0 → 100644
View file @
83ff3a7f
/*
* Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
// This header file includes the inline functions in
// the fix point signal processing library.
#ifndef COMMON_AUDIO_SIGNAL_PROCESSING_INCLUDE_SPL_INL_H_
#define COMMON_AUDIO_SIGNAL_PROCESSING_INCLUDE_SPL_INL_H_
#include "webrtc/rtc_base/compile_assert_c.h"
extern
const
int8_t
kWebRtcSpl_CountLeadingZeros32_Table
[
64
];
// Don't call this directly except in tests!
static
__inline
int
WebRtcSpl_CountLeadingZeros32_NotBuiltin
(
uint32_t
n
)
{
// Normalize n by rounding up to the nearest number that is a sequence of 0
// bits followed by a sequence of 1 bits. This number has the same number of
// leading zeros as the original n. There are exactly 33 such values.
n
|=
n
>>
1
;
n
|=
n
>>
2
;
n
|=
n
>>
4
;
n
|=
n
>>
8
;
n
|=
n
>>
16
;
// Multiply the modified n with a constant selected (by exhaustive search)
// such that each of the 33 possible values of n give a product whose 6 most
// significant bits are unique. Then look up the answer in the table.
return
kWebRtcSpl_CountLeadingZeros32_Table
[(
n
*
0x8c0b2891
)
>>
26
];
}
// Don't call this directly except in tests!
static
__inline
int
WebRtcSpl_CountLeadingZeros64_NotBuiltin
(
uint64_t
n
)
{
const
int
leading_zeros
=
n
>>
32
==
0
?
32
:
0
;
return
leading_zeros
+
WebRtcSpl_CountLeadingZeros32_NotBuiltin
(
(
uint32_t
)(
n
>>
(
32
-
leading_zeros
)));
}
// Returns the number of leading zero bits in the argument.
static
__inline
int
WebRtcSpl_CountLeadingZeros32
(
uint32_t
n
)
{
#ifdef __GNUC__
RTC_COMPILE_ASSERT
(
sizeof
(
unsigned
int
)
==
sizeof
(
uint32_t
));
return
n
==
0
?
32
:
__builtin_clz
(
n
);
#else
return
WebRtcSpl_CountLeadingZeros32_NotBuiltin
(
n
);
#endif
}
// Returns the number of leading zero bits in the argument.
static
__inline
int
WebRtcSpl_CountLeadingZeros64
(
uint64_t
n
)
{
#ifdef __GNUC__
RTC_COMPILE_ASSERT
(
sizeof
(
unsigned
long
long
)
==
sizeof
(
uint64_t
));
// NOLINT
return
n
==
0
?
64
:
__builtin_clzll
(
n
);
#else
return
WebRtcSpl_CountLeadingZeros64_NotBuiltin
(
n
);
#endif
}
#ifdef WEBRTC_ARCH_ARM_V7
#include "webrtc/common_audio/signal_processing/include/spl_inl_armv7.h"
#else
#if defined(MIPS32_LE)
#include "webrtc/common_audio/signal_processing/include/spl_inl_mips.h"
#endif
#if !defined(MIPS_DSP_R1_LE)
static
__inline
int16_t
WebRtcSpl_SatW32ToW16
(
int32_t
value32
)
{
int16_t
out16
=
(
int16_t
)
value32
;
if
(
value32
>
32767
)
out16
=
32767
;
else
if
(
value32
<
-
32768
)
out16
=
-
32768
;
return
out16
;
}
static
__inline
int32_t
WebRtcSpl_AddSatW32
(
int32_t
a
,
int32_t
b
)
{
// Do the addition in unsigned numbers, since signed overflow is undefined
// behavior.
const
int32_t
sum
=
(
int32_t
)((
uint32_t
)
a
+
(
uint32_t
)
b
);
// a + b can't overflow if a and b have different signs. If they have the
// same sign, a + b also has the same sign iff it didn't overflow.
if
((
a
<
0
)
==
(
b
<
0
)
&&
(
a
<
0
)
!=
(
sum
<
0
))
{
// The direction of the overflow is obvious from the sign of a + b.
return
sum
<
0
?
INT32_MAX
:
INT32_MIN
;
}
return
sum
;
}
static
__inline
int32_t
WebRtcSpl_SubSatW32
(
int32_t
a
,
int32_t
b
)
{
// Do the subtraction in unsigned numbers, since signed overflow is undefined
// behavior.
const
int32_t
diff
=
(
int32_t
)((
uint32_t
)
a
-
(
uint32_t
)
b
);
// a - b can't overflow if a and b have the same sign. If they have different
// signs, a - b has the same sign as a iff it didn't overflow.
if
((
a
<
0
)
!=
(
b
<
0
)
&&
(
a
<
0
)
!=
(
diff
<
0
))
{
// The direction of the overflow is obvious from the sign of a - b.
return
diff
<
0
?
INT32_MAX
:
INT32_MIN
;
}
return
diff
;
}
static
__inline
int16_t
WebRtcSpl_AddSatW16
(
int16_t
a
,
int16_t
b
)
{
return
WebRtcSpl_SatW32ToW16
((
int32_t
)
a
+
(
int32_t
)
b
);
}
static
__inline
int16_t
WebRtcSpl_SubSatW16
(
int16_t
var1
,
int16_t
var2
)
{
return
WebRtcSpl_SatW32ToW16
((
int32_t
)
var1
-
(
int32_t
)
var2
);
}
#endif // #if !defined(MIPS_DSP_R1_LE)
#if !defined(MIPS32_LE)
static
__inline
int16_t
WebRtcSpl_GetSizeInBits
(
uint32_t
n
)
{
return
32
-
WebRtcSpl_CountLeadingZeros32
(
n
);
}
// Return the number of steps a can be left-shifted without overflow,
// or 0 if a == 0.
static
__inline
int16_t
WebRtcSpl_NormW32
(
int32_t
a
)
{
return
a
==
0
?
0
:
WebRtcSpl_CountLeadingZeros32
(
a
<
0
?
~
a
:
a
)
-
1
;
}
// Return the number of steps a can be left-shifted without overflow,
// or 0 if a == 0.
static
__inline
int16_t
WebRtcSpl_NormU32
(
uint32_t
a
)
{
return
a
==
0
?
0
:
WebRtcSpl_CountLeadingZeros32
(
a
);
}
// Return the number of steps a can be left-shifted without overflow,
// or 0 if a == 0.
static
__inline
int16_t
WebRtcSpl_NormW16
(
int16_t
a
)
{
const
int32_t
a32
=
a
;
return
a
==
0
?
0
:
WebRtcSpl_CountLeadingZeros32
(
a
<
0
?
~
a32
:
a32
)
-
17
;
}
static
__inline
int32_t
WebRtc_MulAccumW16
(
int16_t
a
,
int16_t
b
,
int32_t
c
)
{
return
(
a
*
b
+
c
);
}
#endif // #if !defined(MIPS32_LE)
#endif // WEBRTC_ARCH_ARM_V7
#endif // COMMON_AUDIO_SIGNAL_PROCESSING_INCLUDE_SPL_INL_H_
cpp_onnx/third_party/webrtc/common_audio/signal_processing/min_max_operations.c
0 → 100644
View file @
83ff3a7f
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
/*
* This file contains the implementation of functions
* WebRtcSpl_MaxAbsValueW16C()
* WebRtcSpl_MaxAbsValueW32C()
* WebRtcSpl_MaxValueW16C()
* WebRtcSpl_MaxValueW32C()
* WebRtcSpl_MinValueW16C()
* WebRtcSpl_MinValueW32C()
* WebRtcSpl_MaxAbsIndexW16()
* WebRtcSpl_MaxIndexW16()
* WebRtcSpl_MaxIndexW32()
* WebRtcSpl_MinIndexW16()
* WebRtcSpl_MinIndexW32()
*
*/
#include <stdlib.h>
#include "webrtc/rtc_base/checks.h"
#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
// TODO(bjorn/kma): Consolidate function pairs (e.g. combine
// WebRtcSpl_MaxAbsValueW16C and WebRtcSpl_MaxAbsIndexW16 into a single one.)
// TODO(kma): Move the next six functions into min_max_operations_c.c.
// Maximum absolute value of word16 vector. C version for generic platforms.
int16_t
WebRtcSpl_MaxAbsValueW16C
(
const
int16_t
*
vector
,
size_t
length
)
{
size_t
i
=
0
;
int
absolute
=
0
,
maximum
=
0
;
RTC_DCHECK_GT
(
length
,
0
);
for
(
i
=
0
;
i
<
length
;
i
++
)
{
absolute
=
abs
((
int
)
vector
[
i
]);
if
(
absolute
>
maximum
)
{
maximum
=
absolute
;
}
}
// Guard the case for abs(-32768).
if
(
maximum
>
WEBRTC_SPL_WORD16_MAX
)
{
maximum
=
WEBRTC_SPL_WORD16_MAX
;
}
return
(
int16_t
)
maximum
;
}
// Maximum absolute value of word32 vector. C version for generic platforms.
int32_t
WebRtcSpl_MaxAbsValueW32C
(
const
int32_t
*
vector
,
size_t
length
)
{
// Use uint32_t for the local variables, to accommodate the return value
// of abs(0x80000000), which is 0x80000000.
uint32_t
absolute
=
0
,
maximum
=
0
;
size_t
i
=
0
;
RTC_DCHECK_GT
(
length
,
0
);
for
(
i
=
0
;
i
<
length
;
i
++
)
{
absolute
=
abs
((
int
)
vector
[
i
]);
if
(
absolute
>
maximum
)
{
maximum
=
absolute
;
}
}
maximum
=
WEBRTC_SPL_MIN
(
maximum
,
WEBRTC_SPL_WORD32_MAX
);
return
(
int32_t
)
maximum
;
}
// Maximum value of word16 vector. C version for generic platforms.
int16_t
WebRtcSpl_MaxValueW16C
(
const
int16_t
*
vector
,
size_t
length
)
{
int16_t
maximum
=
WEBRTC_SPL_WORD16_MIN
;
size_t
i
=
0
;
RTC_DCHECK_GT
(
length
,
0
);
for
(
i
=
0
;
i
<
length
;
i
++
)
{
if
(
vector
[
i
]
>
maximum
)
maximum
=
vector
[
i
];
}
return
maximum
;
}
// Maximum value of word32 vector. C version for generic platforms.
int32_t
WebRtcSpl_MaxValueW32C
(
const
int32_t
*
vector
,
size_t
length
)
{
int32_t
maximum
=
WEBRTC_SPL_WORD32_MIN
;
size_t
i
=
0
;
RTC_DCHECK_GT
(
length
,
0
);
for
(
i
=
0
;
i
<
length
;
i
++
)
{
if
(
vector
[
i
]
>
maximum
)
maximum
=
vector
[
i
];
}
return
maximum
;
}
// Minimum value of word16 vector. C version for generic platforms.
int16_t
WebRtcSpl_MinValueW16C
(
const
int16_t
*
vector
,
size_t
length
)
{
int16_t
minimum
=
WEBRTC_SPL_WORD16_MAX
;
size_t
i
=
0
;
RTC_DCHECK_GT
(
length
,
0
);
for
(
i
=
0
;
i
<
length
;
i
++
)
{
if
(
vector
[
i
]
<
minimum
)
minimum
=
vector
[
i
];
}
return
minimum
;
}
// Minimum value of word32 vector. C version for generic platforms.
int32_t
WebRtcSpl_MinValueW32C
(
const
int32_t
*
vector
,
size_t
length
)
{
int32_t
minimum
=
WEBRTC_SPL_WORD32_MAX
;
size_t
i
=
0
;
RTC_DCHECK_GT
(
length
,
0
);
for
(
i
=
0
;
i
<
length
;
i
++
)
{
if
(
vector
[
i
]
<
minimum
)
minimum
=
vector
[
i
];
}
return
minimum
;
}
// Index of maximum absolute value in a word16 vector.
size_t
WebRtcSpl_MaxAbsIndexW16
(
const
int16_t
*
vector
,
size_t
length
)
{
// Use type int for local variables, to accomodate the value of abs(-32768).
size_t
i
=
0
,
index
=
0
;
int
absolute
=
0
,
maximum
=
0
;
RTC_DCHECK_GT
(
length
,
0
);
for
(
i
=
0
;
i
<
length
;
i
++
)
{
absolute
=
abs
((
int
)
vector
[
i
]);
if
(
absolute
>
maximum
)
{
maximum
=
absolute
;
index
=
i
;
}
}
return
index
;
}
// Index of maximum value in a word16 vector.
size_t
WebRtcSpl_MaxIndexW16
(
const
int16_t
*
vector
,
size_t
length
)
{
size_t
i
=
0
,
index
=
0
;
int16_t
maximum
=
WEBRTC_SPL_WORD16_MIN
;
RTC_DCHECK_GT
(
length
,
0
);
for
(
i
=
0
;
i
<
length
;
i
++
)
{
if
(
vector
[
i
]
>
maximum
)
{
maximum
=
vector
[
i
];
index
=
i
;
}
}
return
index
;
}
// Index of maximum value in a word32 vector.
size_t
WebRtcSpl_MaxIndexW32
(
const
int32_t
*
vector
,
size_t
length
)
{
size_t
i
=
0
,
index
=
0
;
int32_t
maximum
=
WEBRTC_SPL_WORD32_MIN
;
RTC_DCHECK_GT
(
length
,
0
);
for
(
i
=
0
;
i
<
length
;
i
++
)
{
if
(
vector
[
i
]
>
maximum
)
{
maximum
=
vector
[
i
];
index
=
i
;
}
}
return
index
;
}
// Index of minimum value in a word16 vector.
size_t
WebRtcSpl_MinIndexW16
(
const
int16_t
*
vector
,
size_t
length
)
{
size_t
i
=
0
,
index
=
0
;
int16_t
minimum
=
WEBRTC_SPL_WORD16_MAX
;
RTC_DCHECK_GT
(
length
,
0
);
for
(
i
=
0
;
i
<
length
;
i
++
)
{
if
(
vector
[
i
]
<
minimum
)
{
minimum
=
vector
[
i
];
index
=
i
;
}
}
return
index
;
}
// Index of minimum value in a word32 vector.
size_t
WebRtcSpl_MinIndexW32
(
const
int32_t
*
vector
,
size_t
length
)
{
size_t
i
=
0
,
index
=
0
;
int32_t
minimum
=
WEBRTC_SPL_WORD32_MAX
;
RTC_DCHECK_GT
(
length
,
0
);
for
(
i
=
0
;
i
<
length
;
i
++
)
{
if
(
vector
[
i
]
<
minimum
)
{
minimum
=
vector
[
i
];
index
=
i
;
}
}
return
index
;
}
cpp_onnx/third_party/webrtc/common_audio/signal_processing/resample_48khz.c
0 → 100644
View file @
83ff3a7f
/*
* Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
/*
* This file contains resampling functions between 48 kHz and nb/wb.
* The description header can be found in signal_processing_library.h
*
*/
#include <string.h>
#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
#include "webrtc/common_audio/signal_processing/resample_by_2_internal.h"
////////////////////////////
///// 48 kHz -> 16 kHz /////
////////////////////////////
// 48 -> 16 resampler
void
WebRtcSpl_Resample48khzTo16khz
(
const
int16_t
*
in
,
int16_t
*
out
,
WebRtcSpl_State48khzTo16khz
*
state
,
int32_t
*
tmpmem
)
{
///// 48 --> 48(LP) /////
// int16_t in[480]
// int32_t out[480]
/////
WebRtcSpl_LPBy2ShortToInt
(
in
,
480
,
tmpmem
+
16
,
state
->
S_48_48
);
///// 48 --> 32 /////
// int32_t in[480]
// int32_t out[320]
/////
// copy state to and from input array
memcpy
(
tmpmem
+
8
,
state
->
S_48_32
,
8
*
sizeof
(
int32_t
));
memcpy
(
state
->
S_48_32
,
tmpmem
+
488
,
8
*
sizeof
(
int32_t
));
WebRtcSpl_Resample48khzTo32khz
(
tmpmem
+
8
,
tmpmem
,
160
);
///// 32 --> 16 /////
// int32_t in[320]
// int16_t out[160]
/////
WebRtcSpl_DownBy2IntToShort
(
tmpmem
,
320
,
out
,
state
->
S_32_16
);
}
// initialize state of 48 -> 16 resampler
void
WebRtcSpl_ResetResample48khzTo16khz
(
WebRtcSpl_State48khzTo16khz
*
state
)
{
memset
(
state
->
S_48_48
,
0
,
16
*
sizeof
(
int32_t
));
memset
(
state
->
S_48_32
,
0
,
8
*
sizeof
(
int32_t
));
memset
(
state
->
S_32_16
,
0
,
8
*
sizeof
(
int32_t
));
}
////////////////////////////
///// 16 kHz -> 48 kHz /////
////////////////////////////
// 16 -> 48 resampler
void
WebRtcSpl_Resample16khzTo48khz
(
const
int16_t
*
in
,
int16_t
*
out
,
WebRtcSpl_State16khzTo48khz
*
state
,
int32_t
*
tmpmem
)
{
///// 16 --> 32 /////
// int16_t in[160]
// int32_t out[320]
/////
WebRtcSpl_UpBy2ShortToInt
(
in
,
160
,
tmpmem
+
16
,
state
->
S_16_32
);
///// 32 --> 24 /////
// int32_t in[320]
// int32_t out[240]
// copy state to and from input array
/////
memcpy
(
tmpmem
+
8
,
state
->
S_32_24
,
8
*
sizeof
(
int32_t
));
memcpy
(
state
->
S_32_24
,
tmpmem
+
328
,
8
*
sizeof
(
int32_t
));
WebRtcSpl_Resample32khzTo24khz
(
tmpmem
+
8
,
tmpmem
,
80
);
///// 24 --> 48 /////
// int32_t in[240]
// int16_t out[480]
/////
WebRtcSpl_UpBy2IntToShort
(
tmpmem
,
240
,
out
,
state
->
S_24_48
);
}
// initialize state of 16 -> 48 resampler
void
WebRtcSpl_ResetResample16khzTo48khz
(
WebRtcSpl_State16khzTo48khz
*
state
)
{
memset
(
state
->
S_16_32
,
0
,
8
*
sizeof
(
int32_t
));
memset
(
state
->
S_32_24
,
0
,
8
*
sizeof
(
int32_t
));
memset
(
state
->
S_24_48
,
0
,
8
*
sizeof
(
int32_t
));
}
////////////////////////////
///// 48 kHz -> 8 kHz /////
////////////////////////////
// 48 -> 8 resampler
void
WebRtcSpl_Resample48khzTo8khz
(
const
int16_t
*
in
,
int16_t
*
out
,
WebRtcSpl_State48khzTo8khz
*
state
,
int32_t
*
tmpmem
)
{
///// 48 --> 24 /////
// int16_t in[480]
// int32_t out[240]
/////
WebRtcSpl_DownBy2ShortToInt
(
in
,
480
,
tmpmem
+
256
,
state
->
S_48_24
);
///// 24 --> 24(LP) /////
// int32_t in[240]
// int32_t out[240]
/////
WebRtcSpl_LPBy2IntToInt
(
tmpmem
+
256
,
240
,
tmpmem
+
16
,
state
->
S_24_24
);
///// 24 --> 16 /////
// int32_t in[240]
// int32_t out[160]
/////
// copy state to and from input array
memcpy
(
tmpmem
+
8
,
state
->
S_24_16
,
8
*
sizeof
(
int32_t
));
memcpy
(
state
->
S_24_16
,
tmpmem
+
248
,
8
*
sizeof
(
int32_t
));
WebRtcSpl_Resample48khzTo32khz
(
tmpmem
+
8
,
tmpmem
,
80
);
///// 16 --> 8 /////
// int32_t in[160]
// int16_t out[80]
/////
WebRtcSpl_DownBy2IntToShort
(
tmpmem
,
160
,
out
,
state
->
S_16_8
);
}
// initialize state of 48 -> 8 resampler
void
WebRtcSpl_ResetResample48khzTo8khz
(
WebRtcSpl_State48khzTo8khz
*
state
)
{
memset
(
state
->
S_48_24
,
0
,
8
*
sizeof
(
int32_t
));
memset
(
state
->
S_24_24
,
0
,
16
*
sizeof
(
int32_t
));
memset
(
state
->
S_24_16
,
0
,
8
*
sizeof
(
int32_t
));
memset
(
state
->
S_16_8
,
0
,
8
*
sizeof
(
int32_t
));
}
////////////////////////////
///// 8 kHz -> 48 kHz /////
////////////////////////////
// 8 -> 48 resampler
void
WebRtcSpl_Resample8khzTo48khz
(
const
int16_t
*
in
,
int16_t
*
out
,
WebRtcSpl_State8khzTo48khz
*
state
,
int32_t
*
tmpmem
)
{
///// 8 --> 16 /////
// int16_t in[80]
// int32_t out[160]
/////
WebRtcSpl_UpBy2ShortToInt
(
in
,
80
,
tmpmem
+
264
,
state
->
S_8_16
);
///// 16 --> 12 /////
// int32_t in[160]
// int32_t out[120]
/////
// copy state to and from input array
memcpy
(
tmpmem
+
256
,
state
->
S_16_12
,
8
*
sizeof
(
int32_t
));
memcpy
(
state
->
S_16_12
,
tmpmem
+
416
,
8
*
sizeof
(
int32_t
));
WebRtcSpl_Resample32khzTo24khz
(
tmpmem
+
256
,
tmpmem
+
240
,
40
);
///// 12 --> 24 /////
// int32_t in[120]
// int16_t out[240]
/////
WebRtcSpl_UpBy2IntToInt
(
tmpmem
+
240
,
120
,
tmpmem
,
state
->
S_12_24
);
///// 24 --> 48 /////
// int32_t in[240]
// int16_t out[480]
/////
WebRtcSpl_UpBy2IntToShort
(
tmpmem
,
240
,
out
,
state
->
S_24_48
);
}
// initialize state of 8 -> 48 resampler
void
WebRtcSpl_ResetResample8khzTo48khz
(
WebRtcSpl_State8khzTo48khz
*
state
)
{
memset
(
state
->
S_8_16
,
0
,
8
*
sizeof
(
int32_t
));
memset
(
state
->
S_16_12
,
0
,
8
*
sizeof
(
int32_t
));
memset
(
state
->
S_12_24
,
0
,
8
*
sizeof
(
int32_t
));
memset
(
state
->
S_24_48
,
0
,
8
*
sizeof
(
int32_t
));
}
cpp_onnx/third_party/webrtc/common_audio/signal_processing/resample_by_2_internal.c
0 → 100644
View file @
83ff3a7f
/*
* Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
/*
* This header file contains some internal resampling functions.
*
*/
#include "webrtc/common_audio/signal_processing/resample_by_2_internal.h"
#include "webrtc/rtc_base/sanitizer.h"
// allpass filter coefficients.
static
const
int16_t
kResampleAllpass
[
2
][
3
]
=
{
{
821
,
6110
,
12382
},
{
3050
,
9368
,
15063
}
};
//
// decimator
// input: int32_t (shifted 15 positions to the left, + offset 16384) OVERWRITTEN!
// output: int16_t (saturated) (of length len/2)
// state: filter state array; length = 8
void
RTC_NO_SANITIZE
(
"signed-integer-overflow"
)
// bugs.webrtc.org/5486
WebRtcSpl_DownBy2IntToShort
(
int32_t
*
in
,
int32_t
len
,
int16_t
*
out
,
int32_t
*
state
)
{
int32_t
tmp0
,
tmp1
,
diff
;
int32_t
i
;
len
>>=
1
;
// lower allpass filter (operates on even input samples)
for
(
i
=
0
;
i
<
len
;
i
++
)
{
tmp0
=
in
[
i
<<
1
];
diff
=
tmp0
-
state
[
1
];
// UBSan: -1771017321 - 999586185 cannot be represented in type 'int'
// scale down and round
diff
=
(
diff
+
(
1
<<
13
))
>>
14
;
tmp1
=
state
[
0
]
+
diff
*
kResampleAllpass
[
1
][
0
];
state
[
0
]
=
tmp0
;
diff
=
tmp1
-
state
[
2
];
// scale down and truncate
diff
=
diff
>>
14
;
if
(
diff
<
0
)
diff
+=
1
;
tmp0
=
state
[
1
]
+
diff
*
kResampleAllpass
[
1
][
1
];
state
[
1
]
=
tmp1
;
diff
=
tmp0
-
state
[
3
];
// scale down and truncate
diff
=
diff
>>
14
;
if
(
diff
<
0
)
diff
+=
1
;
state
[
3
]
=
state
[
2
]
+
diff
*
kResampleAllpass
[
1
][
2
];
state
[
2
]
=
tmp0
;
// divide by two and store temporarily
in
[
i
<<
1
]
=
(
state
[
3
]
>>
1
);
}
in
++
;
// upper allpass filter (operates on odd input samples)
for
(
i
=
0
;
i
<
len
;
i
++
)
{
tmp0
=
in
[
i
<<
1
];
diff
=
tmp0
-
state
[
5
];
// scale down and round
diff
=
(
diff
+
(
1
<<
13
))
>>
14
;
tmp1
=
state
[
4
]
+
diff
*
kResampleAllpass
[
0
][
0
];
state
[
4
]
=
tmp0
;
diff
=
tmp1
-
state
[
6
];
// scale down and round
diff
=
diff
>>
14
;
if
(
diff
<
0
)
diff
+=
1
;
tmp0
=
state
[
5
]
+
diff
*
kResampleAllpass
[
0
][
1
];
state
[
5
]
=
tmp1
;
diff
=
tmp0
-
state
[
7
];
// scale down and truncate
diff
=
diff
>>
14
;
if
(
diff
<
0
)
diff
+=
1
;
state
[
7
]
=
state
[
6
]
+
diff
*
kResampleAllpass
[
0
][
2
];
state
[
6
]
=
tmp0
;
// divide by two and store temporarily
in
[
i
<<
1
]
=
(
state
[
7
]
>>
1
);
}
in
--
;
// combine allpass outputs
for
(
i
=
0
;
i
<
len
;
i
+=
2
)
{
// divide by two, add both allpass outputs and round
tmp0
=
(
in
[
i
<<
1
]
+
in
[(
i
<<
1
)
+
1
])
>>
15
;
tmp1
=
(
in
[(
i
<<
1
)
+
2
]
+
in
[(
i
<<
1
)
+
3
])
>>
15
;
if
(
tmp0
>
(
int32_t
)
0x00007FFF
)
tmp0
=
0x00007FFF
;
if
(
tmp0
<
(
int32_t
)
0xFFFF8000
)
tmp0
=
0xFFFF8000
;
out
[
i
]
=
(
int16_t
)
tmp0
;
if
(
tmp1
>
(
int32_t
)
0x00007FFF
)
tmp1
=
0x00007FFF
;
if
(
tmp1
<
(
int32_t
)
0xFFFF8000
)
tmp1
=
0xFFFF8000
;
out
[
i
+
1
]
=
(
int16_t
)
tmp1
;
}
}
//
// decimator
// input: int16_t
// output: int32_t (shifted 15 positions to the left, + offset 16384) (of length len/2)
// state: filter state array; length = 8
void
RTC_NO_SANITIZE
(
"signed-integer-overflow"
)
// bugs.webrtc.org/5486
WebRtcSpl_DownBy2ShortToInt
(
const
int16_t
*
in
,
int32_t
len
,
int32_t
*
out
,
int32_t
*
state
)
{
int32_t
tmp0
,
tmp1
,
diff
;
int32_t
i
;
len
>>=
1
;
// lower allpass filter (operates on even input samples)
for
(
i
=
0
;
i
<
len
;
i
++
)
{
tmp0
=
((
int32_t
)
in
[
i
<<
1
]
<<
15
)
+
(
1
<<
14
);
diff
=
tmp0
-
state
[
1
];
// scale down and round
diff
=
(
diff
+
(
1
<<
13
))
>>
14
;
tmp1
=
state
[
0
]
+
diff
*
kResampleAllpass
[
1
][
0
];
state
[
0
]
=
tmp0
;
diff
=
tmp1
-
state
[
2
];
// UBSan: -1379909682 - 834099714 cannot be represented in type 'int'
// scale down and truncate
diff
=
diff
>>
14
;
if
(
diff
<
0
)
diff
+=
1
;
tmp0
=
state
[
1
]
+
diff
*
kResampleAllpass
[
1
][
1
];
state
[
1
]
=
tmp1
;
diff
=
tmp0
-
state
[
3
];
// scale down and truncate
diff
=
diff
>>
14
;
if
(
diff
<
0
)
diff
+=
1
;
state
[
3
]
=
state
[
2
]
+
diff
*
kResampleAllpass
[
1
][
2
];
state
[
2
]
=
tmp0
;
// divide by two and store temporarily
out
[
i
]
=
(
state
[
3
]
>>
1
);
}
in
++
;
// upper allpass filter (operates on odd input samples)
for
(
i
=
0
;
i
<
len
;
i
++
)
{
tmp0
=
((
int32_t
)
in
[
i
<<
1
]
<<
15
)
+
(
1
<<
14
);
diff
=
tmp0
-
state
[
5
];
// scale down and round
diff
=
(
diff
+
(
1
<<
13
))
>>
14
;
tmp1
=
state
[
4
]
+
diff
*
kResampleAllpass
[
0
][
0
];
state
[
4
]
=
tmp0
;
diff
=
tmp1
-
state
[
6
];
// scale down and round
diff
=
diff
>>
14
;
if
(
diff
<
0
)
diff
+=
1
;
tmp0
=
state
[
5
]
+
diff
*
kResampleAllpass
[
0
][
1
];
state
[
5
]
=
tmp1
;
diff
=
tmp0
-
state
[
7
];
// scale down and truncate
diff
=
diff
>>
14
;
if
(
diff
<
0
)
diff
+=
1
;
state
[
7
]
=
state
[
6
]
+
diff
*
kResampleAllpass
[
0
][
2
];
state
[
6
]
=
tmp0
;
// divide by two and store temporarily
out
[
i
]
+=
(
state
[
7
]
>>
1
);
}
in
--
;
}
//
// interpolator
// input: int16_t
// output: int32_t (normalized, not saturated) (of length len*2)
// state: filter state array; length = 8
void
WebRtcSpl_UpBy2ShortToInt
(
const
int16_t
*
in
,
int32_t
len
,
int32_t
*
out
,
int32_t
*
state
)
{
int32_t
tmp0
,
tmp1
,
diff
;
int32_t
i
;
// upper allpass filter (generates odd output samples)
for
(
i
=
0
;
i
<
len
;
i
++
)
{
tmp0
=
((
int32_t
)
in
[
i
]
<<
15
)
+
(
1
<<
14
);
diff
=
tmp0
-
state
[
5
];
// scale down and round
diff
=
(
diff
+
(
1
<<
13
))
>>
14
;
tmp1
=
state
[
4
]
+
diff
*
kResampleAllpass
[
0
][
0
];
state
[
4
]
=
tmp0
;
diff
=
tmp1
-
state
[
6
];
// scale down and truncate
diff
=
diff
>>
14
;
if
(
diff
<
0
)
diff
+=
1
;
tmp0
=
state
[
5
]
+
diff
*
kResampleAllpass
[
0
][
1
];
state
[
5
]
=
tmp1
;
diff
=
tmp0
-
state
[
7
];
// scale down and truncate
diff
=
diff
>>
14
;
if
(
diff
<
0
)
diff
+=
1
;
state
[
7
]
=
state
[
6
]
+
diff
*
kResampleAllpass
[
0
][
2
];
state
[
6
]
=
tmp0
;
// scale down, round and store
out
[
i
<<
1
]
=
state
[
7
]
>>
15
;
}
out
++
;
// lower allpass filter (generates even output samples)
for
(
i
=
0
;
i
<
len
;
i
++
)
{
tmp0
=
((
int32_t
)
in
[
i
]
<<
15
)
+
(
1
<<
14
);
diff
=
tmp0
-
state
[
1
];
// scale down and round
diff
=
(
diff
+
(
1
<<
13
))
>>
14
;
tmp1
=
state
[
0
]
+
diff
*
kResampleAllpass
[
1
][
0
];
state
[
0
]
=
tmp0
;
diff
=
tmp1
-
state
[
2
];
// scale down and truncate
diff
=
diff
>>
14
;
if
(
diff
<
0
)
diff
+=
1
;
tmp0
=
state
[
1
]
+
diff
*
kResampleAllpass
[
1
][
1
];
state
[
1
]
=
tmp1
;
diff
=
tmp0
-
state
[
3
];
// scale down and truncate
diff
=
diff
>>
14
;
if
(
diff
<
0
)
diff
+=
1
;
state
[
3
]
=
state
[
2
]
+
diff
*
kResampleAllpass
[
1
][
2
];
state
[
2
]
=
tmp0
;
// scale down, round and store
out
[
i
<<
1
]
=
state
[
3
]
>>
15
;
}
}
//
// interpolator
// input: int32_t (shifted 15 positions to the left, + offset 16384)
// output: int32_t (shifted 15 positions to the left, + offset 16384) (of length len*2)
// state: filter state array; length = 8
void
WebRtcSpl_UpBy2IntToInt
(
const
int32_t
*
in
,
int32_t
len
,
int32_t
*
out
,
int32_t
*
state
)
{
int32_t
tmp0
,
tmp1
,
diff
;
int32_t
i
;
// upper allpass filter (generates odd output samples)
for
(
i
=
0
;
i
<
len
;
i
++
)
{
tmp0
=
in
[
i
];
diff
=
tmp0
-
state
[
5
];
// scale down and round
diff
=
(
diff
+
(
1
<<
13
))
>>
14
;
tmp1
=
state
[
4
]
+
diff
*
kResampleAllpass
[
0
][
0
];
state
[
4
]
=
tmp0
;
diff
=
tmp1
-
state
[
6
];
// scale down and truncate
diff
=
diff
>>
14
;
if
(
diff
<
0
)
diff
+=
1
;
tmp0
=
state
[
5
]
+
diff
*
kResampleAllpass
[
0
][
1
];
state
[
5
]
=
tmp1
;
diff
=
tmp0
-
state
[
7
];
// scale down and truncate
diff
=
diff
>>
14
;
if
(
diff
<
0
)
diff
+=
1
;
state
[
7
]
=
state
[
6
]
+
diff
*
kResampleAllpass
[
0
][
2
];
state
[
6
]
=
tmp0
;
// scale down, round and store
out
[
i
<<
1
]
=
state
[
7
];
}
out
++
;
// lower allpass filter (generates even output samples)
for
(
i
=
0
;
i
<
len
;
i
++
)
{
tmp0
=
in
[
i
];
diff
=
tmp0
-
state
[
1
];
// scale down and round
diff
=
(
diff
+
(
1
<<
13
))
>>
14
;
tmp1
=
state
[
0
]
+
diff
*
kResampleAllpass
[
1
][
0
];
state
[
0
]
=
tmp0
;
diff
=
tmp1
-
state
[
2
];
// scale down and truncate
diff
=
diff
>>
14
;
if
(
diff
<
0
)
diff
+=
1
;
tmp0
=
state
[
1
]
+
diff
*
kResampleAllpass
[
1
][
1
];
state
[
1
]
=
tmp1
;
diff
=
tmp0
-
state
[
3
];
// scale down and truncate
diff
=
diff
>>
14
;
if
(
diff
<
0
)
diff
+=
1
;
state
[
3
]
=
state
[
2
]
+
diff
*
kResampleAllpass
[
1
][
2
];
state
[
2
]
=
tmp0
;
// scale down, round and store
out
[
i
<<
1
]
=
state
[
3
];
}
}
//
// interpolator
// input: int32_t (shifted 15 positions to the left, + offset 16384)
// output: int16_t (saturated) (of length len*2)
// state: filter state array; length = 8
void
WebRtcSpl_UpBy2IntToShort
(
const
int32_t
*
in
,
int32_t
len
,
int16_t
*
out
,
int32_t
*
state
)
{
int32_t
tmp0
,
tmp1
,
diff
;
int32_t
i
;
// upper allpass filter (generates odd output samples)
for
(
i
=
0
;
i
<
len
;
i
++
)
{
tmp0
=
in
[
i
];
diff
=
tmp0
-
state
[
5
];
// scale down and round
diff
=
(
diff
+
(
1
<<
13
))
>>
14
;
tmp1
=
state
[
4
]
+
diff
*
kResampleAllpass
[
0
][
0
];
state
[
4
]
=
tmp0
;
diff
=
tmp1
-
state
[
6
];
// scale down and round
diff
=
diff
>>
14
;
if
(
diff
<
0
)
diff
+=
1
;
tmp0
=
state
[
5
]
+
diff
*
kResampleAllpass
[
0
][
1
];
state
[
5
]
=
tmp1
;
diff
=
tmp0
-
state
[
7
];
// scale down and truncate
diff
=
diff
>>
14
;
if
(
diff
<
0
)
diff
+=
1
;
state
[
7
]
=
state
[
6
]
+
diff
*
kResampleAllpass
[
0
][
2
];
state
[
6
]
=
tmp0
;
// scale down, saturate and store
tmp1
=
state
[
7
]
>>
15
;
if
(
tmp1
>
(
int32_t
)
0x00007FFF
)
tmp1
=
0x00007FFF
;
if
(
tmp1
<
(
int32_t
)
0xFFFF8000
)
tmp1
=
0xFFFF8000
;
out
[
i
<<
1
]
=
(
int16_t
)
tmp1
;
}
out
++
;
// lower allpass filter (generates even output samples)
for
(
i
=
0
;
i
<
len
;
i
++
)
{
tmp0
=
in
[
i
];
diff
=
tmp0
-
state
[
1
];
// scale down and round
diff
=
(
diff
+
(
1
<<
13
))
>>
14
;
tmp1
=
state
[
0
]
+
diff
*
kResampleAllpass
[
1
][
0
];
state
[
0
]
=
tmp0
;
diff
=
tmp1
-
state
[
2
];
// scale down and truncate
diff
=
diff
>>
14
;
if
(
diff
<
0
)
diff
+=
1
;
tmp0
=
state
[
1
]
+
diff
*
kResampleAllpass
[
1
][
1
];
state
[
1
]
=
tmp1
;
diff
=
tmp0
-
state
[
3
];
// scale down and truncate
diff
=
diff
>>
14
;
if
(
diff
<
0
)
diff
+=
1
;
state
[
3
]
=
state
[
2
]
+
diff
*
kResampleAllpass
[
1
][
2
];
state
[
2
]
=
tmp0
;
// scale down, saturate and store
tmp1
=
state
[
3
]
>>
15
;
if
(
tmp1
>
(
int32_t
)
0x00007FFF
)
tmp1
=
0x00007FFF
;
if
(
tmp1
<
(
int32_t
)
0xFFFF8000
)
tmp1
=
0xFFFF8000
;
out
[
i
<<
1
]
=
(
int16_t
)
tmp1
;
}
}
// lowpass filter
// input: int16_t
// output: int32_t (normalized, not saturated)
// state: filter state array; length = 8
void
WebRtcSpl_LPBy2ShortToInt
(
const
int16_t
*
in
,
int32_t
len
,
int32_t
*
out
,
int32_t
*
state
)
{
int32_t
tmp0
,
tmp1
,
diff
;
int32_t
i
;
len
>>=
1
;
// lower allpass filter: odd input -> even output samples
in
++
;
// initial state of polyphase delay element
tmp0
=
state
[
12
];
for
(
i
=
0
;
i
<
len
;
i
++
)
{
diff
=
tmp0
-
state
[
1
];
// scale down and round
diff
=
(
diff
+
(
1
<<
13
))
>>
14
;
tmp1
=
state
[
0
]
+
diff
*
kResampleAllpass
[
1
][
0
];
state
[
0
]
=
tmp0
;
diff
=
tmp1
-
state
[
2
];
// scale down and truncate
diff
=
diff
>>
14
;
if
(
diff
<
0
)
diff
+=
1
;
tmp0
=
state
[
1
]
+
diff
*
kResampleAllpass
[
1
][
1
];
state
[
1
]
=
tmp1
;
diff
=
tmp0
-
state
[
3
];
// scale down and truncate
diff
=
diff
>>
14
;
if
(
diff
<
0
)
diff
+=
1
;
state
[
3
]
=
state
[
2
]
+
diff
*
kResampleAllpass
[
1
][
2
];
state
[
2
]
=
tmp0
;
// scale down, round and store
out
[
i
<<
1
]
=
state
[
3
]
>>
1
;
tmp0
=
((
int32_t
)
in
[
i
<<
1
]
<<
15
)
+
(
1
<<
14
);
}
in
--
;
// upper allpass filter: even input -> even output samples
for
(
i
=
0
;
i
<
len
;
i
++
)
{
tmp0
=
((
int32_t
)
in
[
i
<<
1
]
<<
15
)
+
(
1
<<
14
);
diff
=
tmp0
-
state
[
5
];
// scale down and round
diff
=
(
diff
+
(
1
<<
13
))
>>
14
;
tmp1
=
state
[
4
]
+
diff
*
kResampleAllpass
[
0
][
0
];
state
[
4
]
=
tmp0
;
diff
=
tmp1
-
state
[
6
];
// scale down and round
diff
=
diff
>>
14
;
if
(
diff
<
0
)
diff
+=
1
;
tmp0
=
state
[
5
]
+
diff
*
kResampleAllpass
[
0
][
1
];
state
[
5
]
=
tmp1
;
diff
=
tmp0
-
state
[
7
];
// scale down and truncate
diff
=
diff
>>
14
;
if
(
diff
<
0
)
diff
+=
1
;
state
[
7
]
=
state
[
6
]
+
diff
*
kResampleAllpass
[
0
][
2
];
state
[
6
]
=
tmp0
;
// average the two allpass outputs, scale down and store
out
[
i
<<
1
]
=
(
out
[
i
<<
1
]
+
(
state
[
7
]
>>
1
))
>>
15
;
}
// switch to odd output samples
out
++
;
// lower allpass filter: even input -> odd output samples
for
(
i
=
0
;
i
<
len
;
i
++
)
{
tmp0
=
((
int32_t
)
in
[
i
<<
1
]
<<
15
)
+
(
1
<<
14
);
diff
=
tmp0
-
state
[
9
];
// scale down and round
diff
=
(
diff
+
(
1
<<
13
))
>>
14
;
tmp1
=
state
[
8
]
+
diff
*
kResampleAllpass
[
1
][
0
];
state
[
8
]
=
tmp0
;
diff
=
tmp1
-
state
[
10
];
// scale down and truncate
diff
=
diff
>>
14
;
if
(
diff
<
0
)
diff
+=
1
;
tmp0
=
state
[
9
]
+
diff
*
kResampleAllpass
[
1
][
1
];
state
[
9
]
=
tmp1
;
diff
=
tmp0
-
state
[
11
];
// scale down and truncate
diff
=
diff
>>
14
;
if
(
diff
<
0
)
diff
+=
1
;
state
[
11
]
=
state
[
10
]
+
diff
*
kResampleAllpass
[
1
][
2
];
state
[
10
]
=
tmp0
;
// scale down, round and store
out
[
i
<<
1
]
=
state
[
11
]
>>
1
;
}
// upper allpass filter: odd input -> odd output samples
in
++
;
for
(
i
=
0
;
i
<
len
;
i
++
)
{
tmp0
=
((
int32_t
)
in
[
i
<<
1
]
<<
15
)
+
(
1
<<
14
);
diff
=
tmp0
-
state
[
13
];
// scale down and round
diff
=
(
diff
+
(
1
<<
13
))
>>
14
;
tmp1
=
state
[
12
]
+
diff
*
kResampleAllpass
[
0
][
0
];
state
[
12
]
=
tmp0
;
diff
=
tmp1
-
state
[
14
];
// scale down and round
diff
=
diff
>>
14
;
if
(
diff
<
0
)
diff
+=
1
;
tmp0
=
state
[
13
]
+
diff
*
kResampleAllpass
[
0
][
1
];
state
[
13
]
=
tmp1
;
diff
=
tmp0
-
state
[
15
];
// scale down and truncate
diff
=
diff
>>
14
;
if
(
diff
<
0
)
diff
+=
1
;
state
[
15
]
=
state
[
14
]
+
diff
*
kResampleAllpass
[
0
][
2
];
state
[
14
]
=
tmp0
;
// average the two allpass outputs, scale down and store
out
[
i
<<
1
]
=
(
out
[
i
<<
1
]
+
(
state
[
15
]
>>
1
))
>>
15
;
}
}
// lowpass filter
// input: int32_t (shifted 15 positions to the left, + offset 16384)
// output: int32_t (normalized, not saturated)
// state: filter state array; length = 8
void
RTC_NO_SANITIZE
(
"signed-integer-overflow"
)
// bugs.webrtc.org/5486
WebRtcSpl_LPBy2IntToInt
(
const
int32_t
*
in
,
int32_t
len
,
int32_t
*
out
,
int32_t
*
state
)
{
int32_t
tmp0
,
tmp1
,
diff
;
int32_t
i
;
len
>>=
1
;
// lower allpass filter: odd input -> even output samples
in
++
;
// initial state of polyphase delay element
tmp0
=
state
[
12
];
for
(
i
=
0
;
i
<
len
;
i
++
)
{
diff
=
tmp0
-
state
[
1
];
// scale down and round
diff
=
(
diff
+
(
1
<<
13
))
>>
14
;
tmp1
=
state
[
0
]
+
diff
*
kResampleAllpass
[
1
][
0
];
state
[
0
]
=
tmp0
;
diff
=
tmp1
-
state
[
2
];
// scale down and truncate
diff
=
diff
>>
14
;
if
(
diff
<
0
)
diff
+=
1
;
tmp0
=
state
[
1
]
+
diff
*
kResampleAllpass
[
1
][
1
];
state
[
1
]
=
tmp1
;
diff
=
tmp0
-
state
[
3
];
// scale down and truncate
diff
=
diff
>>
14
;
if
(
diff
<
0
)
diff
+=
1
;
state
[
3
]
=
state
[
2
]
+
diff
*
kResampleAllpass
[
1
][
2
];
state
[
2
]
=
tmp0
;
// scale down, round and store
out
[
i
<<
1
]
=
state
[
3
]
>>
1
;
tmp0
=
in
[
i
<<
1
];
}
in
--
;
// upper allpass filter: even input -> even output samples
for
(
i
=
0
;
i
<
len
;
i
++
)
{
tmp0
=
in
[
i
<<
1
];
diff
=
tmp0
-
state
[
5
];
// UBSan: -794814117 - 1566149201 cannot be represented in type 'int'
// scale down and round
diff
=
(
diff
+
(
1
<<
13
))
>>
14
;
tmp1
=
state
[
4
]
+
diff
*
kResampleAllpass
[
0
][
0
];
state
[
4
]
=
tmp0
;
diff
=
tmp1
-
state
[
6
];
// scale down and round
diff
=
diff
>>
14
;
if
(
diff
<
0
)
diff
+=
1
;
tmp0
=
state
[
5
]
+
diff
*
kResampleAllpass
[
0
][
1
];
state
[
5
]
=
tmp1
;
diff
=
tmp0
-
state
[
7
];
// scale down and truncate
diff
=
diff
>>
14
;
if
(
diff
<
0
)
diff
+=
1
;
state
[
7
]
=
state
[
6
]
+
diff
*
kResampleAllpass
[
0
][
2
];
state
[
6
]
=
tmp0
;
// average the two allpass outputs, scale down and store
out
[
i
<<
1
]
=
(
out
[
i
<<
1
]
+
(
state
[
7
]
>>
1
))
>>
15
;
}
// switch to odd output samples
out
++
;
// lower allpass filter: even input -> odd output samples
for
(
i
=
0
;
i
<
len
;
i
++
)
{
tmp0
=
in
[
i
<<
1
];
diff
=
tmp0
-
state
[
9
];
// scale down and round
diff
=
(
diff
+
(
1
<<
13
))
>>
14
;
tmp1
=
state
[
8
]
+
diff
*
kResampleAllpass
[
1
][
0
];
state
[
8
]
=
tmp0
;
diff
=
tmp1
-
state
[
10
];
// scale down and truncate
diff
=
diff
>>
14
;
if
(
diff
<
0
)
diff
+=
1
;
tmp0
=
state
[
9
]
+
diff
*
kResampleAllpass
[
1
][
1
];
state
[
9
]
=
tmp1
;
diff
=
tmp0
-
state
[
11
];
// scale down and truncate
diff
=
diff
>>
14
;
if
(
diff
<
0
)
diff
+=
1
;
state
[
11
]
=
state
[
10
]
+
diff
*
kResampleAllpass
[
1
][
2
];
state
[
10
]
=
tmp0
;
// scale down, round and store
out
[
i
<<
1
]
=
state
[
11
]
>>
1
;
}
// upper allpass filter: odd input -> odd output samples
in
++
;
for
(
i
=
0
;
i
<
len
;
i
++
)
{
tmp0
=
in
[
i
<<
1
];
diff
=
tmp0
-
state
[
13
];
// scale down and round
diff
=
(
diff
+
(
1
<<
13
))
>>
14
;
tmp1
=
state
[
12
]
+
diff
*
kResampleAllpass
[
0
][
0
];
state
[
12
]
=
tmp0
;
diff
=
tmp1
-
state
[
14
];
// scale down and round
diff
=
diff
>>
14
;
if
(
diff
<
0
)
diff
+=
1
;
tmp0
=
state
[
13
]
+
diff
*
kResampleAllpass
[
0
][
1
];
state
[
13
]
=
tmp1
;
diff
=
tmp0
-
state
[
15
];
// scale down and truncate
diff
=
diff
>>
14
;
if
(
diff
<
0
)
diff
+=
1
;
state
[
15
]
=
state
[
14
]
+
diff
*
kResampleAllpass
[
0
][
2
];
state
[
14
]
=
tmp0
;
// average the two allpass outputs, scale down and store
out
[
i
<<
1
]
=
(
out
[
i
<<
1
]
+
(
state
[
15
]
>>
1
))
>>
15
;
}
}
cpp_onnx/third_party/webrtc/common_audio/signal_processing/resample_by_2_internal.h
0 → 100644
View file @
83ff3a7f
/*
* Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
/*
* This header file contains some internal resampling functions.
*
*/
#ifndef COMMON_AUDIO_SIGNAL_PROCESSING_RESAMPLE_BY_2_INTERNAL_H_
#define COMMON_AUDIO_SIGNAL_PROCESSING_RESAMPLE_BY_2_INTERNAL_H_
#include <stdint.h>
/*******************************************************************
* resample_by_2_fast.c
* Functions for internal use in the other resample functions
******************************************************************/
void
WebRtcSpl_DownBy2IntToShort
(
int32_t
*
in
,
int32_t
len
,
int16_t
*
out
,
int32_t
*
state
);
void
WebRtcSpl_DownBy2ShortToInt
(
const
int16_t
*
in
,
int32_t
len
,
int32_t
*
out
,
int32_t
*
state
);
void
WebRtcSpl_UpBy2ShortToInt
(
const
int16_t
*
in
,
int32_t
len
,
int32_t
*
out
,
int32_t
*
state
);
void
WebRtcSpl_UpBy2IntToInt
(
const
int32_t
*
in
,
int32_t
len
,
int32_t
*
out
,
int32_t
*
state
);
void
WebRtcSpl_UpBy2IntToShort
(
const
int32_t
*
in
,
int32_t
len
,
int16_t
*
out
,
int32_t
*
state
);
void
WebRtcSpl_LPBy2ShortToInt
(
const
int16_t
*
in
,
int32_t
len
,
int32_t
*
out
,
int32_t
*
state
);
void
WebRtcSpl_LPBy2IntToInt
(
const
int32_t
*
in
,
int32_t
len
,
int32_t
*
out
,
int32_t
*
state
);
#endif // COMMON_AUDIO_SIGNAL_PROCESSING_RESAMPLE_BY_2_INTERNAL_H_
cpp_onnx/third_party/webrtc/common_audio/signal_processing/resample_fractional.c
0 → 100644
View file @
83ff3a7f
/*
* Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
/*
* This file contains the resampling functions between 48, 44, 32 and 24 kHz.
* The description headers can be found in signal_processing_library.h
*
*/
#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
// interpolation coefficients
static
const
int16_t
kCoefficients48To32
[
2
][
8
]
=
{
{
778
,
-
2050
,
1087
,
23285
,
12903
,
-
3783
,
441
,
222
},
{
222
,
441
,
-
3783
,
12903
,
23285
,
1087
,
-
2050
,
778
}
};
static
const
int16_t
kCoefficients32To24
[
3
][
8
]
=
{
{
767
,
-
2362
,
2434
,
24406
,
10620
,
-
3838
,
721
,
90
},
{
386
,
-
381
,
-
2646
,
19062
,
19062
,
-
2646
,
-
381
,
386
},
{
90
,
721
,
-
3838
,
10620
,
24406
,
2434
,
-
2362
,
767
}
};
static
const
int16_t
kCoefficients44To32
[
4
][
9
]
=
{
{
117
,
-
669
,
2245
,
-
6183
,
26267
,
13529
,
-
3245
,
845
,
-
138
},
{
-
101
,
612
,
-
2283
,
8532
,
29790
,
-
5138
,
1789
,
-
524
,
91
},
{
50
,
-
292
,
1016
,
-
3064
,
32010
,
3933
,
-
1147
,
315
,
-
53
},
{
-
156
,
974
,
-
3863
,
18603
,
21691
,
-
6246
,
2353
,
-
712
,
126
}
};
// Resampling ratio: 2/3
// input: int32_t (normalized, not saturated) :: size 3 * K
// output: int32_t (shifted 15 positions to the left, + offset 16384) :: size 2 * K
// K: number of blocks
void
WebRtcSpl_Resample48khzTo32khz
(
const
int32_t
*
In
,
int32_t
*
Out
,
size_t
K
)
{
/////////////////////////////////////////////////////////////
// Filter operation:
//
// Perform resampling (3 input samples -> 2 output samples);
// process in sub blocks of size 3 samples.
int32_t
tmp
;
size_t
m
;
for
(
m
=
0
;
m
<
K
;
m
++
)
{
tmp
=
1
<<
14
;
tmp
+=
kCoefficients48To32
[
0
][
0
]
*
In
[
0
];
tmp
+=
kCoefficients48To32
[
0
][
1
]
*
In
[
1
];
tmp
+=
kCoefficients48To32
[
0
][
2
]
*
In
[
2
];
tmp
+=
kCoefficients48To32
[
0
][
3
]
*
In
[
3
];
tmp
+=
kCoefficients48To32
[
0
][
4
]
*
In
[
4
];
tmp
+=
kCoefficients48To32
[
0
][
5
]
*
In
[
5
];
tmp
+=
kCoefficients48To32
[
0
][
6
]
*
In
[
6
];
tmp
+=
kCoefficients48To32
[
0
][
7
]
*
In
[
7
];
Out
[
0
]
=
tmp
;
tmp
=
1
<<
14
;
tmp
+=
kCoefficients48To32
[
1
][
0
]
*
In
[
1
];
tmp
+=
kCoefficients48To32
[
1
][
1
]
*
In
[
2
];
tmp
+=
kCoefficients48To32
[
1
][
2
]
*
In
[
3
];
tmp
+=
kCoefficients48To32
[
1
][
3
]
*
In
[
4
];
tmp
+=
kCoefficients48To32
[
1
][
4
]
*
In
[
5
];
tmp
+=
kCoefficients48To32
[
1
][
5
]
*
In
[
6
];
tmp
+=
kCoefficients48To32
[
1
][
6
]
*
In
[
7
];
tmp
+=
kCoefficients48To32
[
1
][
7
]
*
In
[
8
];
Out
[
1
]
=
tmp
;
// update pointers
In
+=
3
;
Out
+=
2
;
}
}
// Resampling ratio: 3/4
// input: int32_t (normalized, not saturated) :: size 4 * K
// output: int32_t (shifted 15 positions to the left, + offset 16384) :: size 3 * K
// K: number of blocks
void
WebRtcSpl_Resample32khzTo24khz
(
const
int32_t
*
In
,
int32_t
*
Out
,
size_t
K
)
{
/////////////////////////////////////////////////////////////
// Filter operation:
//
// Perform resampling (4 input samples -> 3 output samples);
// process in sub blocks of size 4 samples.
size_t
m
;
int32_t
tmp
;
for
(
m
=
0
;
m
<
K
;
m
++
)
{
tmp
=
1
<<
14
;
tmp
+=
kCoefficients32To24
[
0
][
0
]
*
In
[
0
];
tmp
+=
kCoefficients32To24
[
0
][
1
]
*
In
[
1
];
tmp
+=
kCoefficients32To24
[
0
][
2
]
*
In
[
2
];
tmp
+=
kCoefficients32To24
[
0
][
3
]
*
In
[
3
];
tmp
+=
kCoefficients32To24
[
0
][
4
]
*
In
[
4
];
tmp
+=
kCoefficients32To24
[
0
][
5
]
*
In
[
5
];
tmp
+=
kCoefficients32To24
[
0
][
6
]
*
In
[
6
];
tmp
+=
kCoefficients32To24
[
0
][
7
]
*
In
[
7
];
Out
[
0
]
=
tmp
;
tmp
=
1
<<
14
;
tmp
+=
kCoefficients32To24
[
1
][
0
]
*
In
[
1
];
tmp
+=
kCoefficients32To24
[
1
][
1
]
*
In
[
2
];
tmp
+=
kCoefficients32To24
[
1
][
2
]
*
In
[
3
];
tmp
+=
kCoefficients32To24
[
1
][
3
]
*
In
[
4
];
tmp
+=
kCoefficients32To24
[
1
][
4
]
*
In
[
5
];
tmp
+=
kCoefficients32To24
[
1
][
5
]
*
In
[
6
];
tmp
+=
kCoefficients32To24
[
1
][
6
]
*
In
[
7
];
tmp
+=
kCoefficients32To24
[
1
][
7
]
*
In
[
8
];
Out
[
1
]
=
tmp
;
tmp
=
1
<<
14
;
tmp
+=
kCoefficients32To24
[
2
][
0
]
*
In
[
2
];
tmp
+=
kCoefficients32To24
[
2
][
1
]
*
In
[
3
];
tmp
+=
kCoefficients32To24
[
2
][
2
]
*
In
[
4
];
tmp
+=
kCoefficients32To24
[
2
][
3
]
*
In
[
5
];
tmp
+=
kCoefficients32To24
[
2
][
4
]
*
In
[
6
];
tmp
+=
kCoefficients32To24
[
2
][
5
]
*
In
[
7
];
tmp
+=
kCoefficients32To24
[
2
][
6
]
*
In
[
8
];
tmp
+=
kCoefficients32To24
[
2
][
7
]
*
In
[
9
];
Out
[
2
]
=
tmp
;
// update pointers
In
+=
4
;
Out
+=
3
;
}
}
//
// fractional resampling filters
// Fout = 11/16 * Fin
// Fout = 8/11 * Fin
//
// compute two inner-products and store them to output array
static
void
WebRtcSpl_ResampDotProduct
(
const
int32_t
*
in1
,
const
int32_t
*
in2
,
const
int16_t
*
coef_ptr
,
int32_t
*
out1
,
int32_t
*
out2
)
{
int32_t
tmp1
=
16384
;
int32_t
tmp2
=
16384
;
int16_t
coef
;
coef
=
coef_ptr
[
0
];
tmp1
+=
coef
*
in1
[
0
];
tmp2
+=
coef
*
in2
[
-
0
];
coef
=
coef_ptr
[
1
];
tmp1
+=
coef
*
in1
[
1
];
tmp2
+=
coef
*
in2
[
-
1
];
coef
=
coef_ptr
[
2
];
tmp1
+=
coef
*
in1
[
2
];
tmp2
+=
coef
*
in2
[
-
2
];
coef
=
coef_ptr
[
3
];
tmp1
+=
coef
*
in1
[
3
];
tmp2
+=
coef
*
in2
[
-
3
];
coef
=
coef_ptr
[
4
];
tmp1
+=
coef
*
in1
[
4
];
tmp2
+=
coef
*
in2
[
-
4
];
coef
=
coef_ptr
[
5
];
tmp1
+=
coef
*
in1
[
5
];
tmp2
+=
coef
*
in2
[
-
5
];
coef
=
coef_ptr
[
6
];
tmp1
+=
coef
*
in1
[
6
];
tmp2
+=
coef
*
in2
[
-
6
];
coef
=
coef_ptr
[
7
];
tmp1
+=
coef
*
in1
[
7
];
tmp2
+=
coef
*
in2
[
-
7
];
coef
=
coef_ptr
[
8
];
*
out1
=
tmp1
+
coef
*
in1
[
8
];
*
out2
=
tmp2
+
coef
*
in2
[
-
8
];
}
// Resampling ratio: 8/11
// input: int32_t (normalized, not saturated) :: size 11 * K
// output: int32_t (shifted 15 positions to the left, + offset 16384) :: size 8 * K
// K: number of blocks
void
WebRtcSpl_Resample44khzTo32khz
(
const
int32_t
*
In
,
int32_t
*
Out
,
size_t
K
)
{
/////////////////////////////////////////////////////////////
// Filter operation:
//
// Perform resampling (11 input samples -> 8 output samples);
// process in sub blocks of size 11 samples.
int32_t
tmp
;
size_t
m
;
for
(
m
=
0
;
m
<
K
;
m
++
)
{
tmp
=
1
<<
14
;
// first output sample
Out
[
0
]
=
((
int32_t
)
In
[
3
]
<<
15
)
+
tmp
;
// sum and accumulate filter coefficients and input samples
tmp
+=
kCoefficients44To32
[
3
][
0
]
*
In
[
5
];
tmp
+=
kCoefficients44To32
[
3
][
1
]
*
In
[
6
];
tmp
+=
kCoefficients44To32
[
3
][
2
]
*
In
[
7
];
tmp
+=
kCoefficients44To32
[
3
][
3
]
*
In
[
8
];
tmp
+=
kCoefficients44To32
[
3
][
4
]
*
In
[
9
];
tmp
+=
kCoefficients44To32
[
3
][
5
]
*
In
[
10
];
tmp
+=
kCoefficients44To32
[
3
][
6
]
*
In
[
11
];
tmp
+=
kCoefficients44To32
[
3
][
7
]
*
In
[
12
];
tmp
+=
kCoefficients44To32
[
3
][
8
]
*
In
[
13
];
Out
[
4
]
=
tmp
;
// sum and accumulate filter coefficients and input samples
WebRtcSpl_ResampDotProduct
(
&
In
[
0
],
&
In
[
17
],
kCoefficients44To32
[
0
],
&
Out
[
1
],
&
Out
[
7
]);
// sum and accumulate filter coefficients and input samples
WebRtcSpl_ResampDotProduct
(
&
In
[
2
],
&
In
[
15
],
kCoefficients44To32
[
1
],
&
Out
[
2
],
&
Out
[
6
]);
// sum and accumulate filter coefficients and input samples
WebRtcSpl_ResampDotProduct
(
&
In
[
3
],
&
In
[
14
],
kCoefficients44To32
[
2
],
&
Out
[
3
],
&
Out
[
5
]);
// update pointers
In
+=
11
;
Out
+=
8
;
}
}
cpp_onnx/third_party/webrtc/common_audio/signal_processing/spl_init.c
0 → 100644
View file @
83ff3a7f
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
/* The global function contained in this file initializes SPL function
* pointers, currently only for ARM platforms.
*
* Some code came from common/rtcd.c in the WebM project.
*/
#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
#include "webrtc/system_wrappers/include/cpu_features_wrapper.h"
/* Declare function pointers. */
MaxAbsValueW16
WebRtcSpl_MaxAbsValueW16
;
MaxAbsValueW32
WebRtcSpl_MaxAbsValueW32
;
MaxValueW16
WebRtcSpl_MaxValueW16
;
MaxValueW32
WebRtcSpl_MaxValueW32
;
MinValueW16
WebRtcSpl_MinValueW16
;
MinValueW32
WebRtcSpl_MinValueW32
;
CrossCorrelation
WebRtcSpl_CrossCorrelation
;
DownsampleFast
WebRtcSpl_DownsampleFast
;
ScaleAndAddVectorsWithRound
WebRtcSpl_ScaleAndAddVectorsWithRound
;
#if (!defined(WEBRTC_HAS_NEON)) && !defined(MIPS32_LE)
/* Initialize function pointers to the generic C version. */
static
void
InitPointersToC
(
void
)
{
WebRtcSpl_MaxAbsValueW16
=
WebRtcSpl_MaxAbsValueW16C
;
WebRtcSpl_MaxAbsValueW32
=
WebRtcSpl_MaxAbsValueW32C
;
WebRtcSpl_MaxValueW16
=
WebRtcSpl_MaxValueW16C
;
WebRtcSpl_MaxValueW32
=
WebRtcSpl_MaxValueW32C
;
WebRtcSpl_MinValueW16
=
WebRtcSpl_MinValueW16C
;
WebRtcSpl_MinValueW32
=
WebRtcSpl_MinValueW32C
;
WebRtcSpl_CrossCorrelation
=
WebRtcSpl_CrossCorrelationC
;
WebRtcSpl_DownsampleFast
=
WebRtcSpl_DownsampleFastC
;
WebRtcSpl_ScaleAndAddVectorsWithRound
=
WebRtcSpl_ScaleAndAddVectorsWithRoundC
;
}
#endif
#if defined(WEBRTC_HAS_NEON)
/* Initialize function pointers to the Neon version. */
static
void
InitPointersToNeon
(
void
)
{
WebRtcSpl_MaxAbsValueW16
=
WebRtcSpl_MaxAbsValueW16Neon
;
WebRtcSpl_MaxAbsValueW32
=
WebRtcSpl_MaxAbsValueW32Neon
;
WebRtcSpl_MaxValueW16
=
WebRtcSpl_MaxValueW16Neon
;
WebRtcSpl_MaxValueW32
=
WebRtcSpl_MaxValueW32Neon
;
WebRtcSpl_MinValueW16
=
WebRtcSpl_MinValueW16Neon
;
WebRtcSpl_MinValueW32
=
WebRtcSpl_MinValueW32Neon
;
WebRtcSpl_CrossCorrelation
=
WebRtcSpl_CrossCorrelationNeon
;
WebRtcSpl_DownsampleFast
=
WebRtcSpl_DownsampleFastNeon
;
WebRtcSpl_ScaleAndAddVectorsWithRound
=
WebRtcSpl_ScaleAndAddVectorsWithRoundC
;
}
#endif
#if defined(MIPS32_LE)
/* Initialize function pointers to the MIPS version. */
static
void
InitPointersToMIPS
(
void
)
{
WebRtcSpl_MaxAbsValueW16
=
WebRtcSpl_MaxAbsValueW16_mips
;
WebRtcSpl_MaxValueW16
=
WebRtcSpl_MaxValueW16_mips
;
WebRtcSpl_MaxValueW32
=
WebRtcSpl_MaxValueW32_mips
;
WebRtcSpl_MinValueW16
=
WebRtcSpl_MinValueW16_mips
;
WebRtcSpl_MinValueW32
=
WebRtcSpl_MinValueW32_mips
;
WebRtcSpl_CrossCorrelation
=
WebRtcSpl_CrossCorrelation_mips
;
WebRtcSpl_DownsampleFast
=
WebRtcSpl_DownsampleFast_mips
;
#if defined(MIPS_DSP_R1_LE)
WebRtcSpl_MaxAbsValueW32
=
WebRtcSpl_MaxAbsValueW32_mips
;
WebRtcSpl_ScaleAndAddVectorsWithRound
=
WebRtcSpl_ScaleAndAddVectorsWithRound_mips
;
#else
WebRtcSpl_MaxAbsValueW32
=
WebRtcSpl_MaxAbsValueW32C
;
WebRtcSpl_ScaleAndAddVectorsWithRound
=
WebRtcSpl_ScaleAndAddVectorsWithRoundC
;
#endif
}
#endif
static
void
InitFunctionPointers
(
void
)
{
#if defined(WEBRTC_HAS_NEON)
InitPointersToNeon
();
#elif defined(MIPS32_LE)
InitPointersToMIPS
();
#else
InitPointersToC
();
#endif
/* WEBRTC_HAS_NEON */
}
#if defined(WEBRTC_POSIX)
#include <pthread.h>
static
void
once
(
void
(
*
func
)(
void
))
{
static
pthread_once_t
lock
=
PTHREAD_ONCE_INIT
;
pthread_once
(
&
lock
,
func
);
}
#elif defined(_WIN32)
#include <windows.h>
static
void
once
(
void
(
*
func
)(
void
))
{
/* Didn't use InitializeCriticalSection() since there's no race-free context
* in which to execute it.
*
* TODO(kma): Change to different implementation (e.g.
* InterlockedCompareExchangePointer) to avoid issues similar to
* http://code.google.com/p/webm/issues/detail?id=467.
*/
static
CRITICAL_SECTION
lock
=
{(
void
*
)((
size_t
)
-
1
),
-
1
,
0
,
0
,
0
,
0
};
static
int
done
=
0
;
EnterCriticalSection
(
&
lock
);
if
(
!
done
)
{
func
();
done
=
1
;
}
LeaveCriticalSection
(
&
lock
);
}
/* There's no fallback version as an #else block here to ensure thread safety.
* In case of neither pthread for WEBRTC_POSIX nor _WIN32 is present, build
* system should pick it up.
*/
#endif
/* WEBRTC_POSIX */
void
WebRtcSpl_Init
(
void
)
{
once
(
InitFunctionPointers
);
}
cpp_onnx/third_party/webrtc/common_audio/signal_processing/spl_inl.c
0 → 100644
View file @
83ff3a7f
/*
* Copyright (c) 2016 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include <stdint.h>
#include "webrtc/common_audio/signal_processing/include/spl_inl.h"
// Table used by WebRtcSpl_CountLeadingZeros32_NotBuiltin. For each uint32_t n
// that's a sequence of 0 bits followed by a sequence of 1 bits, the entry at
// index (n * 0x8c0b2891) >> 26 in this table gives the number of zero bits in
// n.
const
int8_t
kWebRtcSpl_CountLeadingZeros32_Table
[
64
]
=
{
32
,
8
,
17
,
-
1
,
-
1
,
14
,
-
1
,
-
1
,
-
1
,
20
,
-
1
,
-
1
,
-
1
,
28
,
-
1
,
18
,
-
1
,
-
1
,
-
1
,
-
1
,
-
1
,
-
1
,
-
1
,
-
1
,
-
1
,
-
1
,
-
1
,
-
1
,
0
,
26
,
25
,
24
,
4
,
11
,
23
,
31
,
3
,
7
,
10
,
16
,
22
,
30
,
-
1
,
-
1
,
2
,
6
,
13
,
9
,
-
1
,
15
,
-
1
,
21
,
-
1
,
29
,
19
,
-
1
,
-
1
,
-
1
,
-
1
,
-
1
,
1
,
27
,
5
,
12
,
};
cpp_onnx/third_party/webrtc/common_audio/signal_processing/spl_sqrt.c
0 → 100644
View file @
83ff3a7f
/*
* Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
/*
* This file contains the function WebRtcSpl_Sqrt().
* The description header can be found in signal_processing_library.h
*
*/
#include "webrtc/rtc_base/checks.h"
#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
int32_t
WebRtcSpl_SqrtLocal
(
int32_t
in
);
int32_t
WebRtcSpl_SqrtLocal
(
int32_t
in
)
{
int16_t
x_half
,
t16
;
int32_t
A
,
B
,
x2
;
/* The following block performs:
y=in/2
x=y-2^30
x_half=x/2^31
t = 1 + (x_half) - 0.5*((x_half)^2) + 0.5*((x_half)^3) - 0.625*((x_half)^4)
+ 0.875*((x_half)^5)
*/
B
=
in
/
2
;
B
=
B
-
((
int32_t
)
0x40000000
);
// B = in/2 - 1/2
x_half
=
(
int16_t
)(
B
>>
16
);
// x_half = x/2 = (in-1)/2
B
=
B
+
((
int32_t
)
0x40000000
);
// B = 1 + x/2
B
=
B
+
((
int32_t
)
0x40000000
);
// Add 0.5 twice (since 1.0 does not exist in Q31)
x2
=
((
int32_t
)
x_half
)
*
((
int32_t
)
x_half
)
*
2
;
// A = (x/2)^2
A
=
-
x2
;
// A = -(x/2)^2
B
=
B
+
(
A
>>
1
);
// B = 1 + x/2 - 0.5*(x/2)^2
A
>>=
16
;
A
=
A
*
A
*
2
;
// A = (x/2)^4
t16
=
(
int16_t
)(
A
>>
16
);
B
+=
-
20480
*
t16
*
2
;
// B = B - 0.625*A
// After this, B = 1 + x/2 - 0.5*(x/2)^2 - 0.625*(x/2)^4
A
=
x_half
*
t16
*
2
;
// A = (x/2)^5
t16
=
(
int16_t
)(
A
>>
16
);
B
+=
28672
*
t16
*
2
;
// B = B + 0.875*A
// After this, B = 1 + x/2 - 0.5*(x/2)^2 - 0.625*(x/2)^4 + 0.875*(x/2)^5
t16
=
(
int16_t
)(
x2
>>
16
);
A
=
x_half
*
t16
*
2
;
// A = x/2^3
B
=
B
+
(
A
>>
1
);
// B = B + 0.5*A
// After this, B = 1 + x/2 - 0.5*(x/2)^2 + 0.5*(x/2)^3 - 0.625*(x/2)^4 + 0.875*(x/2)^5
B
=
B
+
((
int32_t
)
32768
);
// Round off bit
return
B
;
}
int32_t
WebRtcSpl_Sqrt
(
int32_t
value
)
{
/*
Algorithm:
Six term Taylor Series is used here to compute the square root of a number
y^0.5 = (1+x)^0.5 where x = y-1
= 1+(x/2)-0.5*((x/2)^2+0.5*((x/2)^3-0.625*((x/2)^4+0.875*((x/2)^5)
0.5 <= x < 1
Example of how the algorithm works, with ut=sqrt(in), and
with in=73632 and ut=271 (even shift value case):
in=73632
y= in/131072
x=y-1
t = 1 + (x/2) - 0.5*((x/2)^2) + 0.5*((x/2)^3) - 0.625*((x/2)^4) + 0.875*((x/2)^5)
ut=t*(1/sqrt(2))*512
or:
in=73632
in2=73632*2^14
y= in2/2^31
x=y-1
t = 1 + (x/2) - 0.5*((x/2)^2) + 0.5*((x/2)^3) - 0.625*((x/2)^4) + 0.875*((x/2)^5)
ut=t*(1/sqrt(2))
ut2=ut*2^9
which gives:
in = 73632
in2 = 1206386688
y = 0.56176757812500
x = -0.43823242187500
t = 0.74973506527313
ut = 0.53014274874797
ut2 = 2.714330873589594e+002
or:
in=73632
in2=73632*2^14
y=in2/2
x=y-2^30
x_half=x/2^31
t = 1 + (x_half) - 0.5*((x_half)^2) + 0.5*((x_half)^3) - 0.625*((x_half)^4)
+ 0.875*((x_half)^5)
ut=t*(1/sqrt(2))
ut2=ut*2^9
which gives:
in = 73632
in2 = 1206386688
y = 603193344
x = -470548480
x_half = -0.21911621093750
t = 0.74973506527313
ut = 0.53014274874797
ut2 = 2.714330873589594e+002
*/
int16_t
x_norm
,
nshift
,
t16
,
sh
;
int32_t
A
;
int16_t
k_sqrt_2
=
23170
;
// 1/sqrt2 (==5a82)
A
=
value
;
// The convention in this function is to calculate sqrt(abs(A)). Negate the
// input if it is negative.
if
(
A
<
0
)
{
if
(
A
==
WEBRTC_SPL_WORD32_MIN
)
{
// This number cannot be held in an int32_t after negating.
// Map it to the maximum positive value.
A
=
WEBRTC_SPL_WORD32_MAX
;
}
else
{
A
=
-
A
;
}
}
else
if
(
A
==
0
)
{
return
0
;
// sqrt(0) = 0
}
sh
=
WebRtcSpl_NormW32
(
A
);
// # shifts to normalize A
A
=
WEBRTC_SPL_LSHIFT_W32
(
A
,
sh
);
// Normalize A
if
(
A
<
(
WEBRTC_SPL_WORD32_MAX
-
32767
))
{
A
=
A
+
((
int32_t
)
32768
);
// Round off bit
}
else
{
A
=
WEBRTC_SPL_WORD32_MAX
;
}
x_norm
=
(
int16_t
)(
A
>>
16
);
// x_norm = AH
nshift
=
(
sh
/
2
);
RTC_DCHECK_GE
(
nshift
,
0
);
A
=
(
int32_t
)
WEBRTC_SPL_LSHIFT_W32
((
int32_t
)
x_norm
,
16
);
A
=
WEBRTC_SPL_ABS_W32
(
A
);
// A = abs(x_norm<<16)
A
=
WebRtcSpl_SqrtLocal
(
A
);
// A = sqrt(A)
if
(
2
*
nshift
==
sh
)
{
// Even shift value case
t16
=
(
int16_t
)(
A
>>
16
);
// t16 = AH
A
=
k_sqrt_2
*
t16
*
2
;
// A = 1/sqrt(2)*t16
A
=
A
+
((
int32_t
)
32768
);
// Round off
A
=
A
&
((
int32_t
)
0x7fff0000
);
// Round off
A
>>=
15
;
// A = A>>16
}
else
{
A
>>=
16
;
// A = A>>16
}
A
=
A
&
((
int32_t
)
0x0000ffff
);
A
>>=
nshift
;
// De-normalize the result.
return
A
;
}
cpp_onnx/third_party/webrtc/common_audio/signal_processing/vector_scaling_operations.c
0 → 100644
View file @
83ff3a7f
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
/*
* This file contains implementations of the functions
* WebRtcSpl_VectorBitShiftW16()
* WebRtcSpl_VectorBitShiftW32()
* WebRtcSpl_VectorBitShiftW32ToW16()
* WebRtcSpl_ScaleVector()
* WebRtcSpl_ScaleVectorWithSat()
* WebRtcSpl_ScaleAndAddVectors()
* WebRtcSpl_ScaleAndAddVectorsWithRoundC()
*/
#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
void
WebRtcSpl_VectorBitShiftW16
(
int16_t
*
res
,
size_t
length
,
const
int16_t
*
in
,
int16_t
right_shifts
)
{
size_t
i
;
if
(
right_shifts
>
0
)
{
for
(
i
=
length
;
i
>
0
;
i
--
)
{
(
*
res
++
)
=
((
*
in
++
)
>>
right_shifts
);
}
}
else
{
for
(
i
=
length
;
i
>
0
;
i
--
)
{
(
*
res
++
)
=
((
*
in
++
)
*
(
1
<<
(
-
right_shifts
)));
}
}
}
void
WebRtcSpl_VectorBitShiftW32
(
int32_t
*
out_vector
,
size_t
vector_length
,
const
int32_t
*
in_vector
,
int16_t
right_shifts
)
{
size_t
i
;
if
(
right_shifts
>
0
)
{
for
(
i
=
vector_length
;
i
>
0
;
i
--
)
{
(
*
out_vector
++
)
=
((
*
in_vector
++
)
>>
right_shifts
);
}
}
else
{
for
(
i
=
vector_length
;
i
>
0
;
i
--
)
{
(
*
out_vector
++
)
=
((
*
in_vector
++
)
<<
(
-
right_shifts
));
}
}
}
void
WebRtcSpl_VectorBitShiftW32ToW16
(
int16_t
*
out
,
size_t
length
,
const
int32_t
*
in
,
int
right_shifts
)
{
size_t
i
;
int32_t
tmp_w32
;
if
(
right_shifts
>=
0
)
{
for
(
i
=
length
;
i
>
0
;
i
--
)
{
tmp_w32
=
(
*
in
++
)
>>
right_shifts
;
(
*
out
++
)
=
WebRtcSpl_SatW32ToW16
(
tmp_w32
);
}
}
else
{
int
left_shifts
=
-
right_shifts
;
for
(
i
=
length
;
i
>
0
;
i
--
)
{
tmp_w32
=
(
*
in
++
)
<<
left_shifts
;
(
*
out
++
)
=
WebRtcSpl_SatW32ToW16
(
tmp_w32
);
}
}
}
void
WebRtcSpl_ScaleVector
(
const
int16_t
*
in_vector
,
int16_t
*
out_vector
,
int16_t
gain
,
size_t
in_vector_length
,
int16_t
right_shifts
)
{
// Performs vector operation: out_vector = (gain*in_vector)>>right_shifts
size_t
i
;
const
int16_t
*
inptr
;
int16_t
*
outptr
;
inptr
=
in_vector
;
outptr
=
out_vector
;
for
(
i
=
0
;
i
<
in_vector_length
;
i
++
)
{
*
outptr
++
=
(
int16_t
)((
*
inptr
++
*
gain
)
>>
right_shifts
);
}
}
void
WebRtcSpl_ScaleVectorWithSat
(
const
int16_t
*
in_vector
,
int16_t
*
out_vector
,
int16_t
gain
,
size_t
in_vector_length
,
int16_t
right_shifts
)
{
// Performs vector operation: out_vector = (gain*in_vector)>>right_shifts
size_t
i
;
const
int16_t
*
inptr
;
int16_t
*
outptr
;
inptr
=
in_vector
;
outptr
=
out_vector
;
for
(
i
=
0
;
i
<
in_vector_length
;
i
++
)
{
*
outptr
++
=
WebRtcSpl_SatW32ToW16
((
*
inptr
++
*
gain
)
>>
right_shifts
);
}
}
void
WebRtcSpl_ScaleAndAddVectors
(
const
int16_t
*
in1
,
int16_t
gain1
,
int
shift1
,
const
int16_t
*
in2
,
int16_t
gain2
,
int
shift2
,
int16_t
*
out
,
size_t
vector_length
)
{
// Performs vector operation: out = (gain1*in1)>>shift1 + (gain2*in2)>>shift2
size_t
i
;
const
int16_t
*
in1ptr
;
const
int16_t
*
in2ptr
;
int16_t
*
outptr
;
in1ptr
=
in1
;
in2ptr
=
in2
;
outptr
=
out
;
for
(
i
=
0
;
i
<
vector_length
;
i
++
)
{
*
outptr
++
=
(
int16_t
)((
gain1
*
*
in1ptr
++
)
>>
shift1
)
+
(
int16_t
)((
gain2
*
*
in2ptr
++
)
>>
shift2
);
}
}
// C version of WebRtcSpl_ScaleAndAddVectorsWithRound() for generic platforms.
int
WebRtcSpl_ScaleAndAddVectorsWithRoundC
(
const
int16_t
*
in_vector1
,
int16_t
in_vector1_scale
,
const
int16_t
*
in_vector2
,
int16_t
in_vector2_scale
,
int
right_shifts
,
int16_t
*
out_vector
,
size_t
length
)
{
size_t
i
=
0
;
int
round_value
=
(
1
<<
right_shifts
)
>>
1
;
if
(
in_vector1
==
NULL
||
in_vector2
==
NULL
||
out_vector
==
NULL
||
length
==
0
||
right_shifts
<
0
)
{
return
-
1
;
}
for
(
i
=
0
;
i
<
length
;
i
++
)
{
out_vector
[
i
]
=
(
int16_t
)((
in_vector1
[
i
]
*
in_vector1_scale
+
in_vector2
[
i
]
*
in_vector2_scale
+
round_value
)
>>
right_shifts
);
}
return
0
;
}
cpp_onnx/third_party/webrtc/common_audio/third_party/spl_sqrt_floor/spl_sqrt_floor.c
0 → 100644
View file @
83ff3a7f
/*
* Written by Wilco Dijkstra, 1996. The following email exchange establishes the
* license.
*
* From: Wilco Dijkstra <Wilco.Dijkstra@ntlworld.com>
* Date: Fri, Jun 24, 2011 at 3:20 AM
* Subject: Re: sqrt routine
* To: Kevin Ma <kma@google.com>
* Hi Kevin,
* Thanks for asking. Those routines are public domain (originally posted to
* comp.sys.arm a long time ago), so you can use them freely for any purpose.
* Cheers,
* Wilco
*
* ----- Original Message -----
* From: "Kevin Ma" <kma@google.com>
* To: <Wilco.Dijkstra@ntlworld.com>
* Sent: Thursday, June 23, 2011 11:44 PM
* Subject: Fwd: sqrt routine
* Hi Wilco,
* I saw your sqrt routine from several web sites, including
* http://www.finesse.demon.co.uk/steven/sqrt.html.
* Just wonder if there's any copyright information with your Successive
* approximation routines, or if I can freely use it for any purpose.
* Thanks.
* Kevin
*/
// Minor modifications in code style for WebRTC, 2012.
#include "webrtc/common_audio/third_party/spl_sqrt_floor/spl_sqrt_floor.h"
/*
* Algorithm:
* Successive approximation of the equation (root + delta) ^ 2 = N
* until delta < 1. If delta < 1 we have the integer part of SQRT (N).
* Use delta = 2^i for i = 15 .. 0.
*
* Output precision is 16 bits. Note for large input values (close to
* 0x7FFFFFFF), bit 15 (the highest bit of the low 16-bit half word)
* contains the MSB information (a non-sign value). Do with caution
* if you need to cast the output to int16_t type.
*
* If the input value is negative, it returns 0.
*/
#define WEBRTC_SPL_SQRT_ITER(N) \
try1 = root + (1 << (N)); \
if (value >= try1 << (N)) \
{ \
value -= try1 << (N); \
root |= 2 << (N); \
}
int32_t
WebRtcSpl_SqrtFloor
(
int32_t
value
)
{
int32_t
root
=
0
,
try1
;
WEBRTC_SPL_SQRT_ITER
(
15
);
WEBRTC_SPL_SQRT_ITER
(
14
);
WEBRTC_SPL_SQRT_ITER
(
13
);
WEBRTC_SPL_SQRT_ITER
(
12
);
WEBRTC_SPL_SQRT_ITER
(
11
);
WEBRTC_SPL_SQRT_ITER
(
10
);
WEBRTC_SPL_SQRT_ITER
(
9
);
WEBRTC_SPL_SQRT_ITER
(
8
);
WEBRTC_SPL_SQRT_ITER
(
7
);
WEBRTC_SPL_SQRT_ITER
(
6
);
WEBRTC_SPL_SQRT_ITER
(
5
);
WEBRTC_SPL_SQRT_ITER
(
4
);
WEBRTC_SPL_SQRT_ITER
(
3
);
WEBRTC_SPL_SQRT_ITER
(
2
);
WEBRTC_SPL_SQRT_ITER
(
1
);
WEBRTC_SPL_SQRT_ITER
(
0
);
return
root
>>
1
;
}
cpp_onnx/third_party/webrtc/common_audio/third_party/spl_sqrt_floor/spl_sqrt_floor.h
0 → 100644
View file @
83ff3a7f
/*
* Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include <stdint.h>
//
// WebRtcSpl_SqrtFloor(...)
//
// Returns the square root of the input value |value|. The precision of this
// function is rounding down integer precision, i.e., sqrt(8) gives 2 as answer.
// If |value| is a negative number then 0 is returned.
//
// Algorithm:
//
// An iterative 4 cylce/bit routine
//
// Input:
// - value : Value to calculate sqrt of
//
// Return value : Result of the sqrt calculation
//
int32_t
WebRtcSpl_SqrtFloor
(
int32_t
value
);
cpp_onnx/third_party/webrtc/common_audio/vad/include/webrtc_vad.h
0 → 100644
View file @
83ff3a7f
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
/*
* This header file includes the VAD API calls. Specific function calls are
* given below.
*/
#ifndef COMMON_AUDIO_VAD_INCLUDE_WEBRTC_VAD_H_ // NOLINT
#define COMMON_AUDIO_VAD_INCLUDE_WEBRTC_VAD_H_
#include <stddef.h>
#include <stdint.h>
typedef
struct
WebRtcVadInst
VadInst
;
#ifdef __cplusplus
extern
"C"
{
#endif
// Creates an instance to the VAD structure.
VadInst
*
WebRtcVad_Create
(
void
);
// Frees the dynamic memory of a specified VAD instance.
//
// - handle [i] : Pointer to VAD instance that should be freed.
void
WebRtcVad_Free
(
VadInst
*
handle
);
// Initializes a VAD instance.
//
// - handle [i/o] : Instance that should be initialized.
//
// returns : 0 - (OK),
// -1 - (null pointer or Default mode could not be set).
int
WebRtcVad_Init
(
VadInst
*
handle
);
// Sets the VAD operating mode. A more aggressive (higher mode) VAD is more
// restrictive in reporting speech. Put in other words the probability of being
// speech when the VAD returns 1 is increased with increasing mode. As a
// consequence also the missed detection rate goes up.
//
// - handle [i/o] : VAD instance.
// - mode [i] : Aggressiveness mode (0, 1, 2, or 3).
//
// returns : 0 - (OK),
// -1 - (null pointer, mode could not be set or the VAD instance
// has not been initialized).
int
WebRtcVad_set_mode
(
VadInst
*
handle
,
int
mode
);
// Calculates a VAD decision for the |audio_frame|. For valid sampling rates
// frame lengths, see the description of WebRtcVad_ValidRatesAndFrameLengths().
//
// - handle [i/o] : VAD Instance. Needs to be initialized by
// WebRtcVad_Init() before call.
// - fs [i] : Sampling frequency (Hz): 8000, 16000, or 32000
// - audio_frame [i] : Audio frame buffer.
// - frame_length [i] : Length of audio frame buffer in number of samples.
//
// returns : 1 - (Active Voice),
// 0 - (Non-active Voice),
// -1 - (Error)
int
WebRtcVad_Process
(
VadInst
*
handle
,
int
fs
,
const
int16_t
*
audio_frame
,
size_t
frame_length
);
// Checks for valid combinations of |rate| and |frame_length|. We support 10,
// 20 and 30 ms frames and the rates 8000, 16000 and 32000 Hz.
//
// - rate [i] : Sampling frequency (Hz).
// - frame_length [i] : Speech frame buffer length in number of samples.
//
// returns : 0 - (valid combination), -1 - (invalid combination)
int
WebRtcVad_ValidRateAndFrameLength
(
int
rate
,
size_t
frame_length
);
#ifdef __cplusplus
}
#endif
#endif // COMMON_AUDIO_VAD_INCLUDE_WEBRTC_VAD_H_ // NOLINT
cpp_onnx/third_party/webrtc/common_audio/vad/vad_core.c
0 → 100644
View file @
83ff3a7f
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "webrtc/common_audio/vad/vad_core.h"
#include "webrtc/rtc_base/sanitizer.h"
#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
#include "webrtc/common_audio/vad/vad_filterbank.h"
#include "webrtc/common_audio/vad/vad_gmm.h"
#include "webrtc/common_audio/vad/vad_sp.h"
// Spectrum Weighting
static
const
int16_t
kSpectrumWeight
[
kNumChannels
]
=
{
6
,
8
,
10
,
12
,
14
,
16
};
static
const
int16_t
kNoiseUpdateConst
=
655
;
// Q15
static
const
int16_t
kSpeechUpdateConst
=
6554
;
// Q15
static
const
int16_t
kBackEta
=
154
;
// Q8
// Minimum difference between the two models, Q5
static
const
int16_t
kMinimumDifference
[
kNumChannels
]
=
{
544
,
544
,
576
,
576
,
576
,
576
};
// Upper limit of mean value for speech model, Q7
static
const
int16_t
kMaximumSpeech
[
kNumChannels
]
=
{
11392
,
11392
,
11520
,
11520
,
11520
,
11520
};
// Minimum value for mean value
static
const
int16_t
kMinimumMean
[
kNumGaussians
]
=
{
640
,
768
};
// Upper limit of mean value for noise model, Q7
static
const
int16_t
kMaximumNoise
[
kNumChannels
]
=
{
9216
,
9088
,
8960
,
8832
,
8704
,
8576
};
// Start values for the Gaussian models, Q7
// Weights for the two Gaussians for the six channels (noise)
static
const
int16_t
kNoiseDataWeights
[
kTableSize
]
=
{
34
,
62
,
72
,
66
,
53
,
25
,
94
,
66
,
56
,
62
,
75
,
103
};
// Weights for the two Gaussians for the six channels (speech)
static
const
int16_t
kSpeechDataWeights
[
kTableSize
]
=
{
48
,
82
,
45
,
87
,
50
,
47
,
80
,
46
,
83
,
41
,
78
,
81
};
// Means for the two Gaussians for the six channels (noise)
static
const
int16_t
kNoiseDataMeans
[
kTableSize
]
=
{
6738
,
4892
,
7065
,
6715
,
6771
,
3369
,
7646
,
3863
,
7820
,
7266
,
5020
,
4362
};
// Means for the two Gaussians for the six channels (speech)
static
const
int16_t
kSpeechDataMeans
[
kTableSize
]
=
{
8306
,
10085
,
10078
,
11823
,
11843
,
6309
,
9473
,
9571
,
10879
,
7581
,
8180
,
7483
};
// Stds for the two Gaussians for the six channels (noise)
static
const
int16_t
kNoiseDataStds
[
kTableSize
]
=
{
378
,
1064
,
493
,
582
,
688
,
593
,
474
,
697
,
475
,
688
,
421
,
455
};
// Stds for the two Gaussians for the six channels (speech)
static
const
int16_t
kSpeechDataStds
[
kTableSize
]
=
{
555
,
505
,
567
,
524
,
585
,
1231
,
509
,
828
,
492
,
1540
,
1079
,
850
};
// Constants used in GmmProbability().
//
// Maximum number of counted speech (VAD = 1) frames in a row.
static
const
int16_t
kMaxSpeechFrames
=
6
;
// Minimum standard deviation for both speech and noise.
static
const
int16_t
kMinStd
=
384
;
// Constants in WebRtcVad_InitCore().
// Default aggressiveness mode.
static
const
short
kDefaultMode
=
0
;
static
const
int
kInitCheck
=
42
;
// Constants used in WebRtcVad_set_mode_core().
//
// Thresholds for different frame lengths (10 ms, 20 ms and 30 ms).
//
// Mode 0, Quality.
static
const
int16_t
kOverHangMax1Q
[
3
]
=
{
8
,
4
,
3
};
static
const
int16_t
kOverHangMax2Q
[
3
]
=
{
14
,
7
,
5
};
static
const
int16_t
kLocalThresholdQ
[
3
]
=
{
24
,
21
,
24
};
static
const
int16_t
kGlobalThresholdQ
[
3
]
=
{
57
,
48
,
57
};
// Mode 1, Low bitrate.
static
const
int16_t
kOverHangMax1LBR
[
3
]
=
{
8
,
4
,
3
};
static
const
int16_t
kOverHangMax2LBR
[
3
]
=
{
14
,
7
,
5
};
static
const
int16_t
kLocalThresholdLBR
[
3
]
=
{
37
,
32
,
37
};
static
const
int16_t
kGlobalThresholdLBR
[
3
]
=
{
100
,
80
,
100
};
// Mode 2, Aggressive.
static
const
int16_t
kOverHangMax1AGG
[
3
]
=
{
6
,
3
,
2
};
static
const
int16_t
kOverHangMax2AGG
[
3
]
=
{
9
,
5
,
3
};
static
const
int16_t
kLocalThresholdAGG
[
3
]
=
{
82
,
78
,
82
};
static
const
int16_t
kGlobalThresholdAGG
[
3
]
=
{
285
,
260
,
285
};
// Mode 3, Very aggressive.
static
const
int16_t
kOverHangMax1VAG
[
3
]
=
{
6
,
3
,
2
};
static
const
int16_t
kOverHangMax2VAG
[
3
]
=
{
9
,
5
,
3
};
static
const
int16_t
kLocalThresholdVAG
[
3
]
=
{
94
,
94
,
94
};
static
const
int16_t
kGlobalThresholdVAG
[
3
]
=
{
1100
,
1050
,
1100
};
// Calculates the weighted average w.r.t. number of Gaussians. The |data| are
// updated with an |offset| before averaging.
//
// - data [i/o] : Data to average.
// - offset [i] : An offset added to |data|.
// - weights [i] : Weights used for averaging.
//
// returns : The weighted average.
static
int32_t
WeightedAverage
(
int16_t
*
data
,
int16_t
offset
,
const
int16_t
*
weights
)
{
int
k
;
int32_t
weighted_average
=
0
;
for
(
k
=
0
;
k
<
kNumGaussians
;
k
++
)
{
data
[
k
*
kNumChannels
]
+=
offset
;
weighted_average
+=
data
[
k
*
kNumChannels
]
*
weights
[
k
*
kNumChannels
];
}
return
weighted_average
;
}
// An s16 x s32 -> s32 multiplication that's allowed to overflow. (It's still
// undefined behavior, so not a good idea; this just makes UBSan ignore the
// violation, so that our old code can continue to do what it's always been
// doing.)
static
inline
int32_t
RTC_NO_SANITIZE
(
"signed-integer-overflow"
)
OverflowingMulS16ByS32ToS32
(
int16_t
a
,
int32_t
b
)
{
return
a
*
b
;
}
// Calculates the probabilities for both speech and background noise using
// Gaussian Mixture Models (GMM). A hypothesis-test is performed to decide which
// type of signal is most probable.
//
// - self [i/o] : Pointer to VAD instance
// - features [i] : Feature vector of length |kNumChannels|
// = log10(energy in frequency band)
// - total_power [i] : Total power in audio frame.
// - frame_length [i] : Number of input samples
//
// - returns : the VAD decision (0 - noise, 1 - speech).
static
int16_t
GmmProbability
(
VadInstT
*
self
,
int16_t
*
features
,
int16_t
total_power
,
size_t
frame_length
)
{
int
channel
,
k
;
int16_t
feature_minimum
;
int16_t
h0
,
h1
;
int16_t
log_likelihood_ratio
;
int16_t
vadflag
=
0
;
int16_t
shifts_h0
,
shifts_h1
;
int16_t
tmp_s16
,
tmp1_s16
,
tmp2_s16
;
int16_t
diff
;
int
gaussian
;
int16_t
nmk
,
nmk2
,
nmk3
,
smk
,
smk2
,
nsk
,
ssk
;
int16_t
delt
,
ndelt
;
int16_t
maxspe
,
maxmu
;
int16_t
deltaN
[
kTableSize
],
deltaS
[
kTableSize
];
int16_t
ngprvec
[
kTableSize
]
=
{
0
};
// Conditional probability = 0.
int16_t
sgprvec
[
kTableSize
]
=
{
0
};
// Conditional probability = 0.
int32_t
h0_test
,
h1_test
;
int32_t
tmp1_s32
,
tmp2_s32
;
int32_t
sum_log_likelihood_ratios
=
0
;
int32_t
noise_global_mean
,
speech_global_mean
;
int32_t
noise_probability
[
kNumGaussians
],
speech_probability
[
kNumGaussians
];
int16_t
overhead1
,
overhead2
,
individualTest
,
totalTest
;
// Set various thresholds based on frame lengths (80, 160 or 240 samples).
if
(
frame_length
==
80
)
{
overhead1
=
self
->
over_hang_max_1
[
0
];
overhead2
=
self
->
over_hang_max_2
[
0
];
individualTest
=
self
->
individual
[
0
];
totalTest
=
self
->
total
[
0
];
}
else
if
(
frame_length
==
160
)
{
overhead1
=
self
->
over_hang_max_1
[
1
];
overhead2
=
self
->
over_hang_max_2
[
1
];
individualTest
=
self
->
individual
[
1
];
totalTest
=
self
->
total
[
1
];
}
else
{
overhead1
=
self
->
over_hang_max_1
[
2
];
overhead2
=
self
->
over_hang_max_2
[
2
];
individualTest
=
self
->
individual
[
2
];
totalTest
=
self
->
total
[
2
];
}
if
(
total_power
>
kMinEnergy
)
{
// The signal power of current frame is large enough for processing. The
// processing consists of two parts:
// 1) Calculating the likelihood of speech and thereby a VAD decision.
// 2) Updating the underlying model, w.r.t., the decision made.
// The detection scheme is an LRT with hypothesis
// H0: Noise
// H1: Speech
//
// We combine a global LRT with local tests, for each frequency sub-band,
// here defined as |channel|.
for
(
channel
=
0
;
channel
<
kNumChannels
;
channel
++
)
{
// For each channel we model the probability with a GMM consisting of
// |kNumGaussians|, with different means and standard deviations depending
// on H0 or H1.
h0_test
=
0
;
h1_test
=
0
;
for
(
k
=
0
;
k
<
kNumGaussians
;
k
++
)
{
gaussian
=
channel
+
k
*
kNumChannels
;
// Probability under H0, that is, probability of frame being noise.
// Value given in Q27 = Q7 * Q20.
tmp1_s32
=
WebRtcVad_GaussianProbability
(
features
[
channel
],
self
->
noise_means
[
gaussian
],
self
->
noise_stds
[
gaussian
],
&
deltaN
[
gaussian
]);
noise_probability
[
k
]
=
kNoiseDataWeights
[
gaussian
]
*
tmp1_s32
;
h0_test
+=
noise_probability
[
k
];
// Q27
// Probability under H1, that is, probability of frame being speech.
// Value given in Q27 = Q7 * Q20.
tmp1_s32
=
WebRtcVad_GaussianProbability
(
features
[
channel
],
self
->
speech_means
[
gaussian
],
self
->
speech_stds
[
gaussian
],
&
deltaS
[
gaussian
]);
speech_probability
[
k
]
=
kSpeechDataWeights
[
gaussian
]
*
tmp1_s32
;
h1_test
+=
speech_probability
[
k
];
// Q27
}
// Calculate the log likelihood ratio: log2(Pr{X|H1} / Pr{X|H1}).
// Approximation:
// log2(Pr{X|H1} / Pr{X|H1}) = log2(Pr{X|H1}*2^Q) - log2(Pr{X|H1}*2^Q)
// = log2(h1_test) - log2(h0_test)
// = log2(2^(31-shifts_h1)*(1+b1))
// - log2(2^(31-shifts_h0)*(1+b0))
// = shifts_h0 - shifts_h1
// + log2(1+b1) - log2(1+b0)
// ~= shifts_h0 - shifts_h1
//
// Note that b0 and b1 are values less than 1, hence, 0 <= log2(1+b0) < 1.
// Further, b0 and b1 are independent and on the average the two terms
// cancel.
shifts_h0
=
WebRtcSpl_NormW32
(
h0_test
);
shifts_h1
=
WebRtcSpl_NormW32
(
h1_test
);
if
(
h0_test
==
0
)
{
shifts_h0
=
31
;
}
if
(
h1_test
==
0
)
{
shifts_h1
=
31
;
}
log_likelihood_ratio
=
shifts_h0
-
shifts_h1
;
// Update |sum_log_likelihood_ratios| with spectrum weighting. This is
// used for the global VAD decision.
sum_log_likelihood_ratios
+=
(
int32_t
)
(
log_likelihood_ratio
*
kSpectrumWeight
[
channel
]);
// Local VAD decision.
if
((
log_likelihood_ratio
*
4
)
>
individualTest
)
{
vadflag
=
1
;
}
// TODO(bjornv): The conditional probabilities below are applied on the
// hard coded number of Gaussians set to two. Find a way to generalize.
// Calculate local noise probabilities used later when updating the GMM.
h0
=
(
int16_t
)
(
h0_test
>>
12
);
// Q15
if
(
h0
>
0
)
{
// High probability of noise. Assign conditional probabilities for each
// Gaussian in the GMM.
tmp1_s32
=
(
noise_probability
[
0
]
&
0xFFFFF000
)
<<
2
;
// Q29
ngprvec
[
channel
]
=
(
int16_t
)
WebRtcSpl_DivW32W16
(
tmp1_s32
,
h0
);
// Q14
ngprvec
[
channel
+
kNumChannels
]
=
16384
-
ngprvec
[
channel
];
}
else
{
// Low noise probability. Assign conditional probability 1 to the first
// Gaussian and 0 to the rest (which is already set at initialization).
ngprvec
[
channel
]
=
16384
;
}
// Calculate local speech probabilities used later when updating the GMM.
h1
=
(
int16_t
)
(
h1_test
>>
12
);
// Q15
if
(
h1
>
0
)
{
// High probability of speech. Assign conditional probabilities for each
// Gaussian in the GMM. Otherwise use the initialized values, i.e., 0.
tmp1_s32
=
(
speech_probability
[
0
]
&
0xFFFFF000
)
<<
2
;
// Q29
sgprvec
[
channel
]
=
(
int16_t
)
WebRtcSpl_DivW32W16
(
tmp1_s32
,
h1
);
// Q14
sgprvec
[
channel
+
kNumChannels
]
=
16384
-
sgprvec
[
channel
];
}
}
// Make a global VAD decision.
vadflag
|=
(
sum_log_likelihood_ratios
>=
totalTest
);
// Update the model parameters.
maxspe
=
12800
;
for
(
channel
=
0
;
channel
<
kNumChannels
;
channel
++
)
{
// Get minimum value in past which is used for long term correction in Q4.
feature_minimum
=
WebRtcVad_FindMinimum
(
self
,
features
[
channel
],
channel
);
// Compute the "global" mean, that is the sum of the two means weighted.
noise_global_mean
=
WeightedAverage
(
&
self
->
noise_means
[
channel
],
0
,
&
kNoiseDataWeights
[
channel
]);
tmp1_s16
=
(
int16_t
)
(
noise_global_mean
>>
6
);
// Q8
for
(
k
=
0
;
k
<
kNumGaussians
;
k
++
)
{
gaussian
=
channel
+
k
*
kNumChannels
;
nmk
=
self
->
noise_means
[
gaussian
];
smk
=
self
->
speech_means
[
gaussian
];
nsk
=
self
->
noise_stds
[
gaussian
];
ssk
=
self
->
speech_stds
[
gaussian
];
// Update noise mean vector if the frame consists of noise only.
nmk2
=
nmk
;
if
(
!
vadflag
)
{
// deltaN = (x-mu)/sigma^2
// ngprvec[k] = |noise_probability[k]| /
// (|noise_probability[0]| + |noise_probability[1]|)
// (Q14 * Q11 >> 11) = Q14.
delt
=
(
int16_t
)((
ngprvec
[
gaussian
]
*
deltaN
[
gaussian
])
>>
11
);
// Q7 + (Q14 * Q15 >> 22) = Q7.
nmk2
=
nmk
+
(
int16_t
)((
delt
*
kNoiseUpdateConst
)
>>
22
);
}
// Long term correction of the noise mean.
// Q8 - Q8 = Q8.
ndelt
=
(
feature_minimum
<<
4
)
-
tmp1_s16
;
// Q7 + (Q8 * Q8) >> 9 = Q7.
nmk3
=
nmk2
+
(
int16_t
)((
ndelt
*
kBackEta
)
>>
9
);
// Control that the noise mean does not drift to much.
tmp_s16
=
(
int16_t
)
((
k
+
5
)
<<
7
);
if
(
nmk3
<
tmp_s16
)
{
nmk3
=
tmp_s16
;
}
tmp_s16
=
(
int16_t
)
((
72
+
k
-
channel
)
<<
7
);
if
(
nmk3
>
tmp_s16
)
{
nmk3
=
tmp_s16
;
}
self
->
noise_means
[
gaussian
]
=
nmk3
;
if
(
vadflag
)
{
// Update speech mean vector:
// |deltaS| = (x-mu)/sigma^2
// sgprvec[k] = |speech_probability[k]| /
// (|speech_probability[0]| + |speech_probability[1]|)
// (Q14 * Q11) >> 11 = Q14.
delt
=
(
int16_t
)((
sgprvec
[
gaussian
]
*
deltaS
[
gaussian
])
>>
11
);
// Q14 * Q15 >> 21 = Q8.
tmp_s16
=
(
int16_t
)((
delt
*
kSpeechUpdateConst
)
>>
21
);
// Q7 + (Q8 >> 1) = Q7. With rounding.
smk2
=
smk
+
((
tmp_s16
+
1
)
>>
1
);
// Control that the speech mean does not drift to much.
maxmu
=
maxspe
+
640
;
if
(
smk2
<
kMinimumMean
[
k
])
{
smk2
=
kMinimumMean
[
k
];
}
if
(
smk2
>
maxmu
)
{
smk2
=
maxmu
;
}
self
->
speech_means
[
gaussian
]
=
smk2
;
// Q7.
// (Q7 >> 3) = Q4. With rounding.
tmp_s16
=
((
smk
+
4
)
>>
3
);
tmp_s16
=
features
[
channel
]
-
tmp_s16
;
// Q4
// (Q11 * Q4 >> 3) = Q12.
tmp1_s32
=
(
deltaS
[
gaussian
]
*
tmp_s16
)
>>
3
;
tmp2_s32
=
tmp1_s32
-
4096
;
tmp_s16
=
sgprvec
[
gaussian
]
>>
2
;
// (Q14 >> 2) * Q12 = Q24.
tmp1_s32
=
tmp_s16
*
tmp2_s32
;
tmp2_s32
=
tmp1_s32
>>
4
;
// Q20
// 0.1 * Q20 / Q7 = Q13.
if
(
tmp2_s32
>
0
)
{
tmp_s16
=
(
int16_t
)
WebRtcSpl_DivW32W16
(
tmp2_s32
,
ssk
*
10
);
}
else
{
tmp_s16
=
(
int16_t
)
WebRtcSpl_DivW32W16
(
-
tmp2_s32
,
ssk
*
10
);
tmp_s16
=
-
tmp_s16
;
}
// Divide by 4 giving an update factor of 0.025 (= 0.1 / 4).
// Note that division by 4 equals shift by 2, hence,
// (Q13 >> 8) = (Q13 >> 6) / 4 = Q7.
tmp_s16
+=
128
;
// Rounding.
ssk
+=
(
tmp_s16
>>
8
);
if
(
ssk
<
kMinStd
)
{
ssk
=
kMinStd
;
}
self
->
speech_stds
[
gaussian
]
=
ssk
;
}
else
{
// Update GMM variance vectors.
// deltaN * (features[channel] - nmk) - 1
// Q4 - (Q7 >> 3) = Q4.
tmp_s16
=
features
[
channel
]
-
(
nmk
>>
3
);
// (Q11 * Q4 >> 3) = Q12.
tmp1_s32
=
(
deltaN
[
gaussian
]
*
tmp_s16
)
>>
3
;
tmp1_s32
-=
4096
;
// (Q14 >> 2) * Q12 = Q24.
tmp_s16
=
(
ngprvec
[
gaussian
]
+
2
)
>>
2
;
tmp2_s32
=
OverflowingMulS16ByS32ToS32
(
tmp_s16
,
tmp1_s32
);
// Q20 * approx 0.001 (2^-10=0.0009766), hence,
// (Q24 >> 14) = (Q24 >> 4) / 2^10 = Q20.
tmp1_s32
=
tmp2_s32
>>
14
;
// Q20 / Q7 = Q13.
if
(
tmp1_s32
>
0
)
{
tmp_s16
=
(
int16_t
)
WebRtcSpl_DivW32W16
(
tmp1_s32
,
nsk
);
}
else
{
tmp_s16
=
(
int16_t
)
WebRtcSpl_DivW32W16
(
-
tmp1_s32
,
nsk
);
tmp_s16
=
-
tmp_s16
;
}
tmp_s16
+=
32
;
// Rounding
nsk
+=
tmp_s16
>>
6
;
// Q13 >> 6 = Q7.
if
(
nsk
<
kMinStd
)
{
nsk
=
kMinStd
;
}
self
->
noise_stds
[
gaussian
]
=
nsk
;
}
}
// Separate models if they are too close.
// |noise_global_mean| in Q14 (= Q7 * Q7).
noise_global_mean
=
WeightedAverage
(
&
self
->
noise_means
[
channel
],
0
,
&
kNoiseDataWeights
[
channel
]);
// |speech_global_mean| in Q14 (= Q7 * Q7).
speech_global_mean
=
WeightedAverage
(
&
self
->
speech_means
[
channel
],
0
,
&
kSpeechDataWeights
[
channel
]);
// |diff| = "global" speech mean - "global" noise mean.
// (Q14 >> 9) - (Q14 >> 9) = Q5.
diff
=
(
int16_t
)
(
speech_global_mean
>>
9
)
-
(
int16_t
)
(
noise_global_mean
>>
9
);
if
(
diff
<
kMinimumDifference
[
channel
])
{
tmp_s16
=
kMinimumDifference
[
channel
]
-
diff
;
// |tmp1_s16| = ~0.8 * (kMinimumDifference - diff) in Q7.
// |tmp2_s16| = ~0.2 * (kMinimumDifference - diff) in Q7.
tmp1_s16
=
(
int16_t
)((
13
*
tmp_s16
)
>>
2
);
tmp2_s16
=
(
int16_t
)((
3
*
tmp_s16
)
>>
2
);
// Move Gaussian means for speech model by |tmp1_s16| and update
// |speech_global_mean|. Note that |self->speech_means[channel]| is
// changed after the call.
speech_global_mean
=
WeightedAverage
(
&
self
->
speech_means
[
channel
],
tmp1_s16
,
&
kSpeechDataWeights
[
channel
]);
// Move Gaussian means for noise model by -|tmp2_s16| and update
// |noise_global_mean|. Note that |self->noise_means[channel]| is
// changed after the call.
noise_global_mean
=
WeightedAverage
(
&
self
->
noise_means
[
channel
],
-
tmp2_s16
,
&
kNoiseDataWeights
[
channel
]);
}
// Control that the speech & noise means do not drift to much.
maxspe
=
kMaximumSpeech
[
channel
];
tmp2_s16
=
(
int16_t
)
(
speech_global_mean
>>
7
);
if
(
tmp2_s16
>
maxspe
)
{
// Upper limit of speech model.
tmp2_s16
-=
maxspe
;
for
(
k
=
0
;
k
<
kNumGaussians
;
k
++
)
{
self
->
speech_means
[
channel
+
k
*
kNumChannels
]
-=
tmp2_s16
;
}
}
tmp2_s16
=
(
int16_t
)
(
noise_global_mean
>>
7
);
if
(
tmp2_s16
>
kMaximumNoise
[
channel
])
{
tmp2_s16
-=
kMaximumNoise
[
channel
];
for
(
k
=
0
;
k
<
kNumGaussians
;
k
++
)
{
self
->
noise_means
[
channel
+
k
*
kNumChannels
]
-=
tmp2_s16
;
}
}
}
self
->
frame_counter
++
;
}
// Smooth with respect to transition hysteresis.
if
(
!
vadflag
)
{
if
(
self
->
over_hang
>
0
)
{
vadflag
=
2
+
self
->
over_hang
;
self
->
over_hang
--
;
}
self
->
num_of_speech
=
0
;
}
else
{
self
->
num_of_speech
++
;
if
(
self
->
num_of_speech
>
kMaxSpeechFrames
)
{
self
->
num_of_speech
=
kMaxSpeechFrames
;
self
->
over_hang
=
overhead2
;
}
else
{
self
->
over_hang
=
overhead1
;
}
}
return
vadflag
;
}
// Initialize the VAD. Set aggressiveness mode to default value.
int
WebRtcVad_InitCore
(
VadInstT
*
self
)
{
int
i
;
if
(
self
==
NULL
)
{
return
-
1
;
}
// Initialization of general struct variables.
self
->
vad
=
1
;
// Speech active (=1).
self
->
frame_counter
=
0
;
self
->
over_hang
=
0
;
self
->
num_of_speech
=
0
;
// Initialization of downsampling filter state.
memset
(
self
->
downsampling_filter_states
,
0
,
sizeof
(
self
->
downsampling_filter_states
));
// Initialization of 48 to 8 kHz downsampling.
WebRtcSpl_ResetResample48khzTo8khz
(
&
self
->
state_48_to_8
);
// Read initial PDF parameters.
for
(
i
=
0
;
i
<
kTableSize
;
i
++
)
{
self
->
noise_means
[
i
]
=
kNoiseDataMeans
[
i
];
self
->
speech_means
[
i
]
=
kSpeechDataMeans
[
i
];
self
->
noise_stds
[
i
]
=
kNoiseDataStds
[
i
];
self
->
speech_stds
[
i
]
=
kSpeechDataStds
[
i
];
}
// Initialize Index and Minimum value vectors.
for
(
i
=
0
;
i
<
16
*
kNumChannels
;
i
++
)
{
self
->
low_value_vector
[
i
]
=
10000
;
self
->
index_vector
[
i
]
=
0
;
}
// Initialize splitting filter states.
memset
(
self
->
upper_state
,
0
,
sizeof
(
self
->
upper_state
));
memset
(
self
->
lower_state
,
0
,
sizeof
(
self
->
lower_state
));
// Initialize high pass filter states.
memset
(
self
->
hp_filter_state
,
0
,
sizeof
(
self
->
hp_filter_state
));
// Initialize mean value memory, for WebRtcVad_FindMinimum().
for
(
i
=
0
;
i
<
kNumChannels
;
i
++
)
{
self
->
mean_value
[
i
]
=
1600
;
}
// Set aggressiveness mode to default (=|kDefaultMode|).
if
(
WebRtcVad_set_mode_core
(
self
,
kDefaultMode
)
!=
0
)
{
return
-
1
;
}
self
->
init_flag
=
kInitCheck
;
return
0
;
}
// Set aggressiveness mode
int
WebRtcVad_set_mode_core
(
VadInstT
*
self
,
int
mode
)
{
int
return_value
=
0
;
switch
(
mode
)
{
case
0
:
// Quality mode.
memcpy
(
self
->
over_hang_max_1
,
kOverHangMax1Q
,
sizeof
(
self
->
over_hang_max_1
));
memcpy
(
self
->
over_hang_max_2
,
kOverHangMax2Q
,
sizeof
(
self
->
over_hang_max_2
));
memcpy
(
self
->
individual
,
kLocalThresholdQ
,
sizeof
(
self
->
individual
));
memcpy
(
self
->
total
,
kGlobalThresholdQ
,
sizeof
(
self
->
total
));
break
;
case
1
:
// Low bitrate mode.
memcpy
(
self
->
over_hang_max_1
,
kOverHangMax1LBR
,
sizeof
(
self
->
over_hang_max_1
));
memcpy
(
self
->
over_hang_max_2
,
kOverHangMax2LBR
,
sizeof
(
self
->
over_hang_max_2
));
memcpy
(
self
->
individual
,
kLocalThresholdLBR
,
sizeof
(
self
->
individual
));
memcpy
(
self
->
total
,
kGlobalThresholdLBR
,
sizeof
(
self
->
total
));
break
;
case
2
:
// Aggressive mode.
memcpy
(
self
->
over_hang_max_1
,
kOverHangMax1AGG
,
sizeof
(
self
->
over_hang_max_1
));
memcpy
(
self
->
over_hang_max_2
,
kOverHangMax2AGG
,
sizeof
(
self
->
over_hang_max_2
));
memcpy
(
self
->
individual
,
kLocalThresholdAGG
,
sizeof
(
self
->
individual
));
memcpy
(
self
->
total
,
kGlobalThresholdAGG
,
sizeof
(
self
->
total
));
break
;
case
3
:
// Very aggressive mode.
memcpy
(
self
->
over_hang_max_1
,
kOverHangMax1VAG
,
sizeof
(
self
->
over_hang_max_1
));
memcpy
(
self
->
over_hang_max_2
,
kOverHangMax2VAG
,
sizeof
(
self
->
over_hang_max_2
));
memcpy
(
self
->
individual
,
kLocalThresholdVAG
,
sizeof
(
self
->
individual
));
memcpy
(
self
->
total
,
kGlobalThresholdVAG
,
sizeof
(
self
->
total
));
break
;
default:
return_value
=
-
1
;
break
;
}
return
return_value
;
}
// Calculate VAD decision by first extracting feature values and then calculate
// probability for both speech and background noise.
int
WebRtcVad_CalcVad48khz
(
VadInstT
*
inst
,
const
int16_t
*
speech_frame
,
size_t
frame_length
)
{
int
vad
;
size_t
i
;
int16_t
speech_nb
[
240
];
// 30 ms in 8 kHz.
// |tmp_mem| is a temporary memory used by resample function, length is
// frame length in 10 ms (480 samples) + 256 extra.
int32_t
tmp_mem
[
480
+
256
]
=
{
0
};
const
size_t
kFrameLen10ms48khz
=
480
;
const
size_t
kFrameLen10ms8khz
=
80
;
size_t
num_10ms_frames
=
frame_length
/
kFrameLen10ms48khz
;
for
(
i
=
0
;
i
<
num_10ms_frames
;
i
++
)
{
WebRtcSpl_Resample48khzTo8khz
(
speech_frame
,
&
speech_nb
[
i
*
kFrameLen10ms8khz
],
&
inst
->
state_48_to_8
,
tmp_mem
);
}
// Do VAD on an 8 kHz signal
vad
=
WebRtcVad_CalcVad8khz
(
inst
,
speech_nb
,
frame_length
/
6
);
return
vad
;
}
int
WebRtcVad_CalcVad32khz
(
VadInstT
*
inst
,
const
int16_t
*
speech_frame
,
size_t
frame_length
)
{
size_t
len
;
int
vad
;
int16_t
speechWB
[
480
];
// Downsampled speech frame: 960 samples (30ms in SWB)
int16_t
speechNB
[
240
];
// Downsampled speech frame: 480 samples (30ms in WB)
// Downsample signal 32->16->8 before doing VAD
WebRtcVad_Downsampling
(
speech_frame
,
speechWB
,
&
(
inst
->
downsampling_filter_states
[
2
]),
frame_length
);
len
=
frame_length
/
2
;
WebRtcVad_Downsampling
(
speechWB
,
speechNB
,
inst
->
downsampling_filter_states
,
len
);
len
/=
2
;
// Do VAD on an 8 kHz signal
vad
=
WebRtcVad_CalcVad8khz
(
inst
,
speechNB
,
len
);
return
vad
;
}
int
WebRtcVad_CalcVad16khz
(
VadInstT
*
inst
,
const
int16_t
*
speech_frame
,
size_t
frame_length
)
{
size_t
len
;
int
vad
;
int16_t
speechNB
[
240
];
// Downsampled speech frame: 480 samples (30ms in WB)
// Wideband: Downsample signal before doing VAD
WebRtcVad_Downsampling
(
speech_frame
,
speechNB
,
inst
->
downsampling_filter_states
,
frame_length
);
len
=
frame_length
/
2
;
vad
=
WebRtcVad_CalcVad8khz
(
inst
,
speechNB
,
len
);
return
vad
;
}
int
WebRtcVad_CalcVad8khz
(
VadInstT
*
inst
,
const
int16_t
*
speech_frame
,
size_t
frame_length
)
{
int16_t
feature_vector
[
kNumChannels
],
total_power
;
// Get power in the bands
total_power
=
WebRtcVad_CalculateFeatures
(
inst
,
speech_frame
,
frame_length
,
feature_vector
);
// Make a VAD
inst
->
vad
=
GmmProbability
(
inst
,
feature_vector
,
total_power
,
frame_length
);
return
inst
->
vad
;
}
cpp_onnx/third_party/webrtc/common_audio/vad/vad_core.h
0 → 100644
View file @
83ff3a7f
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
/*
* This header file includes the descriptions of the core VAD calls.
*/
#ifndef COMMON_AUDIO_VAD_VAD_CORE_H_
#define COMMON_AUDIO_VAD_VAD_CORE_H_
#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
enum
{
kNumChannels
=
6
};
// Number of frequency bands (named channels).
enum
{
kNumGaussians
=
2
};
// Number of Gaussians per channel in the GMM.
enum
{
kTableSize
=
kNumChannels
*
kNumGaussians
};
enum
{
kMinEnergy
=
10
};
// Minimum energy required to trigger audio signal.
typedef
struct
VadInstT_
{
int
vad
;
int32_t
downsampling_filter_states
[
4
];
WebRtcSpl_State48khzTo8khz
state_48_to_8
;
int16_t
noise_means
[
kTableSize
];
int16_t
speech_means
[
kTableSize
];
int16_t
noise_stds
[
kTableSize
];
int16_t
speech_stds
[
kTableSize
];
// TODO(bjornv): Change to |frame_count|.
int32_t
frame_counter
;
int16_t
over_hang
;
// Over Hang
int16_t
num_of_speech
;
// TODO(bjornv): Change to |age_vector|.
int16_t
index_vector
[
16
*
kNumChannels
];
int16_t
low_value_vector
[
16
*
kNumChannels
];
// TODO(bjornv): Change to |median|.
int16_t
mean_value
[
kNumChannels
];
int16_t
upper_state
[
5
];
int16_t
lower_state
[
5
];
int16_t
hp_filter_state
[
4
];
int16_t
over_hang_max_1
[
3
];
int16_t
over_hang_max_2
[
3
];
int16_t
individual
[
3
];
int16_t
total
[
3
];
int
init_flag
;
}
VadInstT
;
// Initializes the core VAD component. The default aggressiveness mode is
// controlled by |kDefaultMode| in vad_core.c.
//
// - self [i/o] : Instance that should be initialized
//
// returns : 0 (OK), -1 (null pointer in or if the default mode can't be
// set)
int
WebRtcVad_InitCore
(
VadInstT
*
self
);
/****************************************************************************
* WebRtcVad_set_mode_core(...)
*
* This function changes the VAD settings
*
* Input:
* - inst : VAD instance
* - mode : Aggressiveness degree
* 0 (High quality) - 3 (Highly aggressive)
*
* Output:
* - inst : Changed instance
*
* Return value : 0 - Ok
* -1 - Error
*/
int
WebRtcVad_set_mode_core
(
VadInstT
*
self
,
int
mode
);
/****************************************************************************
* WebRtcVad_CalcVad48khz(...)
* WebRtcVad_CalcVad32khz(...)
* WebRtcVad_CalcVad16khz(...)
* WebRtcVad_CalcVad8khz(...)
*
* Calculate probability for active speech and make VAD decision.
*
* Input:
* - inst : Instance that should be initialized
* - speech_frame : Input speech frame
* - frame_length : Number of input samples
*
* Output:
* - inst : Updated filter states etc.
*
* Return value : VAD decision
* 0 - No active speech
* 1-6 - Active speech
*/
int
WebRtcVad_CalcVad48khz
(
VadInstT
*
inst
,
const
int16_t
*
speech_frame
,
size_t
frame_length
);
int
WebRtcVad_CalcVad32khz
(
VadInstT
*
inst
,
const
int16_t
*
speech_frame
,
size_t
frame_length
);
int
WebRtcVad_CalcVad16khz
(
VadInstT
*
inst
,
const
int16_t
*
speech_frame
,
size_t
frame_length
);
int
WebRtcVad_CalcVad8khz
(
VadInstT
*
inst
,
const
int16_t
*
speech_frame
,
size_t
frame_length
);
#endif // COMMON_AUDIO_VAD_VAD_CORE_H_
cpp_onnx/third_party/webrtc/common_audio/vad/vad_filterbank.c
0 → 100644
View file @
83ff3a7f
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "webrtc/common_audio/vad/vad_filterbank.h"
#include "webrtc/rtc_base/checks.h"
#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
// Constants used in LogOfEnergy().
static
const
int16_t
kLogConst
=
24660
;
// 160*log10(2) in Q9.
static
const
int16_t
kLogEnergyIntPart
=
14336
;
// 14 in Q10
// Coefficients used by HighPassFilter, Q14.
static
const
int16_t
kHpZeroCoefs
[
3
]
=
{
6631
,
-
13262
,
6631
};
static
const
int16_t
kHpPoleCoefs
[
3
]
=
{
16384
,
-
7756
,
5620
};
// Allpass filter coefficients, upper and lower, in Q15.
// Upper: 0.64, Lower: 0.17
static
const
int16_t
kAllPassCoefsQ15
[
2
]
=
{
20972
,
5571
};
// Adjustment for division with two in SplitFilter.
static
const
int16_t
kOffsetVector
[
6
]
=
{
368
,
368
,
272
,
176
,
176
,
176
};
// High pass filtering, with a cut-off frequency at 80 Hz, if the |data_in| is
// sampled at 500 Hz.
//
// - data_in [i] : Input audio data sampled at 500 Hz.
// - data_length [i] : Length of input and output data.
// - filter_state [i/o] : State of the filter.
// - data_out [o] : Output audio data in the frequency interval
// 80 - 250 Hz.
static
void
HighPassFilter
(
const
int16_t
*
data_in
,
size_t
data_length
,
int16_t
*
filter_state
,
int16_t
*
data_out
)
{
size_t
i
;
const
int16_t
*
in_ptr
=
data_in
;
int16_t
*
out_ptr
=
data_out
;
int32_t
tmp32
=
0
;
// The sum of the absolute values of the impulse response:
// The zero/pole-filter has a max amplification of a single sample of: 1.4546
// Impulse response: 0.4047 -0.6179 -0.0266 0.1993 0.1035 -0.0194
// The all-zero section has a max amplification of a single sample of: 1.6189
// Impulse response: 0.4047 -0.8094 0.4047 0 0 0
// The all-pole section has a max amplification of a single sample of: 1.9931
// Impulse response: 1.0000 0.4734 -0.1189 -0.2187 -0.0627 0.04532
for
(
i
=
0
;
i
<
data_length
;
i
++
)
{
// All-zero section (filter coefficients in Q14).
tmp32
=
kHpZeroCoefs
[
0
]
*
*
in_ptr
;
tmp32
+=
kHpZeroCoefs
[
1
]
*
filter_state
[
0
];
tmp32
+=
kHpZeroCoefs
[
2
]
*
filter_state
[
1
];
filter_state
[
1
]
=
filter_state
[
0
];
filter_state
[
0
]
=
*
in_ptr
++
;
// All-pole section (filter coefficients in Q14).
tmp32
-=
kHpPoleCoefs
[
1
]
*
filter_state
[
2
];
tmp32
-=
kHpPoleCoefs
[
2
]
*
filter_state
[
3
];
filter_state
[
3
]
=
filter_state
[
2
];
filter_state
[
2
]
=
(
int16_t
)
(
tmp32
>>
14
);
*
out_ptr
++
=
filter_state
[
2
];
}
}
// All pass filtering of |data_in|, used before splitting the signal into two
// frequency bands (low pass vs high pass).
// Note that |data_in| and |data_out| can NOT correspond to the same address.
//
// - data_in [i] : Input audio signal given in Q0.
// - data_length [i] : Length of input and output data.
// - filter_coefficient [i] : Given in Q15.
// - filter_state [i/o] : State of the filter given in Q(-1).
// - data_out [o] : Output audio signal given in Q(-1).
static
void
AllPassFilter
(
const
int16_t
*
data_in
,
size_t
data_length
,
int16_t
filter_coefficient
,
int16_t
*
filter_state
,
int16_t
*
data_out
)
{
// The filter can only cause overflow (in the w16 output variable)
// if more than 4 consecutive input numbers are of maximum value and
// has the the same sign as the impulse responses first taps.
// First 6 taps of the impulse response:
// 0.6399 0.5905 -0.3779 0.2418 -0.1547 0.0990
size_t
i
;
int16_t
tmp16
=
0
;
int32_t
tmp32
=
0
;
int32_t
state32
=
((
int32_t
)
(
*
filter_state
)
*
(
1
<<
16
));
// Q15
for
(
i
=
0
;
i
<
data_length
;
i
++
)
{
tmp32
=
state32
+
filter_coefficient
*
*
data_in
;
tmp16
=
(
int16_t
)
(
tmp32
>>
16
);
// Q(-1)
*
data_out
++
=
tmp16
;
state32
=
(
*
data_in
*
(
1
<<
14
))
-
filter_coefficient
*
tmp16
;
// Q14
state32
*=
2
;
// Q15.
data_in
+=
2
;
}
*
filter_state
=
(
int16_t
)
(
state32
>>
16
);
// Q(-1)
}
// Splits |data_in| into |hp_data_out| and |lp_data_out| corresponding to
// an upper (high pass) part and a lower (low pass) part respectively.
//
// - data_in [i] : Input audio data to be split into two frequency bands.
// - data_length [i] : Length of |data_in|.
// - upper_state [i/o] : State of the upper filter, given in Q(-1).
// - lower_state [i/o] : State of the lower filter, given in Q(-1).
// - hp_data_out [o] : Output audio data of the upper half of the spectrum.
// The length is |data_length| / 2.
// - lp_data_out [o] : Output audio data of the lower half of the spectrum.
// The length is |data_length| / 2.
static
void
SplitFilter
(
const
int16_t
*
data_in
,
size_t
data_length
,
int16_t
*
upper_state
,
int16_t
*
lower_state
,
int16_t
*
hp_data_out
,
int16_t
*
lp_data_out
)
{
size_t
i
;
size_t
half_length
=
data_length
>>
1
;
// Downsampling by 2.
int16_t
tmp_out
;
// All-pass filtering upper branch.
AllPassFilter
(
&
data_in
[
0
],
half_length
,
kAllPassCoefsQ15
[
0
],
upper_state
,
hp_data_out
);
// All-pass filtering lower branch.
AllPassFilter
(
&
data_in
[
1
],
half_length
,
kAllPassCoefsQ15
[
1
],
lower_state
,
lp_data_out
);
// Make LP and HP signals.
for
(
i
=
0
;
i
<
half_length
;
i
++
)
{
tmp_out
=
*
hp_data_out
;
*
hp_data_out
++
-=
*
lp_data_out
;
*
lp_data_out
++
+=
tmp_out
;
}
}
// Calculates the energy of |data_in| in dB, and also updates an overall
// |total_energy| if necessary.
//
// - data_in [i] : Input audio data for energy calculation.
// - data_length [i] : Length of input data.
// - offset [i] : Offset value added to |log_energy|.
// - total_energy [i/o] : An external energy updated with the energy of
// |data_in|.
// NOTE: |total_energy| is only updated if
// |total_energy| <= |kMinEnergy|.
// - log_energy [o] : 10 * log10("energy of |data_in|") given in Q4.
static
void
LogOfEnergy
(
const
int16_t
*
data_in
,
size_t
data_length
,
int16_t
offset
,
int16_t
*
total_energy
,
int16_t
*
log_energy
)
{
// |tot_rshifts| accumulates the number of right shifts performed on |energy|.
int
tot_rshifts
=
0
;
// The |energy| will be normalized to 15 bits. We use unsigned integer because
// we eventually will mask out the fractional part.
uint32_t
energy
=
0
;
RTC_DCHECK
(
data_in
);
RTC_DCHECK_GT
(
data_length
,
0
);
energy
=
(
uint32_t
)
WebRtcSpl_Energy
((
int16_t
*
)
data_in
,
data_length
,
&
tot_rshifts
);
if
(
energy
!=
0
)
{
// By construction, normalizing to 15 bits is equivalent with 17 leading
// zeros of an unsigned 32 bit value.
int
normalizing_rshifts
=
17
-
WebRtcSpl_NormU32
(
energy
);
// In a 15 bit representation the leading bit is 2^14. log2(2^14) in Q10 is
// (14 << 10), which is what we initialize |log2_energy| with. For a more
// detailed derivations, see below.
int16_t
log2_energy
=
kLogEnergyIntPart
;
tot_rshifts
+=
normalizing_rshifts
;
// Normalize |energy| to 15 bits.
// |tot_rshifts| is now the total number of right shifts performed on
// |energy| after normalization. This means that |energy| is in
// Q(-tot_rshifts).
if
(
normalizing_rshifts
<
0
)
{
energy
<<=
-
normalizing_rshifts
;
}
else
{
energy
>>=
normalizing_rshifts
;
}
// Calculate the energy of |data_in| in dB, in Q4.
//
// 10 * log10("true energy") in Q4 = 2^4 * 10 * log10("true energy") =
// 160 * log10(|energy| * 2^|tot_rshifts|) =
// 160 * log10(2) * log2(|energy| * 2^|tot_rshifts|) =
// 160 * log10(2) * (log2(|energy|) + log2(2^|tot_rshifts|)) =
// (160 * log10(2)) * (log2(|energy|) + |tot_rshifts|) =
// |kLogConst| * (|log2_energy| + |tot_rshifts|)
//
// We know by construction that |energy| is normalized to 15 bits. Hence,
// |energy| = 2^14 + frac_Q15, where frac_Q15 is a fractional part in Q15.
// Further, we'd like |log2_energy| in Q10
// log2(|energy|) in Q10 = 2^10 * log2(2^14 + frac_Q15) =
// 2^10 * log2(2^14 * (1 + frac_Q15 * 2^-14)) =
// 2^10 * (14 + log2(1 + frac_Q15 * 2^-14)) ~=
// (14 << 10) + 2^10 * (frac_Q15 * 2^-14) =
// (14 << 10) + (frac_Q15 * 2^-4) = (14 << 10) + (frac_Q15 >> 4)
//
// Note that frac_Q15 = (|energy| & 0x00003FFF)
// Calculate and add the fractional part to |log2_energy|.
log2_energy
+=
(
int16_t
)
((
energy
&
0x00003FFF
)
>>
4
);
// |kLogConst| is in Q9, |log2_energy| in Q10 and |tot_rshifts| in Q0.
// Note that we in our derivation above have accounted for an output in Q4.
*
log_energy
=
(
int16_t
)(((
kLogConst
*
log2_energy
)
>>
19
)
+
((
tot_rshifts
*
kLogConst
)
>>
9
));
if
(
*
log_energy
<
0
)
{
*
log_energy
=
0
;
}
}
else
{
*
log_energy
=
offset
;
return
;
}
*
log_energy
+=
offset
;
// Update the approximate |total_energy| with the energy of |data_in|, if
// |total_energy| has not exceeded |kMinEnergy|. |total_energy| is used as an
// energy indicator in WebRtcVad_GmmProbability() in vad_core.c.
if
(
*
total_energy
<=
kMinEnergy
)
{
if
(
tot_rshifts
>=
0
)
{
// We know by construction that the |energy| > |kMinEnergy| in Q0, so add
// an arbitrary value such that |total_energy| exceeds |kMinEnergy|.
*
total_energy
+=
kMinEnergy
+
1
;
}
else
{
// By construction |energy| is represented by 15 bits, hence any number of
// right shifted |energy| will fit in an int16_t. In addition, adding the
// value to |total_energy| is wrap around safe as long as
// |kMinEnergy| < 8192.
*
total_energy
+=
(
int16_t
)
(
energy
>>
-
tot_rshifts
);
// Q0.
}
}
}
int16_t
WebRtcVad_CalculateFeatures
(
VadInstT
*
self
,
const
int16_t
*
data_in
,
size_t
data_length
,
int16_t
*
features
)
{
int16_t
total_energy
=
0
;
// We expect |data_length| to be 80, 160 or 240 samples, which corresponds to
// 10, 20 or 30 ms in 8 kHz. Therefore, the intermediate downsampled data will
// have at most 120 samples after the first split and at most 60 samples after
// the second split.
int16_t
hp_120
[
120
],
lp_120
[
120
];
int16_t
hp_60
[
60
],
lp_60
[
60
];
const
size_t
half_data_length
=
data_length
>>
1
;
size_t
length
=
half_data_length
;
// |data_length| / 2, corresponds to
// bandwidth = 2000 Hz after downsampling.
// Initialize variables for the first SplitFilter().
int
frequency_band
=
0
;
const
int16_t
*
in_ptr
=
data_in
;
// [0 - 4000] Hz.
int16_t
*
hp_out_ptr
=
hp_120
;
// [2000 - 4000] Hz.
int16_t
*
lp_out_ptr
=
lp_120
;
// [0 - 2000] Hz.
RTC_DCHECK_LE
(
data_length
,
240
);
RTC_DCHECK_LT
(
4
,
kNumChannels
-
1
);
// Checking maximum |frequency_band|.
// Split at 2000 Hz and downsample.
SplitFilter
(
in_ptr
,
data_length
,
&
self
->
upper_state
[
frequency_band
],
&
self
->
lower_state
[
frequency_band
],
hp_out_ptr
,
lp_out_ptr
);
// For the upper band (2000 Hz - 4000 Hz) split at 3000 Hz and downsample.
frequency_band
=
1
;
in_ptr
=
hp_120
;
// [2000 - 4000] Hz.
hp_out_ptr
=
hp_60
;
// [3000 - 4000] Hz.
lp_out_ptr
=
lp_60
;
// [2000 - 3000] Hz.
SplitFilter
(
in_ptr
,
length
,
&
self
->
upper_state
[
frequency_band
],
&
self
->
lower_state
[
frequency_band
],
hp_out_ptr
,
lp_out_ptr
);
// Energy in 3000 Hz - 4000 Hz.
length
>>=
1
;
// |data_length| / 4 <=> bandwidth = 1000 Hz.
LogOfEnergy
(
hp_60
,
length
,
kOffsetVector
[
5
],
&
total_energy
,
&
features
[
5
]);
// Energy in 2000 Hz - 3000 Hz.
LogOfEnergy
(
lp_60
,
length
,
kOffsetVector
[
4
],
&
total_energy
,
&
features
[
4
]);
// For the lower band (0 Hz - 2000 Hz) split at 1000 Hz and downsample.
frequency_band
=
2
;
in_ptr
=
lp_120
;
// [0 - 2000] Hz.
hp_out_ptr
=
hp_60
;
// [1000 - 2000] Hz.
lp_out_ptr
=
lp_60
;
// [0 - 1000] Hz.
length
=
half_data_length
;
// |data_length| / 2 <=> bandwidth = 2000 Hz.
SplitFilter
(
in_ptr
,
length
,
&
self
->
upper_state
[
frequency_band
],
&
self
->
lower_state
[
frequency_band
],
hp_out_ptr
,
lp_out_ptr
);
// Energy in 1000 Hz - 2000 Hz.
length
>>=
1
;
// |data_length| / 4 <=> bandwidth = 1000 Hz.
LogOfEnergy
(
hp_60
,
length
,
kOffsetVector
[
3
],
&
total_energy
,
&
features
[
3
]);
// For the lower band (0 Hz - 1000 Hz) split at 500 Hz and downsample.
frequency_band
=
3
;
in_ptr
=
lp_60
;
// [0 - 1000] Hz.
hp_out_ptr
=
hp_120
;
// [500 - 1000] Hz.
lp_out_ptr
=
lp_120
;
// [0 - 500] Hz.
SplitFilter
(
in_ptr
,
length
,
&
self
->
upper_state
[
frequency_band
],
&
self
->
lower_state
[
frequency_band
],
hp_out_ptr
,
lp_out_ptr
);
// Energy in 500 Hz - 1000 Hz.
length
>>=
1
;
// |data_length| / 8 <=> bandwidth = 500 Hz.
LogOfEnergy
(
hp_120
,
length
,
kOffsetVector
[
2
],
&
total_energy
,
&
features
[
2
]);
// For the lower band (0 Hz - 500 Hz) split at 250 Hz and downsample.
frequency_band
=
4
;
in_ptr
=
lp_120
;
// [0 - 500] Hz.
hp_out_ptr
=
hp_60
;
// [250 - 500] Hz.
lp_out_ptr
=
lp_60
;
// [0 - 250] Hz.
SplitFilter
(
in_ptr
,
length
,
&
self
->
upper_state
[
frequency_band
],
&
self
->
lower_state
[
frequency_band
],
hp_out_ptr
,
lp_out_ptr
);
// Energy in 250 Hz - 500 Hz.
length
>>=
1
;
// |data_length| / 16 <=> bandwidth = 250 Hz.
LogOfEnergy
(
hp_60
,
length
,
kOffsetVector
[
1
],
&
total_energy
,
&
features
[
1
]);
// Remove 0 Hz - 80 Hz, by high pass filtering the lower band.
HighPassFilter
(
lp_60
,
length
,
self
->
hp_filter_state
,
hp_120
);
// Energy in 80 Hz - 250 Hz.
LogOfEnergy
(
hp_120
,
length
,
kOffsetVector
[
0
],
&
total_energy
,
&
features
[
0
]);
return
total_energy
;
}
Prev
1
2
3
4
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment