Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
Torchaudio
Commits
9dcc7a15
Commit
9dcc7a15
authored
Apr 25, 2022
by
flyingdown
Browse files
init v0.10.0
parent
db2b0b79
Pipeline
#254
failed with stages
in 0 seconds
Changes
416
Pipelines
2
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
2822 additions
and
0 deletions
+2822
-0
test/torchaudio_unittest/functional/librosa_compatibility_cuda_test.py
...io_unittest/functional/librosa_compatibility_cuda_test.py
+12
-0
test/torchaudio_unittest/functional/librosa_compatibility_test_impl.py
...io_unittest/functional/librosa_compatibility_test_impl.py
+161
-0
test/torchaudio_unittest/functional/sox_compatibility_test.py
.../torchaudio_unittest/functional/sox_compatibility_test.py
+299
-0
test/torchaudio_unittest/functional/torchscript_consistency_cpu_test.py
...o_unittest/functional/torchscript_consistency_cpu_test.py
+14
-0
test/torchaudio_unittest/functional/torchscript_consistency_cuda_test.py
..._unittest/functional/torchscript_consistency_cuda_test.py
+16
-0
test/torchaudio_unittest/functional/torchscript_consistency_impl.py
...audio_unittest/functional/torchscript_consistency_impl.py
+718
-0
test/torchaudio_unittest/kaldi_io_test.py
test/torchaudio_unittest/kaldi_io_test.py
+33
-0
test/torchaudio_unittest/models/__init__.py
test/torchaudio_unittest/models/__init__.py
+0
-0
test/torchaudio_unittest/models/models_test.py
test/torchaudio_unittest/models/models_test.py
+248
-0
test/torchaudio_unittest/models/tacotron2/__init__.py
test/torchaudio_unittest/models/tacotron2/__init__.py
+0
-0
test/torchaudio_unittest/models/tacotron2/model_test_cpu_test.py
...rchaudio_unittest/models/tacotron2/model_test_cpu_test.py
+23
-0
test/torchaudio_unittest/models/tacotron2/model_test_gpu_test.py
...rchaudio_unittest/models/tacotron2/model_test_gpu_test.py
+26
-0
test/torchaudio_unittest/models/tacotron2/model_test_impl.py
test/torchaudio_unittest/models/tacotron2/model_test_impl.py
+381
-0
test/torchaudio_unittest/models/wav2vec2/__init__.py
test/torchaudio_unittest/models/wav2vec2/__init__.py
+0
-0
test/torchaudio_unittest/models/wav2vec2/fairseq_integration_test.py
...udio_unittest/models/wav2vec2/fairseq_integration_test.py
+240
-0
test/torchaudio_unittest/models/wav2vec2/huggingface_intergration_test.py
...unittest/models/wav2vec2/huggingface_intergration_test.py
+224
-0
test/torchaudio_unittest/models/wav2vec2/model_test.py
test/torchaudio_unittest/models/wav2vec2/model_test.py
+243
-0
test/torchaudio_unittest/sox_effect/__init__.py
test/torchaudio_unittest/sox_effect/__init__.py
+0
-0
test/torchaudio_unittest/sox_effect/common.py
test/torchaudio_unittest/sox_effect/common.py
+26
-0
test/torchaudio_unittest/sox_effect/dataset_test.py
test/torchaudio_unittest/sox_effect/dataset_test.py
+158
-0
No files found.
Too many changes to show.
To preserve performance only
416 of 416+
files are displayed.
Plain diff
Email patch
test/torchaudio_unittest/functional/librosa_compatibility_cuda_test.py
0 → 100644
View file @
9dcc7a15
from
torchaudio_unittest.common_utils
import
PytorchTestCase
,
skipIfNoCuda
from
.librosa_compatibility_test_impl
import
Functional
,
FunctionalComplex
@
skipIfNoCuda
class
TestFunctionalCUDA
(
Functional
,
PytorchTestCase
):
device
=
'cuda'
@
skipIfNoCuda
class
TestFunctionalComplexCUDA
(
FunctionalComplex
,
PytorchTestCase
):
device
=
'cuda'
test/torchaudio_unittest/functional/librosa_compatibility_test_impl.py
0 → 100644
View file @
9dcc7a15
import
unittest
from
distutils.version
import
StrictVersion
import
torch
from
parameterized
import
param
import
torchaudio.functional
as
F
from
torchaudio._internal.module_utils
import
is_module_available
LIBROSA_AVAILABLE
=
is_module_available
(
'librosa'
)
if
LIBROSA_AVAILABLE
:
import
numpy
as
np
import
librosa
from
torchaudio_unittest.common_utils
import
(
TestBaseMixin
,
nested_params
,
get_whitenoise
,
get_spectrogram
,
)
@
unittest
.
skipIf
(
not
LIBROSA_AVAILABLE
,
"Librosa not available"
)
class
Functional
(
TestBaseMixin
):
"""Test suite for functions in `functional` module."""
dtype
=
torch
.
float64
@
nested_params
([
0
,
0.99
])
def
test_griffinlim
(
self
,
momentum
):
# FFT params
n_fft
=
400
win_length
=
n_fft
hop_length
=
n_fft
//
4
window
=
torch
.
hann_window
(
win_length
,
device
=
self
.
device
)
power
=
1
# GriffinLim params
n_iter
=
8
waveform
=
get_whitenoise
(
device
=
self
.
device
,
dtype
=
self
.
dtype
)
specgram
=
get_spectrogram
(
waveform
,
n_fft
=
n_fft
,
hop_length
=
hop_length
,
power
=
power
,
win_length
=
win_length
,
window
=
window
)
result
=
F
.
griffinlim
(
specgram
,
window
=
window
,
n_fft
=
n_fft
,
hop_length
=
hop_length
,
win_length
=
win_length
,
power
=
power
,
n_iter
=
n_iter
,
momentum
=
momentum
,
length
=
waveform
.
size
(
1
),
rand_init
=
False
)
expected
=
librosa
.
griffinlim
(
specgram
[
0
].
cpu
().
numpy
(),
n_iter
=
n_iter
,
hop_length
=
hop_length
,
momentum
=
momentum
,
init
=
None
,
length
=
waveform
.
size
(
1
))[
None
,
...]
self
.
assertEqual
(
result
,
torch
.
from_numpy
(
expected
),
atol
=
5e-5
,
rtol
=
1e-07
)
@
nested_params
(
[
param
(),
param
(
n_mels
=
128
,
sample_rate
=
44100
),
param
(
n_mels
=
128
,
fmin
=
2000.0
,
fmax
=
5000.0
),
param
(
n_mels
=
56
,
fmin
=
100.0
,
fmax
=
9000.0
),
param
(
n_mels
=
56
,
fmin
=
800.0
,
fmax
=
900.0
),
param
(
n_mels
=
56
,
fmin
=
1900.0
,
fmax
=
900.0
),
param
(
n_mels
=
10
,
fmin
=
1900.0
,
fmax
=
900.0
),
],
[
param
(
norm
=
n
)
for
n
in
[
None
,
'slaney'
]],
[
param
(
mel_scale
=
s
)
for
s
in
[
'htk'
,
'slaney'
]],
)
def
test_create_mel_fb
(
self
,
n_mels
=
40
,
sample_rate
=
22050
,
n_fft
=
2048
,
fmin
=
0.0
,
fmax
=
8000.0
,
norm
=
None
,
mel_scale
=
"htk"
):
if
(
norm
==
"slaney"
and
StrictVersion
(
librosa
.
__version__
)
<
StrictVersion
(
"0.7.2"
)):
self
.
skipTest
(
'Test is known to fail with older versions of librosa.'
)
if
self
.
device
!=
'cpu'
:
self
.
skipTest
(
'No need to run this test on CUDA'
)
expected
=
librosa
.
filters
.
mel
(
sr
=
sample_rate
,
n_fft
=
n_fft
,
n_mels
=
n_mels
,
fmax
=
fmax
,
fmin
=
fmin
,
htk
=
mel_scale
==
"htk"
,
norm
=
norm
).
T
result
=
F
.
melscale_fbanks
(
sample_rate
=
sample_rate
,
n_mels
=
n_mels
,
f_max
=
fmax
,
f_min
=
fmin
,
n_freqs
=
(
n_fft
//
2
+
1
),
norm
=
norm
,
mel_scale
=
mel_scale
)
self
.
assertEqual
(
result
,
torch
.
from_numpy
(
expected
),
atol
=
7e-5
,
rtol
=
1.3e-6
)
def
test_amplitude_to_DB_power
(
self
):
amin
=
1e-10
db_multiplier
=
0.0
top_db
=
80.0
multiplier
=
10.0
spec
=
get_spectrogram
(
get_whitenoise
(
device
=
self
.
device
,
dtype
=
self
.
dtype
),
power
=
2
)
result
=
F
.
amplitude_to_DB
(
spec
,
multiplier
,
amin
,
db_multiplier
,
top_db
)
expected
=
librosa
.
core
.
power_to_db
(
spec
[
0
].
cpu
().
numpy
())[
None
,
...]
self
.
assertEqual
(
result
,
torch
.
from_numpy
(
expected
))
def
test_amplitude_to_DB
(
self
):
amin
=
1e-10
db_multiplier
=
0.0
top_db
=
80.0
multiplier
=
20.0
spec
=
get_spectrogram
(
get_whitenoise
(
device
=
self
.
device
,
dtype
=
self
.
dtype
),
power
=
1
)
result
=
F
.
amplitude_to_DB
(
spec
,
multiplier
,
amin
,
db_multiplier
,
top_db
)
expected
=
librosa
.
core
.
amplitude_to_db
(
spec
[
0
].
cpu
().
numpy
())[
None
,
...]
self
.
assertEqual
(
result
,
torch
.
from_numpy
(
expected
))
@
unittest
.
skipIf
(
not
LIBROSA_AVAILABLE
,
"Librosa not available"
)
class
FunctionalComplex
(
TestBaseMixin
):
@
nested_params
(
[
0.5
,
1.01
,
1.3
],
[
True
,
False
],
)
def
test_phase_vocoder
(
self
,
rate
,
test_pseudo_complex
):
hop_length
=
256
num_freq
=
1025
num_frames
=
400
torch
.
random
.
manual_seed
(
42
)
# Due to cummulative sum, numerical error in using torch.float32 will
# result in bottom right values of the stretched sectrogram to not
# match with librosa.
spec
=
torch
.
randn
(
num_freq
,
num_frames
,
device
=
self
.
device
,
dtype
=
torch
.
complex128
)
phase_advance
=
torch
.
linspace
(
0
,
np
.
pi
*
hop_length
,
num_freq
,
device
=
self
.
device
,
dtype
=
torch
.
float64
)[...,
None
]
stretched
=
F
.
phase_vocoder
(
torch
.
view_as_real
(
spec
)
if
test_pseudo_complex
else
spec
,
rate
=
rate
,
phase_advance
=
phase_advance
)
expected_stretched
=
librosa
.
phase_vocoder
(
spec
.
cpu
().
numpy
(),
rate
=
rate
,
hop_length
=
hop_length
)
self
.
assertEqual
(
torch
.
view_as_complex
(
stretched
)
if
test_pseudo_complex
else
stretched
,
torch
.
from_numpy
(
expected_stretched
))
test/torchaudio_unittest/functional/sox_compatibility_test.py
0 → 100644
View file @
9dcc7a15
import
torch
import
torchaudio.functional
as
F
from
torchaudio_unittest.common_utils
import
(
skipIfNoSox
,
skipIfNoExec
,
TempDirMixin
,
TorchaudioTestCase
,
get_asset_path
,
sox_utils
,
load_wav
,
save_wav
,
get_whitenoise
,
)
@
skipIfNoSox
@
skipIfNoExec
(
'sox'
)
class
TestFunctionalFiltering
(
TempDirMixin
,
TorchaudioTestCase
):
def
run_sox_effect
(
self
,
input_file
,
effect
):
output_file
=
self
.
get_temp_path
(
'expected.wav'
)
sox_utils
.
run_sox_effect
(
input_file
,
output_file
,
[
str
(
e
)
for
e
in
effect
])
return
load_wav
(
output_file
)
def
assert_sox_effect
(
self
,
result
,
input_path
,
effects
,
atol
=
1e-04
,
rtol
=
1e-5
):
expected
,
_
=
self
.
run_sox_effect
(
input_path
,
effects
)
self
.
assertEqual
(
result
,
expected
,
atol
=
atol
,
rtol
=
rtol
)
def
get_whitenoise
(
self
,
sample_rate
=
8000
):
noise
=
get_whitenoise
(
sample_rate
=
sample_rate
,
duration
=
3
,
scale_factor
=
0.9
,
)
path
=
self
.
get_temp_path
(
"whitenoise.wav"
)
save_wav
(
path
,
noise
,
sample_rate
)
return
noise
,
path
def
test_gain
(
self
):
path
=
get_asset_path
(
'steam-train-whistle-daniel_simon.wav'
)
data
,
_
=
load_wav
(
path
)
result
=
F
.
gain
(
data
,
3
)
self
.
assert_sox_effect
(
result
,
path
,
[
'gain'
,
3
])
def
test_dither
(
self
):
path
=
get_asset_path
(
'steam-train-whistle-daniel_simon.wav'
)
data
,
_
=
load_wav
(
path
)
result
=
F
.
dither
(
data
)
self
.
assert_sox_effect
(
result
,
path
,
[
'dither'
])
def
test_dither_noise
(
self
):
path
=
get_asset_path
(
'steam-train-whistle-daniel_simon.wav'
)
data
,
_
=
load_wav
(
path
)
result
=
F
.
dither
(
data
,
noise_shaping
=
True
)
self
.
assert_sox_effect
(
result
,
path
,
[
'dither'
,
'-s'
],
atol
=
1.5e-4
)
def
test_lowpass
(
self
):
cutoff_freq
=
3000
sample_rate
=
8000
data
,
path
=
self
.
get_whitenoise
(
sample_rate
)
result
=
F
.
lowpass_biquad
(
data
,
sample_rate
,
cutoff_freq
)
self
.
assert_sox_effect
(
result
,
path
,
[
'lowpass'
,
cutoff_freq
],
atol
=
1.5e-4
)
def
test_highpass
(
self
):
cutoff_freq
=
2000
sample_rate
=
8000
data
,
path
=
self
.
get_whitenoise
(
sample_rate
)
result
=
F
.
highpass_biquad
(
data
,
sample_rate
,
cutoff_freq
)
self
.
assert_sox_effect
(
result
,
path
,
[
'highpass'
,
cutoff_freq
],
atol
=
1.5e-4
)
def
test_allpass
(
self
):
central_freq
=
1000
q
=
0.707
sample_rate
=
8000
data
,
path
=
self
.
get_whitenoise
(
sample_rate
)
result
=
F
.
allpass_biquad
(
data
,
sample_rate
,
central_freq
,
q
)
self
.
assert_sox_effect
(
result
,
path
,
[
'allpass'
,
central_freq
,
f
'
{
q
}
q'
])
def
test_bandpass_with_csg
(
self
):
central_freq
=
1000
q
=
0.707
const_skirt_gain
=
True
sample_rate
=
8000
data
,
path
=
self
.
get_whitenoise
(
sample_rate
)
result
=
F
.
bandpass_biquad
(
data
,
sample_rate
,
central_freq
,
q
,
const_skirt_gain
)
self
.
assert_sox_effect
(
result
,
path
,
[
'bandpass'
,
'-c'
,
central_freq
,
f
'
{
q
}
q'
])
def
test_bandpass_without_csg
(
self
):
central_freq
=
1000
q
=
0.707
const_skirt_gain
=
False
sample_rate
=
8000
data
,
path
=
self
.
get_whitenoise
(
sample_rate
)
result
=
F
.
bandpass_biquad
(
data
,
sample_rate
,
central_freq
,
q
,
const_skirt_gain
)
self
.
assert_sox_effect
(
result
,
path
,
[
'bandpass'
,
central_freq
,
f
'
{
q
}
q'
])
def
test_bandreject
(
self
):
central_freq
=
1000
q
=
0.707
sample_rate
=
8000
data
,
path
=
self
.
get_whitenoise
(
sample_rate
)
result
=
F
.
bandreject_biquad
(
data
,
sample_rate
,
central_freq
,
q
)
self
.
assert_sox_effect
(
result
,
path
,
[
'bandreject'
,
central_freq
,
f
'
{
q
}
q'
])
def
test_band_with_noise
(
self
):
central_freq
=
1000
q
=
0.707
noise
=
True
sample_rate
=
8000
data
,
path
=
self
.
get_whitenoise
(
sample_rate
)
result
=
F
.
band_biquad
(
data
,
sample_rate
,
central_freq
,
q
,
noise
)
self
.
assert_sox_effect
(
result
,
path
,
[
'band'
,
'-n'
,
central_freq
,
f
'
{
q
}
q'
])
def
test_band_without_noise
(
self
):
central_freq
=
1000
q
=
0.707
noise
=
False
sample_rate
=
8000
data
,
path
=
self
.
get_whitenoise
(
sample_rate
)
result
=
F
.
band_biquad
(
data
,
sample_rate
,
central_freq
,
q
,
noise
)
self
.
assert_sox_effect
(
result
,
path
,
[
'band'
,
central_freq
,
f
'
{
q
}
q'
])
def
test_treble
(
self
):
central_freq
=
1000
q
=
0.707
gain
=
40
sample_rate
=
8000
data
,
path
=
self
.
get_whitenoise
(
sample_rate
)
result
=
F
.
treble_biquad
(
data
,
sample_rate
,
gain
,
central_freq
,
q
)
self
.
assert_sox_effect
(
result
,
path
,
[
'treble'
,
gain
,
central_freq
,
f
'
{
q
}
q'
])
def
test_bass
(
self
):
central_freq
=
1000
q
=
0.707
gain
=
40
sample_rate
=
8000
data
,
path
=
self
.
get_whitenoise
(
sample_rate
)
result
=
F
.
bass_biquad
(
data
,
sample_rate
,
gain
,
central_freq
,
q
)
self
.
assert_sox_effect
(
result
,
path
,
[
'bass'
,
gain
,
central_freq
,
f
'
{
q
}
q'
],
atol
=
1.5e-4
)
def
test_deemph
(
self
):
sample_rate
=
44100
data
,
path
=
self
.
get_whitenoise
(
sample_rate
)
result
=
F
.
deemph_biquad
(
data
,
sample_rate
)
self
.
assert_sox_effect
(
result
,
path
,
[
'deemph'
])
def
test_riaa
(
self
):
sample_rate
=
44100
data
,
path
=
self
.
get_whitenoise
(
sample_rate
)
result
=
F
.
riaa_biquad
(
data
,
sample_rate
)
self
.
assert_sox_effect
(
result
,
path
,
[
'riaa'
])
def
test_contrast
(
self
):
enhancement_amount
=
80.
data
,
path
=
self
.
get_whitenoise
()
result
=
F
.
contrast
(
data
,
enhancement_amount
)
self
.
assert_sox_effect
(
result
,
path
,
[
'contrast'
,
enhancement_amount
])
def
test_dcshift_with_limiter
(
self
):
shift
=
0.5
limiter_gain
=
0.05
data
,
path
=
self
.
get_whitenoise
()
result
=
F
.
dcshift
(
data
,
shift
,
limiter_gain
)
self
.
assert_sox_effect
(
result
,
path
,
[
'dcshift'
,
shift
,
limiter_gain
])
def
test_dcshift_without_limiter
(
self
):
shift
=
0.6
data
,
path
=
self
.
get_whitenoise
()
result
=
F
.
dcshift
(
data
,
shift
)
self
.
assert_sox_effect
(
result
,
path
,
[
'dcshift'
,
shift
])
def
test_overdrive
(
self
):
gain
=
30
colour
=
40
data
,
path
=
self
.
get_whitenoise
()
result
=
F
.
overdrive
(
data
,
gain
,
colour
)
self
.
assert_sox_effect
(
result
,
path
,
[
'overdrive'
,
gain
,
colour
])
def
test_phaser_sine
(
self
):
gain_in
=
0.5
gain_out
=
0.8
delay_ms
=
2.0
decay
=
0.4
speed
=
0.5
sample_rate
=
8000
data
,
path
=
self
.
get_whitenoise
(
sample_rate
)
result
=
F
.
phaser
(
data
,
sample_rate
,
gain_in
,
gain_out
,
delay_ms
,
decay
,
speed
,
sinusoidal
=
True
)
self
.
assert_sox_effect
(
result
,
path
,
[
'phaser'
,
gain_in
,
gain_out
,
delay_ms
,
decay
,
speed
,
'-s'
])
def
test_phaser_triangle
(
self
):
gain_in
=
0.5
gain_out
=
0.8
delay_ms
=
2.0
decay
=
0.4
speed
=
0.5
sample_rate
=
8000
data
,
path
=
self
.
get_whitenoise
(
sample_rate
)
result
=
F
.
phaser
(
data
,
sample_rate
,
gain_in
,
gain_out
,
delay_ms
,
decay
,
speed
,
sinusoidal
=
False
)
self
.
assert_sox_effect
(
result
,
path
,
[
'phaser'
,
gain_in
,
gain_out
,
delay_ms
,
decay
,
speed
,
'-t'
])
def
test_flanger_triangle_linear
(
self
):
delay
=
0.6
depth
=
0.87
regen
=
3.0
width
=
0.9
speed
=
0.5
phase
=
30
sample_rate
=
8000
data
,
path
=
self
.
get_whitenoise
(
sample_rate
)
result
=
F
.
flanger
(
data
,
sample_rate
,
delay
,
depth
,
regen
,
width
,
speed
,
phase
,
modulation
=
'triangular'
,
interpolation
=
'linear'
)
self
.
assert_sox_effect
(
result
,
path
,
[
'flanger'
,
delay
,
depth
,
regen
,
width
,
speed
,
'triangle'
,
phase
,
'linear'
])
def
test_flanger_triangle_quad
(
self
):
delay
=
0.8
depth
=
0.88
regen
=
3.0
width
=
0.4
speed
=
0.5
phase
=
40
sample_rate
=
8000
data
,
path
=
self
.
get_whitenoise
(
sample_rate
)
result
=
F
.
flanger
(
data
,
sample_rate
,
delay
,
depth
,
regen
,
width
,
speed
,
phase
,
modulation
=
'triangular'
,
interpolation
=
'quadratic'
)
self
.
assert_sox_effect
(
result
,
path
,
[
'flanger'
,
delay
,
depth
,
regen
,
width
,
speed
,
'triangle'
,
phase
,
'quadratic'
])
def
test_flanger_sine_linear
(
self
):
delay
=
0.8
depth
=
0.88
regen
=
3.0
width
=
0.23
speed
=
1.3
phase
=
60
sample_rate
=
8000
data
,
path
=
self
.
get_whitenoise
(
sample_rate
)
result
=
F
.
flanger
(
data
,
sample_rate
,
delay
,
depth
,
regen
,
width
,
speed
,
phase
,
modulation
=
'sinusoidal'
,
interpolation
=
'linear'
)
self
.
assert_sox_effect
(
result
,
path
,
[
'flanger'
,
delay
,
depth
,
regen
,
width
,
speed
,
'sine'
,
phase
,
'linear'
])
def
test_flanger_sine_quad
(
self
):
delay
=
0.9
depth
=
0.9
regen
=
4.0
width
=
0.23
speed
=
1.3
phase
=
25
sample_rate
=
8000
data
,
path
=
self
.
get_whitenoise
(
sample_rate
)
result
=
F
.
flanger
(
data
,
sample_rate
,
delay
,
depth
,
regen
,
width
,
speed
,
phase
,
modulation
=
'sinusoidal'
,
interpolation
=
'quadratic'
)
self
.
assert_sox_effect
(
result
,
path
,
[
'flanger'
,
delay
,
depth
,
regen
,
width
,
speed
,
'sine'
,
phase
,
'quadratic'
])
def
test_equalizer
(
self
):
center_freq
=
300
q
=
0.707
gain
=
1
sample_rate
=
8000
data
,
path
=
self
.
get_whitenoise
(
sample_rate
)
result
=
F
.
equalizer_biquad
(
data
,
sample_rate
,
center_freq
,
gain
,
q
)
self
.
assert_sox_effect
(
result
,
path
,
[
'equalizer'
,
center_freq
,
q
,
gain
])
def
test_perf_biquad_filtering
(
self
):
b0
=
0.4
b1
=
0.2
b2
=
0.9
a0
=
0.7
a1
=
0.2
a2
=
0.6
data
,
path
=
self
.
get_whitenoise
()
result
=
F
.
lfilter
(
data
,
torch
.
tensor
([
a0
,
a1
,
a2
]),
torch
.
tensor
([
b0
,
b1
,
b2
]))
self
.
assert_sox_effect
(
result
,
path
,
[
'biquad'
,
b0
,
b1
,
b2
,
a0
,
a1
,
a2
])
test/torchaudio_unittest/functional/torchscript_consistency_cpu_test.py
0 → 100644
View file @
9dcc7a15
import
torch
from
torchaudio_unittest.common_utils
import
PytorchTestCase
from
.torchscript_consistency_impl
import
Functional
,
FunctionalFloat32Only
class
TestFunctionalFloat32
(
Functional
,
FunctionalFloat32Only
,
PytorchTestCase
):
dtype
=
torch
.
float32
device
=
torch
.
device
(
'cpu'
)
class
TestFunctionalFloat64
(
Functional
,
PytorchTestCase
):
dtype
=
torch
.
float64
device
=
torch
.
device
(
'cpu'
)
test/torchaudio_unittest/functional/torchscript_consistency_cuda_test.py
0 → 100644
View file @
9dcc7a15
import
torch
from
torchaudio_unittest.common_utils
import
skipIfNoCuda
,
PytorchTestCase
from
.torchscript_consistency_impl
import
Functional
,
FunctionalFloat32Only
@
skipIfNoCuda
class
TestFunctionalFloat32
(
Functional
,
FunctionalFloat32Only
,
PytorchTestCase
):
dtype
=
torch
.
float32
device
=
torch
.
device
(
'cuda'
)
@
skipIfNoCuda
class
TestFunctionalFloat64
(
Functional
,
PytorchTestCase
):
dtype
=
torch
.
float64
device
=
torch
.
device
(
'cuda'
)
test/torchaudio_unittest/functional/torchscript_consistency_impl.py
0 → 100644
View file @
9dcc7a15
"""Test suites for jit-ability and its numerical compatibility"""
import
unittest
import
torch
import
torchaudio.functional
as
F
from
parameterized
import
parameterized
from
torchaudio_unittest
import
common_utils
from
torchaudio_unittest.common_utils
import
(
TempDirMixin
,
TestBaseMixin
,
skipIfRocm
,
torch_script
,
)
class
Functional
(
TempDirMixin
,
TestBaseMixin
):
"""Implements test for `functional` module that are performed for different devices"""
def
_assert_consistency
(
self
,
func
,
tensor
,
shape_only
=
False
):
tensor
=
tensor
.
to
(
device
=
self
.
device
,
dtype
=
self
.
dtype
)
ts_func
=
torch_script
(
func
)
torch
.
random
.
manual_seed
(
40
)
output
=
func
(
tensor
)
torch
.
random
.
manual_seed
(
40
)
ts_output
=
ts_func
(
tensor
)
if
shape_only
:
ts_output
=
ts_output
.
shape
output
=
output
.
shape
self
.
assertEqual
(
ts_output
,
output
)
def
_assert_consistency_complex
(
self
,
func
,
tensor
,
test_pseudo_complex
=
False
):
assert
tensor
.
is_complex
()
tensor
=
tensor
.
to
(
device
=
self
.
device
,
dtype
=
self
.
complex_dtype
)
ts_func
=
torch_script
(
func
)
if
test_pseudo_complex
:
tensor
=
torch
.
view_as_real
(
tensor
)
torch
.
random
.
manual_seed
(
40
)
output
=
func
(
tensor
)
torch
.
random
.
manual_seed
(
40
)
ts_output
=
ts_func
(
tensor
)
self
.
assertEqual
(
ts_output
,
output
)
def
test_spectrogram_complex
(
self
):
def
func
(
tensor
):
n_fft
=
400
ws
=
400
hop
=
200
pad
=
0
window
=
torch
.
hann_window
(
ws
,
device
=
tensor
.
device
,
dtype
=
tensor
.
dtype
)
power
=
None
normalize
=
False
return
F
.
spectrogram
(
tensor
,
pad
,
window
,
n_fft
,
hop
,
ws
,
power
,
normalize
)
tensor
=
common_utils
.
get_whitenoise
()
self
.
_assert_consistency
(
func
,
tensor
)
def
test_spectrogram_real
(
self
):
def
func
(
tensor
):
n_fft
=
400
ws
=
400
hop
=
200
pad
=
0
window
=
torch
.
hann_window
(
ws
,
device
=
tensor
.
device
,
dtype
=
tensor
.
dtype
)
power
=
2.
normalize
=
False
return
F
.
spectrogram
(
tensor
,
pad
,
window
,
n_fft
,
hop
,
ws
,
power
,
normalize
,
return_complex
=
False
)
tensor
=
common_utils
.
get_whitenoise
()
self
.
_assert_consistency
(
func
,
tensor
)
def
test_inverse_spectrogram_complex
(
self
):
def
func
(
tensor
):
length
=
400
n_fft
=
400
hop
=
200
ws
=
400
pad
=
0
window
=
torch
.
hann_window
(
ws
,
device
=
tensor
.
device
,
dtype
=
torch
.
float64
)
normalize
=
False
return
F
.
inverse_spectrogram
(
tensor
,
length
,
pad
,
window
,
n_fft
,
hop
,
ws
,
normalize
)
waveform
=
common_utils
.
get_whitenoise
(
sample_rate
=
8000
,
duration
=
0.05
)
tensor
=
common_utils
.
get_spectrogram
(
waveform
,
n_fft
=
400
,
hop_length
=
200
)
self
.
_assert_consistency_complex
(
func
,
tensor
)
def
test_inverse_spectrogram_real
(
self
):
def
func
(
tensor
):
length
=
400
n_fft
=
400
hop
=
200
ws
=
400
pad
=
0
window
=
torch
.
hann_window
(
ws
,
device
=
tensor
.
device
,
dtype
=
tensor
.
dtype
)
normalize
=
False
return
F
.
inverse_spectrogram
(
tensor
,
length
,
pad
,
window
,
n_fft
,
hop
,
ws
,
normalize
)
waveform
=
common_utils
.
get_whitenoise
(
sample_rate
=
8000
,
duration
=
0.05
)
tensor
=
common_utils
.
get_spectrogram
(
waveform
,
n_fft
=
400
,
hop_length
=
200
)
tensor
=
torch
.
view_as_real
(
tensor
)
self
.
_assert_consistency
(
func
,
tensor
)
@
skipIfRocm
def
test_griffinlim
(
self
):
def
func
(
tensor
):
n_fft
=
400
ws
=
400
hop
=
200
window
=
torch
.
hann_window
(
ws
,
device
=
tensor
.
device
,
dtype
=
tensor
.
dtype
)
power
=
2.
momentum
=
0.99
n_iter
=
32
length
=
1000
rand_int
=
False
return
F
.
griffinlim
(
tensor
,
window
,
n_fft
,
hop
,
ws
,
power
,
n_iter
,
momentum
,
length
,
rand_int
)
tensor
=
torch
.
rand
((
1
,
201
,
6
))
self
.
_assert_consistency
(
func
,
tensor
)
def
test_compute_deltas
(
self
):
def
func
(
tensor
):
win_length
=
2
*
7
+
1
return
F
.
compute_deltas
(
tensor
,
win_length
=
win_length
)
channel
=
13
n_mfcc
=
channel
*
3
time
=
1021
tensor
=
torch
.
randn
(
channel
,
n_mfcc
,
time
)
self
.
_assert_consistency
(
func
,
tensor
)
def
test_detect_pitch_frequency
(
self
):
waveform
=
common_utils
.
get_sinusoid
(
sample_rate
=
44100
)
def
func
(
tensor
):
sample_rate
=
44100
return
F
.
detect_pitch_frequency
(
tensor
,
sample_rate
)
self
.
_assert_consistency
(
func
,
waveform
)
def
test_melscale_fbanks
(
self
):
if
self
.
device
!=
torch
.
device
(
'cpu'
):
raise
unittest
.
SkipTest
(
'No need to perform test on device other than CPU'
)
def
func
(
_
):
n_stft
=
100
f_min
=
0.0
f_max
=
20.0
n_mels
=
10
sample_rate
=
16000
norm
=
"slaney"
return
F
.
melscale_fbanks
(
n_stft
,
f_min
,
f_max
,
n_mels
,
sample_rate
,
norm
)
dummy
=
torch
.
zeros
(
1
,
1
)
self
.
_assert_consistency
(
func
,
dummy
)
def
test_linear_fbanks
(
self
):
if
self
.
device
!=
torch
.
device
(
'cpu'
):
raise
unittest
.
SkipTest
(
'No need to perform test on device other than CPU'
)
def
func
(
_
):
n_stft
=
100
f_min
=
0.0
f_max
=
20.0
n_filter
=
10
sample_rate
=
16000
return
F
.
linear_fbanks
(
n_stft
,
f_min
,
f_max
,
n_filter
,
sample_rate
)
dummy
=
torch
.
zeros
(
1
,
1
)
self
.
_assert_consistency
(
func
,
dummy
)
def
test_amplitude_to_DB
(
self
):
def
func
(
tensor
):
multiplier
=
10.0
amin
=
1e-10
db_multiplier
=
0.0
top_db
=
80.0
return
F
.
amplitude_to_DB
(
tensor
,
multiplier
,
amin
,
db_multiplier
,
top_db
)
tensor
=
torch
.
rand
((
6
,
201
))
self
.
_assert_consistency
(
func
,
tensor
)
def
test_DB_to_amplitude
(
self
):
def
func
(
tensor
):
ref
=
1.
power
=
1.
return
F
.
DB_to_amplitude
(
tensor
,
ref
,
power
)
tensor
=
torch
.
rand
((
1
,
100
))
self
.
_assert_consistency
(
func
,
tensor
)
def
test_create_dct
(
self
):
if
self
.
device
!=
torch
.
device
(
'cpu'
):
raise
unittest
.
SkipTest
(
'No need to perform test on device other than CPU'
)
def
func
(
_
):
n_mfcc
=
40
n_mels
=
128
norm
=
"ortho"
return
F
.
create_dct
(
n_mfcc
,
n_mels
,
norm
)
dummy
=
torch
.
zeros
(
1
,
1
)
self
.
_assert_consistency
(
func
,
dummy
)
def
test_mu_law_encoding
(
self
):
def
func
(
tensor
):
qc
=
256
return
F
.
mu_law_encoding
(
tensor
,
qc
)
waveform
=
common_utils
.
get_whitenoise
()
self
.
_assert_consistency
(
func
,
waveform
)
def
test_mu_law_decoding
(
self
):
def
func
(
tensor
):
qc
=
256
return
F
.
mu_law_decoding
(
tensor
,
qc
)
tensor
=
torch
.
rand
((
1
,
10
))
self
.
_assert_consistency
(
func
,
tensor
)
def
test_complex_norm
(
self
):
def
func
(
tensor
):
power
=
2.
return
F
.
complex_norm
(
tensor
,
power
)
tensor
=
torch
.
randn
(
1
,
2
,
1025
,
400
,
2
)
self
.
_assert_consistency
(
func
,
tensor
)
def
test_mask_along_axis
(
self
):
def
func
(
tensor
):
mask_param
=
100
mask_value
=
30.
axis
=
2
return
F
.
mask_along_axis
(
tensor
,
mask_param
,
mask_value
,
axis
)
tensor
=
torch
.
randn
(
2
,
1025
,
400
)
self
.
_assert_consistency
(
func
,
tensor
)
def
test_mask_along_axis_iid
(
self
):
def
func
(
tensor
):
mask_param
=
100
mask_value
=
30.
axis
=
2
return
F
.
mask_along_axis_iid
(
tensor
,
mask_param
,
mask_value
,
axis
)
tensor
=
torch
.
randn
(
4
,
2
,
1025
,
400
)
self
.
_assert_consistency
(
func
,
tensor
)
def
test_gain
(
self
):
def
func
(
tensor
):
gainDB
=
2.0
return
F
.
gain
(
tensor
,
gainDB
)
tensor
=
torch
.
rand
((
1
,
1000
))
self
.
_assert_consistency
(
func
,
tensor
)
def
test_dither_TPDF
(
self
):
def
func
(
tensor
):
return
F
.
dither
(
tensor
,
'TPDF'
)
tensor
=
common_utils
.
get_whitenoise
(
n_channels
=
2
)
self
.
_assert_consistency
(
func
,
tensor
,
shape_only
=
True
)
def
test_dither_RPDF
(
self
):
def
func
(
tensor
):
return
F
.
dither
(
tensor
,
'RPDF'
)
tensor
=
common_utils
.
get_whitenoise
(
n_channels
=
2
)
self
.
_assert_consistency
(
func
,
tensor
,
shape_only
=
True
)
def
test_dither_GPDF
(
self
):
def
func
(
tensor
):
return
F
.
dither
(
tensor
,
'GPDF'
)
tensor
=
common_utils
.
get_whitenoise
(
n_channels
=
2
)
self
.
_assert_consistency
(
func
,
tensor
,
shape_only
=
True
)
def
test_dither_noise_shaping
(
self
):
def
func
(
tensor
):
return
F
.
dither
(
tensor
,
noise_shaping
=
True
)
tensor
=
common_utils
.
get_whitenoise
(
n_channels
=
2
)
self
.
_assert_consistency
(
func
,
tensor
)
def
test_lfilter
(
self
):
if
self
.
dtype
==
torch
.
float64
:
raise
unittest
.
SkipTest
(
"This test is known to fail for float64"
)
waveform
=
common_utils
.
get_whitenoise
()
def
func
(
tensor
):
# Design an IIR lowpass filter using scipy.signal filter design
# https://docs.scipy.org/doc/scipy/reference/generated/scipy.signal.iirdesign.html#scipy.signal.iirdesign
#
# Example
# >>> from scipy.signal import iirdesign
# >>> b, a = iirdesign(0.2, 0.3, 1, 60)
b_coeffs
=
torch
.
tensor
(
[
0.00299893
,
-
0.0051152
,
0.00841964
,
-
0.00747802
,
0.00841964
,
-
0.0051152
,
0.00299893
,
],
device
=
tensor
.
device
,
dtype
=
tensor
.
dtype
,
)
a_coeffs
=
torch
.
tensor
(
[
1.0
,
-
4.8155751
,
10.2217618
,
-
12.14481273
,
8.49018171
,
-
3.3066882
,
0.56088705
,
],
device
=
tensor
.
device
,
dtype
=
tensor
.
dtype
,
)
return
F
.
lfilter
(
tensor
,
a_coeffs
,
b_coeffs
)
self
.
_assert_consistency
(
func
,
waveform
)
def
test_filtfilt
(
self
):
def
func
(
tensor
):
torch
.
manual_seed
(
296
)
b_coeffs
=
torch
.
rand
(
4
,
device
=
tensor
.
device
,
dtype
=
tensor
.
dtype
)
a_coeffs
=
torch
.
rand
(
4
,
device
=
tensor
.
device
,
dtype
=
tensor
.
dtype
)
return
F
.
filtfilt
(
tensor
,
a_coeffs
,
b_coeffs
)
waveform
=
common_utils
.
get_whitenoise
(
sample_rate
=
8000
)
self
.
_assert_consistency
(
func
,
waveform
)
def
test_lowpass
(
self
):
if
self
.
dtype
==
torch
.
float64
:
raise
unittest
.
SkipTest
(
"This test is known to fail for float64"
)
waveform
=
common_utils
.
get_whitenoise
(
sample_rate
=
44100
)
def
func
(
tensor
):
sample_rate
=
44100
cutoff_freq
=
3000.
return
F
.
lowpass_biquad
(
tensor
,
sample_rate
,
cutoff_freq
)
self
.
_assert_consistency
(
func
,
waveform
)
def
test_highpass
(
self
):
if
self
.
dtype
==
torch
.
float64
:
raise
unittest
.
SkipTest
(
"This test is known to fail for float64"
)
waveform
=
common_utils
.
get_whitenoise
(
sample_rate
=
44100
)
def
func
(
tensor
):
sample_rate
=
44100
cutoff_freq
=
2000.
return
F
.
highpass_biquad
(
tensor
,
sample_rate
,
cutoff_freq
)
self
.
_assert_consistency
(
func
,
waveform
)
def
test_allpass
(
self
):
if
self
.
dtype
==
torch
.
float64
:
raise
unittest
.
SkipTest
(
"This test is known to fail for float64"
)
waveform
=
common_utils
.
get_whitenoise
(
sample_rate
=
44100
)
def
func
(
tensor
):
sample_rate
=
44100
central_freq
=
1000.
q
=
0.707
return
F
.
allpass_biquad
(
tensor
,
sample_rate
,
central_freq
,
q
)
self
.
_assert_consistency
(
func
,
waveform
)
def
test_bandpass_with_csg
(
self
):
if
self
.
dtype
==
torch
.
float64
:
raise
unittest
.
SkipTest
(
"This test is known to fail for float64"
)
waveform
=
common_utils
.
get_whitenoise
(
sample_rate
=
44100
)
def
func
(
tensor
):
sample_rate
=
44100
central_freq
=
1000.
q
=
0.707
const_skirt_gain
=
True
return
F
.
bandpass_biquad
(
tensor
,
sample_rate
,
central_freq
,
q
,
const_skirt_gain
)
self
.
_assert_consistency
(
func
,
waveform
)
def
test_bandpass_without_csg
(
self
):
if
self
.
dtype
==
torch
.
float64
:
raise
unittest
.
SkipTest
(
"This test is known to fail for float64"
)
waveform
=
common_utils
.
get_whitenoise
(
sample_rate
=
44100
)
def
func
(
tensor
):
sample_rate
=
44100
central_freq
=
1000.
q
=
0.707
const_skirt_gain
=
True
return
F
.
bandpass_biquad
(
tensor
,
sample_rate
,
central_freq
,
q
,
const_skirt_gain
)
self
.
_assert_consistency
(
func
,
waveform
)
def
test_bandreject
(
self
):
if
self
.
dtype
==
torch
.
float64
:
raise
unittest
.
SkipTest
(
"This test is known to fail for float64"
)
waveform
=
common_utils
.
get_whitenoise
(
sample_rate
=
44100
)
def
func
(
tensor
):
sample_rate
=
44100
central_freq
=
1000.
q
=
0.707
return
F
.
bandreject_biquad
(
tensor
,
sample_rate
,
central_freq
,
q
)
self
.
_assert_consistency
(
func
,
waveform
)
def
test_band_with_noise
(
self
):
if
self
.
dtype
==
torch
.
float64
:
raise
unittest
.
SkipTest
(
"This test is known to fail for float64"
)
waveform
=
common_utils
.
get_whitenoise
(
sample_rate
=
44100
)
def
func
(
tensor
):
sample_rate
=
44100
central_freq
=
1000.
q
=
0.707
noise
=
True
return
F
.
band_biquad
(
tensor
,
sample_rate
,
central_freq
,
q
,
noise
)
self
.
_assert_consistency
(
func
,
waveform
)
def
test_band_without_noise
(
self
):
if
self
.
dtype
==
torch
.
float64
:
raise
unittest
.
SkipTest
(
"This test is known to fail for float64"
)
waveform
=
common_utils
.
get_whitenoise
(
sample_rate
=
44100
)
def
func
(
tensor
):
sample_rate
=
44100
central_freq
=
1000.
q
=
0.707
noise
=
False
return
F
.
band_biquad
(
tensor
,
sample_rate
,
central_freq
,
q
,
noise
)
self
.
_assert_consistency
(
func
,
waveform
)
def
test_treble
(
self
):
if
self
.
dtype
==
torch
.
float64
:
raise
unittest
.
SkipTest
(
"This test is known to fail for float64"
)
waveform
=
common_utils
.
get_whitenoise
(
sample_rate
=
44100
)
def
func
(
tensor
):
sample_rate
=
44100
gain
=
40.
central_freq
=
1000.
q
=
0.707
return
F
.
treble_biquad
(
tensor
,
sample_rate
,
gain
,
central_freq
,
q
)
self
.
_assert_consistency
(
func
,
waveform
)
def
test_bass
(
self
):
if
self
.
dtype
==
torch
.
float64
:
raise
unittest
.
SkipTest
(
"This test is known to fail for float64"
)
waveform
=
common_utils
.
get_whitenoise
(
sample_rate
=
44100
)
def
func
(
tensor
):
sample_rate
=
44100
gain
=
40.
central_freq
=
1000.
q
=
0.707
return
F
.
bass_biquad
(
tensor
,
sample_rate
,
gain
,
central_freq
,
q
)
self
.
_assert_consistency
(
func
,
waveform
)
def
test_deemph
(
self
):
if
self
.
dtype
==
torch
.
float64
:
raise
unittest
.
SkipTest
(
"This test is known to fail for float64"
)
waveform
=
common_utils
.
get_whitenoise
(
sample_rate
=
44100
)
def
func
(
tensor
):
sample_rate
=
44100
return
F
.
deemph_biquad
(
tensor
,
sample_rate
)
self
.
_assert_consistency
(
func
,
waveform
)
def
test_riaa
(
self
):
if
self
.
dtype
==
torch
.
float64
:
raise
unittest
.
SkipTest
(
"This test is known to fail for float64"
)
waveform
=
common_utils
.
get_whitenoise
(
sample_rate
=
44100
)
def
func
(
tensor
):
sample_rate
=
44100
return
F
.
riaa_biquad
(
tensor
,
sample_rate
)
self
.
_assert_consistency
(
func
,
waveform
)
def
test_equalizer
(
self
):
if
self
.
dtype
==
torch
.
float64
:
raise
unittest
.
SkipTest
(
"This test is known to fail for float64"
)
waveform
=
common_utils
.
get_whitenoise
(
sample_rate
=
44100
)
def
func
(
tensor
):
sample_rate
=
44100
center_freq
=
300.
gain
=
1.
q
=
0.707
return
F
.
equalizer_biquad
(
tensor
,
sample_rate
,
center_freq
,
gain
,
q
)
self
.
_assert_consistency
(
func
,
waveform
)
def
test_perf_biquad_filtering
(
self
):
if
self
.
dtype
==
torch
.
float64
:
raise
unittest
.
SkipTest
(
"This test is known to fail for float64"
)
waveform
=
common_utils
.
get_whitenoise
()
def
func
(
tensor
):
a
=
torch
.
tensor
([
0.7
,
0.2
,
0.6
],
device
=
tensor
.
device
,
dtype
=
tensor
.
dtype
)
b
=
torch
.
tensor
([
0.4
,
0.2
,
0.9
],
device
=
tensor
.
device
,
dtype
=
tensor
.
dtype
)
return
F
.
lfilter
(
tensor
,
a
,
b
)
self
.
_assert_consistency
(
func
,
waveform
)
def
test_sliding_window_cmn
(
self
):
def
func
(
tensor
):
cmn_window
=
600
min_cmn_window
=
100
center
=
False
norm_vars
=
False
a
=
torch
.
tensor
(
[
[
-
1.915875792503357
,
1.147700309753418
],
[
1.8242558240890503
,
1.3869990110397339
]
],
device
=
tensor
.
device
,
dtype
=
tensor
.
dtype
)
return
F
.
sliding_window_cmn
(
a
,
cmn_window
,
min_cmn_window
,
center
,
norm_vars
)
b
=
torch
.
tensor
(
[
[
-
1.8701
,
-
0.1196
],
[
1.8701
,
0.1196
]
]
)
self
.
_assert_consistency
(
func
,
b
)
def
test_contrast
(
self
):
waveform
=
common_utils
.
get_whitenoise
()
def
func
(
tensor
):
enhancement_amount
=
80.
return
F
.
contrast
(
tensor
,
enhancement_amount
)
self
.
_assert_consistency
(
func
,
waveform
)
def
test_dcshift
(
self
):
waveform
=
common_utils
.
get_whitenoise
()
def
func
(
tensor
):
shift
=
0.5
limiter_gain
=
0.05
return
F
.
dcshift
(
tensor
,
shift
,
limiter_gain
)
self
.
_assert_consistency
(
func
,
waveform
)
def
test_overdrive
(
self
):
waveform
=
common_utils
.
get_whitenoise
()
def
func
(
tensor
):
gain
=
30.
colour
=
50.
return
F
.
overdrive
(
tensor
,
gain
,
colour
)
self
.
_assert_consistency
(
func
,
waveform
)
def
test_phaser
(
self
):
waveform
=
common_utils
.
get_whitenoise
(
sample_rate
=
44100
)
def
func
(
tensor
):
gain_in
=
0.5
gain_out
=
0.8
delay_ms
=
2.0
decay
=
0.4
speed
=
0.5
sample_rate
=
44100
return
F
.
phaser
(
tensor
,
sample_rate
,
gain_in
,
gain_out
,
delay_ms
,
decay
,
speed
,
sinusoidal
=
True
)
self
.
_assert_consistency
(
func
,
waveform
)
def
test_flanger
(
self
):
torch
.
random
.
manual_seed
(
40
)
waveform
=
torch
.
rand
(
2
,
100
)
-
0.5
def
func
(
tensor
):
delay
=
0.8
depth
=
0.88
regen
=
3.0
width
=
0.23
speed
=
1.3
phase
=
60.
sample_rate
=
44100
return
F
.
flanger
(
tensor
,
sample_rate
,
delay
,
depth
,
regen
,
width
,
speed
,
phase
,
modulation
=
'sinusoidal'
,
interpolation
=
'linear'
)
self
.
_assert_consistency
(
func
,
waveform
)
def
test_spectral_centroid
(
self
):
def
func
(
tensor
):
sample_rate
=
44100
n_fft
=
400
ws
=
400
hop
=
200
pad
=
0
window
=
torch
.
hann_window
(
ws
,
device
=
tensor
.
device
,
dtype
=
tensor
.
dtype
)
return
F
.
spectral_centroid
(
tensor
,
sample_rate
,
pad
,
window
,
n_fft
,
hop
,
ws
)
tensor
=
common_utils
.
get_whitenoise
(
sample_rate
=
44100
)
self
.
_assert_consistency
(
func
,
tensor
)
@
common_utils
.
skipIfNoKaldi
def
test_compute_kaldi_pitch
(
self
):
if
self
.
dtype
!=
torch
.
float32
or
self
.
device
!=
torch
.
device
(
'cpu'
):
raise
unittest
.
SkipTest
(
"Only float32, cpu is supported."
)
def
func
(
tensor
):
sample_rate
:
float
=
44100.
return
F
.
compute_kaldi_pitch
(
tensor
,
sample_rate
)
tensor
=
common_utils
.
get_whitenoise
(
sample_rate
=
44100
)
self
.
_assert_consistency
(
func
,
tensor
)
def
test_resample_sinc
(
self
):
def
func
(
tensor
):
sr1
,
sr2
=
16000
,
8000
return
F
.
resample
(
tensor
,
sr1
,
sr2
,
resampling_method
=
"sinc_interpolation"
)
tensor
=
common_utils
.
get_whitenoise
(
sample_rate
=
16000
)
self
.
_assert_consistency
(
func
,
tensor
)
def
test_resample_kaiser
(
self
):
def
func
(
tensor
):
sr1
,
sr2
=
16000
,
8000
return
F
.
resample
(
tensor
,
sr1
,
sr2
,
resampling_method
=
"kaiser_window"
)
def
func_beta
(
tensor
):
sr1
,
sr2
=
16000
,
8000
beta
=
6.
return
F
.
resample
(
tensor
,
sr1
,
sr2
,
resampling_method
=
"kaiser_window"
,
beta
=
beta
)
tensor
=
common_utils
.
get_whitenoise
(
sample_rate
=
16000
)
self
.
_assert_consistency
(
func
,
tensor
)
self
.
_assert_consistency
(
func_beta
,
tensor
)
@
parameterized
.
expand
([(
True
,
),
(
False
,
)])
def
test_phase_vocoder
(
self
,
test_paseudo_complex
):
def
func
(
tensor
):
is_complex
=
tensor
.
is_complex
()
n_freq
=
tensor
.
size
(
-
2
if
is_complex
else
-
3
)
rate
=
0.5
hop_length
=
256
phase_advance
=
torch
.
linspace
(
0
,
3.14
*
hop_length
,
n_freq
,
dtype
=
(
torch
.
real
(
tensor
)
if
is_complex
else
tensor
).
dtype
,
device
=
tensor
.
device
,
)[...,
None
]
return
F
.
phase_vocoder
(
tensor
,
rate
,
phase_advance
)
tensor
=
torch
.
view_as_complex
(
torch
.
randn
(
2
,
1025
,
400
,
2
))
self
.
_assert_consistency_complex
(
func
,
tensor
,
test_paseudo_complex
)
class
FunctionalFloat32Only
(
TestBaseMixin
):
def
test_rnnt_loss
(
self
):
def
func
(
tensor
):
targets
=
torch
.
tensor
([[
1
,
2
]],
device
=
tensor
.
device
,
dtype
=
torch
.
int32
)
logit_lengths
=
torch
.
tensor
([
2
],
device
=
tensor
.
device
,
dtype
=
torch
.
int32
)
target_lengths
=
torch
.
tensor
([
2
],
device
=
tensor
.
device
,
dtype
=
torch
.
int32
)
return
F
.
rnnt_loss
(
tensor
,
targets
,
logit_lengths
,
target_lengths
)
logits
=
torch
.
tensor
([[[[
0.1
,
0.6
,
0.1
,
0.1
,
0.1
],
[
0.1
,
0.1
,
0.6
,
0.1
,
0.1
],
[
0.1
,
0.1
,
0.2
,
0.8
,
0.1
]],
[[
0.1
,
0.6
,
0.1
,
0.1
,
0.1
],
[
0.1
,
0.1
,
0.2
,
0.1
,
0.1
],
[
0.7
,
0.1
,
0.2
,
0.1
,
0.1
]]]])
tensor
=
logits
.
to
(
device
=
self
.
device
,
dtype
=
torch
.
float32
)
self
.
_assert_consistency
(
func
,
tensor
)
test/torchaudio_unittest/kaldi_io_test.py
0 → 100644
View file @
9dcc7a15
import
torch
import
torchaudio.kaldi_io
as
kio
from
torchaudio_unittest
import
common_utils
class
Test_KaldiIO
(
common_utils
.
TorchaudioTestCase
):
data1
=
[[
1
,
2
,
3
],
[
11
,
12
,
13
],
[
21
,
22
,
23
]]
data2
=
[[
31
,
32
,
33
],
[
41
,
42
,
43
],
[
51
,
52
,
53
]]
def
_test_helper
(
self
,
file_name
,
expected_data
,
fn
,
expected_dtype
):
""" Takes a file_name to the input data and a function fn to extract the
data. It compares the extracted data to the expected_data. The expected_dtype
will be used to check that the extracted data is of the right type.
"""
test_filepath
=
common_utils
.
get_asset_path
(
file_name
)
expected_output
=
{
'key'
+
str
(
idx
+
1
):
torch
.
tensor
(
val
,
dtype
=
expected_dtype
)
for
idx
,
val
in
enumerate
(
expected_data
)}
for
key
,
vec
in
fn
(
test_filepath
):
self
.
assertTrue
(
key
in
expected_output
)
self
.
assertTrue
(
isinstance
(
vec
,
torch
.
Tensor
))
self
.
assertEqual
(
vec
.
dtype
,
expected_dtype
)
self
.
assertTrue
(
torch
.
all
(
torch
.
eq
(
vec
,
expected_output
[
key
])))
def
test_read_vec_int_ark
(
self
):
self
.
_test_helper
(
"vec_int.ark"
,
self
.
data1
,
kio
.
read_vec_int_ark
,
torch
.
int32
)
def
test_read_vec_flt_ark
(
self
):
self
.
_test_helper
(
"vec_flt.ark"
,
self
.
data1
,
kio
.
read_vec_flt_ark
,
torch
.
float32
)
def
test_read_mat_ark
(
self
):
self
.
_test_helper
(
"mat.ark"
,
[
self
.
data1
,
self
.
data2
],
kio
.
read_mat_ark
,
torch
.
float32
)
test/torchaudio_unittest/models/__init__.py
0 → 100644
View file @
9dcc7a15
test/torchaudio_unittest/models/models_test.py
0 → 100644
View file @
9dcc7a15
import
itertools
from
collections
import
namedtuple
import
torch
from
parameterized
import
parameterized
from
torchaudio.models
import
ConvTasNet
,
DeepSpeech
,
Wav2Letter
,
WaveRNN
from
torchaudio.models.wavernn
import
MelResNet
,
UpsampleNetwork
from
torchaudio_unittest
import
common_utils
from
torchaudio_unittest.common_utils
import
torch_script
class
TestWav2Letter
(
common_utils
.
TorchaudioTestCase
):
def
test_waveform
(
self
):
batch_size
=
2
num_features
=
1
num_classes
=
40
input_length
=
320
model
=
Wav2Letter
(
num_classes
=
num_classes
,
num_features
=
num_features
)
x
=
torch
.
rand
(
batch_size
,
num_features
,
input_length
)
out
=
model
(
x
)
assert
out
.
size
()
==
(
batch_size
,
num_classes
,
2
)
def
test_mfcc
(
self
):
batch_size
=
2
num_features
=
13
num_classes
=
40
input_length
=
2
model
=
Wav2Letter
(
num_classes
=
num_classes
,
input_type
=
"mfcc"
,
num_features
=
num_features
)
x
=
torch
.
rand
(
batch_size
,
num_features
,
input_length
)
out
=
model
(
x
)
assert
out
.
size
()
==
(
batch_size
,
num_classes
,
2
)
class
TestMelResNet
(
common_utils
.
TorchaudioTestCase
):
def
test_waveform
(
self
):
"""Validate the output dimensions of a MelResNet block.
"""
n_batch
=
2
n_time
=
200
n_freq
=
100
n_output
=
128
n_res_block
=
10
n_hidden
=
128
kernel_size
=
5
model
=
MelResNet
(
n_res_block
,
n_freq
,
n_hidden
,
n_output
,
kernel_size
)
x
=
torch
.
rand
(
n_batch
,
n_freq
,
n_time
)
out
=
model
(
x
)
assert
out
.
size
()
==
(
n_batch
,
n_output
,
n_time
-
kernel_size
+
1
)
class
TestUpsampleNetwork
(
common_utils
.
TorchaudioTestCase
):
def
test_waveform
(
self
):
"""Validate the output dimensions of a UpsampleNetwork block.
"""
upsample_scales
=
[
5
,
5
,
8
]
n_batch
=
2
n_time
=
200
n_freq
=
100
n_output
=
256
n_res_block
=
10
n_hidden
=
128
kernel_size
=
5
total_scale
=
1
for
upsample_scale
in
upsample_scales
:
total_scale
*=
upsample_scale
model
=
UpsampleNetwork
(
upsample_scales
,
n_res_block
,
n_freq
,
n_hidden
,
n_output
,
kernel_size
)
x
=
torch
.
rand
(
n_batch
,
n_freq
,
n_time
)
out1
,
out2
=
model
(
x
)
assert
out1
.
size
()
==
(
n_batch
,
n_freq
,
total_scale
*
(
n_time
-
kernel_size
+
1
))
assert
out2
.
size
()
==
(
n_batch
,
n_output
,
total_scale
*
(
n_time
-
kernel_size
+
1
))
class
TestWaveRNN
(
common_utils
.
TorchaudioTestCase
):
def
test_waveform
(
self
):
"""Validate the output dimensions of a WaveRNN model.
"""
upsample_scales
=
[
5
,
5
,
8
]
n_rnn
=
512
n_fc
=
512
n_classes
=
512
hop_length
=
200
n_batch
=
2
n_time
=
200
n_freq
=
100
n_output
=
256
n_res_block
=
10
n_hidden
=
128
kernel_size
=
5
model
=
WaveRNN
(
upsample_scales
,
n_classes
,
hop_length
,
n_res_block
,
n_rnn
,
n_fc
,
kernel_size
,
n_freq
,
n_hidden
,
n_output
)
x
=
torch
.
rand
(
n_batch
,
1
,
hop_length
*
(
n_time
-
kernel_size
+
1
))
mels
=
torch
.
rand
(
n_batch
,
1
,
n_freq
,
n_time
)
out
=
model
(
x
,
mels
)
assert
out
.
size
()
==
(
n_batch
,
1
,
hop_length
*
(
n_time
-
kernel_size
+
1
),
n_classes
)
def
test_infer_waveform
(
self
):
"""Validate the output dimensions of a WaveRNN model's infer method.
"""
upsample_scales
=
[
5
,
5
,
8
]
n_rnn
=
128
n_fc
=
128
n_classes
=
128
hop_length
=
200
n_batch
=
2
n_time
=
50
n_freq
=
25
n_output
=
64
n_res_block
=
2
n_hidden
=
32
kernel_size
=
5
model
=
WaveRNN
(
upsample_scales
,
n_classes
,
hop_length
,
n_res_block
,
n_rnn
,
n_fc
,
kernel_size
,
n_freq
,
n_hidden
,
n_output
)
x
=
torch
.
rand
(
n_batch
,
n_freq
,
n_time
)
lengths
=
torch
.
tensor
([
n_time
,
n_time
//
2
])
out
,
waveform_lengths
=
model
.
infer
(
x
,
lengths
)
assert
out
.
size
()
==
(
n_batch
,
1
,
hop_length
*
n_time
)
assert
waveform_lengths
[
0
]
==
hop_length
*
n_time
assert
waveform_lengths
[
1
]
==
hop_length
*
n_time
//
2
def
test_torchscript_infer
(
self
):
"""Scripted model outputs the same as eager mode"""
upsample_scales
=
[
5
,
5
,
8
]
n_rnn
=
128
n_fc
=
128
n_classes
=
128
hop_length
=
200
n_batch
=
2
n_time
=
50
n_freq
=
25
n_output
=
64
n_res_block
=
2
n_hidden
=
32
kernel_size
=
5
model
=
WaveRNN
(
upsample_scales
,
n_classes
,
hop_length
,
n_res_block
,
n_rnn
,
n_fc
,
kernel_size
,
n_freq
,
n_hidden
,
n_output
)
model
.
eval
()
x
=
torch
.
rand
(
n_batch
,
n_freq
,
n_time
)
torch
.
random
.
manual_seed
(
0
)
out_eager
=
model
.
infer
(
x
)
torch
.
random
.
manual_seed
(
0
)
out_script
=
torch_script
(
model
).
infer
(
x
)
self
.
assertEqual
(
out_eager
,
out_script
)
_ConvTasNetParams
=
namedtuple
(
'_ConvTasNetParams'
,
[
'enc_num_feats'
,
'enc_kernel_size'
,
'msk_num_feats'
,
'msk_num_hidden_feats'
,
'msk_kernel_size'
,
'msk_num_layers'
,
'msk_num_stacks'
,
]
)
class
TestConvTasNet
(
common_utils
.
TorchaudioTestCase
):
@
parameterized
.
expand
(
list
(
itertools
.
product
(
[
2
,
3
],
[
_ConvTasNetParams
(
128
,
40
,
128
,
256
,
3
,
7
,
2
),
_ConvTasNetParams
(
256
,
40
,
128
,
256
,
3
,
7
,
2
),
_ConvTasNetParams
(
512
,
40
,
128
,
256
,
3
,
7
,
2
),
_ConvTasNetParams
(
512
,
40
,
128
,
256
,
3
,
7
,
2
),
_ConvTasNetParams
(
512
,
40
,
128
,
512
,
3
,
7
,
2
),
_ConvTasNetParams
(
512
,
40
,
128
,
512
,
3
,
7
,
2
),
_ConvTasNetParams
(
512
,
40
,
256
,
256
,
3
,
7
,
2
),
_ConvTasNetParams
(
512
,
40
,
256
,
512
,
3
,
7
,
2
),
_ConvTasNetParams
(
512
,
40
,
256
,
512
,
3
,
7
,
2
),
_ConvTasNetParams
(
512
,
40
,
128
,
512
,
3
,
6
,
4
),
_ConvTasNetParams
(
512
,
40
,
128
,
512
,
3
,
4
,
6
),
_ConvTasNetParams
(
512
,
40
,
128
,
512
,
3
,
8
,
3
),
_ConvTasNetParams
(
512
,
32
,
128
,
512
,
3
,
8
,
3
),
_ConvTasNetParams
(
512
,
16
,
128
,
512
,
3
,
8
,
3
),
],
)))
def
test_paper_configuration
(
self
,
num_sources
,
model_params
):
"""ConvTasNet model works on the valid configurations in the paper"""
batch_size
=
32
num_frames
=
8000
model
=
ConvTasNet
(
num_sources
=
num_sources
,
enc_kernel_size
=
model_params
.
enc_kernel_size
,
enc_num_feats
=
model_params
.
enc_num_feats
,
msk_kernel_size
=
model_params
.
msk_kernel_size
,
msk_num_feats
=
model_params
.
msk_num_feats
,
msk_num_hidden_feats
=
model_params
.
msk_num_hidden_feats
,
msk_num_layers
=
model_params
.
msk_num_layers
,
msk_num_stacks
=
model_params
.
msk_num_stacks
,
)
tensor
=
torch
.
rand
(
batch_size
,
1
,
num_frames
)
output
=
model
(
tensor
)
assert
output
.
shape
==
(
batch_size
,
num_sources
,
num_frames
)
class
TestDeepSpeech
(
common_utils
.
TorchaudioTestCase
):
def
test_deepspeech
(
self
):
n_batch
=
2
n_feature
=
1
n_channel
=
1
n_class
=
40
n_time
=
320
model
=
DeepSpeech
(
n_feature
=
n_feature
,
n_class
=
n_class
)
x
=
torch
.
rand
(
n_batch
,
n_channel
,
n_time
,
n_feature
)
out
=
model
(
x
)
assert
out
.
size
()
==
(
n_batch
,
n_time
,
n_class
)
test/torchaudio_unittest/models/tacotron2/__init__.py
0 → 100644
View file @
9dcc7a15
test/torchaudio_unittest/models/tacotron2/model_test_cpu_test.py
0 → 100644
View file @
9dcc7a15
import
torch
from
torchaudio_unittest.common_utils
import
PytorchTestCase
from
.model_test_impl
import
(
Tacotron2EncoderTests
,
Tacotron2DecoderTests
,
Tacotron2Tests
,
)
class
TestTacotron2EncoderFloat32CPU
(
Tacotron2EncoderTests
,
PytorchTestCase
):
dtype
=
torch
.
float32
device
=
torch
.
device
(
"cpu"
)
class
TestTacotron2DecoderFloat32CPU
(
Tacotron2DecoderTests
,
PytorchTestCase
):
dtype
=
torch
.
float32
device
=
torch
.
device
(
"cpu"
)
class
TestTacotron2Float32CPU
(
Tacotron2Tests
,
PytorchTestCase
):
dtype
=
torch
.
float32
device
=
torch
.
device
(
"cpu"
)
test/torchaudio_unittest/models/tacotron2/model_test_gpu_test.py
0 → 100644
View file @
9dcc7a15
import
torch
from
torchaudio_unittest.common_utils
import
skipIfNoCuda
,
PytorchTestCase
from
.model_test_impl
import
(
Tacotron2EncoderTests
,
Tacotron2DecoderTests
,
Tacotron2Tests
,
)
@
skipIfNoCuda
class
TestTacotron2EncoderFloat32CUDA
(
Tacotron2EncoderTests
,
PytorchTestCase
):
dtype
=
torch
.
float32
device
=
torch
.
device
(
"cuda"
)
@
skipIfNoCuda
class
TestTacotron2DecoderFloat32CUDA
(
Tacotron2DecoderTests
,
PytorchTestCase
):
dtype
=
torch
.
float32
device
=
torch
.
device
(
"cuda"
)
@
skipIfNoCuda
class
TestTacotron2Float32CUDA
(
Tacotron2Tests
,
PytorchTestCase
):
dtype
=
torch
.
float32
device
=
torch
.
device
(
"cuda"
)
test/torchaudio_unittest/models/tacotron2/model_test_impl.py
0 → 100644
View file @
9dcc7a15
from
typing
import
Tuple
import
torch
from
torch
import
Tensor
from
torchaudio.models
import
Tacotron2
from
torchaudio.models.tacotron2
import
_Encoder
,
_Decoder
from
torchaudio_unittest.common_utils
import
TestBaseMixin
,
torch_script
class
Tacotron2InferenceWrapper
(
torch
.
nn
.
Module
):
def
__init__
(
self
,
model
):
super
().
__init__
()
self
.
model
=
model
def
forward
(
self
,
text
:
Tensor
,
text_lengths
:
Tensor
)
->
Tuple
[
Tensor
,
Tensor
,
Tensor
]:
return
self
.
model
.
infer
(
text
,
text_lengths
)
class
Tacotron2DecoderInferenceWrapper
(
torch
.
nn
.
Module
):
def
__init__
(
self
,
model
):
super
().
__init__
()
self
.
model
=
model
def
forward
(
self
,
memory
:
Tensor
,
memory_lengths
:
Tensor
)
->
Tuple
[
Tensor
,
Tensor
,
Tensor
,
Tensor
]:
return
self
.
model
.
infer
(
memory
,
memory_lengths
)
class
TorchscriptConsistencyMixin
(
TestBaseMixin
):
r
"""Mixin to provide easy access assert torchscript consistency"""
def
_assert_torchscript_consistency
(
self
,
model
,
tensors
):
ts_func
=
torch_script
(
model
)
torch
.
random
.
manual_seed
(
40
)
output
=
model
(
*
tensors
)
torch
.
random
.
manual_seed
(
40
)
ts_output
=
ts_func
(
*
tensors
)
self
.
assertEqual
(
ts_output
,
output
)
class
Tacotron2EncoderTests
(
TorchscriptConsistencyMixin
):
def
test_tacotron2_torchscript_consistency
(
self
):
r
"""Validate the torchscript consistency of a Encoder."""
n_batch
,
n_seq
,
encoder_embedding_dim
=
16
,
64
,
512
model
=
_Encoder
(
encoder_embedding_dim
=
encoder_embedding_dim
,
encoder_n_convolution
=
3
,
encoder_kernel_size
=
5
).
to
(
self
.
device
).
eval
()
x
=
torch
.
rand
(
n_batch
,
encoder_embedding_dim
,
n_seq
,
device
=
self
.
device
,
dtype
=
self
.
dtype
)
input_lengths
=
(
torch
.
ones
(
n_batch
,
device
=
self
.
device
,
dtype
=
torch
.
int32
)
*
n_seq
)
self
.
_assert_torchscript_consistency
(
model
,
(
x
,
input_lengths
))
def
test_encoder_output_shape
(
self
):
r
"""Feed tensors with specific shape to Tacotron2 Decoder and validate
that it outputs with a tensor with expected shape.
"""
n_batch
,
n_seq
,
encoder_embedding_dim
=
16
,
64
,
512
model
=
_Encoder
(
encoder_embedding_dim
=
encoder_embedding_dim
,
encoder_n_convolution
=
3
,
encoder_kernel_size
=
5
).
to
(
self
.
device
).
eval
()
x
=
torch
.
rand
(
n_batch
,
encoder_embedding_dim
,
n_seq
,
device
=
self
.
device
,
dtype
=
self
.
dtype
)
input_lengths
=
(
torch
.
ones
(
n_batch
,
device
=
self
.
device
,
dtype
=
torch
.
int32
)
*
n_seq
)
out
=
model
(
x
,
input_lengths
)
assert
out
.
size
()
==
(
n_batch
,
n_seq
,
encoder_embedding_dim
)
def
_get_decoder_model
(
n_mels
=
80
,
encoder_embedding_dim
=
512
,
decoder_max_step
=
2000
,
gate_threshold
=
0.5
):
model
=
_Decoder
(
n_mels
=
n_mels
,
n_frames_per_step
=
1
,
encoder_embedding_dim
=
encoder_embedding_dim
,
decoder_rnn_dim
=
1024
,
decoder_max_step
=
decoder_max_step
,
decoder_dropout
=
0.1
,
decoder_early_stopping
=
True
,
attention_rnn_dim
=
1024
,
attention_hidden_dim
=
128
,
attention_location_n_filter
=
32
,
attention_location_kernel_size
=
31
,
attention_dropout
=
0.1
,
prenet_dim
=
256
,
gate_threshold
=
gate_threshold
,
)
return
model
class
Tacotron2DecoderTests
(
TorchscriptConsistencyMixin
):
def
test_decoder_torchscript_consistency
(
self
):
r
"""Validate the torchscript consistency of a Decoder."""
n_batch
=
16
n_mels
=
80
n_seq
=
200
encoder_embedding_dim
=
256
n_time_steps
=
150
model
=
_get_decoder_model
(
n_mels
=
n_mels
,
encoder_embedding_dim
=
encoder_embedding_dim
)
model
=
model
.
to
(
self
.
device
).
eval
()
memory
=
torch
.
rand
(
n_batch
,
n_seq
,
encoder_embedding_dim
,
dtype
=
self
.
dtype
,
device
=
self
.
device
)
decoder_inputs
=
torch
.
rand
(
n_batch
,
n_mels
,
n_time_steps
,
dtype
=
self
.
dtype
,
device
=
self
.
device
)
memory_lengths
=
torch
.
ones
(
n_batch
,
dtype
=
torch
.
int32
,
device
=
self
.
device
)
self
.
_assert_torchscript_consistency
(
model
,
(
memory
,
decoder_inputs
,
memory_lengths
)
)
def
test_decoder_output_shape
(
self
):
r
"""Feed tensors with specific shape to Tacotron2 Decoder and validate
that it outputs with a tensor with expected shape.
"""
n_batch
=
16
n_mels
=
80
n_seq
=
200
encoder_embedding_dim
=
256
n_time_steps
=
150
model
=
_get_decoder_model
(
n_mels
=
n_mels
,
encoder_embedding_dim
=
encoder_embedding_dim
)
model
=
model
.
to
(
self
.
device
).
eval
()
memory
=
torch
.
rand
(
n_batch
,
n_seq
,
encoder_embedding_dim
,
dtype
=
self
.
dtype
,
device
=
self
.
device
)
decoder_inputs
=
torch
.
rand
(
n_batch
,
n_mels
,
n_time_steps
,
dtype
=
self
.
dtype
,
device
=
self
.
device
)
memory_lengths
=
torch
.
ones
(
n_batch
,
dtype
=
torch
.
int32
,
device
=
self
.
device
)
mel_specgram
,
gate_outputs
,
alignments
=
model
(
memory
,
decoder_inputs
,
memory_lengths
)
assert
mel_specgram
.
size
()
==
(
n_batch
,
n_mels
,
n_time_steps
)
assert
gate_outputs
.
size
()
==
(
n_batch
,
n_time_steps
)
assert
alignments
.
size
()
==
(
n_batch
,
n_time_steps
,
n_seq
)
def
test_decoder_inference_torchscript_consistency
(
self
):
r
"""Validate the torchscript consistency of a Decoder."""
n_batch
=
16
n_mels
=
80
n_seq
=
200
encoder_embedding_dim
=
256
decoder_max_step
=
300
# make inference more efficient
gate_threshold
=
0.505
# make inference more efficient
model
=
_get_decoder_model
(
n_mels
=
n_mels
,
encoder_embedding_dim
=
encoder_embedding_dim
,
decoder_max_step
=
decoder_max_step
,
gate_threshold
=
gate_threshold
,
)
model
=
model
.
to
(
self
.
device
).
eval
()
memory
=
torch
.
rand
(
n_batch
,
n_seq
,
encoder_embedding_dim
,
dtype
=
self
.
dtype
,
device
=
self
.
device
)
memory_lengths
=
torch
.
ones
(
n_batch
,
dtype
=
torch
.
int32
,
device
=
self
.
device
)
model_wrapper
=
Tacotron2DecoderInferenceWrapper
(
model
)
self
.
_assert_torchscript_consistency
(
model_wrapper
,
(
memory
,
memory_lengths
))
def
test_decoder_inference_output_shape
(
self
):
r
"""Validate the torchscript consistency of a Decoder."""
n_batch
=
16
n_mels
=
80
n_seq
=
200
encoder_embedding_dim
=
256
decoder_max_step
=
300
# make inference more efficient
gate_threshold
=
0.505
# if set to 0.5, the model will only run one step
model
=
_get_decoder_model
(
n_mels
=
n_mels
,
encoder_embedding_dim
=
encoder_embedding_dim
,
decoder_max_step
=
decoder_max_step
,
gate_threshold
=
gate_threshold
,
)
model
=
model
.
to
(
self
.
device
).
eval
()
memory
=
torch
.
rand
(
n_batch
,
n_seq
,
encoder_embedding_dim
,
dtype
=
self
.
dtype
,
device
=
self
.
device
)
memory_lengths
=
torch
.
ones
(
n_batch
,
dtype
=
torch
.
int32
,
device
=
self
.
device
)
mel_specgram
,
mel_specgram_lengths
,
gate_outputs
,
alignments
=
model
.
infer
(
memory
,
memory_lengths
)
assert
len
(
mel_specgram
.
size
())
==
3
assert
mel_specgram
.
size
()[:
-
1
]
==
(
n_batch
,
n_mels
,
)
assert
mel_specgram
.
size
()[
2
]
==
mel_specgram_lengths
.
max
().
item
()
assert
len
(
mel_specgram_lengths
.
size
())
==
1
assert
mel_specgram_lengths
.
size
()[
0
]
==
n_batch
assert
mel_specgram_lengths
.
max
().
item
()
<=
model
.
decoder_max_step
assert
len
(
gate_outputs
.
size
())
==
2
assert
gate_outputs
.
size
()[
0
]
==
n_batch
assert
gate_outputs
.
size
()[
1
]
==
mel_specgram_lengths
.
max
().
item
()
assert
len
(
alignments
.
size
())
==
2
assert
alignments
.
size
()[
0
]
==
n_seq
assert
alignments
.
size
()[
1
]
==
mel_specgram_lengths
.
max
().
item
()
*
n_batch
def
_get_tacotron2_model
(
n_mels
,
decoder_max_step
=
2000
,
gate_threshold
=
0.5
):
return
Tacotron2
(
mask_padding
=
False
,
n_mels
=
n_mels
,
n_symbol
=
148
,
n_frames_per_step
=
1
,
symbol_embedding_dim
=
512
,
encoder_embedding_dim
=
512
,
encoder_n_convolution
=
3
,
encoder_kernel_size
=
5
,
decoder_rnn_dim
=
1024
,
decoder_max_step
=
decoder_max_step
,
decoder_dropout
=
0.1
,
decoder_early_stopping
=
True
,
attention_rnn_dim
=
1024
,
attention_hidden_dim
=
128
,
attention_location_n_filter
=
32
,
attention_location_kernel_size
=
31
,
attention_dropout
=
0.1
,
prenet_dim
=
256
,
postnet_n_convolution
=
5
,
postnet_kernel_size
=
5
,
postnet_embedding_dim
=
512
,
gate_threshold
=
gate_threshold
,
)
class
Tacotron2Tests
(
TorchscriptConsistencyMixin
):
def
_get_inputs
(
self
,
n_mels
:
int
,
n_batch
:
int
,
max_mel_specgram_length
:
int
,
max_text_length
:
int
):
text
=
torch
.
randint
(
0
,
148
,
(
n_batch
,
max_text_length
),
dtype
=
torch
.
int32
,
device
=
self
.
device
)
text_lengths
=
max_text_length
*
torch
.
ones
(
(
n_batch
,),
dtype
=
torch
.
int32
,
device
=
self
.
device
)
mel_specgram
=
torch
.
rand
(
n_batch
,
n_mels
,
max_mel_specgram_length
,
dtype
=
self
.
dtype
,
device
=
self
.
device
,
)
mel_specgram_lengths
=
max_mel_specgram_length
*
torch
.
ones
(
(
n_batch
,),
dtype
=
torch
.
int32
,
device
=
self
.
device
)
return
text
,
text_lengths
,
mel_specgram
,
mel_specgram_lengths
def
test_tacotron2_torchscript_consistency
(
self
):
r
"""Validate the torchscript consistency of a Tacotron2."""
n_batch
=
16
n_mels
=
80
max_mel_specgram_length
=
300
max_text_length
=
100
model
=
_get_tacotron2_model
(
n_mels
).
to
(
self
.
device
).
eval
()
inputs
=
self
.
_get_inputs
(
n_mels
,
n_batch
,
max_mel_specgram_length
,
max_text_length
)
self
.
_assert_torchscript_consistency
(
model
,
inputs
)
def
test_tacotron2_output_shape
(
self
):
r
"""Feed tensors with specific shape to Tacotron2 and validate
that it outputs with a tensor with expected shape.
"""
n_batch
=
16
n_mels
=
80
max_mel_specgram_length
=
300
max_text_length
=
100
model
=
_get_tacotron2_model
(
n_mels
).
to
(
self
.
device
).
eval
()
inputs
=
self
.
_get_inputs
(
n_mels
,
n_batch
,
max_mel_specgram_length
,
max_text_length
)
mel_out
,
mel_out_postnet
,
gate_outputs
,
alignments
=
model
(
*
inputs
)
assert
mel_out
.
size
()
==
(
n_batch
,
n_mels
,
max_mel_specgram_length
)
assert
mel_out_postnet
.
size
()
==
(
n_batch
,
n_mels
,
max_mel_specgram_length
)
assert
gate_outputs
.
size
()
==
(
n_batch
,
max_mel_specgram_length
)
assert
alignments
.
size
()
==
(
n_batch
,
max_mel_specgram_length
,
max_text_length
)
def
test_tacotron2_backward
(
self
):
r
"""Make sure calling the backward function on Tacotron2's outputs does
not error out. Following:
https://github.com/pytorch/vision/blob/23b8760374a5aaed53c6e5fc83a7e83dbe3b85df/test/test_models.py#L255
"""
n_batch
=
16
n_mels
=
80
max_mel_specgram_length
=
300
max_text_length
=
100
model
=
_get_tacotron2_model
(
n_mels
).
to
(
self
.
device
)
inputs
=
self
.
_get_inputs
(
n_mels
,
n_batch
,
max_mel_specgram_length
,
max_text_length
)
mel_out
,
mel_out_postnet
,
gate_outputs
,
_
=
model
(
*
inputs
)
mel_out
.
sum
().
backward
(
retain_graph
=
True
)
mel_out_postnet
.
sum
().
backward
(
retain_graph
=
True
)
gate_outputs
.
sum
().
backward
()
def
_get_inference_inputs
(
self
,
n_batch
:
int
,
max_text_length
:
int
):
text
=
torch
.
randint
(
0
,
148
,
(
n_batch
,
max_text_length
),
dtype
=
torch
.
int32
,
device
=
self
.
device
)
text_lengths
=
max_text_length
*
torch
.
ones
(
(
n_batch
,),
dtype
=
torch
.
int32
,
device
=
self
.
device
)
return
text
,
text_lengths
def
test_tacotron2_inference_torchscript_consistency
(
self
):
r
"""Validate the torchscript consistency of Tacotron2 inference function."""
n_batch
=
16
n_mels
=
40
max_text_length
=
100
decoder_max_step
=
200
# make inference more efficient
gate_threshold
=
0.51
# if set to 0.5, the model will only run one step
model
=
_get_tacotron2_model
(
n_mels
,
decoder_max_step
=
decoder_max_step
,
gate_threshold
=
gate_threshold
).
to
(
self
.
device
).
eval
()
inputs
=
self
.
_get_inference_inputs
(
n_batch
,
max_text_length
)
model_wrapper
=
Tacotron2InferenceWrapper
(
model
)
self
.
_assert_torchscript_consistency
(
model_wrapper
,
inputs
)
def
test_tacotron2_inference_output_shape
(
self
):
r
"""Feed tensors with specific shape to Tacotron2 inference function and validate
that it outputs with a tensor with expected shape.
"""
n_batch
=
16
n_mels
=
40
max_text_length
=
100
decoder_max_step
=
200
# make inference more efficient
gate_threshold
=
0.51
# if set to 0.5, the model will only run one step
model
=
_get_tacotron2_model
(
n_mels
,
decoder_max_step
=
decoder_max_step
,
gate_threshold
=
gate_threshold
).
to
(
self
.
device
).
eval
()
inputs
=
self
.
_get_inference_inputs
(
n_batch
,
max_text_length
)
mel_out
,
mel_specgram_lengths
,
alignments
=
model
.
infer
(
*
inputs
)
# There is no guarantee on exactly what max_mel_specgram_length should be
# We only know that it should be smaller than model.decoder.decoder_max_step
assert
len
(
mel_out
.
size
())
==
3
assert
mel_out
.
size
()[:
2
]
==
(
n_batch
,
n_mels
,
)
assert
mel_out
.
size
()[
2
]
==
mel_specgram_lengths
.
max
().
item
()
assert
len
(
mel_specgram_lengths
.
size
())
==
1
assert
mel_specgram_lengths
.
size
()[
0
]
==
n_batch
assert
mel_specgram_lengths
.
max
().
item
()
<=
model
.
decoder
.
decoder_max_step
assert
len
(
alignments
.
size
())
==
3
assert
alignments
.
size
()[
0
]
==
n_batch
assert
alignments
.
size
()[
1
]
==
mel_specgram_lengths
.
max
().
item
()
assert
alignments
.
size
()[
2
]
==
max_text_length
test/torchaudio_unittest/models/wav2vec2/__init__.py
0 → 100644
View file @
9dcc7a15
test/torchaudio_unittest/models/wav2vec2/fairseq_integration_test.py
0 → 100644
View file @
9dcc7a15
import
json
import
torch
from
torchaudio.models.wav2vec2
import
(
wav2vec2_base
,
wav2vec2_large
,
wav2vec2_large_lv60k
,
hubert_base
,
hubert_large
,
hubert_xlarge
,
)
from
torchaudio.models.wav2vec2.utils
import
(
import_fairseq_model
,
)
from
parameterized
import
parameterized
from
torchaudio_unittest.common_utils
import
(
get_asset_path
,
skipIfNoModule
,
TorchaudioTestCase
,
)
def
_load_config
(
*
paths
):
with
open
(
f
'
{
get_asset_path
(
"wav2vec2"
,
"fairseq"
,
*
paths
)
}
.json'
,
'r'
)
as
file_
:
return
json
.
load
(
file_
)
def
_name_func
(
testcase_func
,
i
,
param
):
return
f
'
{
testcase_func
.
__name__
}
_
{
i
}
_
{
param
[
0
][
1
].
__name__
}
'
# Pretraining models
WAV2VEC2_BASE
=
_load_config
(
'wav2vec_small'
)
WAV2VEC2_LARGE
=
_load_config
(
'libri960_big'
)
WAV2VEC2_LARGE_LV60K
=
_load_config
(
'wav2vec_vox_new'
)
WAV2VEC2_XLSR_53_56K
=
_load_config
(
'xlsr_53_56k'
)
HUBERT_BASE
=
_load_config
(
'hubert_base_ls960'
)
HUBERT_LARGE_LL60K
=
_load_config
(
'hubert_large_ll60k'
)
HUBERT_XLARGE_LL60K
=
_load_config
(
'hubert_xtralarge_ll60k'
)
# Finetuning models
WAV2VEC2_BASE_960H
=
_load_config
(
'wav2vec_small_960h'
)
WAV2VEC2_LARGE_960H
=
_load_config
(
'wav2vec_large_960h'
)
WAV2VEC2_LARGE_LV60K_960H
=
_load_config
(
'wav2vec_large_lv60k_960h'
)
WAV2VEC2_LARGE_LV60K_SELF_960H
=
_load_config
(
'wav2vec_large_lv60k_self_960h'
)
HUBERT_LARGE
=
_load_config
(
'hubert_large_ll60k_finetune_ls960'
)
HUBERT_XLARGE
=
_load_config
(
'hubert_xtralarge_ll60k_finetune_ls960'
)
# Config and corresponding factory functions
WAV2VEC2_PRETRAINING_CONFIGS
=
parameterized
.
expand
([
(
WAV2VEC2_BASE
,
wav2vec2_base
),
(
WAV2VEC2_LARGE
,
wav2vec2_large
),
(
WAV2VEC2_LARGE_LV60K
,
wav2vec2_large_lv60k
),
(
WAV2VEC2_XLSR_53_56K
,
wav2vec2_large_lv60k
),
],
name_func
=
_name_func
)
HUBERT_PRETRAINING_CONFIGS
=
parameterized
.
expand
([
(
HUBERT_BASE
,
hubert_base
),
(
HUBERT_LARGE_LL60K
,
hubert_large
),
(
HUBERT_XLARGE_LL60K
,
hubert_xlarge
),
],
name_func
=
_name_func
)
ALL_PRETRAINING_CONFIGS
=
parameterized
.
expand
([
(
WAV2VEC2_BASE
,
wav2vec2_base
),
(
WAV2VEC2_LARGE
,
wav2vec2_large
),
(
WAV2VEC2_LARGE_LV60K
,
wav2vec2_large_lv60k
),
(
WAV2VEC2_XLSR_53_56K
,
wav2vec2_large_lv60k
),
(
HUBERT_BASE
,
hubert_base
),
(
HUBERT_LARGE_LL60K
,
hubert_large
),
(
HUBERT_XLARGE_LL60K
,
hubert_xlarge
),
],
name_func
=
_name_func
)
FINETUNING_CONFIGS
=
parameterized
.
expand
([
(
WAV2VEC2_BASE_960H
,
wav2vec2_base
),
(
WAV2VEC2_LARGE_960H
,
wav2vec2_large
),
(
WAV2VEC2_LARGE_LV60K_960H
,
wav2vec2_large_lv60k
),
(
WAV2VEC2_LARGE_LV60K_SELF_960H
,
wav2vec2_large_lv60k
),
(
HUBERT_LARGE
,
hubert_large
),
(
HUBERT_XLARGE
,
hubert_xlarge
),
],
name_func
=
_name_func
)
@
skipIfNoModule
(
'fairseq'
)
class
TestFairseqIntegration
(
TorchaudioTestCase
):
"""Test the process of importing the models from fairseq.
Test methods in this test suite check the following things
1. Models loaded with fairseq cane be imported.
2. The same model can be recreated without fairseq.
"""
def
_get_model
(
self
,
config
,
num_out
=
None
):
import
copy
from
omegaconf
import
OmegaConf
from
fairseq.models.wav2vec.wav2vec2
import
(
Wav2Vec2Config
,
Wav2Vec2Model
,
)
from
fairseq.models.wav2vec.wav2vec2_asr
import
(
Wav2VecEncoder
,
Wav2Vec2CtcConfig
,
)
from
fairseq.models.hubert.hubert_asr
import
(
HubertCtcConfig
,
HubertEncoder
,
)
from
fairseq.models.hubert.hubert
import
(
HubertModel
,
HubertConfig
,
)
from
fairseq.tasks.hubert_pretraining
import
HubertPretrainingConfig
if
config
[
'_name'
]
==
'wav2vec_ctc'
:
config
=
copy
.
deepcopy
(
config
)
config
[
'w2v_args'
]
=
OmegaConf
.
create
(
config
[
'w2v_args'
])
return
Wav2VecEncoder
(
Wav2Vec2CtcConfig
(
**
config
),
num_out
)
if
config
[
'_name'
]
==
'wav2vec2'
:
return
Wav2Vec2Model
(
Wav2Vec2Config
(
**
config
))
if
config
[
'_name'
]
==
'hubert_ctc'
:
config
=
copy
.
deepcopy
(
config
)
config
[
'w2v_args'
]
=
OmegaConf
.
create
(
config
[
'w2v_args'
])
ctc_cfg
=
HubertCtcConfig
(
**
config
)
return
HubertEncoder
(
ctc_cfg
,
tgt_dict
=
range
(
num_out
))
if
config
[
'_name'
]
==
'hubert'
:
dicts
=
[
list
(
range
(
i
))
for
i
in
config
[
'num_classes'
]]
return
HubertModel
(
HubertConfig
(
**
config
[
'model'
]),
HubertPretrainingConfig
(
**
config
[
'task'
]),
dicts
,
)
raise
ValueError
(
f
'Unexpected configuration:
{
config
[
"_name"
]
}
'
)
@
WAV2VEC2_PRETRAINING_CONFIGS
def
test_import_wave2vec2_pretraining_model
(
self
,
config
,
_
):
"""Wav2vec2 pretraining models from fairseq can be imported and yields the same results"""
batch_size
,
num_frames
=
3
,
1024
torch
.
manual_seed
(
0
)
original
=
self
.
_get_model
(
config
).
eval
()
imported
=
import_fairseq_model
(
original
).
eval
()
x
=
torch
.
randn
(
batch_size
,
num_frames
)
hyp
,
_
=
imported
.
extract_features
(
x
)
refs
=
original
.
extract_features
(
x
,
padding_mask
=
torch
.
zeros_like
(
x
),
layer
=-
1
)
for
i
,
(
ref
,
_
)
in
enumerate
(
refs
[
'layer_results'
]):
self
.
assertEqual
(
hyp
[
i
],
ref
.
transpose
(
0
,
1
))
@
HUBERT_PRETRAINING_CONFIGS
def
test_import_hubert_pretraining_model
(
self
,
config
,
factory_func
):
"""HuBERT pretraining models from fairseq can be imported and yields the same results"""
batch_size
,
num_frames
=
3
,
1024
torch
.
manual_seed
(
0
)
original
=
self
.
_get_model
(
config
).
eval
()
imported
=
import_fairseq_model
(
original
).
eval
()
x
=
torch
.
randn
(
batch_size
,
num_frames
)
mask
=
torch
.
zeros_like
(
x
)
hyp
,
_
=
imported
.
extract_features
(
x
)
# check the last layer
ref
,
_
=
original
.
extract_features
(
x
,
padding_mask
=
mask
,
output_layer
=
len
(
original
.
encoder
.
layers
))
atol
=
3.0e-05
if
factory_func
is
hubert_xlarge
else
1.0e-5
self
.
assertEqual
(
hyp
[
-
1
],
ref
,
atol
=
atol
,
rtol
=
1.3e-6
)
# check the first layer
ref
,
_
=
original
.
extract_features
(
x
,
padding_mask
=
mask
,
output_layer
=
1
)
self
.
assertEqual
(
hyp
[
0
],
ref
)
@
ALL_PRETRAINING_CONFIGS
def
test_recreate_pretraining_model
(
self
,
config
,
factory_func
):
"""Imported pretraining models can be recreated via a factory function without fairseq."""
batch_size
,
num_frames
=
3
,
1024
original
=
self
.
_get_model
(
config
).
eval
()
imported
=
import_fairseq_model
(
original
).
eval
()
reloaded
=
factory_func
()
reloaded
.
load_state_dict
(
imported
.
state_dict
())
reloaded
.
eval
()
x
=
torch
.
randn
(
batch_size
,
num_frames
)
lengths
=
torch
.
randint
(
low
=
0
,
high
=
num_frames
,
size
=
[
batch_size
,
])
# Without mask
ref
,
_
=
imported
(
x
)
hyp
,
_
=
reloaded
(
x
)
self
.
assertEqual
(
ref
,
hyp
)
# With mask
ref
,
ref_lengths
=
imported
(
x
,
lengths
)
hyp
,
hyp_lengths
=
reloaded
(
x
,
lengths
)
self
.
assertEqual
(
ref
,
hyp
)
self
.
assertEqual
(
ref_lengths
,
hyp_lengths
)
@
FINETUNING_CONFIGS
def
test_import_finetuning_model
(
self
,
config
,
_
):
"""Fintuned wav2vec2 models from fairseq can be imported and yields the same results"""
num_out
=
28
batch_size
,
num_frames
=
3
,
1024
original
=
self
.
_get_model
(
config
,
num_out
).
eval
()
imported
=
import_fairseq_model
(
original
).
eval
()
# Without mask
x
=
torch
.
randn
(
batch_size
,
num_frames
)
ref
=
original
(
x
,
torch
.
zeros_like
(
x
))[
'encoder_out'
].
transpose
(
0
,
1
)
hyp
,
_
=
imported
(
x
)
self
.
assertEqual
(
ref
,
hyp
)
# With mask
lengths
=
torch
.
randint
(
low
=
0
,
high
=
num_frames
,
size
=
[
batch_size
,
])
mask
=
torch
.
arange
(
num_frames
).
expand
(
batch_size
,
num_frames
)
>=
lengths
[:,
None
]
ref
=
original
(
x
,
mask
)[
'encoder_out'
].
transpose
(
0
,
1
)
hyp
,
output_lengths
=
imported
(
x
,
lengths
)
for
i
,
l
in
enumerate
(
output_lengths
):
self
.
assertEqual
(
ref
[
i
,
:
l
,
...],
hyp
[
i
,
:
l
,
...])
@
FINETUNING_CONFIGS
def
test_recreate_finetuning_model
(
self
,
config
,
factory_func
):
"""Imported finetuning models can be recreated via a factory function without fairseq."""
num_out
=
28
batch_size
,
num_frames
=
3
,
1024
original
=
self
.
_get_model
(
config
,
num_out
).
eval
()
imported
=
import_fairseq_model
(
original
).
eval
()
reloaded
=
factory_func
(
aux_num_out
=
num_out
)
reloaded
.
load_state_dict
(
imported
.
state_dict
())
reloaded
.
eval
()
# Without mask
torch
.
manual_seed
(
0
)
x
=
torch
.
randn
(
batch_size
,
num_frames
)
ref
,
_
=
imported
(
x
)
hyp
,
_
=
reloaded
(
x
)
self
.
assertEqual
(
ref
,
hyp
)
# With mask
lengths
=
torch
.
randint
(
low
=
0
,
high
=
num_frames
,
size
=
[
batch_size
,
])
ref
,
ref_lengths
=
imported
(
x
,
lengths
)
hyp
,
hyp_lengths
=
reloaded
(
x
,
lengths
)
self
.
assertEqual
(
ref
,
hyp
)
self
.
assertEqual
(
ref_lengths
,
hyp_lengths
)
test/torchaudio_unittest/models/wav2vec2/huggingface_intergration_test.py
0 → 100644
View file @
9dcc7a15
import
json
import
torch
from
torchaudio.models.wav2vec2
import
(
wav2vec2_base
,
wav2vec2_large
,
wav2vec2_large_lv60k
,
)
from
torchaudio.models.wav2vec2.utils
import
import_huggingface_model
from
parameterized
import
parameterized
from
torchaudio_unittest.common_utils
import
(
get_asset_path
,
skipIfNoModule
,
TorchaudioTestCase
,
)
def
_load_config
(
*
paths
):
with
open
(
f
'
{
get_asset_path
(
"wav2vec2"
,
"huggingface"
,
*
paths
)
}
.json'
,
'r'
)
as
file_
:
return
json
.
load
(
file_
)
def
_name_func
(
testcase_func
,
i
,
param
):
return
f
"
{
testcase_func
.
__name__
}
_
{
i
}
_
{
param
[
0
][
1
].
__name__
}
"
# Pretrained
HF_BASE
=
_load_config
(
'facebook'
,
'wav2vec2-base'
)
HF_LARGE
=
_load_config
(
'facebook'
,
'wav2vec2-large'
)
HF_LARGE_LV60
=
_load_config
(
'facebook'
,
'wav2vec2-large-lv60'
)
HF_LARGE_XLSR_53
=
_load_config
(
'facebook'
,
'wav2vec2-large-xlsr-53'
)
HF_BASE_10K_VOXPOPULI
=
_load_config
(
'facebook'
,
'wav2vec2-base-10k-voxpopuli'
)
# Finetuned
HF_BASE_960H
=
_load_config
(
'facebook'
,
'wav2vec2-base-960h'
)
HF_LARGE_960H
=
_load_config
(
'facebook'
,
'wav2vec2-large-960h'
)
HF_LARGE_LV60_960H
=
_load_config
(
'facebook'
,
'wav2vec2-large-960h-lv60'
)
HF_LARGE_LV60_SELF_960H
=
_load_config
(
'facebook'
,
'wav2vec2-large-960h-lv60-self'
)
HF_LARGE_XLSR_DE
=
_load_config
(
'facebook'
,
'wav2vec2-large-xlsr-53-german'
)
# Config and corresponding factory functions
PRETRAIN_CONFIGS
=
parameterized
.
expand
([
(
HF_BASE
,
wav2vec2_base
),
(
HF_LARGE
,
wav2vec2_large
),
(
HF_LARGE_LV60
,
wav2vec2_large_lv60k
),
(
HF_LARGE_XLSR_53
,
wav2vec2_large_lv60k
),
(
HF_BASE_10K_VOXPOPULI
,
wav2vec2_base
),
],
name_func
=
_name_func
)
FINETUNE_CONFIGS
=
parameterized
.
expand
([
(
HF_BASE_960H
,
wav2vec2_base
),
(
HF_LARGE_960H
,
wav2vec2_large
),
(
HF_LARGE_LV60_960H
,
wav2vec2_large_lv60k
),
(
HF_LARGE_LV60_SELF_960H
,
wav2vec2_large_lv60k
),
(
HF_LARGE_XLSR_DE
,
wav2vec2_large_lv60k
),
],
name_func
=
_name_func
)
@
skipIfNoModule
(
'transformers'
)
class
TestHFIntegration
(
TorchaudioTestCase
):
"""Test the process of importing the models from Hugging Face Transformers
Test methods in this test suite check the following things
1. Models loaded with Hugging Face Transformers cane be imported.
2. The same model can be recreated without Hugging Face Transformers.
"""
def
_get_model
(
self
,
config
):
# Helper function to avoid importing transformers on module scope.
# Normally, we use `is_module_available` helper function to check if
# the library is available, and import it on module scope if available.
# However, somehow, once "transformers" is imported, `is_module_available`
# starts to fail. Therefore, we defer importing "transformers" until
# the actual tests are started.
from
transformers.models.wav2vec2
import
(
Wav2Vec2Config
,
Wav2Vec2Model
,
Wav2Vec2ForCTC
,
)
if
config
[
'architectures'
]
==
[
'Wav2Vec2Model'
]:
return
Wav2Vec2Model
(
Wav2Vec2Config
(
**
config
))
if
config
[
'architectures'
]
==
[
'Wav2Vec2ForCTC'
]:
return
Wav2Vec2ForCTC
(
Wav2Vec2Config
(
**
config
))
raise
ValueError
(
f
'Unexpected arch:
{
config
[
"architectures"
]
}
'
)
def
_test_import_pretrain
(
self
,
original
,
imported
,
config
):
torch
.
manual_seed
(
0
)
# FeatureExtractor
x
=
torch
.
randn
(
3
,
1024
)
ref
=
original
.
feature_extractor
(
x
).
transpose
(
1
,
2
)
hyp
,
_
=
imported
.
feature_extractor
(
x
,
None
)
self
.
assertEqual
(
ref
,
hyp
)
# Feature projection
x
=
torch
.
randn
(
3
,
10
,
config
[
'conv_dim'
][
-
1
])
ref
=
original
.
feature_projection
(
x
)[
0
]
hyp
=
imported
.
encoder
.
feature_projection
(
x
)
self
.
assertEqual
(
ref
,
hyp
)
# Convolutional Positional Encoder
x
=
torch
.
randn
(
3
,
256
,
config
[
'hidden_size'
])
ref
=
original
.
encoder
.
pos_conv_embed
(
x
)
hyp
=
imported
.
encoder
.
transformer
.
pos_conv_embed
(
x
)
self
.
assertEqual
(
ref
,
hyp
)
# Encoder Transformer Layer
for
original_
,
imported_
in
zip
(
original
.
encoder
.
layers
,
imported
.
encoder
.
transformer
.
layers
):
b
,
l
,
e
=
16
,
3
,
config
[
"hidden_size"
]
x
=
torch
.
randn
(
b
,
l
,
e
)
mask
=
torch
.
randn
(
b
,
1
,
l
,
l
)
ref
,
=
original_
(
x
,
attention_mask
=
mask
,
output_attentions
=
False
)
hyp
=
imported_
(
x
,
mask
)
self
.
assertEqual
(
ref
,
hyp
)
# The whole Encoder Transformer
b
,
l
,
e
=
16
,
3
,
config
[
"hidden_size"
]
x
=
torch
.
randn
(
b
,
l
,
e
)
ref
=
original
.
encoder
(
x
).
last_hidden_state
hyp
=
imported
.
encoder
.
transformer
(
x
)
self
.
assertEqual
(
ref
,
hyp
)
def
_test_import_finetune
(
self
,
original
,
imported
,
config
):
# Aux
x
=
torch
.
randn
(
3
,
10
,
config
[
"hidden_size"
])
ref
=
original
.
lm_head
(
x
)
hyp
=
imported
.
aux
(
x
)
self
.
assertEqual
(
ref
,
hyp
)
# The whole model without mask
x
=
torch
.
randn
(
3
,
1024
)
ref
=
original
(
x
).
logits
hyp
,
_
=
imported
(
x
)
self
.
assertEqual
(
ref
,
hyp
)
# The whole model without mask
batch_size
,
num_frames
=
3
,
1024
x
=
torch
.
randn
(
batch_size
,
num_frames
)
ref
=
original
(
x
).
logits
hyp
,
_
=
imported
(
x
)
self
.
assertEqual
(
ref
,
hyp
)
# The whole model with mask
batch_size
,
num_frames
=
3
,
1024
x
=
torch
.
randn
(
batch_size
,
num_frames
)
lengths
=
torch
.
randint
(
low
=
0
,
high
=
num_frames
,
size
=
[
batch_size
,
])
mask
=
torch
.
arange
(
num_frames
).
expand
(
batch_size
,
num_frames
)
<
lengths
[:,
None
]
ref
=
original
(
x
,
attention_mask
=
mask
).
logits
hyp
,
output_lengths
=
imported
(
x
,
lengths
)
for
i
,
l
in
enumerate
(
output_lengths
):
self
.
assertEqual
(
ref
[
i
,
:
l
,
...],
hyp
[
i
,
:
l
,
...])
@
PRETRAIN_CONFIGS
def
test_import_pretrain
(
self
,
config
,
_
):
"""wav2vec2 models from HF transformers can be imported and yields the same results"""
original
=
self
.
_get_model
(
config
).
eval
()
imported
=
import_huggingface_model
(
original
).
eval
()
self
.
_test_import_pretrain
(
original
,
imported
,
config
)
@
FINETUNE_CONFIGS
def
test_import_finetune
(
self
,
config
,
_
):
"""wav2vec2 models from HF transformers can be imported and yields the same results"""
original
=
self
.
_get_model
(
config
).
eval
()
imported
=
import_huggingface_model
(
original
).
eval
()
self
.
_test_import_pretrain
(
original
.
wav2vec2
,
imported
,
config
)
self
.
_test_import_finetune
(
original
,
imported
,
config
)
def
_test_recreate
(
self
,
imported
,
reloaded
,
config
):
torch
.
manual_seed
(
0
)
# FeatureExtractor
x
=
torch
.
randn
(
3
,
1024
)
ref
,
_
=
imported
.
feature_extractor
(
x
,
None
)
hyp
,
_
=
reloaded
.
feature_extractor
(
x
,
None
)
self
.
assertEqual
(
ref
,
hyp
)
# Feature projection
x
=
torch
.
randn
(
3
,
10
,
config
[
'conv_dim'
][
-
1
])
ref
=
imported
.
encoder
.
feature_projection
(
x
)
hyp
=
reloaded
.
encoder
.
feature_projection
(
x
)
self
.
assertEqual
(
ref
,
hyp
)
# Convolutional Positional Encoder
x
=
torch
.
randn
(
3
,
256
,
config
[
'hidden_size'
])
ref
=
imported
.
encoder
.
transformer
.
pos_conv_embed
(
x
)
hyp
=
reloaded
.
encoder
.
transformer
.
pos_conv_embed
(
x
)
self
.
assertEqual
(
ref
,
hyp
)
# Encoder Transformer Layer
for
imported_
,
reloaded_
in
zip
(
imported
.
encoder
.
transformer
.
layers
,
reloaded
.
encoder
.
transformer
.
layers
):
b
,
l
,
e
=
16
,
3
,
config
[
"hidden_size"
]
x
=
torch
.
randn
(
b
,
l
,
e
)
mask
=
torch
.
randn
(
b
,
1
,
l
,
l
)
ref
=
imported_
(
x
,
mask
)
hyp
=
reloaded_
(
x
,
mask
)
self
.
assertEqual
(
ref
,
hyp
)
# The whole Encoder Transformer
# TODO: Add mask pattern. Expected mask shapes and values are different.
b
,
l
,
e
=
16
,
3
,
config
[
"hidden_size"
]
x
=
torch
.
randn
(
b
,
l
,
e
)
mask
=
torch
.
randn
(
b
,
1
,
l
,
l
)
ref
=
imported
.
encoder
.
transformer
(
x
)
hyp
=
reloaded
.
encoder
.
transformer
(
x
)
self
.
assertEqual
(
ref
,
hyp
)
# Aux
if
imported
.
aux
is
not
None
:
x
=
torch
.
randn
(
3
,
10
,
config
[
"hidden_size"
])
ref
=
imported
.
aux
(
x
)
hyp
=
reloaded
.
aux
(
x
)
self
.
assertEqual
(
ref
,
hyp
)
# The whole model
x
=
torch
.
randn
(
3
,
1024
)
ref
,
_
=
imported
(
x
)
hyp
,
_
=
reloaded
(
x
)
self
.
assertEqual
(
ref
,
hyp
)
@
PRETRAIN_CONFIGS
def
test_recreate_pretrain
(
self
,
config
,
factory_func
):
"""Imported models can be recreated via a factory function without Hugging Face transformers."""
imported
=
import_huggingface_model
(
self
.
_get_model
(
config
)).
eval
()
reloaded
=
factory_func
()
reloaded
.
load_state_dict
(
imported
.
state_dict
())
reloaded
.
eval
()
self
.
_test_recreate
(
imported
,
reloaded
,
config
)
@
FINETUNE_CONFIGS
def
test_recreate_finetune
(
self
,
config
,
factory_func
):
"""Imported models can be recreated via a factory function without Hugging Face transformers."""
imported
=
import_huggingface_model
(
self
.
_get_model
(
config
)).
eval
()
reloaded
=
factory_func
(
aux_num_out
=
imported
.
aux
.
out_features
)
reloaded
.
load_state_dict
(
imported
.
state_dict
())
reloaded
.
eval
()
self
.
_test_recreate
(
imported
,
reloaded
,
config
)
test/torchaudio_unittest/models/wav2vec2/model_test.py
0 → 100644
View file @
9dcc7a15
import
os
import
torch
import
torch.nn.functional
as
F
from
torchaudio.models.wav2vec2
import
(
wav2vec2_base
,
wav2vec2_large
,
wav2vec2_large_lv60k
,
hubert_base
,
hubert_large
,
hubert_xlarge
,
)
from
torchaudio_unittest.common_utils
import
(
TorchaudioTestCase
,
skipIfNoQengine
,
skipIfNoCuda
,
torch_script
,
)
from
parameterized
import
parameterized
def
_name_func
(
testcase_func
,
i
,
param
):
return
f
"
{
testcase_func
.
__name__
}
_
{
i
}
_
{
param
[
0
][
0
].
__name__
}
"
factory_funcs
=
parameterized
.
expand
([
(
wav2vec2_base
,
),
(
wav2vec2_large
,
),
(
wav2vec2_large_lv60k
,
),
(
hubert_base
,
),
(
hubert_large
,
),
(
hubert_xlarge
,
),
],
name_func
=
_name_func
)
class
TestWav2Vec2Model
(
TorchaudioTestCase
):
def
_smoke_test
(
self
,
model
,
device
,
dtype
):
model
=
model
.
to
(
device
=
device
,
dtype
=
dtype
)
model
=
model
.
eval
()
torch
.
manual_seed
(
0
)
batch_size
,
num_frames
=
3
,
1024
waveforms
=
torch
.
randn
(
batch_size
,
num_frames
,
device
=
device
,
dtype
=
dtype
)
lengths
=
torch
.
randint
(
low
=
0
,
high
=
num_frames
,
size
=
[
batch_size
,
],
device
=
device
)
model
(
waveforms
,
lengths
)
@
parameterized
.
expand
([(
torch
.
float32
,
),
(
torch
.
float64
,
)])
def
test_cpu_smoke_test
(
self
,
dtype
):
model
=
wav2vec2_base
()
self
.
_smoke_test
(
model
,
torch
.
device
(
'cpu'
),
dtype
)
model
=
wav2vec2_base
(
aux_num_out
=
32
)
self
.
_smoke_test
(
model
,
torch
.
device
(
'cpu'
),
dtype
)
@
parameterized
.
expand
([(
torch
.
float32
,
),
(
torch
.
float64
,
)])
@
skipIfNoCuda
def
test_cuda_smoke_test
(
self
,
dtype
):
model
=
wav2vec2_base
()
self
.
_smoke_test
(
model
,
torch
.
device
(
'cuda'
),
dtype
)
model
=
wav2vec2_base
(
aux_num_out
=
32
)
self
.
_smoke_test
(
model
,
torch
.
device
(
'cuda'
),
dtype
)
def
_feature_extractor_test
(
self
,
model
):
batch_size
,
num_frames
=
3
,
1024
model
.
eval
()
num_layers
=
len
(
model
.
encoder
.
transformer
.
layers
)
torch
.
manual_seed
(
0
)
waveforms
=
torch
.
randn
(
batch_size
,
num_frames
)
lengths
=
torch
.
randint
(
low
=
0
,
high
=
num_frames
,
size
=
[
batch_size
,
])
# Not providing num_layers returns all the intermediate features from
# tranformer layers
all_features
,
lengths_
=
model
.
extract_features
(
waveforms
,
lengths
,
num_layers
=
None
)
assert
len
(
all_features
)
==
num_layers
for
features
in
all_features
:
assert
features
.
ndim
==
3
assert
features
.
shape
[
0
]
==
batch_size
assert
lengths_
.
shape
==
torch
.
Size
([
batch_size
])
# Limiting the number of layers to `l`.
for
l
in
range
(
1
,
num_layers
+
1
):
features
,
lengths_
=
model
.
extract_features
(
waveforms
,
lengths
,
num_layers
=
l
)
assert
len
(
features
)
==
l
for
i
in
range
(
l
):
self
.
assertEqual
(
all_features
[
i
],
features
[
i
])
assert
lengths_
.
shape
==
torch
.
Size
([
batch_size
])
@
factory_funcs
def
test_extract_feature
(
self
,
factory_func
):
"""`extract_features` method does not fail"""
self
.
_feature_extractor_test
(
factory_func
(
aux_num_out
=
32
))
def
_test_batch_consistency
(
self
,
model
):
model
.
eval
()
batch_size
,
max_frames
=
5
,
5
*
1024
torch
.
manual_seed
(
0
)
waveforms
=
torch
.
randn
(
batch_size
,
max_frames
)
input_lengths
=
torch
.
tensor
([
i
*
3200
for
i
in
range
(
1
,
6
)])
# Batch process with lengths
batch_logits
,
output_lengths
=
model
(
waveforms
,
input_lengths
)
for
i
in
range
(
batch_size
):
# Par-sample process without feeding length
single_logit
,
_
=
model
(
waveforms
[
i
:
i
+
1
,
:
input_lengths
[
i
]],
None
)
batch_logit
=
batch_logits
[
i
:
i
+
1
,
:
output_lengths
[
i
]]
# Convert to probability so that it's easier to interpretate the diff
single_prob
=
F
.
softmax
(
single_logit
,
dim
=
2
)
batch_prob
=
F
.
softmax
(
batch_logit
,
dim
=
2
)
# We allow max atol=0.005 -> 0.5%
self
.
assertEqual
(
single_prob
,
batch_prob
,
atol
=
0.005
,
rtol
=
0
)
@
factory_funcs
def
test_pretrain_batch_consistency
(
self
,
factory_func
):
"""Results from single process and batched process should be reasonably close
"""
self
.
_test_batch_consistency
(
factory_func
())
@
factory_funcs
def
test_finetune_batch_consistency
(
self
,
factory_func
):
"""Results from single process and batched process should be reasonably close
"""
self
.
_test_batch_consistency
(
factory_func
(
aux_num_out
=
32
))
def
_test_zero_length
(
self
,
model
):
model
.
eval
()
torch
.
manual_seed
(
0
)
batch_size
=
3
waveforms
=
torch
.
randn
(
batch_size
,
1024
)
input_lengths
=
torch
.
zeros
(
batch_size
)
_
,
output_lengths
=
model
(
waveforms
,
input_lengths
)
self
.
assertEqual
(
torch
.
zeros_like
(
output_lengths
),
output_lengths
)
_
,
output_lengths
=
model
.
extract_features
(
waveforms
,
input_lengths
)
self
.
assertEqual
(
torch
.
zeros_like
(
output_lengths
),
output_lengths
)
@
factory_funcs
def
test_pretrain_zero_length
(
self
,
factory_func
):
"""Passing zero length should not fail"""
self
.
_test_zero_length
(
factory_func
())
@
factory_funcs
def
test_finetune_zero_length
(
self
,
factory_func
):
"""Passing zero length should not fail"""
self
.
_test_zero_length
(
factory_func
(
aux_num_out
=
32
))
def
_test_torchscript
(
self
,
model
):
model
.
eval
()
batch_size
,
num_frames
=
3
,
1024
torch
.
manual_seed
(
0
)
waveforms
=
torch
.
randn
(
batch_size
,
num_frames
)
lengths
=
torch
.
randint
(
low
=
0
,
high
=
num_frames
,
size
=
[
batch_size
,
])
ref_out
,
ref_len
=
model
(
waveforms
,
lengths
)
scripted
=
torch_script
(
model
)
hyp_out
,
hyp_len
=
scripted
(
waveforms
,
lengths
)
self
.
assertEqual
(
hyp_out
,
ref_out
)
self
.
assertEqual
(
hyp_len
,
ref_len
)
@
factory_funcs
def
test_pretrain_torchscript
(
self
,
factory_func
):
"""Wav2Vec2Model should be scriptable"""
if
factory_func
is
hubert_xlarge
and
os
.
name
==
'nt'
and
os
.
environ
.
get
(
'CI'
)
==
'true'
:
self
.
skipTest
(
'hubert_xlarge is known to fail on Windows CI. '
'See https://github.com/pytorch/pytorch/issues/65776'
)
self
.
_test_torchscript
(
factory_func
())
@
factory_funcs
def
test_finetune_torchscript
(
self
,
factory_func
):
"""Wav2Vec2Model should be scriptable"""
if
factory_func
is
hubert_xlarge
and
os
.
name
==
'nt'
and
os
.
environ
.
get
(
'CI'
)
==
'true'
:
self
.
skipTest
(
'hubert_xlarge is known to fail on Windows CI. '
'See https://github.com/pytorch/pytorch/issues/65776'
)
self
.
_test_torchscript
(
factory_func
(
aux_num_out
=
32
))
def
_test_quantize_smoke_test
(
self
,
model
):
model
.
eval
()
batch_size
,
num_frames
=
3
,
1024
# Remove the weight normalization forward hook
model
.
encoder
.
transformer
.
pos_conv_embed
.
__prepare_scriptable__
()
quantized
=
torch
.
quantization
.
quantize_dynamic
(
model
,
qconfig_spec
=
{
torch
.
nn
.
Linear
},
dtype
=
torch
.
qint8
)
# A lazy way to check that Modules are different
assert
str
(
quantized
)
!=
str
(
model
),
"Dynamic quantization did not modify the module."
torch
.
manual_seed
(
0
)
waveforms
=
torch
.
randn
(
batch_size
,
num_frames
)
lengths
=
torch
.
randint
(
low
=
0
,
high
=
num_frames
,
size
=
[
batch_size
,
])
_
,
_
=
quantized
(
waveforms
,
lengths
)
@
factory_funcs
@
skipIfNoQengine
def
test_quantize
(
self
,
factory_func
):
"""Wav2Vec2Model should support basic quantization"""
self
.
_test_quantize_smoke_test
(
factory_func
(
aux_num_out
=
32
))
def
_test_quantize_torchscript
(
self
,
model
):
model
.
eval
()
batch_size
,
num_frames
=
3
,
1024
# Remove the weight normalization forward hook
model
.
encoder
.
transformer
.
pos_conv_embed
.
__prepare_scriptable__
()
quantized
=
torch
.
quantization
.
quantize_dynamic
(
model
,
qconfig_spec
=
{
torch
.
nn
.
Linear
},
dtype
=
torch
.
qint8
)
# A lazy way to check that Modules are different
assert
str
(
quantized
)
!=
str
(
model
),
"Dynamic quantization did not modify the module."
torch
.
manual_seed
(
0
)
waveforms
=
torch
.
randn
(
batch_size
,
num_frames
)
lengths
=
torch
.
randint
(
low
=
0
,
high
=
num_frames
,
size
=
[
batch_size
,
])
ref_out
,
ref_len
=
quantized
(
waveforms
,
lengths
)
# Script
scripted
=
torch_script
(
quantized
)
hyp_out
,
hyp_len
=
scripted
(
waveforms
,
lengths
)
self
.
assertEqual
(
hyp_out
,
ref_out
)
self
.
assertEqual
(
hyp_len
,
ref_len
)
@
factory_funcs
@
skipIfNoQengine
def
test_quantize_torchscript
(
self
,
factory_func
):
"""Quantized Wav2Vec2Model should be scriptable"""
self
.
_test_quantize_torchscript
(
factory_func
(
aux_num_out
=
32
))
test/torchaudio_unittest/sox_effect/__init__.py
0 → 100644
View file @
9dcc7a15
test/torchaudio_unittest/sox_effect/common.py
0 → 100644
View file @
9dcc7a15
import
json
from
parameterized
import
param
from
torchaudio_unittest.common_utils
import
get_asset_path
def
name_func
(
func
,
_
,
params
):
if
isinstance
(
params
.
args
[
0
],
str
):
args
=
"_"
.
join
([
str
(
arg
)
for
arg
in
params
.
args
])
else
:
args
=
"_"
.
join
([
str
(
arg
)
for
arg
in
params
.
args
[
0
]])
return
f
'
{
func
.
__name__
}
_
{
args
}
'
def
load_params
(
*
paths
):
params
=
[]
with
open
(
get_asset_path
(
*
paths
),
'r'
)
as
file
:
for
line
in
file
:
data
=
json
.
loads
(
line
)
for
effect
in
data
[
'effects'
]:
for
i
,
arg
in
enumerate
(
effect
):
if
arg
.
startswith
(
"<ASSET_DIR>"
):
effect
[
i
]
=
arg
.
replace
(
"<ASSET_DIR>"
,
get_asset_path
())
params
.
append
(
param
(
data
))
return
params
test/torchaudio_unittest/sox_effect/dataset_test.py
0 → 100644
View file @
9dcc7a15
import
sys
import
platform
from
unittest
import
skipIf
from
typing
import
List
,
Tuple
from
concurrent.futures
import
ProcessPoolExecutor
import
numpy
as
np
import
torch
import
torchaudio
from
torchaudio_unittest.common_utils
import
(
TempDirMixin
,
PytorchTestCase
,
skipIfNoSox
,
get_whitenoise
,
save_wav
,
)
class
RandomPerturbationFile
(
torch
.
utils
.
data
.
Dataset
):
"""Given flist, apply random speed perturbation"""
def
__init__
(
self
,
flist
:
List
[
str
],
sample_rate
:
int
):
super
().
__init__
()
self
.
flist
=
flist
self
.
sample_rate
=
sample_rate
self
.
rng
=
None
def
__getitem__
(
self
,
index
):
speed
=
self
.
rng
.
uniform
(
0.5
,
2.0
)
effects
=
[
[
'gain'
,
'-n'
,
'-10'
],
[
'speed'
,
f
'
{
speed
:.
5
f
}
'
],
# duration of data is 0.5 ~ 2.0 seconds.
[
'rate'
,
f
'
{
self
.
sample_rate
}
'
],
[
'pad'
,
'0'
,
'1.5'
],
# add 1.5 seconds silence at the end
[
'trim'
,
'0'
,
'2'
],
# get the first 2 seconds
]
data
,
_
=
torchaudio
.
sox_effects
.
apply_effects_file
(
self
.
flist
[
index
],
effects
)
return
data
def
__len__
(
self
):
return
len
(
self
.
flist
)
class
RandomPerturbationTensor
(
torch
.
utils
.
data
.
Dataset
):
"""Apply speed purturbation to (synthetic) Tensor data"""
def
__init__
(
self
,
signals
:
List
[
Tuple
[
torch
.
Tensor
,
int
]],
sample_rate
:
int
):
super
().
__init__
()
self
.
signals
=
signals
self
.
sample_rate
=
sample_rate
self
.
rng
=
None
def
__getitem__
(
self
,
index
):
speed
=
self
.
rng
.
uniform
(
0.5
,
2.0
)
effects
=
[
[
'gain'
,
'-n'
,
'-10'
],
[
'speed'
,
f
'
{
speed
:.
5
f
}
'
],
# duration of data is 0.5 ~ 2.0 seconds.
[
'rate'
,
f
'
{
self
.
sample_rate
}
'
],
[
'pad'
,
'0'
,
'1.5'
],
# add 1.5 seconds silence at the end
[
'trim'
,
'0'
,
'2'
],
# get the first 2 seconds
]
tensor
,
sample_rate
=
self
.
signals
[
index
]
data
,
_
=
torchaudio
.
sox_effects
.
apply_effects_tensor
(
tensor
,
sample_rate
,
effects
)
return
data
def
__len__
(
self
):
return
len
(
self
.
signals
)
def
init_random_seed
(
worker_id
):
dataset
=
torch
.
utils
.
data
.
get_worker_info
().
dataset
dataset
.
rng
=
np
.
random
.
RandomState
(
worker_id
)
@
skipIfNoSox
@
skipIf
(
platform
.
system
()
==
'Darwin'
and
sys
.
version_info
.
major
==
3
and
sys
.
version_info
.
minor
in
[
6
,
7
],
'This test is known to get stuck for macOS with Python < 3.8. '
'See https://github.com/pytorch/pytorch/issues/46409'
)
class
TestSoxEffectsDataset
(
TempDirMixin
,
PytorchTestCase
):
"""Test `apply_effects_file` in multi-process dataloader setting"""
def
_generate_dataset
(
self
,
num_samples
=
128
):
flist
=
[]
for
i
in
range
(
num_samples
):
sample_rate
=
np
.
random
.
choice
([
8000
,
16000
,
44100
])
dtype
=
np
.
random
.
choice
([
'float32'
,
'int32'
,
'int16'
,
'uint8'
])
data
=
get_whitenoise
(
n_channels
=
2
,
sample_rate
=
sample_rate
,
duration
=
1
,
dtype
=
dtype
)
path
=
self
.
get_temp_path
(
f
'
{
i
:
03
d
}
_
{
dtype
}
_
{
sample_rate
}
.wav'
)
save_wav
(
path
,
data
,
sample_rate
)
flist
.
append
(
path
)
return
flist
def
test_apply_effects_file
(
self
):
sample_rate
=
12000
flist
=
self
.
_generate_dataset
()
dataset
=
RandomPerturbationFile
(
flist
,
sample_rate
)
loader
=
torch
.
utils
.
data
.
DataLoader
(
dataset
,
batch_size
=
32
,
num_workers
=
16
,
worker_init_fn
=
init_random_seed
,
)
for
batch
in
loader
:
assert
batch
.
shape
==
(
32
,
2
,
2
*
sample_rate
)
def
_generate_signals
(
self
,
num_samples
=
128
):
signals
=
[]
for
_
in
range
(
num_samples
):
sample_rate
=
np
.
random
.
choice
([
8000
,
16000
,
44100
])
data
=
get_whitenoise
(
n_channels
=
2
,
sample_rate
=
sample_rate
,
duration
=
1
,
dtype
=
'float32'
)
signals
.
append
((
data
,
sample_rate
))
return
signals
def
test_apply_effects_tensor
(
self
):
sample_rate
=
12000
signals
=
self
.
_generate_signals
()
dataset
=
RandomPerturbationTensor
(
signals
,
sample_rate
)
loader
=
torch
.
utils
.
data
.
DataLoader
(
dataset
,
batch_size
=
32
,
num_workers
=
16
,
worker_init_fn
=
init_random_seed
,
)
for
batch
in
loader
:
assert
batch
.
shape
==
(
32
,
2
,
2
*
sample_rate
)
def
speed
(
path
):
wav
,
sample_rate
=
torchaudio
.
backend
.
sox_io_backend
.
load
(
path
)
effects
=
[
[
'speed'
,
'1.03756523535464655'
],
[
'rate'
,
f
'
{
sample_rate
}
'
],
]
return
torchaudio
.
sox_effects
.
apply_effects_tensor
(
wav
,
sample_rate
,
effects
)[
0
]
@
skipIfNoSox
class
TestProcessPoolExecutor
(
TempDirMixin
,
PytorchTestCase
):
backend
=
"sox_io"
def
setUp
(
self
):
sample_rate
=
16000
self
.
flist
=
[]
for
i
in
range
(
10
):
path
=
self
.
get_temp_path
(
f
'
{
i
}
.wav'
)
data
=
get_whitenoise
(
n_channels
=
1
,
sample_rate
=
sample_rate
,
duration
=
1
,
dtype
=
'float'
)
save_wav
(
path
,
data
,
sample_rate
)
self
.
flist
.
append
(
path
)
def
test_executor
(
self
):
"""Test that apply_effects_tensor with speed + rate does not crush
https://github.com/pytorch/audio/issues/1021
"""
executor
=
ProcessPoolExecutor
(
1
)
futures
=
[
executor
.
submit
(
speed
,
path
)
for
path
in
self
.
flist
]
for
future
in
futures
:
future
.
result
()
Prev
1
…
11
12
13
14
15
16
17
18
19
…
21
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment