Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
hehl2
Torchaudio
Commits
9dcc7a15
Commit
9dcc7a15
authored
Apr 25, 2022
by
flyingdown
Browse files
init v0.10.0
parent
db2b0b79
Changes
416
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
1824 additions
and
0 deletions
+1824
-0
test/torchaudio_unittest/example/souce_sepration/metrics_test.py
...rchaudio_unittest/example/souce_sepration/metrics_test.py
+39
-0
test/torchaudio_unittest/example/souce_sepration/sdr_reference.py
...chaudio_unittest/example/souce_sepration/sdr_reference.py
+98
-0
test/torchaudio_unittest/example/souce_sepration/wsj0mix_test.py
...rchaudio_unittest/example/souce_sepration/wsj0mix_test.py
+111
-0
test/torchaudio_unittest/example/tacotron2/__init__.py
test/torchaudio_unittest/example/tacotron2/__init__.py
+0
-0
test/torchaudio_unittest/example/tacotron2/tacotron2_loss_cpu_test.py
...dio_unittest/example/tacotron2/tacotron2_loss_cpu_test.py
+23
-0
test/torchaudio_unittest/example/tacotron2/tacotron2_loss_gpu_test.py
...dio_unittest/example/tacotron2/tacotron2_loss_gpu_test.py
+26
-0
test/torchaudio_unittest/example/tacotron2/tacotron2_loss_impl.py
...chaudio_unittest/example/tacotron2/tacotron2_loss_impl.py
+111
-0
test/torchaudio_unittest/example/tacotron2/test_text_preprocessing.py
...dio_unittest/example/tacotron2/test_text_preprocessing.py
+97
-0
test/torchaudio_unittest/functional/__init__.py
test/torchaudio_unittest/functional/__init__.py
+0
-0
test/torchaudio_unittest/functional/autograd_cpu_test.py
test/torchaudio_unittest/functional/autograd_cpu_test.py
+13
-0
test/torchaudio_unittest/functional/autograd_cuda_test.py
test/torchaudio_unittest/functional/autograd_cuda_test.py
+15
-0
test/torchaudio_unittest/functional/autograd_impl.py
test/torchaudio_unittest/functional/autograd_impl.py
+269
-0
test/torchaudio_unittest/functional/batch_consistency_test.py
.../torchaudio_unittest/functional/batch_consistency_test.py
+249
-0
test/torchaudio_unittest/functional/functional_cpu_test.py
test/torchaudio_unittest/functional/functional_cpu_test.py
+63
-0
test/torchaudio_unittest/functional/functional_cuda_test.py
test/torchaudio_unittest/functional/functional_cuda_test.py
+21
-0
test/torchaudio_unittest/functional/functional_impl.py
test/torchaudio_unittest/functional/functional_impl.py
+584
-0
test/torchaudio_unittest/functional/kaldi_compatibility_cpu_test.py
...audio_unittest/functional/kaldi_compatibility_cpu_test.py
+19
-0
test/torchaudio_unittest/functional/kaldi_compatibility_cuda_test.py
...udio_unittest/functional/kaldi_compatibility_cuda_test.py
+16
-0
test/torchaudio_unittest/functional/kaldi_compatibility_test_impl.py
...udio_unittest/functional/kaldi_compatibility_test_impl.py
+60
-0
test/torchaudio_unittest/functional/librosa_compatibility_cpu_test.py
...dio_unittest/functional/librosa_compatibility_cpu_test.py
+10
-0
No files found.
Too many changes to show.
To preserve performance only
416 of 416+
files are displayed.
Plain diff
Email patch
test/torchaudio_unittest/example/souce_sepration/metrics_test.py
0 → 100644
View file @
9dcc7a15
from
itertools
import
product
import
torch
from
torch.testing._internal.common_utils
import
TestCase
from
parameterized
import
parameterized
from
.
import
sdr_reference
from
source_separation.utils
import
metrics
class
TestSDR
(
TestCase
):
@
parameterized
.
expand
([(
1
,
),
(
2
,
),
(
32
,
)])
def
test_sdr
(
self
,
batch_size
):
"""sdr produces the same result as the reference implementation"""
num_frames
=
256
estimation
=
torch
.
rand
(
batch_size
,
num_frames
)
origin
=
torch
.
rand
(
batch_size
,
num_frames
)
sdr_ref
=
sdr_reference
.
calc_sdr_torch
(
estimation
,
origin
)
sdr
=
metrics
.
sdr
(
estimation
.
unsqueeze
(
1
),
origin
.
unsqueeze
(
1
)).
squeeze
(
1
)
self
.
assertEqual
(
sdr
,
sdr_ref
)
@
parameterized
.
expand
(
list
(
product
([
1
,
2
,
32
],
[
2
,
3
,
4
,
5
])))
def
test_sdr_pit
(
self
,
batch_size
,
num_sources
):
"""sdr_pit produces the same result as the reference implementation"""
num_frames
=
256
estimation
=
torch
.
randn
(
batch_size
,
num_sources
,
num_frames
)
origin
=
torch
.
randn
(
batch_size
,
num_sources
,
num_frames
)
estimation
-=
estimation
.
mean
(
axis
=
2
,
keepdim
=
True
)
origin
-=
origin
.
mean
(
axis
=
2
,
keepdim
=
True
)
batch_sdr_ref
=
sdr_reference
.
batch_SDR_torch
(
estimation
,
origin
)
batch_sdr
=
metrics
.
sdr_pit
(
estimation
,
origin
)
self
.
assertEqual
(
batch_sdr
,
batch_sdr_ref
)
test/torchaudio_unittest/example/souce_sepration/sdr_reference.py
0 → 100644
View file @
9dcc7a15
"""Reference Implementation of SDR and PIT SDR.
This module was taken from the following implementation
https://github.com/naplab/Conv-TasNet/blob/e66d82a8f956a69749ec8a4ae382217faa097c5c/utility/sdr.py
which was made available by Yi Luo under the following liscence,
Creative Commons Attribution-NonCommercial-ShareAlike 3.0 United States License.
The module was modified in the following manner;
- Remove the functions other than `calc_sdr_torch` and `batch_SDR_torch`,
- Remove the import statements required only for the removed functions.
- Add `# flake8: noqa` so as not to report any format issue on this module.
The implementation of the retained functions and their formats are kept as-is.
"""
# flake8: noqa
import
numpy
as
np
from
itertools
import
permutations
import
torch
def
calc_sdr_torch
(
estimation
,
origin
,
mask
=
None
):
"""
batch-wise SDR caculation for one audio file on pytorch Variables.
estimation: (batch, nsample)
origin: (batch, nsample)
mask: optional, (batch, nsample), binary
"""
if
mask
is
not
None
:
origin
=
origin
*
mask
estimation
=
estimation
*
mask
origin_power
=
torch
.
pow
(
origin
,
2
).
sum
(
1
,
keepdim
=
True
)
+
1e-8
# (batch, 1)
scale
=
torch
.
sum
(
origin
*
estimation
,
1
,
keepdim
=
True
)
/
origin_power
# (batch, 1)
est_true
=
scale
*
origin
# (batch, nsample)
est_res
=
estimation
-
est_true
# (batch, nsample)
true_power
=
torch
.
pow
(
est_true
,
2
).
sum
(
1
)
res_power
=
torch
.
pow
(
est_res
,
2
).
sum
(
1
)
return
10
*
torch
.
log10
(
true_power
)
-
10
*
torch
.
log10
(
res_power
)
# (batch, 1)
def
batch_SDR_torch
(
estimation
,
origin
,
mask
=
None
):
"""
batch-wise SDR caculation for multiple audio files.
estimation: (batch, nsource, nsample)
origin: (batch, nsource, nsample)
mask: optional, (batch, nsample), binary
"""
batch_size_est
,
nsource_est
,
nsample_est
=
estimation
.
size
()
batch_size_ori
,
nsource_ori
,
nsample_ori
=
origin
.
size
()
assert
batch_size_est
==
batch_size_ori
,
"Estimation and original sources should have same shape."
assert
nsource_est
==
nsource_ori
,
"Estimation and original sources should have same shape."
assert
nsample_est
==
nsample_ori
,
"Estimation and original sources should have same shape."
assert
nsource_est
<
nsample_est
,
"Axis 1 should be the number of sources, and axis 2 should be the signal."
batch_size
=
batch_size_est
nsource
=
nsource_est
nsample
=
nsample_est
# zero mean signals
estimation
=
estimation
-
torch
.
mean
(
estimation
,
2
,
keepdim
=
True
).
expand_as
(
estimation
)
origin
=
origin
-
torch
.
mean
(
origin
,
2
,
keepdim
=
True
).
expand_as
(
estimation
)
# possible permutations
perm
=
list
(
set
(
permutations
(
np
.
arange
(
nsource
))))
# pair-wise SDR
SDR
=
torch
.
zeros
((
batch_size
,
nsource
,
nsource
)).
type
(
estimation
.
type
())
for
i
in
range
(
nsource
):
for
j
in
range
(
nsource
):
SDR
[:,
i
,
j
]
=
calc_sdr_torch
(
estimation
[:,
i
],
origin
[:,
j
],
mask
)
# choose the best permutation
SDR_max
=
[]
SDR_perm
=
[]
for
permute
in
perm
:
sdr
=
[]
for
idx
in
range
(
len
(
permute
)):
sdr
.
append
(
SDR
[:,
idx
,
permute
[
idx
]].
view
(
batch_size
,
-
1
))
sdr
=
torch
.
sum
(
torch
.
cat
(
sdr
,
1
),
1
)
SDR_perm
.
append
(
sdr
.
view
(
batch_size
,
1
))
SDR_perm
=
torch
.
cat
(
SDR_perm
,
1
)
SDR_max
,
_
=
torch
.
max
(
SDR_perm
,
dim
=
1
)
return
SDR_max
/
nsource
test/torchaudio_unittest/example/souce_sepration/wsj0mix_test.py
0 → 100644
View file @
9dcc7a15
import
os
from
torchaudio_unittest.common_utils
import
(
TempDirMixin
,
TorchaudioTestCase
,
get_whitenoise
,
save_wav
,
normalize_wav
,
)
from
source_separation.utils.dataset
import
wsj0mix
_FILENAMES
=
[
"012c0207_1.9952_01cc0202_-1.9952.wav"
,
"01co0302_1.63_014c020q_-1.63.wav"
,
"01do0316_0.24011_205a0104_-0.24011.wav"
,
"01lc020x_1.1301_027o030r_-1.1301.wav"
,
"01mc0202_0.34056_205o0106_-0.34056.wav"
,
"01nc020t_0.53821_018o030w_-0.53821.wav"
,
"01po030f_2.2136_40ko031a_-2.2136.wav"
,
"01ra010o_2.4098_403a010f_-2.4098.wav"
,
"01xo030b_0.22377_016o031a_-0.22377.wav"
,
"02ac020x_0.68566_01ec020b_-0.68566.wav"
,
"20co010m_0.82801_019c0212_-0.82801.wav"
,
"20da010u_1.2483_017c0211_-1.2483.wav"
,
"20oo010d_1.0631_01ic020s_-1.0631.wav"
,
"20sc0107_2.0222_20fo010h_-2.0222.wav"
,
"20tc010f_0.051456_404a0110_-0.051456.wav"
,
"407c0214_1.1712_02ca0113_-1.1712.wav"
,
"40ao030w_2.4697_20vc010a_-2.4697.wav"
,
"40pa0101_1.1087_40ea0107_-1.1087.wav"
,
]
def
_mock_dataset
(
root_dir
,
num_speaker
):
dirnames
=
[
"mix"
]
+
[
f
"s
{
i
+
1
}
"
for
i
in
range
(
num_speaker
)]
for
dirname
in
dirnames
:
os
.
makedirs
(
os
.
path
.
join
(
root_dir
,
dirname
),
exist_ok
=
True
)
seed
=
0
sample_rate
=
8000
expected
=
[]
for
filename
in
_FILENAMES
:
mix
=
None
src
=
[]
for
dirname
in
dirnames
:
waveform
=
get_whitenoise
(
sample_rate
=
8000
,
duration
=
1
,
n_channels
=
1
,
dtype
=
"int16"
,
seed
=
seed
)
seed
+=
1
path
=
os
.
path
.
join
(
root_dir
,
dirname
,
filename
)
save_wav
(
path
,
waveform
,
sample_rate
)
waveform
=
normalize_wav
(
waveform
)
if
dirname
==
"mix"
:
mix
=
waveform
else
:
src
.
append
(
waveform
)
expected
.
append
((
sample_rate
,
mix
,
src
))
return
expected
class
TestWSJ0Mix2
(
TempDirMixin
,
TorchaudioTestCase
):
backend
=
"default"
root_dir
=
None
expected
=
None
@
classmethod
def
setUpClass
(
cls
):
cls
.
root_dir
=
cls
.
get_base_temp_dir
()
cls
.
expected
=
_mock_dataset
(
cls
.
root_dir
,
2
)
def
test_wsj0mix
(
self
):
dataset
=
wsj0mix
.
WSJ0Mix
(
self
.
root_dir
,
num_speakers
=
2
,
sample_rate
=
8000
)
n_ite
=
0
for
i
,
sample
in
enumerate
(
dataset
):
(
_
,
sample_mix
,
sample_src
)
=
sample
(
_
,
expected_mix
,
expected_src
)
=
self
.
expected
[
i
]
self
.
assertEqual
(
sample_mix
,
expected_mix
,
atol
=
5e-5
,
rtol
=
1e-8
)
self
.
assertEqual
(
sample_src
[
0
],
expected_src
[
0
],
atol
=
5e-5
,
rtol
=
1e-8
)
self
.
assertEqual
(
sample_src
[
1
],
expected_src
[
1
],
atol
=
5e-5
,
rtol
=
1e-8
)
n_ite
+=
1
assert
n_ite
==
len
(
self
.
expected
)
class
TestWSJ0Mix3
(
TempDirMixin
,
TorchaudioTestCase
):
backend
=
"default"
root_dir
=
None
expected
=
None
@
classmethod
def
setUpClass
(
cls
):
cls
.
root_dir
=
cls
.
get_base_temp_dir
()
cls
.
expected
=
_mock_dataset
(
cls
.
root_dir
,
3
)
def
test_wsj0mix
(
self
):
dataset
=
wsj0mix
.
WSJ0Mix
(
self
.
root_dir
,
num_speakers
=
3
,
sample_rate
=
8000
)
n_ite
=
0
for
i
,
sample
in
enumerate
(
dataset
):
(
_
,
sample_mix
,
sample_src
)
=
sample
(
_
,
expected_mix
,
expected_src
)
=
self
.
expected
[
i
]
self
.
assertEqual
(
sample_mix
,
expected_mix
,
atol
=
5e-5
,
rtol
=
1e-8
)
self
.
assertEqual
(
sample_src
[
0
],
expected_src
[
0
],
atol
=
5e-5
,
rtol
=
1e-8
)
self
.
assertEqual
(
sample_src
[
1
],
expected_src
[
1
],
atol
=
5e-5
,
rtol
=
1e-8
)
self
.
assertEqual
(
sample_src
[
2
],
expected_src
[
2
],
atol
=
5e-5
,
rtol
=
1e-8
)
n_ite
+=
1
assert
n_ite
==
len
(
self
.
expected
)
test/torchaudio_unittest/example/tacotron2/__init__.py
0 → 100644
View file @
9dcc7a15
test/torchaudio_unittest/example/tacotron2/tacotron2_loss_cpu_test.py
0 → 100644
View file @
9dcc7a15
import
torch
from
.tacotron2_loss_impl
import
(
Tacotron2LossShapeTests
,
Tacotron2LossTorchscriptTests
,
Tacotron2LossGradcheckTests
,
)
from
torchaudio_unittest.common_utils
import
PytorchTestCase
class
TestTacotron2LossShapeFloat32CPU
(
Tacotron2LossShapeTests
,
PytorchTestCase
):
dtype
=
torch
.
float32
device
=
torch
.
device
(
"cpu"
)
class
TestTacotron2TorchsciptFloat32CPU
(
Tacotron2LossTorchscriptTests
,
PytorchTestCase
):
dtype
=
torch
.
float32
device
=
torch
.
device
(
"cpu"
)
class
TestTacotron2GradcheckFloat64CPU
(
Tacotron2LossGradcheckTests
,
PytorchTestCase
):
dtype
=
torch
.
float64
# gradcheck needs a higher numerical accuracy
device
=
torch
.
device
(
"cpu"
)
test/torchaudio_unittest/example/tacotron2/tacotron2_loss_gpu_test.py
0 → 100644
View file @
9dcc7a15
import
torch
from
.tacotron2_loss_impl
import
(
Tacotron2LossShapeTests
,
Tacotron2LossTorchscriptTests
,
Tacotron2LossGradcheckTests
,
)
from
torchaudio_unittest.common_utils
import
skipIfNoCuda
,
PytorchTestCase
@
skipIfNoCuda
class
TestTacotron2LossShapeFloat32CUDA
(
PytorchTestCase
,
Tacotron2LossShapeTests
):
dtype
=
torch
.
float32
device
=
torch
.
device
(
"cuda"
)
@
skipIfNoCuda
class
TestTacotron2TorchsciptFloat32CUDA
(
PytorchTestCase
,
Tacotron2LossTorchscriptTests
):
dtype
=
torch
.
float32
device
=
torch
.
device
(
"cuda"
)
@
skipIfNoCuda
class
TestTacotron2GradcheckFloat64CUDA
(
PytorchTestCase
,
Tacotron2LossGradcheckTests
):
dtype
=
torch
.
float64
# gradcheck needs a higher numerical accuracy
device
=
torch
.
device
(
"cuda"
)
test/torchaudio_unittest/example/tacotron2/tacotron2_loss_impl.py
0 → 100644
View file @
9dcc7a15
import
torch
from
torch.autograd
import
gradcheck
,
gradgradcheck
from
pipeline_tacotron2.loss
import
Tacotron2Loss
from
torchaudio_unittest.common_utils
import
(
TestBaseMixin
,
torch_script
,
)
class
Tacotron2LossInputMixin
(
TestBaseMixin
):
def
_get_inputs
(
self
,
n_mel
=
80
,
n_batch
=
16
,
max_mel_specgram_length
=
300
):
mel_specgram
=
torch
.
rand
(
n_batch
,
n_mel
,
max_mel_specgram_length
,
dtype
=
self
.
dtype
,
device
=
self
.
device
)
mel_specgram_postnet
=
torch
.
rand
(
n_batch
,
n_mel
,
max_mel_specgram_length
,
dtype
=
self
.
dtype
,
device
=
self
.
device
)
gate_out
=
torch
.
rand
(
n_batch
,
dtype
=
self
.
dtype
,
device
=
self
.
device
)
truth_mel_specgram
=
torch
.
rand
(
n_batch
,
n_mel
,
max_mel_specgram_length
,
dtype
=
self
.
dtype
,
device
=
self
.
device
)
truth_gate_out
=
torch
.
rand
(
n_batch
,
dtype
=
self
.
dtype
,
device
=
self
.
device
)
truth_mel_specgram
.
requires_grad
=
False
truth_gate_out
.
requires_grad
=
False
return
(
mel_specgram
,
mel_specgram_postnet
,
gate_out
,
truth_mel_specgram
,
truth_gate_out
,
)
class
Tacotron2LossShapeTests
(
Tacotron2LossInputMixin
):
def
test_tacotron2_loss_shape
(
self
):
"""Validate the output shape of Tacotron2Loss."""
n_batch
=
16
(
mel_specgram
,
mel_specgram_postnet
,
gate_out
,
truth_mel_specgram
,
truth_gate_out
,
)
=
self
.
_get_inputs
(
n_batch
=
n_batch
)
mel_loss
,
mel_postnet_loss
,
gate_loss
=
Tacotron2Loss
()(
(
mel_specgram
,
mel_specgram_postnet
,
gate_out
),
(
truth_mel_specgram
,
truth_gate_out
)
)
self
.
assertEqual
(
mel_loss
.
size
(),
torch
.
Size
([]))
self
.
assertEqual
(
mel_postnet_loss
.
size
(),
torch
.
Size
([]))
self
.
assertEqual
(
gate_loss
.
size
(),
torch
.
Size
([]))
class
Tacotron2LossTorchscriptTests
(
Tacotron2LossInputMixin
):
def
_assert_torchscript_consistency
(
self
,
fn
,
tensors
):
ts_func
=
torch_script
(
fn
)
output
=
fn
(
tensors
[:
3
],
tensors
[
3
:])
ts_output
=
ts_func
(
tensors
[:
3
],
tensors
[
3
:])
self
.
assertEqual
(
ts_output
,
output
)
def
test_tacotron2_loss_torchscript_consistency
(
self
):
"""Validate the torchscript consistency of Tacotron2Loss."""
loss_fn
=
Tacotron2Loss
()
self
.
_assert_torchscript_consistency
(
loss_fn
,
self
.
_get_inputs
())
class
Tacotron2LossGradcheckTests
(
Tacotron2LossInputMixin
):
def
test_tacotron2_loss_gradcheck
(
self
):
"""Performing gradient check on Tacotron2Loss."""
(
mel_specgram
,
mel_specgram_postnet
,
gate_out
,
truth_mel_specgram
,
truth_gate_out
,
)
=
self
.
_get_inputs
()
mel_specgram
.
requires_grad_
(
True
)
mel_specgram_postnet
.
requires_grad_
(
True
)
gate_out
.
requires_grad_
(
True
)
def
_fn
(
mel_specgram
,
mel_specgram_postnet
,
gate_out
,
truth_mel_specgram
,
truth_gate_out
):
loss_fn
=
Tacotron2Loss
()
return
loss_fn
(
(
mel_specgram
,
mel_specgram_postnet
,
gate_out
),
(
truth_mel_specgram
,
truth_gate_out
),
)
gradcheck
(
_fn
,
(
mel_specgram
,
mel_specgram_postnet
,
gate_out
,
truth_mel_specgram
,
truth_gate_out
),
fast_mode
=
True
,
)
gradgradcheck
(
_fn
,
(
mel_specgram
,
mel_specgram_postnet
,
gate_out
,
truth_mel_specgram
,
truth_gate_out
),
fast_mode
=
True
,
)
test/torchaudio_unittest/example/tacotron2/test_text_preprocessing.py
0 → 100644
View file @
9dcc7a15
from
parameterized
import
parameterized
from
torchaudio._internal.module_utils
import
is_module_available
from
torchaudio_unittest.common_utils
import
TorchaudioTestCase
,
skipIfNoModule
if
is_module_available
(
"unidecode"
)
and
is_module_available
(
"inflect"
):
from
pipeline_tacotron2.text.text_preprocessing
import
text_to_sequence
from
pipeline_tacotron2.text.numbers
import
(
_remove_commas
,
_expand_pounds
,
_expand_dollars
,
_expand_decimal_point
,
_expand_ordinal
,
_expand_number
,
)
@
skipIfNoModule
(
"unidecode"
)
@
skipIfNoModule
(
"inflect"
)
class
TestTextPreprocessor
(
TorchaudioTestCase
):
@
parameterized
.
expand
(
[
[
"dr. Strange?"
,
[
15
,
26
,
14
,
31
,
26
,
29
,
11
,
30
,
31
,
29
,
12
,
25
,
18
,
16
,
10
]],
[
"ML, is fun."
,
[
24
,
23
,
6
,
11
,
20
,
30
,
11
,
17
,
32
,
25
,
7
]],
[
"I love torchaudio!"
,
[
20
,
11
,
23
,
26
,
33
,
16
,
11
,
31
,
26
,
29
,
14
,
19
,
12
,
32
,
15
,
20
,
26
,
2
]],
# 'one thousand dollars, twenty cents'
[
"$1,000.20"
,
[
26
,
25
,
16
,
11
,
31
,
19
,
26
,
32
,
30
,
12
,
25
,
15
,
11
,
15
,
26
,
23
,
23
,
12
,
29
,
30
,
6
,
11
,
31
,
34
,
16
,
25
,
31
,
36
,
11
,
14
,
16
,
25
,
31
,
30
]],
]
)
def
test_text_to_sequence
(
self
,
sent
,
seq
):
assert
(
text_to_sequence
(
sent
)
==
seq
)
@
parameterized
.
expand
(
[
[
"He, she, and I have $1,000"
,
"He, she, and I have $1000"
],
]
)
def
test_remove_commas
(
self
,
sent
,
truth
):
assert
(
_remove_commas
(
sent
)
==
truth
)
@
parameterized
.
expand
(
[
[
"He, she, and I have £1000"
,
"He, she, and I have 1000 pounds"
],
]
)
def
test_expand_pounds
(
self
,
sent
,
truth
):
assert
(
_expand_pounds
(
sent
)
==
truth
)
@
parameterized
.
expand
(
[
[
"He, she, and I have $1000"
,
"He, she, and I have 1000 dollars"
],
[
"He, she, and I have $3000.01"
,
"He, she, and I have 3000 dollars, 1 cent"
],
[
"He has $500.20 and she has $1000.50."
,
"He has 500 dollars, 20 cents and she has 1000 dollars, 50 cents."
],
]
)
def
test_expand_dollars
(
self
,
sent
,
truth
):
assert
(
_expand_dollars
(
sent
)
==
truth
)
@
parameterized
.
expand
(
[
[
"1000.20"
,
"1000 point 20"
],
[
"1000.1"
,
"1000 point 1"
],
]
)
def
test_expand_decimal_point
(
self
,
sent
,
truth
):
assert
(
_expand_decimal_point
(
sent
)
==
truth
)
@
parameterized
.
expand
(
[
[
"21st centry"
,
"twenty-first centry"
],
[
"20th centry"
,
"twentieth centry"
],
[
"2nd place."
,
"second place."
],
]
)
def
test_expand_ordinal
(
self
,
sent
,
truth
):
assert
(
_expand_ordinal
(
sent
)
==
truth
)
_expand_ordinal
,
@
parameterized
.
expand
(
[
[
"100020 dollars."
,
"one hundred thousand twenty dollars."
],
[
"1234567890!"
,
"one billion, two hundred thirty-four million, "
"five hundred sixty-seven thousand, eight hundred ninety!"
],
]
)
def
test_expand_number
(
self
,
sent
,
truth
):
assert
(
_expand_number
(
sent
)
==
truth
)
test/torchaudio_unittest/functional/__init__.py
0 → 100644
View file @
9dcc7a15
test/torchaudio_unittest/functional/autograd_cpu_test.py
0 → 100644
View file @
9dcc7a15
import
torch
from
.autograd_impl
import
Autograd
,
AutogradFloat32
from
torchaudio_unittest
import
common_utils
class
TestAutogradLfilterCPU
(
Autograd
,
common_utils
.
PytorchTestCase
):
dtype
=
torch
.
float64
device
=
torch
.
device
(
'cpu'
)
class
TestAutogradRNNTCPU
(
AutogradFloat32
,
common_utils
.
PytorchTestCase
):
dtype
=
torch
.
float32
device
=
torch
.
device
(
'cpu'
)
test/torchaudio_unittest/functional/autograd_cuda_test.py
0 → 100644
View file @
9dcc7a15
import
torch
from
.autograd_impl
import
Autograd
,
AutogradFloat32
from
torchaudio_unittest
import
common_utils
@
common_utils
.
skipIfNoCuda
class
TestAutogradLfilterCUDA
(
Autograd
,
common_utils
.
PytorchTestCase
):
dtype
=
torch
.
float64
device
=
torch
.
device
(
'cuda'
)
@
common_utils
.
skipIfNoCuda
class
TestAutogradRNNTCUDA
(
AutogradFloat32
,
common_utils
.
PytorchTestCase
):
dtype
=
torch
.
float32
device
=
torch
.
device
(
'cuda'
)
test/torchaudio_unittest/functional/autograd_impl.py
0 → 100644
View file @
9dcc7a15
from
typing
import
Callable
,
Tuple
from
functools
import
partial
import
torch
from
parameterized
import
parameterized
from
torch
import
Tensor
import
torchaudio.functional
as
F
from
torch.autograd
import
gradcheck
,
gradgradcheck
from
torchaudio_unittest.common_utils
import
(
TestBaseMixin
,
get_whitenoise
,
rnnt_utils
,
)
class
Autograd
(
TestBaseMixin
):
def
assert_grad
(
self
,
transform
:
Callable
[...,
Tensor
],
inputs
:
Tuple
[
torch
.
Tensor
],
*
,
enable_all_grad
:
bool
=
True
,
):
inputs_
=
[]
for
i
in
inputs
:
if
torch
.
is_tensor
(
i
):
i
=
i
.
to
(
dtype
=
self
.
dtype
,
device
=
self
.
device
)
if
enable_all_grad
:
i
.
requires_grad
=
True
inputs_
.
append
(
i
)
assert
gradcheck
(
transform
,
inputs_
)
assert
gradgradcheck
(
transform
,
inputs_
)
def
test_lfilter_x
(
self
):
torch
.
random
.
manual_seed
(
2434
)
x
=
get_whitenoise
(
sample_rate
=
22050
,
duration
=
0.01
,
n_channels
=
2
)
a
=
torch
.
tensor
([
0.7
,
0.2
,
0.6
])
b
=
torch
.
tensor
([
0.4
,
0.2
,
0.9
])
x
.
requires_grad
=
True
self
.
assert_grad
(
F
.
lfilter
,
(
x
,
a
,
b
),
enable_all_grad
=
False
)
def
test_lfilter_a
(
self
):
torch
.
random
.
manual_seed
(
2434
)
x
=
get_whitenoise
(
sample_rate
=
22050
,
duration
=
0.01
,
n_channels
=
2
)
a
=
torch
.
tensor
([
0.7
,
0.2
,
0.6
])
b
=
torch
.
tensor
([
0.4
,
0.2
,
0.9
])
a
.
requires_grad
=
True
self
.
assert_grad
(
F
.
lfilter
,
(
x
,
a
,
b
),
enable_all_grad
=
False
)
def
test_lfilter_b
(
self
):
torch
.
random
.
manual_seed
(
2434
)
x
=
get_whitenoise
(
sample_rate
=
22050
,
duration
=
0.01
,
n_channels
=
2
)
a
=
torch
.
tensor
([
0.7
,
0.2
,
0.6
])
b
=
torch
.
tensor
([
0.4
,
0.2
,
0.9
])
b
.
requires_grad
=
True
self
.
assert_grad
(
F
.
lfilter
,
(
x
,
a
,
b
),
enable_all_grad
=
False
)
def
test_lfilter_all_inputs
(
self
):
torch
.
random
.
manual_seed
(
2434
)
x
=
get_whitenoise
(
sample_rate
=
22050
,
duration
=
0.01
,
n_channels
=
2
)
a
=
torch
.
tensor
([
0.7
,
0.2
,
0.6
])
b
=
torch
.
tensor
([
0.4
,
0.2
,
0.9
])
self
.
assert_grad
(
F
.
lfilter
,
(
x
,
a
,
b
))
def
test_lfilter_filterbanks
(
self
):
torch
.
random
.
manual_seed
(
2434
)
x
=
get_whitenoise
(
sample_rate
=
22050
,
duration
=
0.01
,
n_channels
=
3
)
a
=
torch
.
tensor
([[
0.7
,
0.2
,
0.6
],
[
0.8
,
0.2
,
0.9
]])
b
=
torch
.
tensor
([[
0.4
,
0.2
,
0.9
],
[
0.7
,
0.2
,
0.6
]])
self
.
assert_grad
(
partial
(
F
.
lfilter
,
batching
=
False
),
(
x
,
a
,
b
))
def
test_lfilter_batching
(
self
):
torch
.
random
.
manual_seed
(
2434
)
x
=
get_whitenoise
(
sample_rate
=
22050
,
duration
=
0.01
,
n_channels
=
2
)
a
=
torch
.
tensor
([[
0.7
,
0.2
,
0.6
],
[
0.8
,
0.2
,
0.9
]])
b
=
torch
.
tensor
([[
0.4
,
0.2
,
0.9
],
[
0.7
,
0.2
,
0.6
]])
self
.
assert_grad
(
F
.
lfilter
,
(
x
,
a
,
b
))
def
test_filtfilt_a
(
self
):
torch
.
random
.
manual_seed
(
2434
)
x
=
get_whitenoise
(
sample_rate
=
22050
,
duration
=
0.01
,
n_channels
=
2
)
a
=
torch
.
tensor
([
0.7
,
0.2
,
0.6
])
b
=
torch
.
tensor
([
0.4
,
0.2
,
0.9
])
a
.
requires_grad
=
True
self
.
assert_grad
(
F
.
filtfilt
,
(
x
,
a
,
b
),
enable_all_grad
=
False
)
def
test_filtfilt_b
(
self
):
torch
.
random
.
manual_seed
(
2434
)
x
=
get_whitenoise
(
sample_rate
=
22050
,
duration
=
0.01
,
n_channels
=
2
)
a
=
torch
.
tensor
([
0.7
,
0.2
,
0.6
])
b
=
torch
.
tensor
([
0.4
,
0.2
,
0.9
])
b
.
requires_grad
=
True
self
.
assert_grad
(
F
.
filtfilt
,
(
x
,
a
,
b
),
enable_all_grad
=
False
)
def
test_filtfilt_all_inputs
(
self
):
torch
.
random
.
manual_seed
(
2434
)
x
=
get_whitenoise
(
sample_rate
=
22050
,
duration
=
0.01
,
n_channels
=
2
)
a
=
torch
.
tensor
([
0.7
,
0.2
,
0.6
])
b
=
torch
.
tensor
([
0.4
,
0.2
,
0.9
])
self
.
assert_grad
(
F
.
filtfilt
,
(
x
,
a
,
b
))
def
test_filtfilt_batching
(
self
):
torch
.
random
.
manual_seed
(
2434
)
x
=
get_whitenoise
(
sample_rate
=
22050
,
duration
=
0.01
,
n_channels
=
2
)
a
=
torch
.
tensor
([[
0.7
,
0.2
,
0.6
],
[
0.8
,
0.2
,
0.9
]])
b
=
torch
.
tensor
([[
0.4
,
0.2
,
0.9
],
[
0.7
,
0.2
,
0.6
]])
self
.
assert_grad
(
F
.
filtfilt
,
(
x
,
a
,
b
))
def
test_biquad
(
self
):
torch
.
random
.
manual_seed
(
2434
)
x
=
get_whitenoise
(
sample_rate
=
22050
,
duration
=
0.01
,
n_channels
=
1
)
a
=
torch
.
tensor
([
0.7
,
0.2
,
0.6
])
b
=
torch
.
tensor
([
0.4
,
0.2
,
0.9
])
self
.
assert_grad
(
F
.
biquad
,
(
x
,
b
[
0
],
b
[
1
],
b
[
2
],
a
[
0
],
a
[
1
],
a
[
2
]))
@
parameterized
.
expand
([
(
800
,
0.7
,
True
),
(
800
,
0.7
,
False
),
])
def
test_band_biquad
(
self
,
central_freq
,
Q
,
noise
):
torch
.
random
.
manual_seed
(
2434
)
sr
=
22050
x
=
get_whitenoise
(
sample_rate
=
sr
,
duration
=
0.01
,
n_channels
=
1
)
central_freq
=
torch
.
tensor
(
central_freq
)
Q
=
torch
.
tensor
(
Q
)
self
.
assert_grad
(
F
.
band_biquad
,
(
x
,
sr
,
central_freq
,
Q
,
noise
))
@
parameterized
.
expand
([
(
800
,
0.7
,
10
),
(
800
,
0.7
,
-
10
),
])
def
test_bass_biquad
(
self
,
central_freq
,
Q
,
gain
):
torch
.
random
.
manual_seed
(
2434
)
sr
=
22050
x
=
get_whitenoise
(
sample_rate
=
sr
,
duration
=
0.01
,
n_channels
=
1
)
central_freq
=
torch
.
tensor
(
central_freq
)
Q
=
torch
.
tensor
(
Q
)
gain
=
torch
.
tensor
(
gain
)
self
.
assert_grad
(
F
.
bass_biquad
,
(
x
,
sr
,
gain
,
central_freq
,
Q
))
@
parameterized
.
expand
([
(
3000
,
0.7
,
10
),
(
3000
,
0.7
,
-
10
),
])
def
test_treble_biquad
(
self
,
central_freq
,
Q
,
gain
):
torch
.
random
.
manual_seed
(
2434
)
sr
=
22050
x
=
get_whitenoise
(
sample_rate
=
sr
,
duration
=
0.01
,
n_channels
=
1
)
central_freq
=
torch
.
tensor
(
central_freq
)
Q
=
torch
.
tensor
(
Q
)
gain
=
torch
.
tensor
(
gain
)
self
.
assert_grad
(
F
.
treble_biquad
,
(
x
,
sr
,
gain
,
central_freq
,
Q
))
@
parameterized
.
expand
([
(
800
,
0.7
,
),
])
def
test_allpass_biquad
(
self
,
central_freq
,
Q
):
torch
.
random
.
manual_seed
(
2434
)
sr
=
22050
x
=
get_whitenoise
(
sample_rate
=
sr
,
duration
=
0.01
,
n_channels
=
1
)
central_freq
=
torch
.
tensor
(
central_freq
)
Q
=
torch
.
tensor
(
Q
)
self
.
assert_grad
(
F
.
allpass_biquad
,
(
x
,
sr
,
central_freq
,
Q
))
@
parameterized
.
expand
([
(
800
,
0.7
,
),
])
def
test_lowpass_biquad
(
self
,
cutoff_freq
,
Q
):
torch
.
random
.
manual_seed
(
2434
)
sr
=
22050
x
=
get_whitenoise
(
sample_rate
=
sr
,
duration
=
0.01
,
n_channels
=
1
)
cutoff_freq
=
torch
.
tensor
(
cutoff_freq
)
Q
=
torch
.
tensor
(
Q
)
self
.
assert_grad
(
F
.
lowpass_biquad
,
(
x
,
sr
,
cutoff_freq
,
Q
))
@
parameterized
.
expand
([
(
800
,
0.7
,
),
])
def
test_highpass_biquad
(
self
,
cutoff_freq
,
Q
):
torch
.
random
.
manual_seed
(
2434
)
sr
=
22050
x
=
get_whitenoise
(
sample_rate
=
sr
,
duration
=
0.01
,
n_channels
=
1
)
cutoff_freq
=
torch
.
tensor
(
cutoff_freq
)
Q
=
torch
.
tensor
(
Q
)
self
.
assert_grad
(
F
.
highpass_biquad
,
(
x
,
sr
,
cutoff_freq
,
Q
))
@
parameterized
.
expand
([
(
800
,
0.7
,
True
),
(
800
,
0.7
,
False
),
])
def
test_bandpass_biquad
(
self
,
central_freq
,
Q
,
const_skirt_gain
):
torch
.
random
.
manual_seed
(
2434
)
sr
=
22050
x
=
get_whitenoise
(
sample_rate
=
sr
,
duration
=
0.01
,
n_channels
=
1
)
central_freq
=
torch
.
tensor
(
central_freq
)
Q
=
torch
.
tensor
(
Q
)
self
.
assert_grad
(
F
.
bandpass_biquad
,
(
x
,
sr
,
central_freq
,
Q
,
const_skirt_gain
))
@
parameterized
.
expand
([
(
800
,
0.7
,
10
),
(
800
,
0.7
,
-
10
),
])
def
test_equalizer_biquad
(
self
,
central_freq
,
Q
,
gain
):
torch
.
random
.
manual_seed
(
2434
)
sr
=
22050
x
=
get_whitenoise
(
sample_rate
=
sr
,
duration
=
0.01
,
n_channels
=
1
)
central_freq
=
torch
.
tensor
(
central_freq
)
Q
=
torch
.
tensor
(
Q
)
gain
=
torch
.
tensor
(
gain
)
self
.
assert_grad
(
F
.
equalizer_biquad
,
(
x
,
sr
,
central_freq
,
gain
,
Q
))
@
parameterized
.
expand
([
(
800
,
0.7
,
),
])
def
test_bandreject_biquad
(
self
,
central_freq
,
Q
):
torch
.
random
.
manual_seed
(
2434
)
sr
=
22050
x
=
get_whitenoise
(
sample_rate
=
sr
,
duration
=
0.01
,
n_channels
=
1
)
central_freq
=
torch
.
tensor
(
central_freq
)
Q
=
torch
.
tensor
(
Q
)
self
.
assert_grad
(
F
.
bandreject_biquad
,
(
x
,
sr
,
central_freq
,
Q
))
class
AutogradFloat32
(
TestBaseMixin
):
def
assert_grad
(
self
,
transform
:
Callable
[...,
Tensor
],
inputs
:
Tuple
[
torch
.
Tensor
],
enable_all_grad
:
bool
=
True
,
):
inputs_
=
[]
for
i
in
inputs
:
if
torch
.
is_tensor
(
i
):
i
=
i
.
to
(
dtype
=
self
.
dtype
,
device
=
self
.
device
)
if
enable_all_grad
:
i
.
requires_grad
=
True
inputs_
.
append
(
i
)
# gradcheck with float32 requires higher atol and epsilon
assert
gradcheck
(
transform
,
inputs
,
eps
=
1e-3
,
atol
=
1e-3
,
nondet_tol
=
0.
)
@
parameterized
.
expand
([
(
rnnt_utils
.
get_B1_T10_U3_D4_data
,
),
(
rnnt_utils
.
get_B2_T4_U3_D3_data
,
),
(
rnnt_utils
.
get_B1_T2_U3_D5_data
,
),
])
def
test_rnnt_loss
(
self
,
data_func
):
def
get_data
(
data_func
,
device
):
data
=
data_func
()
if
type
(
data
)
==
tuple
:
data
=
data
[
0
]
return
data
data
=
get_data
(
data_func
,
self
.
device
)
inputs
=
(
data
[
"logits"
].
to
(
torch
.
float32
),
# logits
data
[
"targets"
],
# targets
data
[
"logit_lengths"
],
# logit_lengths
data
[
"target_lengths"
],
# target_lengths
data
[
"blank"
],
# blank
-
1
,
# clamp
)
self
.
assert_grad
(
F
.
rnnt_loss
,
inputs
,
enable_all_grad
=
False
)
test/torchaudio_unittest/functional/batch_consistency_test.py
0 → 100644
View file @
9dcc7a15
"""Test numerical consistency among single input and batched input."""
import
itertools
import
math
from
parameterized
import
parameterized
,
parameterized_class
import
torch
import
torchaudio.functional
as
F
from
torchaudio_unittest
import
common_utils
def
_name_from_args
(
func
,
_
,
params
):
"""Return a parameterized test name, based on parameter values."""
return
"{}_{}"
.
format
(
func
.
__name__
,
"_"
.
join
(
str
(
arg
)
for
arg
in
params
.
args
))
@
parameterized_class
([
# Single-item batch isolates problems that come purely from adding a
# dimension (rather than processing multiple items)
{
"batch_size"
:
1
},
{
"batch_size"
:
3
},
])
class
TestFunctional
(
common_utils
.
TorchaudioTestCase
):
"""Test functions defined in `functional` module"""
backend
=
'default'
def
assert_batch_consistency
(
self
,
functional
,
batch
,
*
args
,
atol
=
1e-8
,
rtol
=
1e-5
,
seed
=
42
,
**
kwargs
):
n
=
batch
.
size
(
0
)
# Compute items separately, then batch the result
torch
.
random
.
manual_seed
(
seed
)
items_input
=
batch
.
clone
()
items_result
=
torch
.
stack
([
functional
(
items_input
[
i
],
*
args
,
**
kwargs
)
for
i
in
range
(
n
)
])
# Batch the input and run
torch
.
random
.
manual_seed
(
seed
)
batch_input
=
batch
.
clone
()
batch_result
=
functional
(
batch_input
,
*
args
,
**
kwargs
)
self
.
assertEqual
(
items_input
,
batch_input
,
rtol
=
rtol
,
atol
=
atol
)
self
.
assertEqual
(
items_result
,
batch_result
,
rtol
=
rtol
,
atol
=
atol
)
def
test_griffinlim
(
self
):
n_fft
=
400
ws
=
400
hop
=
200
window
=
torch
.
hann_window
(
ws
)
power
=
2
momentum
=
0.99
n_iter
=
32
length
=
1000
torch
.
random
.
manual_seed
(
0
)
batch
=
torch
.
rand
(
self
.
batch_size
,
1
,
201
,
6
)
self
.
assert_batch_consistency
(
F
.
griffinlim
,
batch
,
window
,
n_fft
,
hop
,
ws
,
power
,
n_iter
,
momentum
,
length
,
0
,
atol
=
5e-5
)
@
parameterized
.
expand
(
list
(
itertools
.
product
(
[
8000
,
16000
,
44100
],
[
1
,
2
],
)),
name_func
=
_name_from_args
)
def
test_detect_pitch_frequency
(
self
,
sample_rate
,
n_channels
):
# Use different frequencies to ensure each item in the batch returns a
# different answer.
torch
.
manual_seed
(
0
)
frequencies
=
torch
.
randint
(
100
,
1000
,
[
self
.
batch_size
])
waveforms
=
torch
.
stack
([
common_utils
.
get_sinusoid
(
frequency
=
frequency
,
sample_rate
=
sample_rate
,
n_channels
=
n_channels
,
duration
=
5
)
for
frequency
in
frequencies
])
self
.
assert_batch_consistency
(
F
.
detect_pitch_frequency
,
waveforms
,
sample_rate
)
def
test_amplitude_to_DB
(
self
):
torch
.
manual_seed
(
0
)
spec
=
torch
.
rand
(
self
.
batch_size
,
2
,
100
,
100
)
*
200
amplitude_mult
=
20.
amin
=
1e-10
ref
=
1.0
db_mult
=
math
.
log10
(
max
(
amin
,
ref
))
# Test with & without a `top_db` clamp
self
.
assert_batch_consistency
(
F
.
amplitude_to_DB
,
spec
,
amplitude_mult
,
amin
,
db_mult
,
top_db
=
None
)
self
.
assert_batch_consistency
(
F
.
amplitude_to_DB
,
spec
,
amplitude_mult
,
amin
,
db_mult
,
top_db
=
40.
)
def
test_amplitude_to_DB_itemwise_clamps
(
self
):
"""Ensure that the clamps are separate for each spectrogram in a batch.
The clamp was determined per-batch in a prior implementation, which
meant it was determined by the loudest item, thus items weren't
independent. See:
https://github.com/pytorch/audio/issues/994
"""
amplitude_mult
=
20.
amin
=
1e-10
ref
=
1.0
db_mult
=
math
.
log10
(
max
(
amin
,
ref
))
top_db
=
20.
# Make a batch of noise
torch
.
manual_seed
(
0
)
spec
=
torch
.
rand
([
2
,
2
,
100
,
100
])
*
200
# Make one item blow out the other
spec
[
0
]
+=
50
batchwise_dbs
=
F
.
amplitude_to_DB
(
spec
,
amplitude_mult
,
amin
,
db_mult
,
top_db
=
top_db
)
itemwise_dbs
=
torch
.
stack
([
F
.
amplitude_to_DB
(
item
,
amplitude_mult
,
amin
,
db_mult
,
top_db
=
top_db
)
for
item
in
spec
])
self
.
assertEqual
(
batchwise_dbs
,
itemwise_dbs
)
def
test_amplitude_to_DB_not_channelwise_clamps
(
self
):
"""Check that clamps are applied per-item, not per channel."""
amplitude_mult
=
20.
amin
=
1e-10
ref
=
1.0
db_mult
=
math
.
log10
(
max
(
amin
,
ref
))
top_db
=
40.
torch
.
manual_seed
(
0
)
spec
=
torch
.
rand
([
1
,
2
,
100
,
100
])
*
200
# Make one channel blow out the other
spec
[:,
0
]
+=
50
specwise_dbs
=
F
.
amplitude_to_DB
(
spec
,
amplitude_mult
,
amin
,
db_mult
,
top_db
=
top_db
)
channelwise_dbs
=
torch
.
stack
([
F
.
amplitude_to_DB
(
spec
[:,
i
],
amplitude_mult
,
amin
,
db_mult
,
top_db
=
top_db
)
for
i
in
range
(
spec
.
size
(
-
3
))
])
# Just check channelwise gives a different answer.
difference
=
(
specwise_dbs
-
channelwise_dbs
).
abs
()
assert
(
difference
>=
1e-5
).
any
()
def
test_contrast
(
self
):
torch
.
random
.
manual_seed
(
0
)
waveforms
=
torch
.
rand
(
self
.
batch_size
,
2
,
100
)
-
0.5
self
.
assert_batch_consistency
(
F
.
contrast
,
waveforms
,
enhancement_amount
=
80.
)
def
test_dcshift
(
self
):
torch
.
random
.
manual_seed
(
0
)
waveforms
=
torch
.
rand
(
self
.
batch_size
,
2
,
100
)
-
0.5
self
.
assert_batch_consistency
(
F
.
dcshift
,
waveforms
,
shift
=
0.5
,
limiter_gain
=
0.05
)
def
test_overdrive
(
self
):
torch
.
random
.
manual_seed
(
0
)
waveforms
=
torch
.
rand
(
self
.
batch_size
,
2
,
100
)
-
0.5
self
.
assert_batch_consistency
(
F
.
overdrive
,
waveforms
,
gain
=
45
,
colour
=
30
)
def
test_phaser
(
self
):
sample_rate
=
44100
n_channels
=
2
waveform
=
common_utils
.
get_whitenoise
(
sample_rate
=
sample_rate
,
n_channels
=
self
.
batch_size
*
n_channels
,
duration
=
1
)
batch
=
waveform
.
view
(
self
.
batch_size
,
n_channels
,
waveform
.
size
(
-
1
))
self
.
assert_batch_consistency
(
F
.
phaser
,
batch
,
sample_rate
)
def
test_flanger
(
self
):
torch
.
random
.
manual_seed
(
0
)
waveforms
=
torch
.
rand
(
self
.
batch_size
,
2
,
100
)
-
0.5
sample_rate
=
44100
self
.
assert_batch_consistency
(
F
.
flanger
,
waveforms
,
sample_rate
)
@
parameterized
.
expand
(
list
(
itertools
.
product
(
[
True
,
False
],
# center
[
True
,
False
],
# norm_vars
)),
name_func
=
_name_from_args
)
def
test_sliding_window_cmn
(
self
,
center
,
norm_vars
):
torch
.
manual_seed
(
0
)
spectrogram
=
torch
.
rand
(
self
.
batch_size
,
2
,
1024
,
1024
)
*
200
self
.
assert_batch_consistency
(
F
.
sliding_window_cmn
,
spectrogram
,
center
=
center
,
norm_vars
=
norm_vars
)
@
parameterized
.
expand
([(
"sinc_interpolation"
),
(
"kaiser_window"
)])
def
test_resample_waveform
(
self
,
resampling_method
):
num_channels
=
3
sr
=
16000
new_sr
=
sr
//
2
multi_sound
=
common_utils
.
get_whitenoise
(
sample_rate
=
sr
,
n_channels
=
num_channels
,
duration
=
0.5
,)
self
.
assert_batch_consistency
(
F
.
resample
,
multi_sound
,
orig_freq
=
sr
,
new_freq
=
new_sr
,
resampling_method
=
resampling_method
,
rtol
=
1e-4
,
atol
=
1e-7
)
@
common_utils
.
skipIfNoKaldi
def
test_compute_kaldi_pitch
(
self
):
sample_rate
=
44100
n_channels
=
2
waveform
=
common_utils
.
get_whitenoise
(
sample_rate
=
sample_rate
,
n_channels
=
self
.
batch_size
*
n_channels
)
batch
=
waveform
.
view
(
self
.
batch_size
,
n_channels
,
waveform
.
size
(
-
1
))
self
.
assert_batch_consistency
(
F
.
compute_kaldi_pitch
,
batch
,
sample_rate
=
sample_rate
)
def
test_lfilter
(
self
):
signal_length
=
2048
torch
.
manual_seed
(
2434
)
x
=
torch
.
randn
(
self
.
batch_size
,
signal_length
)
a
=
torch
.
rand
(
self
.
batch_size
,
3
)
b
=
torch
.
rand
(
self
.
batch_size
,
3
)
batchwise_output
=
F
.
lfilter
(
x
,
a
,
b
,
batching
=
True
)
itemwise_output
=
torch
.
stack
([
F
.
lfilter
(
x
[
i
],
a
[
i
],
b
[
i
])
for
i
in
range
(
self
.
batch_size
)
])
self
.
assertEqual
(
batchwise_output
,
itemwise_output
)
def
test_filtfilt
(
self
):
signal_length
=
2048
torch
.
manual_seed
(
2434
)
x
=
torch
.
randn
(
self
.
batch_size
,
signal_length
)
a
=
torch
.
rand
(
self
.
batch_size
,
3
)
b
=
torch
.
rand
(
self
.
batch_size
,
3
)
batchwise_output
=
F
.
filtfilt
(
x
,
a
,
b
)
itemwise_output
=
torch
.
stack
([
F
.
filtfilt
(
x
[
i
],
a
[
i
],
b
[
i
])
for
i
in
range
(
self
.
batch_size
)
])
self
.
assertEqual
(
batchwise_output
,
itemwise_output
)
test/torchaudio_unittest/functional/functional_cpu_test.py
0 → 100644
View file @
9dcc7a15
import
torch
import
torchaudio.functional
as
F
import
unittest
from
parameterized
import
parameterized
from
torchaudio_unittest.common_utils
import
PytorchTestCase
,
TorchaudioTestCase
,
skipIfNoSox
from
.functional_impl
import
Functional
,
FunctionalCPUOnly
class
TestFunctionalFloat32
(
Functional
,
FunctionalCPUOnly
,
PytorchTestCase
):
dtype
=
torch
.
float32
device
=
torch
.
device
(
'cpu'
)
@
unittest
.
expectedFailure
def
test_lfilter_9th_order_filter_stability
(
self
):
super
().
test_lfilter_9th_order_filter_stability
()
class
TestFunctionalFloat64
(
Functional
,
PytorchTestCase
):
dtype
=
torch
.
float64
device
=
torch
.
device
(
'cpu'
)
@
skipIfNoSox
class
TestApplyCodec
(
TorchaudioTestCase
):
backend
=
"sox_io"
def
_smoke_test
(
self
,
format
,
compression
,
check_num_frames
):
"""
The purpose of this test suite is to verify that apply_codec functionalities do not exhibit
abnormal behaviors.
"""
torch
.
random
.
manual_seed
(
42
)
sample_rate
=
8000
num_frames
=
3
*
sample_rate
num_channels
=
2
waveform
=
torch
.
rand
(
num_channels
,
num_frames
)
augmented
=
F
.
apply_codec
(
waveform
,
sample_rate
,
format
,
True
,
compression
)
assert
augmented
.
dtype
==
waveform
.
dtype
assert
augmented
.
shape
[
0
]
==
num_channels
if
check_num_frames
:
assert
augmented
.
shape
[
1
]
==
num_frames
def
test_wave
(
self
):
self
.
_smoke_test
(
"wav"
,
compression
=
None
,
check_num_frames
=
True
)
@
parameterized
.
expand
([(
96
,),
(
128
,),
(
160
,),
(
192
,),
(
224
,),
(
256
,),
(
320
,)])
def
test_mp3
(
self
,
compression
):
self
.
_smoke_test
(
"mp3"
,
compression
,
check_num_frames
=
False
)
@
parameterized
.
expand
([(
0
,),
(
1
,),
(
2
,),
(
3
,),
(
4
,),
(
5
,),
(
6
,),
(
7
,),
(
8
,)])
def
test_flac
(
self
,
compression
):
self
.
_smoke_test
(
"flac"
,
compression
,
check_num_frames
=
False
)
@
parameterized
.
expand
([(
-
1
,),
(
0
,),
(
1
,),
(
2
,),
(
3
,),
(
3.6
,),
(
5
,),
(
10
,)])
def
test_vorbis
(
self
,
compression
):
self
.
_smoke_test
(
"vorbis"
,
compression
,
check_num_frames
=
False
)
test/torchaudio_unittest/functional/functional_cuda_test.py
0 → 100644
View file @
9dcc7a15
import
torch
import
unittest
from
torchaudio_unittest.common_utils
import
PytorchTestCase
,
skipIfNoCuda
from
.functional_impl
import
Functional
@
skipIfNoCuda
class
TestFunctionalFloat32
(
Functional
,
PytorchTestCase
):
dtype
=
torch
.
float32
device
=
torch
.
device
(
'cuda'
)
@
unittest
.
expectedFailure
def
test_lfilter_9th_order_filter_stability
(
self
):
super
().
test_lfilter_9th_order_filter_stability
()
@
skipIfNoCuda
class
TestLFilterFloat64
(
Functional
,
PytorchTestCase
):
dtype
=
torch
.
float64
device
=
torch
.
device
(
'cuda'
)
test/torchaudio_unittest/functional/functional_impl.py
0 → 100644
View file @
9dcc7a15
"""Test definition common to CPU and CUDA"""
import
math
import
itertools
import
warnings
import
numpy
as
np
import
torch
import
torchaudio.functional
as
F
from
parameterized
import
parameterized
from
scipy
import
signal
from
torchaudio_unittest.common_utils
import
(
TestBaseMixin
,
get_sinusoid
,
nested_params
,
get_whitenoise
,
rnnt_utils
,
)
class
Functional
(
TestBaseMixin
):
def
_test_resample_waveform_accuracy
(
self
,
up_scale_factor
=
None
,
down_scale_factor
=
None
,
resampling_method
=
"sinc_interpolation"
,
atol
=
1e-1
,
rtol
=
1e-4
):
# resample the signal and compare it to the ground truth
n_to_trim
=
20
sample_rate
=
1000
new_sample_rate
=
sample_rate
if
up_scale_factor
is
not
None
:
new_sample_rate
=
int
(
new_sample_rate
*
up_scale_factor
)
if
down_scale_factor
is
not
None
:
new_sample_rate
=
int
(
new_sample_rate
/
down_scale_factor
)
duration
=
5
# seconds
original_timestamps
=
torch
.
arange
(
0
,
duration
,
1.0
/
sample_rate
)
sound
=
123
*
torch
.
cos
(
2
*
math
.
pi
*
3
*
original_timestamps
).
unsqueeze
(
0
)
estimate
=
F
.
resample
(
sound
,
sample_rate
,
new_sample_rate
,
resampling_method
=
resampling_method
).
squeeze
()
new_timestamps
=
torch
.
arange
(
0
,
duration
,
1.0
/
new_sample_rate
)[:
estimate
.
size
(
0
)]
ground_truth
=
123
*
torch
.
cos
(
2
*
math
.
pi
*
3
*
new_timestamps
)
# trim the first/last n samples as these points have boundary effects
ground_truth
=
ground_truth
[...,
n_to_trim
:
-
n_to_trim
]
estimate
=
estimate
[...,
n_to_trim
:
-
n_to_trim
]
self
.
assertEqual
(
estimate
,
ground_truth
,
atol
=
atol
,
rtol
=
rtol
)
def
_test_costs_and_gradients
(
self
,
data
,
ref_costs
,
ref_gradients
,
atol
=
1e-6
,
rtol
=
1e-2
):
logits_shape
=
data
[
"logits"
].
shape
costs
,
gradients
=
rnnt_utils
.
compute_with_pytorch_transducer
(
data
=
data
)
self
.
assertEqual
(
costs
,
ref_costs
,
atol
=
atol
,
rtol
=
rtol
)
self
.
assertEqual
(
logits_shape
,
gradients
.
shape
)
self
.
assertEqual
(
gradients
,
ref_gradients
,
atol
=
atol
,
rtol
=
rtol
)
def
test_lfilter_simple
(
self
):
"""
Create a very basic signal,
Then make a simple 4th order delay
The output should be same as the input but shifted
"""
torch
.
random
.
manual_seed
(
42
)
waveform
=
torch
.
rand
(
2
,
44100
*
1
,
dtype
=
self
.
dtype
,
device
=
self
.
device
)
b_coeffs
=
torch
.
tensor
([
0
,
0
,
0
,
1
],
dtype
=
self
.
dtype
,
device
=
self
.
device
)
a_coeffs
=
torch
.
tensor
([
1
,
0
,
0
,
0
],
dtype
=
self
.
dtype
,
device
=
self
.
device
)
output_waveform
=
F
.
lfilter
(
waveform
,
a_coeffs
,
b_coeffs
)
self
.
assertEqual
(
output_waveform
[:,
3
:],
waveform
[:,
0
:
-
3
],
atol
=
1e-5
,
rtol
=
1e-5
)
def
test_lfilter_clamp
(
self
):
input_signal
=
torch
.
ones
(
1
,
44100
*
1
,
dtype
=
self
.
dtype
,
device
=
self
.
device
)
b_coeffs
=
torch
.
tensor
([
1
,
0
],
dtype
=
self
.
dtype
,
device
=
self
.
device
)
a_coeffs
=
torch
.
tensor
([
1
,
-
0.95
],
dtype
=
self
.
dtype
,
device
=
self
.
device
)
output_signal
=
F
.
lfilter
(
input_signal
,
a_coeffs
,
b_coeffs
,
clamp
=
True
)
assert
output_signal
.
max
()
<=
1
output_signal
=
F
.
lfilter
(
input_signal
,
a_coeffs
,
b_coeffs
,
clamp
=
False
)
assert
output_signal
.
max
()
>
1
@
parameterized
.
expand
([
((
44100
,),
(
4
,),
(
44100
,)),
((
3
,
44100
),
(
4
,),
(
3
,
44100
,)),
((
2
,
3
,
44100
),
(
4
,),
(
2
,
3
,
44100
,)),
((
1
,
2
,
3
,
44100
),
(
4
,),
(
1
,
2
,
3
,
44100
,)),
((
44100
,),
(
2
,
4
),
(
2
,
44100
)),
((
3
,
44100
),
(
1
,
4
),
(
3
,
1
,
44100
)),
((
1
,
2
,
44100
),
(
3
,
4
),
(
1
,
2
,
3
,
44100
))
])
def
test_lfilter_shape
(
self
,
input_shape
,
coeff_shape
,
target_shape
):
torch
.
random
.
manual_seed
(
42
)
waveform
=
torch
.
rand
(
*
input_shape
,
dtype
=
self
.
dtype
,
device
=
self
.
device
)
b_coeffs
=
torch
.
rand
(
*
coeff_shape
,
dtype
=
self
.
dtype
,
device
=
self
.
device
)
a_coeffs
=
torch
.
rand
(
*
coeff_shape
,
dtype
=
self
.
dtype
,
device
=
self
.
device
)
output_waveform
=
F
.
lfilter
(
waveform
,
a_coeffs
,
b_coeffs
,
batching
=
False
)
assert
input_shape
==
waveform
.
size
()
assert
target_shape
==
output_waveform
.
size
()
def
test_lfilter_9th_order_filter_stability
(
self
):
"""
Validate the precision of lfilter against reference scipy implementation when using high order filter.
The reference implementation use cascaded second-order filters so is more numerically accurate.
"""
# create an impulse signal
x
=
torch
.
zeros
(
1024
,
dtype
=
self
.
dtype
,
device
=
self
.
device
)
x
[
0
]
=
1
# get target impulse response
sos
=
signal
.
butter
(
9
,
850
,
'hp'
,
fs
=
22050
,
output
=
'sos'
)
y
=
torch
.
from_numpy
(
signal
.
sosfilt
(
sos
,
x
.
cpu
().
numpy
())).
to
(
self
.
dtype
).
to
(
self
.
device
)
# get lfilter coefficients
b
,
a
=
signal
.
butter
(
9
,
850
,
'hp'
,
fs
=
22050
,
output
=
'ba'
)
b
,
a
=
torch
.
from_numpy
(
b
).
to
(
self
.
dtype
).
to
(
self
.
device
),
torch
.
from_numpy
(
a
).
to
(
self
.
dtype
).
to
(
self
.
device
)
# predict impulse response
yhat
=
F
.
lfilter
(
x
,
a
,
b
,
False
)
self
.
assertEqual
(
yhat
,
y
,
atol
=
1e-4
,
rtol
=
1e-5
)
def
test_filtfilt_simple
(
self
):
"""
Check that, for an arbitrary signal, applying filtfilt with filter coefficients
corresponding to a pure delay filter imparts no time delay.
"""
waveform
=
get_whitenoise
(
sample_rate
=
8000
,
n_channels
=
2
,
dtype
=
self
.
dtype
).
to
(
device
=
self
.
device
)
b_coeffs
=
torch
.
tensor
([
0
,
0
,
0
,
1
],
dtype
=
self
.
dtype
,
device
=
self
.
device
)
a_coeffs
=
torch
.
tensor
([
1
,
0
,
0
,
0
],
dtype
=
self
.
dtype
,
device
=
self
.
device
)
padded_waveform
=
torch
.
cat
(
(
waveform
,
torch
.
zeros
(
2
,
3
,
dtype
=
self
.
dtype
,
device
=
self
.
device
)),
axis
=
1
)
output_waveform
=
F
.
filtfilt
(
padded_waveform
,
a_coeffs
,
b_coeffs
)
self
.
assertEqual
(
output_waveform
,
padded_waveform
,
atol
=
1e-5
,
rtol
=
1e-5
)
def
test_filtfilt_filter_sinusoid
(
self
):
"""
Check that, for a signal comprising two sinusoids, applying filtfilt
with appropriate filter coefficients correctly removes the higher-frequency
sinusoid while imparting no time delay.
"""
T
=
1.0
samples
=
1000
waveform_k0
=
get_sinusoid
(
frequency
=
5
,
sample_rate
=
samples
//
T
,
dtype
=
self
.
dtype
,
device
=
self
.
device
).
squeeze
(
0
)
waveform_k1
=
get_sinusoid
(
frequency
=
200
,
sample_rate
=
samples
//
T
,
dtype
=
self
.
dtype
,
device
=
self
.
device
,
).
squeeze
(
0
)
waveform
=
waveform_k0
+
waveform_k1
# Transfer function numerator and denominator polynomial coefficients
# corresponding to 8th-order Butterworth filter with 100-cycle/T cutoff.
# Generated with
# >>> from scipy import signal
# >>> b_coeffs, a_coeffs = signal.butter(8, 0.2)
b_coeffs
=
torch
.
tensor
(
[
2.39596441e-05
,
1.91677153e-04
,
6.70870035e-04
,
1.34174007e-03
,
1.67717509e-03
,
1.34174007e-03
,
6.70870035e-04
,
1.91677153e-04
,
2.39596441e-05
,
],
dtype
=
self
.
dtype
,
device
=
self
.
device
,
)
a_coeffs
=
torch
.
tensor
(
[
1.0
,
-
4.78451489
,
10.44504107
,
-
13.45771989
,
11.12933104
,
-
6.0252604
,
2.0792738
,
-
0.41721716
,
0.0372001
,
],
dtype
=
self
.
dtype
,
device
=
self
.
device
,
)
# Extend waveform in each direction, preserving periodicity.
padded_waveform
=
torch
.
cat
((
waveform
[:
-
1
],
waveform
,
waveform
[
1
:]))
output_waveform
=
F
.
filtfilt
(
padded_waveform
,
a_coeffs
,
b_coeffs
)
# Remove padding from output waveform; confirm that result
# closely matches waveform_k0.
self
.
assertEqual
(
output_waveform
[
samples
-
1
:
2
*
samples
-
1
],
waveform_k0
,
atol
=
1e-3
,
rtol
=
1e-3
,
)
@
parameterized
.
expand
([(
0.
,
),
(
1.
,
),
(
2.
,
),
(
3.
,
)])
def
test_spectogram_grad_at_zero
(
self
,
power
):
"""The gradient of power spectrogram should not be nan but zero near x=0
https://github.com/pytorch/audio/issues/993
"""
x
=
torch
.
zeros
(
1
,
22050
,
requires_grad
=
True
)
spec
=
F
.
spectrogram
(
x
,
pad
=
0
,
window
=
None
,
n_fft
=
2048
,
hop_length
=
None
,
win_length
=
None
,
power
=
power
,
normalized
=
False
,
)
spec
.
sum
().
backward
()
assert
not
x
.
grad
.
isnan
().
sum
()
def
test_compute_deltas_one_channel
(
self
):
specgram
=
torch
.
tensor
([[[
1.0
,
2.0
,
3.0
,
4.0
]]],
dtype
=
self
.
dtype
,
device
=
self
.
device
)
expected
=
torch
.
tensor
([[[
0.5
,
1.0
,
1.0
,
0.5
]]],
dtype
=
self
.
dtype
,
device
=
self
.
device
)
computed
=
F
.
compute_deltas
(
specgram
,
win_length
=
3
)
self
.
assertEqual
(
computed
,
expected
)
def
test_compute_deltas_two_channels
(
self
):
specgram
=
torch
.
tensor
([[[
1.0
,
2.0
,
3.0
,
4.0
],
[
1.0
,
2.0
,
3.0
,
4.0
]]],
dtype
=
self
.
dtype
,
device
=
self
.
device
)
expected
=
torch
.
tensor
([[[
0.5
,
1.0
,
1.0
,
0.5
],
[
0.5
,
1.0
,
1.0
,
0.5
]]],
dtype
=
self
.
dtype
,
device
=
self
.
device
)
computed
=
F
.
compute_deltas
(
specgram
,
win_length
=
3
)
self
.
assertEqual
(
computed
,
expected
)
@
parameterized
.
expand
([(
100
,),
(
440
,)])
def
test_detect_pitch_frequency_pitch
(
self
,
frequency
):
sample_rate
=
44100
test_sine_waveform
=
get_sinusoid
(
frequency
=
frequency
,
sample_rate
=
sample_rate
,
duration
=
5
)
freq
=
F
.
detect_pitch_frequency
(
test_sine_waveform
,
sample_rate
)
threshold
=
1
s
=
((
freq
-
frequency
).
abs
()
>
threshold
).
sum
()
self
.
assertFalse
(
s
)
@
parameterized
.
expand
([([
100
,
100
],),
([
2
,
100
,
100
],),
([
2
,
2
,
100
,
100
],)])
def
test_amplitude_to_DB_reversible
(
self
,
shape
):
"""Round trip between amplitude and db should return the original for various shape
This implicitly also tests `DB_to_amplitude`.
"""
amplitude_mult
=
20.
power_mult
=
10.
amin
=
1e-10
ref
=
1.0
db_mult
=
math
.
log10
(
max
(
amin
,
ref
))
torch
.
manual_seed
(
0
)
spec
=
torch
.
rand
(
*
shape
,
dtype
=
self
.
dtype
,
device
=
self
.
device
)
*
200
# Spectrogram amplitude -> DB -> amplitude
db
=
F
.
amplitude_to_DB
(
spec
,
amplitude_mult
,
amin
,
db_mult
,
top_db
=
None
)
x2
=
F
.
DB_to_amplitude
(
db
,
ref
,
0.5
)
self
.
assertEqual
(
x2
,
spec
,
atol
=
5e-5
,
rtol
=
1e-5
)
# Spectrogram power -> DB -> power
db
=
F
.
amplitude_to_DB
(
spec
,
power_mult
,
amin
,
db_mult
,
top_db
=
None
)
x2
=
F
.
DB_to_amplitude
(
db
,
ref
,
1.
)
self
.
assertEqual
(
x2
,
spec
)
@
parameterized
.
expand
([([
100
,
100
],),
([
2
,
100
,
100
],),
([
2
,
2
,
100
,
100
],)])
def
test_amplitude_to_DB_top_db_clamp
(
self
,
shape
):
"""Ensure values are properly clamped when `top_db` is supplied."""
amplitude_mult
=
20.
amin
=
1e-10
ref
=
1.0
db_mult
=
math
.
log10
(
max
(
amin
,
ref
))
top_db
=
40.
torch
.
manual_seed
(
0
)
# A random tensor is used for increased entropy, but the max and min for
# each spectrogram still need to be predictable. The max determines the
# decibel cutoff, and the distance from the min must be large enough
# that it triggers a clamp.
spec
=
torch
.
rand
(
*
shape
,
dtype
=
self
.
dtype
,
device
=
self
.
device
)
# Ensure each spectrogram has a min of 0 and a max of 1.
spec
-=
spec
.
amin
([
-
2
,
-
1
])[...,
None
,
None
]
spec
/=
spec
.
amax
([
-
2
,
-
1
])[...,
None
,
None
]
# Expand the range to (0, 200) - wide enough to properly test clamping.
spec
*=
200
decibels
=
F
.
amplitude_to_DB
(
spec
,
amplitude_mult
,
amin
,
db_mult
,
top_db
=
top_db
)
# Ensure the clamp was applied
below_limit
=
decibels
<
6.0205
assert
not
below_limit
.
any
(),
(
"{} decibel values were below the expected cutoff:
\n
{}"
.
format
(
below_limit
.
sum
().
item
(),
decibels
)
)
# Ensure it didn't over-clamp
close_to_limit
=
decibels
<
6.0207
assert
close_to_limit
.
any
(),
(
f
"No values were close to the limit. Did it over-clamp?
\n
{
decibels
}
"
)
@
parameterized
.
expand
(
list
(
itertools
.
product
([(
1
,
2
,
1025
,
400
,
2
),
(
1025
,
400
,
2
)],
[
1
,
2
,
0.7
]))
)
def
test_complex_norm
(
self
,
shape
,
power
):
torch
.
random
.
manual_seed
(
42
)
complex_tensor
=
torch
.
randn
(
*
shape
,
dtype
=
self
.
dtype
,
device
=
self
.
device
)
expected_norm_tensor
=
complex_tensor
.
pow
(
2
).
sum
(
-
1
).
pow
(
power
/
2
)
norm_tensor
=
F
.
complex_norm
(
complex_tensor
,
power
)
self
.
assertEqual
(
norm_tensor
,
expected_norm_tensor
,
atol
=
1e-5
,
rtol
=
1e-5
)
@
parameterized
.
expand
(
list
(
itertools
.
product
([(
2
,
1025
,
400
),
(
1
,
201
,
100
)],
[
100
],
[
0.
,
30.
],
[
1
,
2
]))
)
def
test_mask_along_axis
(
self
,
shape
,
mask_param
,
mask_value
,
axis
):
torch
.
random
.
manual_seed
(
42
)
specgram
=
torch
.
randn
(
*
shape
,
dtype
=
self
.
dtype
,
device
=
self
.
device
)
mask_specgram
=
F
.
mask_along_axis
(
specgram
,
mask_param
,
mask_value
,
axis
)
other_axis
=
1
if
axis
==
2
else
2
masked_columns
=
(
mask_specgram
==
mask_value
).
sum
(
other_axis
)
num_masked_columns
=
(
masked_columns
==
mask_specgram
.
size
(
other_axis
)).
sum
()
num_masked_columns
=
torch
.
div
(
num_masked_columns
,
mask_specgram
.
size
(
0
),
rounding_mode
=
'floor'
)
assert
mask_specgram
.
size
()
==
specgram
.
size
()
assert
num_masked_columns
<
mask_param
@
parameterized
.
expand
(
list
(
itertools
.
product
([
100
],
[
0.
,
30.
],
[
2
,
3
])))
def
test_mask_along_axis_iid
(
self
,
mask_param
,
mask_value
,
axis
):
torch
.
random
.
manual_seed
(
42
)
specgrams
=
torch
.
randn
(
4
,
2
,
1025
,
400
,
dtype
=
self
.
dtype
,
device
=
self
.
device
)
mask_specgrams
=
F
.
mask_along_axis_iid
(
specgrams
,
mask_param
,
mask_value
,
axis
)
other_axis
=
2
if
axis
==
3
else
3
masked_columns
=
(
mask_specgrams
==
mask_value
).
sum
(
other_axis
)
num_masked_columns
=
(
masked_columns
==
mask_specgrams
.
size
(
other_axis
)).
sum
(
-
1
)
assert
mask_specgrams
.
size
()
==
specgrams
.
size
()
assert
(
num_masked_columns
<
mask_param
).
sum
()
==
num_masked_columns
.
numel
()
@
parameterized
.
expand
(
list
(
itertools
.
product
([(
2
,
1025
,
400
),
(
1
,
201
,
100
)],
[
100
],
[
0.
,
30.
],
[
1
,
2
]))
)
def
test_mask_along_axis_preserve
(
self
,
shape
,
mask_param
,
mask_value
,
axis
):
"""mask_along_axis should not alter original input Tensor
Test is run 5 times to bound the probability of no masking occurring to 1e-10
See https://github.com/pytorch/audio/issues/1478
"""
torch
.
random
.
manual_seed
(
42
)
for
_
in
range
(
5
):
specgram
=
torch
.
randn
(
*
shape
,
dtype
=
self
.
dtype
,
device
=
self
.
device
)
specgram_copy
=
specgram
.
clone
()
F
.
mask_along_axis
(
specgram
,
mask_param
,
mask_value
,
axis
)
self
.
assertEqual
(
specgram
,
specgram_copy
)
@
parameterized
.
expand
(
list
(
itertools
.
product
([
100
],
[
0.
,
30.
],
[
2
,
3
])))
def
test_mask_along_axis_iid_preserve
(
self
,
mask_param
,
mask_value
,
axis
):
"""mask_along_axis_iid should not alter original input Tensor
Test is run 5 times to bound the probability of no masking occurring to 1e-10
See https://github.com/pytorch/audio/issues/1478
"""
torch
.
random
.
manual_seed
(
42
)
for
_
in
range
(
5
):
specgrams
=
torch
.
randn
(
4
,
2
,
1025
,
400
,
dtype
=
self
.
dtype
,
device
=
self
.
device
)
specgrams_copy
=
specgrams
.
clone
()
F
.
mask_along_axis_iid
(
specgrams
,
mask_param
,
mask_value
,
axis
)
self
.
assertEqual
(
specgrams
,
specgrams_copy
)
@
parameterized
.
expand
(
list
(
itertools
.
product
(
[
"sinc_interpolation"
,
"kaiser_window"
],
[
16000
,
44100
],
)))
def
test_resample_identity
(
self
,
resampling_method
,
sample_rate
):
waveform
=
get_whitenoise
(
sample_rate
=
sample_rate
,
duration
=
1
)
resampled
=
F
.
resample
(
waveform
,
sample_rate
,
sample_rate
)
self
.
assertEqual
(
waveform
,
resampled
)
@
parameterized
.
expand
([(
"sinc_interpolation"
),
(
"kaiser_window"
)])
def
test_resample_waveform_upsample_size
(
self
,
resampling_method
):
sr
=
16000
waveform
=
get_whitenoise
(
sample_rate
=
sr
,
duration
=
0.5
,)
upsampled
=
F
.
resample
(
waveform
,
sr
,
sr
*
2
,
resampling_method
=
resampling_method
)
assert
upsampled
.
size
(
-
1
)
==
waveform
.
size
(
-
1
)
*
2
@
parameterized
.
expand
([(
"sinc_interpolation"
),
(
"kaiser_window"
)])
def
test_resample_waveform_downsample_size
(
self
,
resampling_method
):
sr
=
16000
waveform
=
get_whitenoise
(
sample_rate
=
sr
,
duration
=
0.5
,)
downsampled
=
F
.
resample
(
waveform
,
sr
,
sr
//
2
,
resampling_method
=
resampling_method
)
assert
downsampled
.
size
(
-
1
)
==
waveform
.
size
(
-
1
)
//
2
@
parameterized
.
expand
([(
"sinc_interpolation"
),
(
"kaiser_window"
)])
def
test_resample_waveform_identity_size
(
self
,
resampling_method
):
sr
=
16000
waveform
=
get_whitenoise
(
sample_rate
=
sr
,
duration
=
0.5
,)
resampled
=
F
.
resample
(
waveform
,
sr
,
sr
,
resampling_method
=
resampling_method
)
assert
resampled
.
size
(
-
1
)
==
waveform
.
size
(
-
1
)
@
parameterized
.
expand
(
list
(
itertools
.
product
(
[
"sinc_interpolation"
,
"kaiser_window"
],
list
(
range
(
1
,
20
)),
)))
def
test_resample_waveform_downsample_accuracy
(
self
,
resampling_method
,
i
):
self
.
_test_resample_waveform_accuracy
(
down_scale_factor
=
i
*
2
,
resampling_method
=
resampling_method
)
@
parameterized
.
expand
(
list
(
itertools
.
product
(
[
"sinc_interpolation"
,
"kaiser_window"
],
list
(
range
(
1
,
20
)),
)))
def
test_resample_waveform_upsample_accuracy
(
self
,
resampling_method
,
i
):
self
.
_test_resample_waveform_accuracy
(
up_scale_factor
=
1.0
+
i
/
20.0
,
resampling_method
=
resampling_method
)
@
nested_params
(
[
0.5
,
1.01
,
1.3
],
[
True
,
False
],
)
def
test_phase_vocoder_shape
(
self
,
rate
,
test_pseudo_complex
):
"""Verify the output shape of phase vocoder"""
hop_length
=
256
num_freq
=
1025
num_frames
=
400
batch_size
=
2
torch
.
random
.
manual_seed
(
42
)
spec
=
torch
.
randn
(
batch_size
,
num_freq
,
num_frames
,
dtype
=
self
.
complex_dtype
,
device
=
self
.
device
)
if
test_pseudo_complex
:
spec
=
torch
.
view_as_real
(
spec
)
phase_advance
=
torch
.
linspace
(
0
,
np
.
pi
*
hop_length
,
num_freq
,
dtype
=
self
.
dtype
,
device
=
self
.
device
)[...,
None
]
spec_stretch
=
F
.
phase_vocoder
(
spec
,
rate
=
rate
,
phase_advance
=
phase_advance
)
assert
spec
.
dim
()
==
spec_stretch
.
dim
()
expected_shape
=
torch
.
Size
([
batch_size
,
num_freq
,
int
(
np
.
ceil
(
num_frames
/
rate
))])
output_shape
=
(
torch
.
view_as_complex
(
spec_stretch
)
if
test_pseudo_complex
else
spec_stretch
).
shape
assert
output_shape
==
expected_shape
@
parameterized
.
expand
(
[
# words
[
""
,
""
,
0
],
# equal
[
"abc"
,
"abc"
,
0
],
[
"ᑌᑎIᑕO"
,
"ᑌᑎIᑕO"
,
0
],
[
"abc"
,
""
,
3
],
# deletion
[
"aa"
,
"aaa"
,
1
],
[
"aaa"
,
"aa"
,
1
],
[
"ᑌᑎI"
,
"ᑌᑎIᑕO"
,
2
],
[
"aaa"
,
"aba"
,
1
],
# substitution
[
"aba"
,
"aaa"
,
1
],
[
"aba"
,
" "
,
3
],
[
"abc"
,
"bcd"
,
2
],
# mix deletion and substitution
[
"0ᑌᑎI"
,
"ᑌᑎIᑕO"
,
3
],
# sentences
[[
"hello"
,
""
,
"Tᕮ᙭T"
],
[
"hello"
,
""
,
"Tᕮ᙭T"
],
0
],
# equal
[[],
[],
0
],
[[
"hello"
,
"world"
],
[
"hello"
,
"world"
,
"!"
],
1
],
# deletion
[[
"hello"
,
"world"
],
[
"world"
],
1
],
[[
"hello"
,
"world"
],
[],
2
],
[[
"Tᕮ᙭T"
,
],
[
"world"
],
1
],
# substitution
[[
"Tᕮ᙭T"
,
"XD"
],
[
"world"
,
"hello"
],
2
],
[[
""
,
"XD"
],
[
"world"
,
""
],
2
],
[
"aba"
,
" "
,
3
],
[[
"hello"
,
"world"
],
[
"world"
,
"hello"
,
"!"
],
2
],
# mix deletion and substitution
[[
"Tᕮ᙭T"
,
"world"
,
"LOL"
,
"XD"
],
[
"world"
,
"hello"
,
"ʕ•́ᴥ•̀ʔっ"
],
3
],
]
)
def
test_simple_case_edit_distance
(
self
,
seq1
,
seq2
,
distance
):
assert
F
.
edit_distance
(
seq1
,
seq2
)
==
distance
assert
F
.
edit_distance
(
seq2
,
seq1
)
==
distance
@
nested_params
(
[
-
4
,
-
2
,
0
,
2
,
4
],
)
def
test_pitch_shift_shape
(
self
,
n_steps
):
sample_rate
=
16000
torch
.
random
.
manual_seed
(
42
)
waveform
=
torch
.
rand
(
2
,
44100
*
1
,
dtype
=
self
.
dtype
,
device
=
self
.
device
)
waveform_shift
=
F
.
pitch_shift
(
waveform
,
sample_rate
,
n_steps
)
assert
waveform
.
size
()
==
waveform_shift
.
size
()
def
test_rnnt_loss_basic_backward
(
self
):
logits
,
targets
,
logit_lengths
,
target_lengths
=
rnnt_utils
.
get_basic_data
(
self
.
device
)
loss
=
F
.
rnnt_loss
(
logits
,
targets
,
logit_lengths
,
target_lengths
)
loss
.
backward
()
def
test_rnnt_loss_basic_forward_no_grad
(
self
):
"""In early stage, calls to `rnnt_loss` resulted in segmentation fault when
`logits` have `requires_grad = False`. This test makes sure that this no longer
occurs and the functional call runs without error.
See https://github.com/pytorch/audio/pull/1707
"""
logits
,
targets
,
logit_lengths
,
target_lengths
=
rnnt_utils
.
get_basic_data
(
self
.
device
)
logits
.
requires_grad_
(
False
)
F
.
rnnt_loss
(
logits
,
targets
,
logit_lengths
,
target_lengths
)
@
parameterized
.
expand
([
(
rnnt_utils
.
get_B1_T2_U3_D5_data
,
torch
.
float32
,
1e-6
,
1e-2
),
(
rnnt_utils
.
get_B2_T4_U3_D3_data
,
torch
.
float32
,
1e-6
,
1e-2
),
(
rnnt_utils
.
get_B1_T2_U3_D5_data
,
torch
.
float16
,
1e-3
,
1e-2
),
(
rnnt_utils
.
get_B2_T4_U3_D3_data
,
torch
.
float16
,
1e-3
,
1e-2
),
])
def
test_rnnt_loss_costs_and_gradients
(
self
,
data_func
,
dtype
,
atol
,
rtol
):
data
,
ref_costs
,
ref_gradients
=
data_func
(
dtype
=
dtype
,
device
=
self
.
device
,
)
self
.
_test_costs_and_gradients
(
data
=
data
,
ref_costs
=
ref_costs
,
ref_gradients
=
ref_gradients
,
atol
=
atol
,
rtol
=
rtol
,
)
def
test_rnnt_loss_costs_and_gradients_random_data_with_numpy_fp32
(
self
):
seed
=
777
for
i
in
range
(
5
):
data
=
rnnt_utils
.
get_random_data
(
dtype
=
torch
.
float32
,
device
=
self
.
device
,
seed
=
(
seed
+
i
))
ref_costs
,
ref_gradients
=
rnnt_utils
.
compute_with_numpy_transducer
(
data
=
data
)
self
.
_test_costs_and_gradients
(
data
=
data
,
ref_costs
=
ref_costs
,
ref_gradients
=
ref_gradients
)
class
FunctionalCPUOnly
(
TestBaseMixin
):
def
test_melscale_fbanks_no_warning_high_n_freq
(
self
):
with
warnings
.
catch_warnings
(
record
=
True
)
as
w
:
warnings
.
simplefilter
(
"always"
)
F
.
melscale_fbanks
(
288
,
0
,
8000
,
128
,
16000
)
assert
len
(
w
)
==
0
def
test_melscale_fbanks_no_warning_low_n_mels
(
self
):
with
warnings
.
catch_warnings
(
record
=
True
)
as
w
:
warnings
.
simplefilter
(
"always"
)
F
.
melscale_fbanks
(
201
,
0
,
8000
,
89
,
16000
)
assert
len
(
w
)
==
0
def
test_melscale_fbanks_warning
(
self
):
with
warnings
.
catch_warnings
(
record
=
True
)
as
w
:
warnings
.
simplefilter
(
"always"
)
F
.
melscale_fbanks
(
201
,
0
,
8000
,
128
,
16000
)
assert
len
(
w
)
==
1
test/torchaudio_unittest/functional/kaldi_compatibility_cpu_test.py
0 → 100644
View file @
9dcc7a15
import
torch
from
torchaudio_unittest.common_utils
import
PytorchTestCase
from
.kaldi_compatibility_test_impl
import
Kaldi
,
KaldiCPUOnly
class
TestKaldiCPUOnly
(
KaldiCPUOnly
,
PytorchTestCase
):
dtype
=
torch
.
float32
device
=
torch
.
device
(
'cpu'
)
class
TestKaldiFloat32
(
Kaldi
,
PytorchTestCase
):
dtype
=
torch
.
float32
device
=
torch
.
device
(
'cpu'
)
class
TestKaldiFloat64
(
Kaldi
,
PytorchTestCase
):
dtype
=
torch
.
float64
device
=
torch
.
device
(
'cpu'
)
test/torchaudio_unittest/functional/kaldi_compatibility_cuda_test.py
0 → 100644
View file @
9dcc7a15
import
torch
from
torchaudio_unittest.common_utils
import
PytorchTestCase
,
skipIfNoCuda
from
.kaldi_compatibility_test_impl
import
Kaldi
@
skipIfNoCuda
class
TestKaldiFloat32
(
Kaldi
,
PytorchTestCase
):
dtype
=
torch
.
float32
device
=
torch
.
device
(
'cuda'
)
@
skipIfNoCuda
class
TestKaldiFloat64
(
Kaldi
,
PytorchTestCase
):
dtype
=
torch
.
float64
device
=
torch
.
device
(
'cuda'
)
test/torchaudio_unittest/functional/kaldi_compatibility_test_impl.py
0 → 100644
View file @
9dcc7a15
from
parameterized
import
parameterized
import
torch
import
torchaudio.functional
as
F
from
torchaudio_unittest.common_utils
import
(
get_sinusoid
,
load_params
,
save_wav
,
skipIfNoExec
,
TempDirMixin
,
TestBaseMixin
,
)
from
torchaudio_unittest.common_utils.kaldi_utils
import
(
convert_args
,
run_kaldi
,
)
class
Kaldi
(
TempDirMixin
,
TestBaseMixin
):
def
assert_equal
(
self
,
output
,
*
,
expected
,
rtol
=
None
,
atol
=
None
):
expected
=
expected
.
to
(
dtype
=
self
.
dtype
,
device
=
self
.
device
)
self
.
assertEqual
(
output
,
expected
,
rtol
=
rtol
,
atol
=
atol
)
@
skipIfNoExec
(
'apply-cmvn-sliding'
)
def
test_sliding_window_cmn
(
self
):
"""sliding_window_cmn should be numerically compatible with apply-cmvn-sliding"""
kwargs
=
{
'cmn_window'
:
600
,
'min_cmn_window'
:
100
,
'center'
:
False
,
'norm_vars'
:
False
,
}
tensor
=
torch
.
randn
(
40
,
10
,
dtype
=
self
.
dtype
,
device
=
self
.
device
)
result
=
F
.
sliding_window_cmn
(
tensor
,
**
kwargs
)
command
=
[
'apply-cmvn-sliding'
]
+
convert_args
(
**
kwargs
)
+
[
'ark:-'
,
'ark:-'
]
kaldi_result
=
run_kaldi
(
command
,
'ark'
,
tensor
)
self
.
assert_equal
(
result
,
expected
=
kaldi_result
)
class
KaldiCPUOnly
(
TempDirMixin
,
TestBaseMixin
):
def
assert_equal
(
self
,
output
,
*
,
expected
,
rtol
=
None
,
atol
=
None
):
expected
=
expected
.
to
(
dtype
=
self
.
dtype
,
device
=
self
.
device
)
self
.
assertEqual
(
output
,
expected
,
rtol
=
rtol
,
atol
=
atol
)
@
parameterized
.
expand
(
load_params
(
'kaldi_test_pitch_args.jsonl'
))
@
skipIfNoExec
(
'compute-kaldi-pitch-feats'
)
def
test_pitch_feats
(
self
,
kwargs
):
"""compute_kaldi_pitch produces numerically compatible result with compute-kaldi-pitch-feats"""
sample_rate
=
kwargs
[
'sample_rate'
]
waveform
=
get_sinusoid
(
dtype
=
'float32'
,
sample_rate
=
sample_rate
)
result
=
F
.
compute_kaldi_pitch
(
waveform
[
0
],
**
kwargs
)
waveform
=
get_sinusoid
(
dtype
=
'int16'
,
sample_rate
=
sample_rate
)
wave_file
=
self
.
get_temp_path
(
'test.wav'
)
save_wav
(
wave_file
,
waveform
,
sample_rate
)
command
=
[
'compute-kaldi-pitch-feats'
]
+
convert_args
(
**
kwargs
)
+
[
'scp:-'
,
'ark:-'
]
kaldi_result
=
run_kaldi
(
command
,
'scp'
,
wave_file
)
self
.
assert_equal
(
result
,
expected
=
kaldi_result
)
test/torchaudio_unittest/functional/librosa_compatibility_cpu_test.py
0 → 100644
View file @
9dcc7a15
from
torchaudio_unittest.common_utils
import
PytorchTestCase
from
.librosa_compatibility_test_impl
import
Functional
,
FunctionalComplex
class
TestFunctionalCPU
(
Functional
,
PytorchTestCase
):
device
=
'cpu'
class
TestFunctionalComplexCPU
(
FunctionalComplex
,
PytorchTestCase
):
device
=
'cpu'
Prev
1
…
10
11
12
13
14
15
16
17
18
…
21
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment