Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
Torchaudio
Commits
9dcc7a15
Commit
9dcc7a15
authored
Apr 25, 2022
by
flyingdown
Browse files
init v0.10.0
parent
db2b0b79
Pipeline
#254
failed with stages
in 0 seconds
Changes
416
Pipelines
2
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
1785 additions
and
0 deletions
+1785
-0
test/torchaudio_unittest/sox_effect/smoke_test.py
test/torchaudio_unittest/sox_effect/smoke_test.py
+78
-0
test/torchaudio_unittest/sox_effect/sox_effect_test.py
test/torchaudio_unittest/sox_effect/sox_effect_test.py
+423
-0
test/torchaudio_unittest/sox_effect/torchscript_test.py
test/torchaudio_unittest/sox_effect/torchscript_test.py
+96
-0
test/torchaudio_unittest/transforms/__init__.py
test/torchaudio_unittest/transforms/__init__.py
+0
-0
test/torchaudio_unittest/transforms/autograd_cpu_test.py
test/torchaudio_unittest/transforms/autograd_cpu_test.py
+10
-0
test/torchaudio_unittest/transforms/autograd_cuda_test.py
test/torchaudio_unittest/transforms/autograd_cuda_test.py
+15
-0
test/torchaudio_unittest/transforms/autograd_test_impl.py
test/torchaudio_unittest/transforms/autograd_test_impl.py
+337
-0
test/torchaudio_unittest/transforms/batch_consistency_test.py
.../torchaudio_unittest/transforms/batch_consistency_test.py
+228
-0
test/torchaudio_unittest/transforms/kaldi_compatibility_cpu_test.py
...audio_unittest/transforms/kaldi_compatibility_cpu_test.py
+14
-0
test/torchaudio_unittest/transforms/kaldi_compatibility_cuda_test.py
...udio_unittest/transforms/kaldi_compatibility_cuda_test.py
+16
-0
test/torchaudio_unittest/transforms/kaldi_compatibility_impl.py
...orchaudio_unittest/transforms/kaldi_compatibility_impl.py
+55
-0
test/torchaudio_unittest/transforms/librosa_compatibility_cpu_test.py
...dio_unittest/transforms/librosa_compatibility_cpu_test.py
+9
-0
test/torchaudio_unittest/transforms/librosa_compatibility_cuda_test.py
...io_unittest/transforms/librosa_compatibility_cuda_test.py
+10
-0
test/torchaudio_unittest/transforms/librosa_compatibility_test_impl.py
...io_unittest/transforms/librosa_compatibility_test_impl.py
+141
-0
test/torchaudio_unittest/transforms/sox_compatibility_test.py
.../torchaudio_unittest/transforms/sox_compatibility_test.py
+88
-0
test/torchaudio_unittest/transforms/torchscript_consistency_cpu_test.py
...o_unittest/transforms/torchscript_consistency_cpu_test.py
+14
-0
test/torchaudio_unittest/transforms/torchscript_consistency_cuda_test.py
..._unittest/transforms/torchscript_consistency_cuda_test.py
+16
-0
test/torchaudio_unittest/transforms/torchscript_consistency_impl.py
...audio_unittest/transforms/torchscript_consistency_impl.py
+202
-0
test/torchaudio_unittest/transforms/transforms_cpu_test.py
test/torchaudio_unittest/transforms/transforms_cpu_test.py
+14
-0
test/torchaudio_unittest/transforms/transforms_cuda_test.py
test/torchaudio_unittest/transforms/transforms_cuda_test.py
+19
-0
No files found.
Too many changes to show.
To preserve performance only
416 of 416+
files are displayed.
Plain diff
Email patch
test/torchaudio_unittest/sox_effect/smoke_test.py
0 → 100644
View file @
9dcc7a15
from
torchaudio
import
sox_effects
from
parameterized
import
parameterized
from
torchaudio_unittest.common_utils
import
(
TempDirMixin
,
TorchaudioTestCase
,
skipIfNoSox
,
get_wav_data
,
get_sinusoid
,
save_wav
,
)
from
.common
import
(
load_params
,
)
@
skipIfNoSox
class
SmokeTest
(
TempDirMixin
,
TorchaudioTestCase
):
"""Run smoke test on various effects
The purpose of this test suite is to verify that sox_effect functionalities do not exhibit
abnormal behaviors.
This test suite should be able to run without any additional tools (such as sox command),
however without such tools, the correctness of each function cannot be verified.
"""
@
parameterized
.
expand
(
load_params
(
"sox_effect_test_args.jsonl"
),
name_func
=
lambda
f
,
i
,
p
:
f
'
{
f
.
__name__
}
_
{
i
}
_
{
p
.
args
[
0
][
"effects"
][
0
][
0
]
}
'
,
)
def
test_apply_effects_tensor
(
self
,
args
):
"""`apply_effects_tensor` should not crash"""
effects
=
args
[
'effects'
]
num_channels
=
args
.
get
(
"num_channels"
,
2
)
input_sr
=
args
.
get
(
"input_sample_rate"
,
8000
)
original
=
get_sinusoid
(
frequency
=
800
,
sample_rate
=
input_sr
,
n_channels
=
num_channels
,
dtype
=
'float32'
)
_found
,
_sr
=
sox_effects
.
apply_effects_tensor
(
original
,
input_sr
,
effects
)
@
parameterized
.
expand
(
load_params
(
"sox_effect_test_args.jsonl"
),
name_func
=
lambda
f
,
i
,
p
:
f
'
{
f
.
__name__
}
_
{
i
}
_
{
p
.
args
[
0
][
"effects"
][
0
][
0
]
}
'
,
)
def
test_apply_effects_file
(
self
,
args
):
"""`apply_effects_file` should return identical data as sox command"""
dtype
=
'int32'
channels_first
=
True
effects
=
args
[
'effects'
]
num_channels
=
args
.
get
(
"num_channels"
,
2
)
input_sr
=
args
.
get
(
"input_sample_rate"
,
8000
)
input_path
=
self
.
get_temp_path
(
'input.wav'
)
data
=
get_wav_data
(
dtype
,
num_channels
,
channels_first
=
channels_first
)
save_wav
(
input_path
,
data
,
input_sr
,
channels_first
=
channels_first
)
_found
,
_sr
=
sox_effects
.
apply_effects_file
(
input_path
,
effects
,
normalize
=
False
,
channels_first
=
channels_first
)
@
parameterized
.
expand
(
load_params
(
"sox_effect_test_args.jsonl"
),
name_func
=
lambda
f
,
i
,
p
:
f
'
{
f
.
__name__
}
_
{
i
}
_
{
p
.
args
[
0
][
"effects"
][
0
][
0
]
}
'
,
)
def
test_apply_effects_fileobj
(
self
,
args
):
"""`apply_effects_file` should return identical data as sox command"""
dtype
=
'int32'
channels_first
=
True
effects
=
args
[
'effects'
]
num_channels
=
args
.
get
(
"num_channels"
,
2
)
input_sr
=
args
.
get
(
"input_sample_rate"
,
8000
)
input_path
=
self
.
get_temp_path
(
'input.wav'
)
data
=
get_wav_data
(
dtype
,
num_channels
,
channels_first
=
channels_first
)
save_wav
(
input_path
,
data
,
input_sr
,
channels_first
=
channels_first
)
with
open
(
input_path
,
'rb'
)
as
fileobj
:
_found
,
_sr
=
sox_effects
.
apply_effects_file
(
fileobj
,
effects
,
normalize
=
False
,
channels_first
=
channels_first
)
test/torchaudio_unittest/sox_effect/sox_effect_test.py
0 → 100644
View file @
9dcc7a15
import
io
import
itertools
from
pathlib
import
Path
import
tarfile
from
parameterized
import
parameterized
from
torchaudio
import
sox_effects
from
torchaudio._internal
import
module_utils
as
_mod_utils
from
torchaudio_unittest.common_utils
import
(
TempDirMixin
,
HttpServerMixin
,
PytorchTestCase
,
skipIfNoSox
,
skipIfNoModule
,
skipIfNoExec
,
get_asset_path
,
get_sinusoid
,
get_wav_data
,
save_wav
,
load_wav
,
sox_utils
,
)
from
.common
import
(
load_params
,
name_func
,
)
if
_mod_utils
.
is_module_available
(
"requests"
):
import
requests
@
skipIfNoSox
class
TestSoxEffects
(
PytorchTestCase
):
def
test_init
(
self
):
"""Calling init_sox_effects multiple times does not crush"""
for
_
in
range
(
3
):
sox_effects
.
init_sox_effects
()
@
skipIfNoSox
class
TestSoxEffectsTensor
(
TempDirMixin
,
PytorchTestCase
):
"""Test suite for `apply_effects_tensor` function"""
@
parameterized
.
expand
(
list
(
itertools
.
product
(
[
'float32'
,
'int32'
,
'int16'
,
'uint8'
],
[
8000
,
16000
],
[
1
,
2
,
4
,
8
],
[
True
,
False
]
)),
name_func
=
name_func
)
def
test_apply_no_effect
(
self
,
dtype
,
sample_rate
,
num_channels
,
channels_first
):
"""`apply_effects_tensor` without effects should return identical data as input"""
original
=
get_wav_data
(
dtype
,
num_channels
,
channels_first
=
channels_first
)
expected
=
original
.
clone
()
found
,
output_sample_rate
=
sox_effects
.
apply_effects_tensor
(
expected
,
sample_rate
,
[],
channels_first
)
assert
output_sample_rate
==
sample_rate
# SoxEffect should not alter the input Tensor object
self
.
assertEqual
(
original
,
expected
)
# SoxEffect should not return the same Tensor object
assert
expected
is
not
found
# Returned Tensor should equal to the input Tensor
self
.
assertEqual
(
expected
,
found
)
@
parameterized
.
expand
(
load_params
(
"sox_effect_test_args.jsonl"
),
name_func
=
lambda
f
,
i
,
p
:
f
'
{
f
.
__name__
}
_
{
i
}
_
{
p
.
args
[
0
][
"effects"
][
0
][
0
]
}
'
,
)
def
test_apply_effects
(
self
,
args
):
"""`apply_effects_tensor` should return identical data as sox command"""
effects
=
args
[
'effects'
]
num_channels
=
args
.
get
(
"num_channels"
,
2
)
input_sr
=
args
.
get
(
"input_sample_rate"
,
8000
)
output_sr
=
args
.
get
(
"output_sample_rate"
)
input_path
=
self
.
get_temp_path
(
'input.wav'
)
reference_path
=
self
.
get_temp_path
(
'reference.wav'
)
original
=
get_sinusoid
(
frequency
=
800
,
sample_rate
=
input_sr
,
n_channels
=
num_channels
,
dtype
=
'float32'
)
save_wav
(
input_path
,
original
,
input_sr
)
sox_utils
.
run_sox_effect
(
input_path
,
reference_path
,
effects
,
output_sample_rate
=
output_sr
)
expected
,
expected_sr
=
load_wav
(
reference_path
)
found
,
sr
=
sox_effects
.
apply_effects_tensor
(
original
,
input_sr
,
effects
)
assert
sr
==
expected_sr
self
.
assertEqual
(
expected
,
found
)
@
skipIfNoSox
class
TestSoxEffectsFile
(
TempDirMixin
,
PytorchTestCase
):
"""Test suite for `apply_effects_file` function"""
@
parameterized
.
expand
(
list
(
itertools
.
product
(
[
'float32'
,
'int32'
,
'int16'
,
'uint8'
],
[
8000
,
16000
],
[
1
,
2
,
4
,
8
],
[
False
,
True
],
)),
name_func
=
name_func
)
def
test_apply_no_effect
(
self
,
dtype
,
sample_rate
,
num_channels
,
channels_first
):
"""`apply_effects_file` without effects should return identical data as input"""
path
=
self
.
get_temp_path
(
'input.wav'
)
expected
=
get_wav_data
(
dtype
,
num_channels
,
channels_first
=
channels_first
)
save_wav
(
path
,
expected
,
sample_rate
,
channels_first
=
channels_first
)
found
,
output_sample_rate
=
sox_effects
.
apply_effects_file
(
path
,
[],
normalize
=
False
,
channels_first
=
channels_first
)
assert
output_sample_rate
==
sample_rate
self
.
assertEqual
(
expected
,
found
)
@
parameterized
.
expand
(
load_params
(
"sox_effect_test_args.jsonl"
),
name_func
=
lambda
f
,
i
,
p
:
f
'
{
f
.
__name__
}
_
{
i
}
_
{
p
.
args
[
0
][
"effects"
][
0
][
0
]
}
'
,
)
def
test_apply_effects_str
(
self
,
args
):
"""`apply_effects_file` should return identical data as sox command"""
dtype
=
'int32'
channels_first
=
True
effects
=
args
[
'effects'
]
num_channels
=
args
.
get
(
"num_channels"
,
2
)
input_sr
=
args
.
get
(
"input_sample_rate"
,
8000
)
output_sr
=
args
.
get
(
"output_sample_rate"
)
input_path
=
self
.
get_temp_path
(
'input.wav'
)
reference_path
=
self
.
get_temp_path
(
'reference.wav'
)
data
=
get_wav_data
(
dtype
,
num_channels
,
channels_first
=
channels_first
)
save_wav
(
input_path
,
data
,
input_sr
,
channels_first
=
channels_first
)
sox_utils
.
run_sox_effect
(
input_path
,
reference_path
,
effects
,
output_sample_rate
=
output_sr
)
expected
,
expected_sr
=
load_wav
(
reference_path
)
found
,
sr
=
sox_effects
.
apply_effects_file
(
input_path
,
effects
,
normalize
=
False
,
channels_first
=
channels_first
)
assert
sr
==
expected_sr
self
.
assertEqual
(
found
,
expected
)
def
test_apply_effects_path
(
self
):
"""`apply_effects_file` should return identical data as sox command when file path is given as a Path Object"""
dtype
=
'int32'
channels_first
=
True
effects
=
[[
"hilbert"
]]
num_channels
=
2
input_sr
=
8000
output_sr
=
8000
input_path
=
self
.
get_temp_path
(
'input.wav'
)
reference_path
=
self
.
get_temp_path
(
'reference.wav'
)
data
=
get_wav_data
(
dtype
,
num_channels
,
channels_first
=
channels_first
)
save_wav
(
input_path
,
data
,
input_sr
,
channels_first
=
channels_first
)
sox_utils
.
run_sox_effect
(
input_path
,
reference_path
,
effects
,
output_sample_rate
=
output_sr
)
expected
,
expected_sr
=
load_wav
(
reference_path
)
found
,
sr
=
sox_effects
.
apply_effects_file
(
Path
(
input_path
),
effects
,
normalize
=
False
,
channels_first
=
channels_first
)
assert
sr
==
expected_sr
self
.
assertEqual
(
found
,
expected
)
@
skipIfNoSox
class
TestFileFormats
(
TempDirMixin
,
PytorchTestCase
):
"""`apply_effects_file` gives the same result as sox on various file formats"""
@
parameterized
.
expand
(
list
(
itertools
.
product
(
[
'float32'
,
'int32'
,
'int16'
,
'uint8'
],
[
8000
,
16000
],
[
1
,
2
],
)),
name_func
=
lambda
f
,
_
,
p
:
f
'
{
f
.
__name__
}
_
{
"_"
.
join
(
str
(
arg
)
for
arg
in
p
.
args
)
}
'
)
def
test_wav
(
self
,
dtype
,
sample_rate
,
num_channels
):
"""`apply_effects_file` works on various wav format"""
channels_first
=
True
effects
=
[[
'band'
,
'300'
,
'10'
]]
input_path
=
self
.
get_temp_path
(
'input.wav'
)
reference_path
=
self
.
get_temp_path
(
'reference.wav'
)
data
=
get_wav_data
(
dtype
,
num_channels
,
channels_first
=
channels_first
)
save_wav
(
input_path
,
data
,
sample_rate
,
channels_first
=
channels_first
)
sox_utils
.
run_sox_effect
(
input_path
,
reference_path
,
effects
)
expected
,
expected_sr
=
load_wav
(
reference_path
)
found
,
sr
=
sox_effects
.
apply_effects_file
(
input_path
,
effects
,
normalize
=
False
,
channels_first
=
channels_first
)
assert
sr
==
expected_sr
self
.
assertEqual
(
found
,
expected
)
@
parameterized
.
expand
(
list
(
itertools
.
product
(
[
8000
,
16000
],
[
1
,
2
],
)),
name_func
=
lambda
f
,
_
,
p
:
f
'
{
f
.
__name__
}
_
{
"_"
.
join
(
str
(
arg
)
for
arg
in
p
.
args
)
}
'
)
def
test_mp3
(
self
,
sample_rate
,
num_channels
):
"""`apply_effects_file` works on various mp3 format"""
channels_first
=
True
effects
=
[[
'band'
,
'300'
,
'10'
]]
input_path
=
self
.
get_temp_path
(
'input.mp3'
)
reference_path
=
self
.
get_temp_path
(
'reference.wav'
)
sox_utils
.
gen_audio_file
(
input_path
,
sample_rate
,
num_channels
)
sox_utils
.
run_sox_effect
(
input_path
,
reference_path
,
effects
)
expected
,
expected_sr
=
load_wav
(
reference_path
)
found
,
sr
=
sox_effects
.
apply_effects_file
(
input_path
,
effects
,
channels_first
=
channels_first
)
save_wav
(
self
.
get_temp_path
(
'result.wav'
),
found
,
sr
,
channels_first
=
channels_first
)
assert
sr
==
expected_sr
self
.
assertEqual
(
found
,
expected
,
atol
=
1e-4
,
rtol
=
1e-8
)
@
parameterized
.
expand
(
list
(
itertools
.
product
(
[
8000
,
16000
],
[
1
,
2
],
)),
name_func
=
lambda
f
,
_
,
p
:
f
'
{
f
.
__name__
}
_
{
"_"
.
join
(
str
(
arg
)
for
arg
in
p
.
args
)
}
'
)
def
test_flac
(
self
,
sample_rate
,
num_channels
):
"""`apply_effects_file` works on various flac format"""
channels_first
=
True
effects
=
[[
'band'
,
'300'
,
'10'
]]
input_path
=
self
.
get_temp_path
(
'input.flac'
)
reference_path
=
self
.
get_temp_path
(
'reference.wav'
)
sox_utils
.
gen_audio_file
(
input_path
,
sample_rate
,
num_channels
)
sox_utils
.
run_sox_effect
(
input_path
,
reference_path
,
effects
,
output_bitdepth
=
32
)
expected
,
expected_sr
=
load_wav
(
reference_path
)
found
,
sr
=
sox_effects
.
apply_effects_file
(
input_path
,
effects
,
channels_first
=
channels_first
)
save_wav
(
self
.
get_temp_path
(
'result.wav'
),
found
,
sr
,
channels_first
=
channels_first
)
assert
sr
==
expected_sr
self
.
assertEqual
(
found
,
expected
)
@
parameterized
.
expand
(
list
(
itertools
.
product
(
[
8000
,
16000
],
[
1
,
2
],
)),
name_func
=
lambda
f
,
_
,
p
:
f
'
{
f
.
__name__
}
_
{
"_"
.
join
(
str
(
arg
)
for
arg
in
p
.
args
)
}
'
)
def
test_vorbis
(
self
,
sample_rate
,
num_channels
):
"""`apply_effects_file` works on various vorbis format"""
channels_first
=
True
effects
=
[[
'band'
,
'300'
,
'10'
]]
input_path
=
self
.
get_temp_path
(
'input.vorbis'
)
reference_path
=
self
.
get_temp_path
(
'reference.wav'
)
sox_utils
.
gen_audio_file
(
input_path
,
sample_rate
,
num_channels
)
sox_utils
.
run_sox_effect
(
input_path
,
reference_path
,
effects
,
output_bitdepth
=
32
)
expected
,
expected_sr
=
load_wav
(
reference_path
)
found
,
sr
=
sox_effects
.
apply_effects_file
(
input_path
,
effects
,
channels_first
=
channels_first
)
save_wav
(
self
.
get_temp_path
(
'result.wav'
),
found
,
sr
,
channels_first
=
channels_first
)
assert
sr
==
expected_sr
self
.
assertEqual
(
found
,
expected
)
@
skipIfNoSox
class
TestApplyEffectFileWithoutExtension
(
PytorchTestCase
):
def
test_mp3
(
self
):
"""Providing format allows to read mp3 without extension
libsox does not check header for mp3
https://github.com/pytorch/audio/issues/1040
The file was generated with the following command
ffmpeg -f lavfi -i "sine=frequency=1000:duration=5" -ar 16000 -f mp3 test_noext
"""
effects
=
[[
'band'
,
'300'
,
'10'
]]
path
=
get_asset_path
(
"mp3_without_ext"
)
_
,
sr
=
sox_effects
.
apply_effects_file
(
path
,
effects
,
format
=
"mp3"
)
assert
sr
==
16000
@
skipIfNoExec
(
'sox'
)
@
skipIfNoSox
class
TestFileObject
(
TempDirMixin
,
PytorchTestCase
):
@
parameterized
.
expand
([
(
'wav'
,
None
),
(
'mp3'
,
128
),
(
'mp3'
,
320
),
(
'flac'
,
0
),
(
'flac'
,
5
),
(
'flac'
,
8
),
(
'vorbis'
,
-
1
),
(
'vorbis'
,
10
),
(
'amb'
,
None
),
])
def
test_fileobj
(
self
,
ext
,
compression
):
"""Applying effects via file object works"""
sample_rate
=
16000
channels_first
=
True
effects
=
[[
'band'
,
'300'
,
'10'
]]
format_
=
ext
if
ext
in
[
'mp3'
]
else
None
input_path
=
self
.
get_temp_path
(
f
'input.
{
ext
}
'
)
reference_path
=
self
.
get_temp_path
(
'reference.wav'
)
sox_utils
.
gen_audio_file
(
input_path
,
sample_rate
,
num_channels
=
2
,
compression
=
compression
)
sox_utils
.
run_sox_effect
(
input_path
,
reference_path
,
effects
,
output_bitdepth
=
32
)
expected
,
expected_sr
=
load_wav
(
reference_path
)
with
open
(
input_path
,
'rb'
)
as
fileobj
:
found
,
sr
=
sox_effects
.
apply_effects_file
(
fileobj
,
effects
,
channels_first
=
channels_first
,
format
=
format_
)
save_wav
(
self
.
get_temp_path
(
'result.wav'
),
found
,
sr
,
channels_first
=
channels_first
)
assert
sr
==
expected_sr
self
.
assertEqual
(
found
,
expected
)
@
parameterized
.
expand
([
(
'wav'
,
None
),
(
'mp3'
,
128
),
(
'mp3'
,
320
),
(
'flac'
,
0
),
(
'flac'
,
5
),
(
'flac'
,
8
),
(
'vorbis'
,
-
1
),
(
'vorbis'
,
10
),
(
'amb'
,
None
),
])
def
test_bytesio
(
self
,
ext
,
compression
):
"""Applying effects via BytesIO object works"""
sample_rate
=
16000
channels_first
=
True
effects
=
[[
'band'
,
'300'
,
'10'
]]
format_
=
ext
if
ext
in
[
'mp3'
]
else
None
input_path
=
self
.
get_temp_path
(
f
'input.
{
ext
}
'
)
reference_path
=
self
.
get_temp_path
(
'reference.wav'
)
sox_utils
.
gen_audio_file
(
input_path
,
sample_rate
,
num_channels
=
2
,
compression
=
compression
)
sox_utils
.
run_sox_effect
(
input_path
,
reference_path
,
effects
,
output_bitdepth
=
32
)
expected
,
expected_sr
=
load_wav
(
reference_path
)
with
open
(
input_path
,
'rb'
)
as
file_
:
fileobj
=
io
.
BytesIO
(
file_
.
read
())
found
,
sr
=
sox_effects
.
apply_effects_file
(
fileobj
,
effects
,
channels_first
=
channels_first
,
format
=
format_
)
save_wav
(
self
.
get_temp_path
(
'result.wav'
),
found
,
sr
,
channels_first
=
channels_first
)
assert
sr
==
expected_sr
self
.
assertEqual
(
found
,
expected
)
@
parameterized
.
expand
([
(
'wav'
,
None
),
(
'mp3'
,
128
),
(
'mp3'
,
320
),
(
'flac'
,
0
),
(
'flac'
,
5
),
(
'flac'
,
8
),
(
'vorbis'
,
-
1
),
(
'vorbis'
,
10
),
(
'amb'
,
None
),
])
def
test_tarfile
(
self
,
ext
,
compression
):
"""Applying effects to compressed audio via file-like file works"""
sample_rate
=
16000
channels_first
=
True
effects
=
[[
'band'
,
'300'
,
'10'
]]
format_
=
ext
if
ext
in
[
'mp3'
]
else
None
audio_file
=
f
'input.
{
ext
}
'
input_path
=
self
.
get_temp_path
(
audio_file
)
reference_path
=
self
.
get_temp_path
(
'reference.wav'
)
archive_path
=
self
.
get_temp_path
(
'archive.tar.gz'
)
sox_utils
.
gen_audio_file
(
input_path
,
sample_rate
,
num_channels
=
2
,
compression
=
compression
)
sox_utils
.
run_sox_effect
(
input_path
,
reference_path
,
effects
,
output_bitdepth
=
32
)
expected
,
expected_sr
=
load_wav
(
reference_path
)
with
tarfile
.
TarFile
(
archive_path
,
'w'
)
as
tarobj
:
tarobj
.
add
(
input_path
,
arcname
=
audio_file
)
with
tarfile
.
TarFile
(
archive_path
,
'r'
)
as
tarobj
:
fileobj
=
tarobj
.
extractfile
(
audio_file
)
found
,
sr
=
sox_effects
.
apply_effects_file
(
fileobj
,
effects
,
channels_first
=
channels_first
,
format
=
format_
)
save_wav
(
self
.
get_temp_path
(
'result.wav'
),
found
,
sr
,
channels_first
=
channels_first
)
assert
sr
==
expected_sr
self
.
assertEqual
(
found
,
expected
)
@
skipIfNoSox
@
skipIfNoExec
(
'sox'
)
@
skipIfNoModule
(
"requests"
)
class
TestFileObjectHttp
(
HttpServerMixin
,
PytorchTestCase
):
@
parameterized
.
expand
([
(
'wav'
,
None
),
(
'mp3'
,
128
),
(
'mp3'
,
320
),
(
'flac'
,
0
),
(
'flac'
,
5
),
(
'flac'
,
8
),
(
'vorbis'
,
-
1
),
(
'vorbis'
,
10
),
(
'amb'
,
None
),
])
def
test_requests
(
self
,
ext
,
compression
):
sample_rate
=
16000
channels_first
=
True
effects
=
[[
'band'
,
'300'
,
'10'
]]
format_
=
ext
if
ext
in
[
'mp3'
]
else
None
audio_file
=
f
'input.
{
ext
}
'
input_path
=
self
.
get_temp_path
(
audio_file
)
reference_path
=
self
.
get_temp_path
(
'reference.wav'
)
sox_utils
.
gen_audio_file
(
input_path
,
sample_rate
,
num_channels
=
2
,
compression
=
compression
)
sox_utils
.
run_sox_effect
(
input_path
,
reference_path
,
effects
,
output_bitdepth
=
32
)
expected
,
expected_sr
=
load_wav
(
reference_path
)
url
=
self
.
get_url
(
audio_file
)
with
requests
.
get
(
url
,
stream
=
True
)
as
resp
:
found
,
sr
=
sox_effects
.
apply_effects_file
(
resp
.
raw
,
effects
,
channels_first
=
channels_first
,
format
=
format_
)
save_wav
(
self
.
get_temp_path
(
'result.wav'
),
found
,
sr
,
channels_first
=
channels_first
)
assert
sr
==
expected_sr
self
.
assertEqual
(
found
,
expected
)
test/torchaudio_unittest/sox_effect/torchscript_test.py
0 → 100644
View file @
9dcc7a15
from
typing
import
List
import
torch
from
torchaudio
import
sox_effects
from
parameterized
import
parameterized
from
torchaudio_unittest.common_utils
import
(
TempDirMixin
,
TorchaudioTestCase
,
skipIfNoSox
,
get_sinusoid
,
save_wav
,
torch_script
,
)
from
.common
import
(
load_params
,
)
class
SoxEffectTensorTransform
(
torch
.
nn
.
Module
):
effects
:
List
[
List
[
str
]]
def
__init__
(
self
,
effects
:
List
[
List
[
str
]],
sample_rate
:
int
,
channels_first
:
bool
):
super
().
__init__
()
self
.
effects
=
effects
self
.
sample_rate
=
sample_rate
self
.
channels_first
=
channels_first
def
forward
(
self
,
tensor
:
torch
.
Tensor
):
return
sox_effects
.
apply_effects_tensor
(
tensor
,
self
.
sample_rate
,
self
.
effects
,
self
.
channels_first
)
class
SoxEffectFileTransform
(
torch
.
nn
.
Module
):
effects
:
List
[
List
[
str
]]
channels_first
:
bool
def
__init__
(
self
,
effects
:
List
[
List
[
str
]],
channels_first
:
bool
):
super
().
__init__
()
self
.
effects
=
effects
self
.
channels_first
=
channels_first
def
forward
(
self
,
path
:
str
):
return
sox_effects
.
apply_effects_file
(
path
,
self
.
effects
,
self
.
channels_first
)
@
skipIfNoSox
class
TestTorchScript
(
TempDirMixin
,
TorchaudioTestCase
):
@
parameterized
.
expand
(
load_params
(
"sox_effect_test_args.jsonl"
),
name_func
=
lambda
f
,
i
,
p
:
f
'
{
f
.
__name__
}
_
{
i
}
_
{
p
.
args
[
0
][
"effects"
][
0
][
0
]
}
'
,
)
def
test_apply_effects_tensor
(
self
,
args
):
effects
=
args
[
'effects'
]
channels_first
=
True
num_channels
=
args
.
get
(
"num_channels"
,
2
)
input_sr
=
args
.
get
(
"input_sample_rate"
,
8000
)
trans
=
SoxEffectTensorTransform
(
effects
,
input_sr
,
channels_first
)
trans
=
torch_script
(
trans
)
wav
=
get_sinusoid
(
frequency
=
800
,
sample_rate
=
input_sr
,
n_channels
=
num_channels
,
dtype
=
'float32'
,
channels_first
=
channels_first
)
found
,
sr_found
=
trans
(
wav
)
expected
,
sr_expected
=
sox_effects
.
apply_effects_tensor
(
wav
,
input_sr
,
effects
,
channels_first
)
assert
sr_found
==
sr_expected
self
.
assertEqual
(
expected
,
found
)
@
parameterized
.
expand
(
load_params
(
"sox_effect_test_args.jsonl"
),
name_func
=
lambda
f
,
i
,
p
:
f
'
{
f
.
__name__
}
_
{
i
}
_
{
p
.
args
[
0
][
"effects"
][
0
][
0
]
}
'
,
)
def
test_apply_effects_file
(
self
,
args
):
effects
=
args
[
'effects'
]
channels_first
=
True
num_channels
=
args
.
get
(
"num_channels"
,
2
)
input_sr
=
args
.
get
(
"input_sample_rate"
,
8000
)
trans
=
SoxEffectFileTransform
(
effects
,
channels_first
)
trans
=
torch_script
(
trans
)
path
=
self
.
get_temp_path
(
'input.wav'
)
wav
=
get_sinusoid
(
frequency
=
800
,
sample_rate
=
input_sr
,
n_channels
=
num_channels
,
dtype
=
'float32'
,
channels_first
=
channels_first
)
save_wav
(
path
,
wav
,
sample_rate
=
input_sr
,
channels_first
=
channels_first
)
found
,
sr_found
=
trans
(
path
)
expected
,
sr_expected
=
sox_effects
.
apply_effects_file
(
path
,
effects
,
channels_first
)
assert
sr_found
==
sr_expected
self
.
assertEqual
(
expected
,
found
)
test/torchaudio_unittest/transforms/__init__.py
0 → 100644
View file @
9dcc7a15
test/torchaudio_unittest/transforms/autograd_cpu_test.py
0 → 100644
View file @
9dcc7a15
from
torchaudio_unittest.common_utils
import
PytorchTestCase
from
.autograd_test_impl
import
AutogradTestMixin
,
AutogradTestFloat32
class
AutogradCPUTest
(
AutogradTestMixin
,
PytorchTestCase
):
device
=
'cpu'
class
AutogradRNNTCPUTest
(
AutogradTestFloat32
,
PytorchTestCase
):
device
=
'cpu'
test/torchaudio_unittest/transforms/autograd_cuda_test.py
0 → 100644
View file @
9dcc7a15
from
torchaudio_unittest.common_utils
import
(
PytorchTestCase
,
skipIfNoCuda
,
)
from
.autograd_test_impl
import
AutogradTestMixin
,
AutogradTestFloat32
@
skipIfNoCuda
class
AutogradCUDATest
(
AutogradTestMixin
,
PytorchTestCase
):
device
=
'cuda'
@
skipIfNoCuda
class
AutogradRNNTCUDATest
(
AutogradTestFloat32
,
PytorchTestCase
):
device
=
'cuda'
test/torchaudio_unittest/transforms/autograd_test_impl.py
0 → 100644
View file @
9dcc7a15
from
typing
import
List
import
unittest
from
parameterized
import
parameterized
import
torch
from
torch.autograd
import
gradcheck
,
gradgradcheck
import
torchaudio.transforms
as
T
from
torchaudio_unittest.common_utils
import
(
TestBaseMixin
,
get_whitenoise
,
get_spectrogram
,
nested_params
,
rnnt_utils
,
)
class
_DeterministicWrapper
(
torch
.
nn
.
Module
):
"""Helper transform wrapper to make the given transform deterministic"""
def
__init__
(
self
,
transform
,
seed
=
0
):
super
().
__init__
()
self
.
seed
=
seed
self
.
transform
=
transform
def
forward
(
self
,
input
:
torch
.
Tensor
):
torch
.
random
.
manual_seed
(
self
.
seed
)
return
self
.
transform
(
input
)
class
AutogradTestMixin
(
TestBaseMixin
):
def
assert_grad
(
self
,
transform
:
torch
.
nn
.
Module
,
inputs
:
List
[
torch
.
Tensor
],
*
,
nondet_tol
:
float
=
0.0
,
):
transform
=
transform
.
to
(
dtype
=
torch
.
float64
,
device
=
self
.
device
)
# gradcheck and gradgradcheck only pass if the input tensors are of dtype `torch.double` or
# `torch.cdouble`, when the default eps and tolerance values are used.
inputs_
=
[]
for
i
in
inputs
:
if
torch
.
is_tensor
(
i
):
i
=
i
.
to
(
dtype
=
torch
.
cdouble
if
i
.
is_complex
()
else
torch
.
double
,
device
=
self
.
device
)
i
.
requires_grad
=
True
inputs_
.
append
(
i
)
assert
gradcheck
(
transform
,
inputs_
)
assert
gradgradcheck
(
transform
,
inputs_
,
nondet_tol
=
nondet_tol
)
@
parameterized
.
expand
([
({
'pad'
:
0
,
'normalized'
:
False
,
'power'
:
None
,
'return_complex'
:
True
},
),
({
'pad'
:
3
,
'normalized'
:
False
,
'power'
:
None
,
'return_complex'
:
True
},
),
({
'pad'
:
0
,
'normalized'
:
True
,
'power'
:
None
,
'return_complex'
:
True
},
),
({
'pad'
:
3
,
'normalized'
:
True
,
'power'
:
None
,
'return_complex'
:
True
},
),
({
'pad'
:
0
,
'normalized'
:
False
,
'power'
:
None
},
),
({
'pad'
:
3
,
'normalized'
:
False
,
'power'
:
None
},
),
({
'pad'
:
0
,
'normalized'
:
True
,
'power'
:
None
},
),
({
'pad'
:
3
,
'normalized'
:
True
,
'power'
:
None
},
),
({
'pad'
:
0
,
'normalized'
:
False
,
'power'
:
1.0
},
),
({
'pad'
:
3
,
'normalized'
:
False
,
'power'
:
1.0
},
),
({
'pad'
:
0
,
'normalized'
:
True
,
'power'
:
1.0
},
),
({
'pad'
:
3
,
'normalized'
:
True
,
'power'
:
1.0
},
),
({
'pad'
:
0
,
'normalized'
:
False
,
'power'
:
2.0
},
),
({
'pad'
:
3
,
'normalized'
:
False
,
'power'
:
2.0
},
),
({
'pad'
:
0
,
'normalized'
:
True
,
'power'
:
2.0
},
),
({
'pad'
:
3
,
'normalized'
:
True
,
'power'
:
2.0
},
),
])
def
test_spectrogram
(
self
,
kwargs
):
# replication_pad1d_backward_cuda is not deteministic and
# gives very small (~2.7756e-17) difference.
#
# See https://github.com/pytorch/pytorch/issues/54093
transform
=
T
.
Spectrogram
(
**
kwargs
)
waveform
=
get_whitenoise
(
sample_rate
=
8000
,
duration
=
0.05
,
n_channels
=
2
)
self
.
assert_grad
(
transform
,
[
waveform
],
nondet_tol
=
1e-10
)
@
parameterized
.
expand
([(
False
,
),
(
True
,
)])
def
test_inverse_spectrogram
(
self
,
return_complex
):
# create a realistic input:
waveform
=
get_whitenoise
(
sample_rate
=
8000
,
duration
=
0.05
,
n_channels
=
2
)
length
=
waveform
.
shape
[
-
1
]
spectrogram
=
get_spectrogram
(
waveform
,
n_fft
=
400
)
if
not
return_complex
:
spectrogram
=
torch
.
view_as_real
(
spectrogram
)
# test
inv_transform
=
T
.
InverseSpectrogram
(
n_fft
=
400
)
self
.
assert_grad
(
inv_transform
,
[
spectrogram
,
length
])
def
test_melspectrogram
(
self
):
# replication_pad1d_backward_cuda is not deteministic and
# gives very small (~2.7756e-17) difference.
#
# See https://github.com/pytorch/pytorch/issues/54093
sample_rate
=
8000
transform
=
T
.
MelSpectrogram
(
sample_rate
=
sample_rate
)
waveform
=
get_whitenoise
(
sample_rate
=
sample_rate
,
duration
=
0.05
,
n_channels
=
2
)
self
.
assert_grad
(
transform
,
[
waveform
],
nondet_tol
=
1e-10
)
@
nested_params
(
[
0
,
0.99
],
[
False
,
True
],
)
def
test_griffinlim
(
self
,
momentum
,
rand_init
):
n_fft
=
400
power
=
1
n_iter
=
3
spec
=
get_spectrogram
(
get_whitenoise
(
sample_rate
=
8000
,
duration
=
0.05
,
n_channels
=
2
),
n_fft
=
n_fft
,
power
=
power
)
transform
=
_DeterministicWrapper
(
T
.
GriffinLim
(
n_fft
=
n_fft
,
n_iter
=
n_iter
,
momentum
=
momentum
,
rand_init
=
rand_init
,
power
=
power
))
self
.
assert_grad
(
transform
,
[
spec
])
@
parameterized
.
expand
([(
False
,
),
(
True
,
)])
def
test_mfcc
(
self
,
log_mels
):
sample_rate
=
8000
transform
=
T
.
MFCC
(
sample_rate
=
sample_rate
,
log_mels
=
log_mels
)
waveform
=
get_whitenoise
(
sample_rate
=
sample_rate
,
duration
=
0.05
,
n_channels
=
2
)
self
.
assert_grad
(
transform
,
[
waveform
])
@
parameterized
.
expand
([(
False
,
),
(
True
,
)])
def
test_lfcc
(
self
,
log_lf
):
sample_rate
=
8000
transform
=
T
.
LFCC
(
sample_rate
=
sample_rate
,
log_lf
=
log_lf
)
waveform
=
get_whitenoise
(
sample_rate
=
sample_rate
,
duration
=
0.05
,
n_channels
=
2
)
self
.
assert_grad
(
transform
,
[
waveform
])
def
test_compute_deltas
(
self
):
transform
=
T
.
ComputeDeltas
()
spec
=
torch
.
rand
(
10
,
20
)
self
.
assert_grad
(
transform
,
[
spec
])
@
parameterized
.
expand
([(
8000
,
8000
),
(
8000
,
4000
),
(
4000
,
8000
)])
def
test_resample
(
self
,
orig_freq
,
new_freq
):
transform
=
T
.
Resample
(
orig_freq
=
orig_freq
,
new_freq
=
new_freq
)
waveform
=
get_whitenoise
(
sample_rate
=
8000
,
duration
=
0.05
,
n_channels
=
2
)
self
.
assert_grad
(
transform
,
[
waveform
])
@
parameterized
.
expand
([(
"linear"
,
),
(
"exponential"
,
),
(
"logarithmic"
,
),
(
"quarter_sine"
,
),
(
"half_sine"
,
)])
def
test_fade
(
self
,
fade_shape
):
transform
=
T
.
Fade
(
fade_shape
=
fade_shape
)
waveform
=
get_whitenoise
(
sample_rate
=
8000
,
duration
=
0.05
,
n_channels
=
2
)
self
.
assert_grad
(
transform
,
[
waveform
],
nondet_tol
=
1e-10
)
@
parameterized
.
expand
([(
T
.
TimeMasking
,),
(
T
.
FrequencyMasking
,)])
def
test_masking
(
self
,
masking_transform
):
sample_rate
=
8000
n_fft
=
400
spectrogram
=
get_spectrogram
(
get_whitenoise
(
sample_rate
=
sample_rate
,
duration
=
0.05
,
n_channels
=
2
),
n_fft
=
n_fft
,
power
=
1
)
deterministic_transform
=
_DeterministicWrapper
(
masking_transform
(
400
))
self
.
assert_grad
(
deterministic_transform
,
[
spectrogram
])
@
parameterized
.
expand
([(
T
.
TimeMasking
,),
(
T
.
FrequencyMasking
,)])
def
test_masking_iid
(
self
,
masking_transform
):
sample_rate
=
8000
n_fft
=
400
specs
=
[
get_spectrogram
(
get_whitenoise
(
sample_rate
=
sample_rate
,
duration
=
0.05
,
n_channels
=
2
,
seed
=
i
),
n_fft
=
n_fft
,
power
=
1
)
for
i
in
range
(
3
)
]
batch
=
torch
.
stack
(
specs
)
assert
batch
.
ndim
==
4
deterministic_transform
=
_DeterministicWrapper
(
masking_transform
(
400
,
True
))
self
.
assert_grad
(
deterministic_transform
,
[
batch
])
def
test_spectral_centroid
(
self
):
sample_rate
=
8000
transform
=
T
.
SpectralCentroid
(
sample_rate
=
sample_rate
)
waveform
=
get_whitenoise
(
sample_rate
=
sample_rate
,
duration
=
0.05
,
n_channels
=
2
)
self
.
assert_grad
(
transform
,
[
waveform
],
nondet_tol
=
1e-10
)
def
test_amplitude_to_db
(
self
):
sample_rate
=
8000
transform
=
T
.
AmplitudeToDB
()
waveform
=
get_whitenoise
(
sample_rate
=
sample_rate
,
duration
=
0.05
,
n_channels
=
2
)
self
.
assert_grad
(
transform
,
[
waveform
])
def
test_melscale
(
self
):
sample_rate
=
8000
n_fft
=
400
n_mels
=
n_fft
//
2
+
1
transform
=
T
.
MelScale
(
sample_rate
=
sample_rate
,
n_mels
=
n_mels
)
spec
=
get_spectrogram
(
get_whitenoise
(
sample_rate
=
sample_rate
,
duration
=
0.05
,
n_channels
=
2
),
n_fft
=
n_fft
,
power
=
1
)
self
.
assert_grad
(
transform
,
[
spec
])
@
parameterized
.
expand
([(
1.5
,
"amplitude"
),
(
2
,
"power"
),
(
10
,
"db"
)])
def
test_vol
(
self
,
gain
,
gain_type
):
sample_rate
=
8000
transform
=
T
.
Vol
(
gain
=
gain
,
gain_type
=
gain_type
)
waveform
=
get_whitenoise
(
sample_rate
=
sample_rate
,
duration
=
0.05
,
n_channels
=
2
)
self
.
assert_grad
(
transform
,
[
waveform
])
@
parameterized
.
expand
([
({
'cmn_window'
:
100
,
'min_cmn_window'
:
50
,
'center'
:
False
,
'norm_vars'
:
False
},
),
({
'cmn_window'
:
100
,
'min_cmn_window'
:
50
,
'center'
:
True
,
'norm_vars'
:
False
},
),
({
'cmn_window'
:
100
,
'min_cmn_window'
:
50
,
'center'
:
False
,
'norm_vars'
:
True
},
),
({
'cmn_window'
:
100
,
'min_cmn_window'
:
50
,
'center'
:
True
,
'norm_vars'
:
True
},
),
])
def
test_sliding_window_cmn
(
self
,
kwargs
):
n_fft
=
10
power
=
1
spec
=
get_spectrogram
(
get_whitenoise
(
sample_rate
=
200
,
duration
=
0.05
,
n_channels
=
2
),
n_fft
=
n_fft
,
power
=
power
)
spec_reshaped
=
spec
.
transpose
(
-
1
,
-
2
)
transform
=
T
.
SlidingWindowCmn
(
**
kwargs
)
self
.
assert_grad
(
transform
,
[
spec_reshaped
])
@
unittest
.
expectedFailure
def
test_timestretch_zeros_fail
(
self
):
"""Test that ``T.TimeStretch`` fails gradcheck at 0
This is because ``F.phase_vocoder`` converts data from cartesian to polar coordinate,
which performs ``atan2(img, real)``, and gradient is not defined at 0.
"""
n_fft
=
16
transform
=
T
.
TimeStretch
(
n_freq
=
n_fft
//
2
+
1
,
fixed_rate
=
0.99
)
waveform
=
torch
.
zeros
(
2
,
40
)
spectrogram
=
get_spectrogram
(
waveform
,
n_fft
=
n_fft
,
power
=
None
)
self
.
assert_grad
(
transform
,
[
spectrogram
])
@
nested_params
(
[
0.7
,
0.8
,
0.9
,
1.0
,
1.3
],
[
False
,
True
],
)
def
test_timestretch_non_zero
(
self
,
rate
,
test_pseudo_complex
):
"""Verify that ``T.TimeStretch`` does not fail if it's not close to 0
``T.TimeStrech`` is not differentiable around 0, so this test checks the differentiability
for cases where input is not zero.
As tested above, when spectrogram contains values close to zero, the gradients are unstable
and gradcheck fails.
In this test, we generate spectrogram from random signal, then we push the points around
zero away from the origin.
This process does not reflect the real use-case, and it is not practical for users, but
this helps us understand to what degree the function is differentiable and when not.
"""
n_fft
=
16
transform
=
T
.
TimeStretch
(
n_freq
=
n_fft
//
2
+
1
,
fixed_rate
=
rate
)
waveform
=
get_whitenoise
(
sample_rate
=
40
,
duration
=
1
,
n_channels
=
2
)
spectrogram
=
get_spectrogram
(
waveform
,
n_fft
=
n_fft
,
power
=
None
)
# 1e-3 is too small (on CPU)
epsilon
=
1e-2
too_close
=
spectrogram
.
abs
()
<
epsilon
spectrogram
[
too_close
]
=
epsilon
*
spectrogram
[
too_close
]
/
spectrogram
[
too_close
].
abs
()
if
test_pseudo_complex
:
spectrogram
=
torch
.
view_as_real
(
spectrogram
)
self
.
assert_grad
(
transform
,
[
spectrogram
])
def
test_psd
(
self
):
transform
=
T
.
PSD
()
waveform
=
get_whitenoise
(
sample_rate
=
8000
,
duration
=
0.05
,
n_channels
=
2
)
spectrogram
=
get_spectrogram
(
waveform
,
n_fft
=
400
)
self
.
assert_grad
(
transform
,
[
spectrogram
])
@
parameterized
.
expand
([
[
True
],
[
False
],
])
def
test_psd_with_mask
(
self
,
multi_mask
):
transform
=
T
.
PSD
(
multi_mask
=
multi_mask
)
waveform
=
get_whitenoise
(
sample_rate
=
8000
,
duration
=
0.05
,
n_channels
=
2
)
spectrogram
=
get_spectrogram
(
waveform
,
n_fft
=
400
)
if
multi_mask
:
mask
=
torch
.
rand
(
spectrogram
.
shape
[
-
3
:])
else
:
mask
=
torch
.
rand
(
spectrogram
.
shape
[
-
2
:])
self
.
assert_grad
(
transform
,
[
spectrogram
,
mask
])
@
parameterized
.
expand
([
"ref_channel"
,
# stv_power test time too long, comment for now
# "stv_power",
# stv_evd will fail since the eigenvalues are not distinct
# "stv_evd",
])
def
test_mvdr
(
self
,
solution
):
transform
=
T
.
MVDR
(
solution
=
solution
)
waveform
=
get_whitenoise
(
sample_rate
=
8000
,
duration
=
0.05
,
n_channels
=
2
)
spectrogram
=
get_spectrogram
(
waveform
,
n_fft
=
400
)
mask_s
=
torch
.
rand
(
spectrogram
.
shape
[
-
2
:])
mask_n
=
torch
.
rand
(
spectrogram
.
shape
[
-
2
:])
self
.
assert_grad
(
transform
,
[
spectrogram
,
mask_s
,
mask_n
])
class
AutogradTestFloat32
(
TestBaseMixin
):
def
assert_grad
(
self
,
transform
:
torch
.
nn
.
Module
,
inputs
:
List
[
torch
.
Tensor
],
):
inputs_
=
[]
for
i
in
inputs
:
if
torch
.
is_tensor
(
i
):
i
=
i
.
to
(
dtype
=
torch
.
float32
,
device
=
self
.
device
)
inputs_
.
append
(
i
)
# gradcheck with float32 requires higher atol and epsilon
assert
gradcheck
(
transform
,
inputs
,
eps
=
1e-3
,
atol
=
1e-3
,
nondet_tol
=
0.
)
@
parameterized
.
expand
([
(
rnnt_utils
.
get_B1_T10_U3_D4_data
,
),
(
rnnt_utils
.
get_B2_T4_U3_D3_data
,
),
(
rnnt_utils
.
get_B1_T2_U3_D5_data
,
),
])
def
test_rnnt_loss
(
self
,
data_func
):
def
get_data
(
data_func
,
device
):
data
=
data_func
()
if
type
(
data
)
==
tuple
:
data
=
data
[
0
]
return
data
data
=
get_data
(
data_func
,
self
.
device
)
inputs
=
(
data
[
"logits"
].
to
(
torch
.
float32
),
data
[
"targets"
],
data
[
"logit_lengths"
],
data
[
"target_lengths"
],
)
loss
=
T
.
RNNTLoss
(
blank
=
data
[
"blank"
])
self
.
assert_grad
(
loss
,
inputs
)
test/torchaudio_unittest/transforms/batch_consistency_test.py
0 → 100644
View file @
9dcc7a15
"""Test numerical consistency among single input and batched input."""
import
torch
from
parameterized
import
parameterized
from
torchaudio
import
transforms
as
T
from
torchaudio_unittest
import
common_utils
class
TestTransforms
(
common_utils
.
TorchaudioTestCase
):
"""Test suite for classes defined in `transforms` module"""
backend
=
'default'
def
assert_batch_consistency
(
self
,
transform
,
batch
,
*
args
,
atol
=
1e-8
,
rtol
=
1e-5
,
seed
=
42
,
**
kwargs
):
n
=
batch
.
size
(
0
)
# Compute items separately, then batch the result
torch
.
random
.
manual_seed
(
seed
)
items_input
=
batch
.
clone
()
items_result
=
torch
.
stack
([
transform
(
items_input
[
i
],
*
args
,
**
kwargs
)
for
i
in
range
(
n
)
])
# Batch the input and run
torch
.
random
.
manual_seed
(
seed
)
batch_input
=
batch
.
clone
()
batch_result
=
transform
(
batch_input
,
*
args
,
**
kwargs
)
self
.
assertEqual
(
items_input
,
batch_input
,
rtol
=
rtol
,
atol
=
atol
)
self
.
assertEqual
(
items_result
,
batch_result
,
rtol
=
rtol
,
atol
=
atol
)
def
test_batch_AmplitudeToDB
(
self
):
spec
=
torch
.
rand
((
3
,
2
,
6
,
201
))
transform
=
T
.
AmplitudeToDB
()
self
.
assert_batch_consistency
(
transform
,
spec
)
def
test_batch_Resample
(
self
):
waveform
=
torch
.
randn
(
3
,
2
,
2786
)
transform
=
T
.
Resample
()
self
.
assert_batch_consistency
(
transform
,
waveform
)
def
test_batch_MelScale
(
self
):
specgram
=
torch
.
randn
(
3
,
2
,
201
,
256
)
transform
=
T
.
MelScale
()
self
.
assert_batch_consistency
(
transform
,
specgram
)
def
test_batch_InverseMelScale
(
self
):
n_mels
=
32
n_stft
=
5
mel_spec
=
torch
.
randn
(
3
,
2
,
n_mels
,
32
)
**
2
transform
=
T
.
InverseMelScale
(
n_stft
,
n_mels
)
# Because InverseMelScale runs SGD on randomly initialized values so they do not yield
# exactly same result. For this reason, tolerance is very relaxed here.
self
.
assert_batch_consistency
(
transform
,
mel_spec
,
atol
=
1.0
,
rtol
=
1e-5
)
def
test_batch_compute_deltas
(
self
):
specgram
=
torch
.
randn
(
3
,
2
,
31
,
2786
)
transform
=
T
.
ComputeDeltas
()
self
.
assert_batch_consistency
(
transform
,
specgram
)
def
test_batch_mulaw
(
self
):
waveform
=
common_utils
.
get_whitenoise
(
sample_rate
=
8000
,
duration
=
1
,
n_channels
=
6
)
waveform
=
waveform
.
reshape
(
3
,
2
,
-
1
)
# Single then transform then batch
expected
=
[
T
.
MuLawEncoding
()(
waveform
[
i
])
for
i
in
range
(
3
)]
expected
=
torch
.
stack
(
expected
)
# Batch then transform
computed
=
T
.
MuLawEncoding
()(
waveform
)
# shape = (3, 2, 201, 1394)
self
.
assertEqual
(
computed
,
expected
)
# Single then transform then batch
expected_decoded
=
[
T
.
MuLawDecoding
()(
expected
[
i
])
for
i
in
range
(
3
)]
expected_decoded
=
torch
.
stack
(
expected_decoded
)
# Batch then transform
computed_decoded
=
T
.
MuLawDecoding
()(
computed
)
# shape = (3, 2, 201, 1394)
self
.
assertEqual
(
computed_decoded
,
expected_decoded
)
def
test_batch_spectrogram
(
self
):
waveform
=
common_utils
.
get_whitenoise
(
sample_rate
=
8000
,
duration
=
1
,
n_channels
=
6
)
waveform
=
waveform
.
reshape
(
3
,
2
,
-
1
)
transform
=
T
.
Spectrogram
()
self
.
assert_batch_consistency
(
transform
,
waveform
)
def
test_batch_inverse_spectrogram
(
self
):
waveform
=
common_utils
.
get_whitenoise
(
sample_rate
=
8000
,
duration
=
1
,
n_channels
=
6
)
specgram
=
common_utils
.
get_spectrogram
(
waveform
,
n_fft
=
400
)
specgram
=
specgram
.
reshape
(
3
,
2
,
specgram
.
shape
[
-
2
],
specgram
.
shape
[
-
1
])
transform
=
T
.
InverseSpectrogram
(
n_fft
=
400
)
self
.
assert_batch_consistency
(
transform
,
specgram
)
def
test_batch_melspectrogram
(
self
):
waveform
=
common_utils
.
get_whitenoise
(
sample_rate
=
8000
,
duration
=
1
,
n_channels
=
6
)
waveform
=
waveform
.
reshape
(
3
,
2
,
-
1
)
transform
=
T
.
MelSpectrogram
()
self
.
assert_batch_consistency
(
transform
,
waveform
)
def
test_batch_mfcc
(
self
):
waveform
=
common_utils
.
get_whitenoise
(
sample_rate
=
8000
,
duration
=
1
,
n_channels
=
6
)
waveform
=
waveform
.
reshape
(
3
,
2
,
-
1
)
transform
=
T
.
MFCC
()
self
.
assert_batch_consistency
(
transform
,
waveform
,
atol
=
1e-4
,
rtol
=
1e-5
)
def
test_batch_lfcc
(
self
):
waveform
=
common_utils
.
get_whitenoise
(
sample_rate
=
8000
,
duration
=
1
,
n_channels
=
6
)
waveform
=
waveform
.
reshape
(
3
,
2
,
-
1
)
transform
=
T
.
LFCC
()
self
.
assert_batch_consistency
(
transform
,
waveform
,
atol
=
1e-4
,
rtol
=
1e-5
)
@
parameterized
.
expand
([(
True
,
),
(
False
,
)])
def
test_batch_TimeStretch
(
self
,
test_pseudo_complex
):
rate
=
2
num_freq
=
1025
num_frames
=
400
batch
=
3
spec
=
torch
.
randn
(
batch
,
num_freq
,
num_frames
,
dtype
=
torch
.
complex64
)
if
test_pseudo_complex
:
spec
=
torch
.
view_as_real
(
spec
)
transform
=
T
.
TimeStretch
(
fixed_rate
=
rate
,
n_freq
=
num_freq
,
hop_length
=
512
)
self
.
assert_batch_consistency
(
transform
,
spec
,
atol
=
1e-5
,
rtol
=
1e-5
)
def
test_batch_Fade
(
self
):
waveform
=
common_utils
.
get_whitenoise
(
sample_rate
=
8000
,
duration
=
1
,
n_channels
=
6
)
waveform
=
waveform
.
reshape
(
3
,
2
,
-
1
)
fade_in_len
=
3000
fade_out_len
=
3000
transform
=
T
.
Fade
(
fade_in_len
,
fade_out_len
)
self
.
assert_batch_consistency
(
transform
,
waveform
)
def
test_batch_Vol
(
self
):
waveform
=
common_utils
.
get_whitenoise
(
sample_rate
=
8000
,
duration
=
1
,
n_channels
=
6
)
waveform
=
waveform
.
reshape
(
3
,
2
,
-
1
)
transform
=
T
.
Vol
(
gain
=
1.1
)
self
.
assert_batch_consistency
(
transform
,
waveform
)
def
test_batch_spectral_centroid
(
self
):
sample_rate
=
44100
waveform
=
common_utils
.
get_whitenoise
(
sample_rate
=
sample_rate
,
n_channels
=
6
)
waveform
=
waveform
.
reshape
(
3
,
2
,
-
1
)
transform
=
T
.
SpectralCentroid
(
sample_rate
)
self
.
assert_batch_consistency
(
transform
,
waveform
)
def
test_batch_pitch_shift
(
self
):
sample_rate
=
8000
n_steps
=
-
2
waveform
=
common_utils
.
get_whitenoise
(
sample_rate
=
sample_rate
,
duration
=
0.05
,
n_channels
=
6
)
waveform
=
waveform
.
reshape
(
3
,
2
,
-
1
)
transform
=
T
.
PitchShift
(
sample_rate
,
n_steps
,
n_fft
=
400
)
self
.
assert_batch_consistency
(
transform
,
waveform
)
def
test_batch_PSD
(
self
):
waveform
=
common_utils
.
get_whitenoise
(
sample_rate
=
8000
,
duration
=
1
,
n_channels
=
6
)
specgram
=
common_utils
.
get_spectrogram
(
waveform
,
n_fft
=
400
)
specgram
=
specgram
.
reshape
(
3
,
2
,
specgram
.
shape
[
-
2
],
specgram
.
shape
[
-
1
])
transform
=
T
.
PSD
()
self
.
assert_batch_consistency
(
transform
,
specgram
)
def
test_batch_PSD_with_mask
(
self
):
waveform
=
common_utils
.
get_whitenoise
(
sample_rate
=
8000
,
duration
=
1
,
n_channels
=
6
)
waveform
=
waveform
.
to
(
torch
.
double
)
specgram
=
common_utils
.
get_spectrogram
(
waveform
,
n_fft
=
400
)
specgram
=
specgram
.
reshape
(
3
,
2
,
specgram
.
shape
[
-
2
],
specgram
.
shape
[
-
1
])
mask
=
torch
.
rand
((
3
,
specgram
.
shape
[
-
2
],
specgram
.
shape
[
-
1
]))
transform
=
T
.
PSD
()
# Single then transform then batch
expected
=
[
transform
(
specgram
[
i
],
mask
[
i
])
for
i
in
range
(
3
)]
expected
=
torch
.
stack
(
expected
)
# Batch then transform
computed
=
transform
(
specgram
,
mask
)
self
.
assertEqual
(
computed
,
expected
)
@
parameterized
.
expand
([
[
True
],
[
False
],
])
def
test_MVDR
(
self
,
multi_mask
):
waveform
=
common_utils
.
get_whitenoise
(
sample_rate
=
8000
,
duration
=
1
,
n_channels
=
6
)
waveform
=
waveform
.
to
(
torch
.
double
)
specgram
=
common_utils
.
get_spectrogram
(
waveform
,
n_fft
=
400
)
specgram
=
specgram
.
reshape
(
3
,
2
,
specgram
.
shape
[
-
2
],
specgram
.
shape
[
-
1
])
if
multi_mask
:
mask_s
=
torch
.
rand
((
3
,
2
,
specgram
.
shape
[
-
2
],
specgram
.
shape
[
-
1
]))
mask_n
=
torch
.
rand
((
3
,
2
,
specgram
.
shape
[
-
2
],
specgram
.
shape
[
-
1
]))
else
:
mask_s
=
torch
.
rand
((
3
,
specgram
.
shape
[
-
2
],
specgram
.
shape
[
-
1
]))
mask_n
=
torch
.
rand
((
3
,
specgram
.
shape
[
-
2
],
specgram
.
shape
[
-
1
]))
transform
=
T
.
MVDR
(
multi_mask
=
multi_mask
)
# Single then transform then batch
expected
=
[
transform
(
specgram
[
i
],
mask_s
[
i
],
mask_n
[
i
])
for
i
in
range
(
3
)]
expected
=
torch
.
stack
(
expected
)
# Batch then transform
computed
=
transform
(
specgram
,
mask_s
,
mask_n
)
self
.
assertEqual
(
computed
,
expected
)
test/torchaudio_unittest/transforms/kaldi_compatibility_cpu_test.py
0 → 100644
View file @
9dcc7a15
import
torch
from
torchaudio_unittest
import
common_utils
from
.kaldi_compatibility_impl
import
Kaldi
class
TestKaldiFloat32
(
Kaldi
,
common_utils
.
PytorchTestCase
):
dtype
=
torch
.
float32
device
=
torch
.
device
(
'cpu'
)
class
TestKaldiFloat64
(
Kaldi
,
common_utils
.
PytorchTestCase
):
dtype
=
torch
.
float64
device
=
torch
.
device
(
'cpu'
)
test/torchaudio_unittest/transforms/kaldi_compatibility_cuda_test.py
0 → 100644
View file @
9dcc7a15
import
torch
from
torchaudio_unittest
import
common_utils
from
.kaldi_compatibility_impl
import
Kaldi
@
common_utils
.
skipIfNoCuda
class
TestKaldiFloat32
(
Kaldi
,
common_utils
.
PytorchTestCase
):
dtype
=
torch
.
float32
device
=
torch
.
device
(
'cuda'
)
@
common_utils
.
skipIfNoCuda
class
TestKaldiFloat64
(
Kaldi
,
common_utils
.
PytorchTestCase
):
dtype
=
torch
.
float64
device
=
torch
.
device
(
'cuda'
)
test/torchaudio_unittest/transforms/kaldi_compatibility_impl.py
0 → 100644
View file @
9dcc7a15
"""Test suites for checking numerical compatibility against Kaldi"""
import
torchaudio.compliance.kaldi
from
parameterized
import
parameterized
from
torchaudio_unittest.common_utils
import
(
TestBaseMixin
,
TempDirMixin
,
load_params
,
skipIfNoExec
,
get_asset_path
,
load_wav
,
)
from
torchaudio_unittest.common_utils.kaldi_utils
import
(
convert_args
,
run_kaldi
,
)
class
Kaldi
(
TempDirMixin
,
TestBaseMixin
):
def
assert_equal
(
self
,
output
,
*
,
expected
,
rtol
=
None
,
atol
=
None
):
expected
=
expected
.
to
(
dtype
=
self
.
dtype
,
device
=
self
.
device
)
self
.
assertEqual
(
output
,
expected
,
rtol
=
rtol
,
atol
=
atol
)
@
parameterized
.
expand
(
load_params
(
'kaldi_test_fbank_args.jsonl'
))
@
skipIfNoExec
(
'compute-fbank-feats'
)
def
test_fbank
(
self
,
kwargs
):
"""fbank should be numerically compatible with compute-fbank-feats"""
wave_file
=
get_asset_path
(
'kaldi_file.wav'
)
waveform
=
load_wav
(
wave_file
,
normalize
=
False
)[
0
].
to
(
dtype
=
self
.
dtype
,
device
=
self
.
device
)
result
=
torchaudio
.
compliance
.
kaldi
.
fbank
(
waveform
,
**
kwargs
)
command
=
[
'compute-fbank-feats'
]
+
convert_args
(
**
kwargs
)
+
[
'scp:-'
,
'ark:-'
]
kaldi_result
=
run_kaldi
(
command
,
'scp'
,
wave_file
)
self
.
assert_equal
(
result
,
expected
=
kaldi_result
,
rtol
=
1e-4
,
atol
=
1e-8
)
@
parameterized
.
expand
(
load_params
(
'kaldi_test_spectrogram_args.jsonl'
))
@
skipIfNoExec
(
'compute-spectrogram-feats'
)
def
test_spectrogram
(
self
,
kwargs
):
"""spectrogram should be numerically compatible with compute-spectrogram-feats"""
wave_file
=
get_asset_path
(
'kaldi_file.wav'
)
waveform
=
load_wav
(
wave_file
,
normalize
=
False
)[
0
].
to
(
dtype
=
self
.
dtype
,
device
=
self
.
device
)
result
=
torchaudio
.
compliance
.
kaldi
.
spectrogram
(
waveform
,
**
kwargs
)
command
=
[
'compute-spectrogram-feats'
]
+
convert_args
(
**
kwargs
)
+
[
'scp:-'
,
'ark:-'
]
kaldi_result
=
run_kaldi
(
command
,
'scp'
,
wave_file
)
self
.
assert_equal
(
result
,
expected
=
kaldi_result
,
rtol
=
1e-4
,
atol
=
1e-8
)
@
parameterized
.
expand
(
load_params
(
'kaldi_test_mfcc_args.jsonl'
))
@
skipIfNoExec
(
'compute-mfcc-feats'
)
def
test_mfcc
(
self
,
kwargs
):
"""mfcc should be numerically compatible with compute-mfcc-feats"""
wave_file
=
get_asset_path
(
'kaldi_file.wav'
)
waveform
=
load_wav
(
wave_file
,
normalize
=
False
)[
0
].
to
(
dtype
=
self
.
dtype
,
device
=
self
.
device
)
result
=
torchaudio
.
compliance
.
kaldi
.
mfcc
(
waveform
,
**
kwargs
)
command
=
[
'compute-mfcc-feats'
]
+
convert_args
(
**
kwargs
)
+
[
'scp:-'
,
'ark:-'
]
kaldi_result
=
run_kaldi
(
command
,
'scp'
,
wave_file
)
self
.
assert_equal
(
result
,
expected
=
kaldi_result
,
rtol
=
1e-4
,
atol
=
1e-8
)
test/torchaudio_unittest/transforms/librosa_compatibility_cpu_test.py
0 → 100644
View file @
9dcc7a15
import
torch
from
torchaudio_unittest.common_utils
import
PytorchTestCase
from
.librosa_compatibility_test_impl
import
TransformsTestBase
class
TestTransforms
(
TransformsTestBase
,
PytorchTestCase
):
dtype
=
torch
.
float64
device
=
torch
.
device
(
'cpu'
)
test/torchaudio_unittest/transforms/librosa_compatibility_cuda_test.py
0 → 100644
View file @
9dcc7a15
import
torch
from
torchaudio_unittest.common_utils
import
PytorchTestCase
,
skipIfNoCuda
from
.librosa_compatibility_test_impl
import
TransformsTestBase
@
skipIfNoCuda
class
TestTransforms
(
TransformsTestBase
,
PytorchTestCase
):
dtype
=
torch
.
float64
device
=
torch
.
device
(
'cuda'
)
test/torchaudio_unittest/transforms/librosa_compatibility_test_impl.py
0 → 100644
View file @
9dcc7a15
import
unittest
import
torch
import
torchaudio.transforms
as
T
from
torchaudio._internal.module_utils
import
is_module_available
from
parameterized
import
param
,
parameterized
from
torchaudio_unittest.common_utils
import
(
TestBaseMixin
,
get_whitenoise
,
get_sinusoid
,
get_spectrogram
,
nested_params
,
)
LIBROSA_AVAILABLE
=
is_module_available
(
'librosa'
)
if
LIBROSA_AVAILABLE
:
import
librosa
@
unittest
.
skipIf
(
not
LIBROSA_AVAILABLE
,
"Librosa not available"
)
class
TransformsTestBase
(
TestBaseMixin
):
@
parameterized
.
expand
([
param
(
n_fft
=
400
,
hop_length
=
200
,
power
=
2.0
),
param
(
n_fft
=
600
,
hop_length
=
100
,
power
=
2.0
),
param
(
n_fft
=
400
,
hop_length
=
200
,
power
=
3.0
),
param
(
n_fft
=
200
,
hop_length
=
50
,
power
=
2.0
),
])
def
test_Spectrogram
(
self
,
n_fft
,
hop_length
,
power
):
sample_rate
=
16000
waveform
=
get_whitenoise
(
sample_rate
=
sample_rate
,
n_channels
=
1
,
).
to
(
self
.
device
,
self
.
dtype
)
expected
=
librosa
.
core
.
spectrum
.
_spectrogram
(
y
=
waveform
[
0
].
cpu
().
numpy
(),
n_fft
=
n_fft
,
hop_length
=
hop_length
,
power
=
power
)[
0
]
result
=
T
.
Spectrogram
(
n_fft
=
n_fft
,
hop_length
=
hop_length
,
power
=
power
,
).
to
(
self
.
device
,
self
.
dtype
)(
waveform
)[
0
]
self
.
assertEqual
(
result
,
torch
.
from_numpy
(
expected
),
atol
=
1e-5
,
rtol
=
1e-5
)
def
test_Spectrogram_complex
(
self
):
n_fft
=
400
hop_length
=
200
sample_rate
=
16000
waveform
=
get_whitenoise
(
sample_rate
=
sample_rate
,
n_channels
=
1
,
).
to
(
self
.
device
,
self
.
dtype
)
expected
=
librosa
.
core
.
spectrum
.
_spectrogram
(
y
=
waveform
[
0
].
cpu
().
numpy
(),
n_fft
=
n_fft
,
hop_length
=
hop_length
,
power
=
1
)[
0
]
result
=
T
.
Spectrogram
(
n_fft
=
n_fft
,
hop_length
=
hop_length
,
power
=
None
,
return_complex
=
True
,
).
to
(
self
.
device
,
self
.
dtype
)(
waveform
)[
0
]
self
.
assertEqual
(
result
.
abs
(),
torch
.
from_numpy
(
expected
),
atol
=
1e-5
,
rtol
=
1e-5
)
@
nested_params
(
[
param
(
n_fft
=
400
,
hop_length
=
200
,
n_mels
=
64
),
param
(
n_fft
=
600
,
hop_length
=
100
,
n_mels
=
128
),
param
(
n_fft
=
200
,
hop_length
=
50
,
n_mels
=
32
),
],
[
param
(
norm
=
norm
)
for
norm
in
[
None
,
'slaney'
]],
[
param
(
mel_scale
=
mel_scale
)
for
mel_scale
in
[
'htk'
,
'slaney'
]],
)
def
test_MelSpectrogram
(
self
,
n_fft
,
hop_length
,
n_mels
,
norm
,
mel_scale
):
sample_rate
=
16000
waveform
=
get_sinusoid
(
sample_rate
=
sample_rate
,
n_channels
=
1
,
).
to
(
self
.
device
,
self
.
dtype
)
expected
=
librosa
.
feature
.
melspectrogram
(
y
=
waveform
[
0
].
cpu
().
numpy
(),
sr
=
sample_rate
,
n_fft
=
n_fft
,
hop_length
=
hop_length
,
n_mels
=
n_mels
,
norm
=
norm
,
htk
=
mel_scale
==
"htk"
)
result
=
T
.
MelSpectrogram
(
sample_rate
=
sample_rate
,
window_fn
=
torch
.
hann_window
,
hop_length
=
hop_length
,
n_mels
=
n_mels
,
n_fft
=
n_fft
,
norm
=
norm
,
mel_scale
=
mel_scale
,
).
to
(
self
.
device
,
self
.
dtype
)(
waveform
)[
0
]
self
.
assertEqual
(
result
,
torch
.
from_numpy
(
expected
),
atol
=
5e-4
,
rtol
=
1e-5
)
def
test_magnitude_to_db
(
self
):
spectrogram
=
get_spectrogram
(
get_whitenoise
(),
n_fft
=
400
,
power
=
2
).
to
(
self
.
device
,
self
.
dtype
)
result
=
T
.
AmplitudeToDB
(
'magnitude'
,
80.
).
to
(
self
.
device
,
self
.
dtype
)(
spectrogram
)[
0
]
expected
=
librosa
.
core
.
spectrum
.
amplitude_to_db
(
spectrogram
[
0
].
cpu
().
numpy
())
self
.
assertEqual
(
result
,
torch
.
from_numpy
(
expected
))
def
test_power_to_db
(
self
):
spectrogram
=
get_spectrogram
(
get_whitenoise
(),
n_fft
=
400
,
power
=
2
).
to
(
self
.
device
,
self
.
dtype
)
result
=
T
.
AmplitudeToDB
(
'power'
,
80.
).
to
(
self
.
device
,
self
.
dtype
)(
spectrogram
)[
0
]
expected
=
librosa
.
core
.
spectrum
.
power_to_db
(
spectrogram
[
0
].
cpu
().
numpy
())
self
.
assertEqual
(
result
,
torch
.
from_numpy
(
expected
))
@
nested_params
([
param
(
n_fft
=
400
,
hop_length
=
200
,
n_mels
=
64
,
n_mfcc
=
40
),
param
(
n_fft
=
600
,
hop_length
=
100
,
n_mels
=
128
,
n_mfcc
=
20
),
param
(
n_fft
=
200
,
hop_length
=
50
,
n_mels
=
32
,
n_mfcc
=
25
),
])
def
test_mfcc
(
self
,
n_fft
,
hop_length
,
n_mels
,
n_mfcc
):
sample_rate
=
16000
waveform
=
get_whitenoise
(
sample_rate
=
sample_rate
,
n_channels
=
1
).
to
(
self
.
device
,
self
.
dtype
)
result
=
T
.
MFCC
(
sample_rate
=
sample_rate
,
n_mfcc
=
n_mfcc
,
norm
=
'ortho'
,
melkwargs
=
{
'hop_length'
:
hop_length
,
'n_fft'
:
n_fft
,
'n_mels'
:
n_mels
},
).
to
(
self
.
device
,
self
.
dtype
)(
waveform
)[
0
]
melspec
=
librosa
.
feature
.
melspectrogram
(
y
=
waveform
[
0
].
cpu
().
numpy
(),
sr
=
sample_rate
,
n_fft
=
n_fft
,
win_length
=
n_fft
,
hop_length
=
hop_length
,
n_mels
=
n_mels
,
htk
=
True
,
norm
=
None
)
expected
=
librosa
.
feature
.
mfcc
(
S
=
librosa
.
core
.
spectrum
.
power_to_db
(
melspec
),
n_mfcc
=
n_mfcc
,
dct_type
=
2
,
norm
=
'ortho'
)
self
.
assertEqual
(
result
,
torch
.
from_numpy
(
expected
),
atol
=
5e-4
,
rtol
=
1e-5
)
@
parameterized
.
expand
([
param
(
n_fft
=
400
,
hop_length
=
200
),
param
(
n_fft
=
600
,
hop_length
=
100
),
param
(
n_fft
=
200
,
hop_length
=
50
),
])
def
test_spectral_centroid
(
self
,
n_fft
,
hop_length
):
sample_rate
=
16000
waveform
=
get_whitenoise
(
sample_rate
=
sample_rate
,
n_channels
=
1
).
to
(
self
.
device
,
self
.
dtype
)
result
=
T
.
SpectralCentroid
(
sample_rate
=
sample_rate
,
n_fft
=
n_fft
,
hop_length
=
hop_length
,
).
to
(
self
.
device
,
self
.
dtype
)(
waveform
)
expected
=
librosa
.
feature
.
spectral_centroid
(
y
=
waveform
[
0
].
cpu
().
numpy
(),
sr
=
sample_rate
,
n_fft
=
n_fft
,
hop_length
=
hop_length
)
self
.
assertEqual
(
result
,
torch
.
from_numpy
(
expected
),
atol
=
5e-4
,
rtol
=
1e-5
)
test/torchaudio_unittest/transforms/sox_compatibility_test.py
0 → 100644
View file @
9dcc7a15
import
warnings
import
torch
import
torchaudio.transforms
as
T
from
parameterized
import
parameterized
from
torchaudio_unittest.common_utils
import
(
skipIfNoSox
,
skipIfNoExec
,
TempDirMixin
,
TorchaudioTestCase
,
get_asset_path
,
sox_utils
,
load_wav
,
save_wav
,
get_whitenoise
,
)
@
skipIfNoSox
@
skipIfNoExec
(
'sox'
)
class
TestFunctionalFiltering
(
TempDirMixin
,
TorchaudioTestCase
):
def
run_sox_effect
(
self
,
input_file
,
effect
):
output_file
=
self
.
get_temp_path
(
'expected.wav'
)
sox_utils
.
run_sox_effect
(
input_file
,
output_file
,
[
str
(
e
)
for
e
in
effect
])
return
load_wav
(
output_file
)
def
assert_sox_effect
(
self
,
result
,
input_path
,
effects
,
atol
=
1e-04
,
rtol
=
1e-5
):
expected
,
_
=
self
.
run_sox_effect
(
input_path
,
effects
)
self
.
assertEqual
(
result
,
expected
,
atol
=
atol
,
rtol
=
rtol
)
def
get_whitenoise
(
self
,
sample_rate
=
8000
):
noise
=
get_whitenoise
(
sample_rate
=
sample_rate
,
duration
=
3
,
scale_factor
=
0.9
,
)
path
=
self
.
get_temp_path
(
"whitenoise.wav"
)
save_wav
(
path
,
noise
,
sample_rate
)
return
noise
,
path
@
parameterized
.
expand
([
(
'q'
,
'quarter_sine'
),
(
'h'
,
'half_sine'
),
(
't'
,
'linear'
),
])
def
test_fade
(
self
,
fade_shape_sox
,
fade_shape
):
fade_in_len
,
fade_out_len
=
44100
,
44100
data
,
path
=
self
.
get_whitenoise
(
sample_rate
=
44100
)
result
=
T
.
Fade
(
fade_in_len
,
fade_out_len
,
fade_shape
)(
data
)
self
.
assert_sox_effect
(
result
,
path
,
[
'fade'
,
fade_shape_sox
,
'1'
,
'0'
,
'1'
])
@
parameterized
.
expand
([
(
'amplitude'
,
1.1
),
(
'db'
,
2
),
(
'power'
,
2
),
])
def
test_vol
(
self
,
gain_type
,
gain
):
data
,
path
=
self
.
get_whitenoise
()
result
=
T
.
Vol
(
gain
,
gain_type
)(
data
)
self
.
assert_sox_effect
(
result
,
path
,
[
'vol'
,
f
'
{
gain
}
'
,
gain_type
])
@
parameterized
.
expand
([
'vad-go-stereo-44100.wav'
,
'vad-go-mono-32000.wav'
])
def
test_vad
(
self
,
filename
):
path
=
get_asset_path
(
filename
)
data
,
sample_rate
=
load_wav
(
path
)
result
=
T
.
Vad
(
sample_rate
)(
data
)
self
.
assert_sox_effect
(
result
,
path
,
[
'vad'
])
def
test_vad_warning
(
self
):
"""vad should throw a warning if input dimension is greater than 2"""
sample_rate
=
41100
data
=
torch
.
rand
(
5
,
5
,
sample_rate
)
with
warnings
.
catch_warnings
(
record
=
True
)
as
w
:
warnings
.
simplefilter
(
"always"
)
T
.
Vad
(
sample_rate
)(
data
)
assert
len
(
w
)
==
1
data
=
torch
.
rand
(
5
,
sample_rate
)
with
warnings
.
catch_warnings
(
record
=
True
)
as
w
:
warnings
.
simplefilter
(
"always"
)
T
.
Vad
(
sample_rate
)(
data
)
assert
len
(
w
)
==
0
data
=
torch
.
rand
(
sample_rate
)
with
warnings
.
catch_warnings
(
record
=
True
)
as
w
:
warnings
.
simplefilter
(
"always"
)
T
.
Vad
(
sample_rate
)(
data
)
assert
len
(
w
)
==
0
test/torchaudio_unittest/transforms/torchscript_consistency_cpu_test.py
0 → 100644
View file @
9dcc7a15
import
torch
from
torchaudio_unittest.common_utils
import
PytorchTestCase
from
.torchscript_consistency_impl
import
Transforms
,
TransformsFloat32Only
,
TransformsFloat64Only
class
TestTransformsFloat32
(
Transforms
,
TransformsFloat32Only
,
PytorchTestCase
):
dtype
=
torch
.
float32
device
=
torch
.
device
(
'cpu'
)
class
TestTransformsFloat64
(
Transforms
,
TransformsFloat64Only
,
PytorchTestCase
):
dtype
=
torch
.
float64
device
=
torch
.
device
(
'cpu'
)
test/torchaudio_unittest/transforms/torchscript_consistency_cuda_test.py
0 → 100644
View file @
9dcc7a15
import
torch
from
torchaudio_unittest.common_utils
import
skipIfNoCuda
,
PytorchTestCase
from
.torchscript_consistency_impl
import
Transforms
,
TransformsFloat32Only
,
TransformsFloat64Only
@
skipIfNoCuda
class
TestTransformsFloat32
(
Transforms
,
TransformsFloat32Only
,
PytorchTestCase
):
dtype
=
torch
.
float32
device
=
torch
.
device
(
'cuda'
)
@
skipIfNoCuda
class
TestTransformsFloat64
(
Transforms
,
TransformsFloat64Only
,
PytorchTestCase
):
dtype
=
torch
.
float64
device
=
torch
.
device
(
'cuda'
)
test/torchaudio_unittest/transforms/torchscript_consistency_impl.py
0 → 100644
View file @
9dcc7a15
"""Test suites for jit-ability and its numerical compatibility"""
import
torch
import
torchaudio.transforms
as
T
from
parameterized
import
parameterized
from
torchaudio_unittest
import
common_utils
from
torchaudio_unittest.common_utils
import
(
skipIfRocm
,
TestBaseMixin
,
torch_script
,
)
class
Transforms
(
TestBaseMixin
):
"""Implements test for Transforms that are performed for different devices"""
def
_assert_consistency
(
self
,
transform
,
tensor
,
*
args
):
tensor
=
tensor
.
to
(
device
=
self
.
device
,
dtype
=
self
.
dtype
)
transform
=
transform
.
to
(
device
=
self
.
device
,
dtype
=
self
.
dtype
)
ts_transform
=
torch_script
(
transform
)
output
=
transform
(
tensor
,
*
args
)
ts_output
=
ts_transform
(
tensor
,
*
args
)
self
.
assertEqual
(
ts_output
,
output
)
def
_assert_consistency_complex
(
self
,
transform
,
tensor
,
test_pseudo_complex
=
False
,
*
args
):
assert
tensor
.
is_complex
()
tensor
=
tensor
.
to
(
device
=
self
.
device
,
dtype
=
self
.
complex_dtype
)
transform
=
transform
.
to
(
device
=
self
.
device
,
dtype
=
self
.
dtype
)
ts_transform
=
torch_script
(
transform
)
if
test_pseudo_complex
:
tensor
=
torch
.
view_as_real
(
tensor
)
output
=
transform
(
tensor
,
*
args
)
ts_output
=
ts_transform
(
tensor
,
*
args
)
self
.
assertEqual
(
ts_output
,
output
)
def
test_Spectrogram
(
self
):
tensor
=
torch
.
rand
((
1
,
1000
))
self
.
_assert_consistency
(
T
.
Spectrogram
(),
tensor
)
def
test_Spectrogram_return_complex
(
self
):
tensor
=
torch
.
rand
((
1
,
1000
))
self
.
_assert_consistency
(
T
.
Spectrogram
(
power
=
None
,
return_complex
=
True
),
tensor
)
def
test_InverseSpectrogram
(
self
):
tensor
=
common_utils
.
get_whitenoise
(
sample_rate
=
8000
)
spectrogram
=
common_utils
.
get_spectrogram
(
tensor
,
n_fft
=
400
,
hop_length
=
100
)
self
.
_assert_consistency_complex
(
T
.
InverseSpectrogram
(
n_fft
=
400
,
hop_length
=
100
),
spectrogram
)
def
test_InverseSpectrogram_pseudocomplex
(
self
):
tensor
=
common_utils
.
get_whitenoise
(
sample_rate
=
8000
)
spectrogram
=
common_utils
.
get_spectrogram
(
tensor
,
n_fft
=
400
,
hop_length
=
100
)
spectrogram
=
torch
.
view_as_real
(
spectrogram
)
self
.
_assert_consistency
(
T
.
InverseSpectrogram
(
n_fft
=
400
,
hop_length
=
100
),
spectrogram
)
@
skipIfRocm
def
test_GriffinLim
(
self
):
tensor
=
torch
.
rand
((
1
,
201
,
6
))
self
.
_assert_consistency
(
T
.
GriffinLim
(
length
=
1000
,
rand_init
=
False
),
tensor
)
def
test_AmplitudeToDB
(
self
):
spec
=
torch
.
rand
((
6
,
201
))
self
.
_assert_consistency
(
T
.
AmplitudeToDB
(),
spec
)
def
test_MelScale
(
self
):
spec_f
=
torch
.
rand
((
1
,
201
,
6
))
self
.
_assert_consistency
(
T
.
MelScale
(
n_stft
=
201
),
spec_f
)
def
test_MelSpectrogram
(
self
):
tensor
=
torch
.
rand
((
1
,
1000
))
self
.
_assert_consistency
(
T
.
MelSpectrogram
(),
tensor
)
def
test_MFCC
(
self
):
tensor
=
torch
.
rand
((
1
,
1000
))
self
.
_assert_consistency
(
T
.
MFCC
(),
tensor
)
def
test_LFCC
(
self
):
tensor
=
torch
.
rand
((
1
,
1000
))
self
.
_assert_consistency
(
T
.
LFCC
(),
tensor
)
def
test_Resample
(
self
):
sr1
,
sr2
=
16000
,
8000
tensor
=
common_utils
.
get_whitenoise
(
sample_rate
=
sr1
)
self
.
_assert_consistency
(
T
.
Resample
(
sr1
,
sr2
),
tensor
)
def
test_ComplexNorm
(
self
):
tensor
=
torch
.
rand
((
1
,
2
,
201
,
2
))
self
.
_assert_consistency
(
T
.
ComplexNorm
(),
tensor
)
def
test_MuLawEncoding
(
self
):
tensor
=
common_utils
.
get_whitenoise
()
self
.
_assert_consistency
(
T
.
MuLawEncoding
(),
tensor
)
def
test_MuLawDecoding
(
self
):
tensor
=
torch
.
rand
((
1
,
10
))
self
.
_assert_consistency
(
T
.
MuLawDecoding
(),
tensor
)
def
test_Fade
(
self
):
waveform
=
common_utils
.
get_whitenoise
()
fade_in_len
=
3000
fade_out_len
=
3000
self
.
_assert_consistency
(
T
.
Fade
(
fade_in_len
,
fade_out_len
),
waveform
)
def
test_FrequencyMasking
(
self
):
tensor
=
torch
.
rand
((
10
,
2
,
50
,
10
,
2
))
self
.
_assert_consistency
(
T
.
FrequencyMasking
(
freq_mask_param
=
60
,
iid_masks
=
False
),
tensor
)
def
test_TimeMasking
(
self
):
tensor
=
torch
.
rand
((
10
,
2
,
50
,
10
,
2
))
self
.
_assert_consistency
(
T
.
TimeMasking
(
time_mask_param
=
30
,
iid_masks
=
False
),
tensor
)
def
test_Vol
(
self
):
waveform
=
common_utils
.
get_whitenoise
()
self
.
_assert_consistency
(
T
.
Vol
(
1.1
),
waveform
)
def
test_SlidingWindowCmn
(
self
):
tensor
=
torch
.
rand
((
1000
,
10
))
self
.
_assert_consistency
(
T
.
SlidingWindowCmn
(),
tensor
)
def
test_Vad
(
self
):
filepath
=
common_utils
.
get_asset_path
(
"vad-go-mono-32000.wav"
)
waveform
,
sample_rate
=
common_utils
.
load_wav
(
filepath
)
self
.
_assert_consistency
(
T
.
Vad
(
sample_rate
=
sample_rate
),
waveform
)
def
test_SpectralCentroid
(
self
):
sample_rate
=
44100
waveform
=
common_utils
.
get_whitenoise
(
sample_rate
=
sample_rate
)
self
.
_assert_consistency
(
T
.
SpectralCentroid
(
sample_rate
=
sample_rate
),
waveform
)
@
parameterized
.
expand
([(
True
,
),
(
False
,
)])
def
test_TimeStretch
(
self
,
test_pseudo_complex
):
n_freq
=
400
hop_length
=
512
fixed_rate
=
1.3
tensor
=
torch
.
view_as_complex
(
torch
.
rand
((
10
,
2
,
n_freq
,
10
,
2
)))
self
.
_assert_consistency_complex
(
T
.
TimeStretch
(
n_freq
=
n_freq
,
hop_length
=
hop_length
,
fixed_rate
=
fixed_rate
),
tensor
,
test_pseudo_complex
)
def
test_PitchShift
(
self
):
sample_rate
=
8000
n_steps
=
4
waveform
=
common_utils
.
get_whitenoise
(
sample_rate
=
sample_rate
)
self
.
_assert_consistency
(
T
.
PitchShift
(
sample_rate
=
sample_rate
,
n_steps
=
n_steps
),
waveform
)
def
test_PSD
(
self
):
tensor
=
common_utils
.
get_whitenoise
(
sample_rate
=
8000
,
n_channels
=
4
)
spectrogram
=
common_utils
.
get_spectrogram
(
tensor
,
n_fft
=
400
,
hop_length
=
100
)
spectrogram
=
spectrogram
.
to
(
self
.
device
)
self
.
_assert_consistency_complex
(
T
.
PSD
(),
spectrogram
)
def
test_PSD_with_mask
(
self
):
tensor
=
common_utils
.
get_whitenoise
(
sample_rate
=
8000
,
n_channels
=
4
)
spectrogram
=
common_utils
.
get_spectrogram
(
tensor
,
n_fft
=
400
,
hop_length
=
100
)
spectrogram
=
spectrogram
.
to
(
self
.
device
)
mask
=
torch
.
rand
(
spectrogram
.
shape
[
-
2
:],
device
=
self
.
device
)
self
.
_assert_consistency_complex
(
T
.
PSD
(),
spectrogram
,
False
,
mask
)
class
TransformsFloat32Only
(
TestBaseMixin
):
def
test_rnnt_loss
(
self
):
logits
=
torch
.
tensor
([[[[
0.1
,
0.6
,
0.1
,
0.1
,
0.1
],
[
0.1
,
0.1
,
0.6
,
0.1
,
0.1
],
[
0.1
,
0.1
,
0.2
,
0.8
,
0.1
]],
[[
0.1
,
0.6
,
0.1
,
0.1
,
0.1
],
[
0.1
,
0.1
,
0.2
,
0.1
,
0.1
],
[
0.7
,
0.1
,
0.2
,
0.1
,
0.1
]]]])
tensor
=
logits
.
to
(
device
=
self
.
device
,
dtype
=
torch
.
float32
)
targets
=
torch
.
tensor
([[
1
,
2
]],
device
=
tensor
.
device
,
dtype
=
torch
.
int32
)
logit_lengths
=
torch
.
tensor
([
2
],
device
=
tensor
.
device
,
dtype
=
torch
.
int32
)
target_lengths
=
torch
.
tensor
([
2
],
device
=
tensor
.
device
,
dtype
=
torch
.
int32
)
self
.
_assert_consistency
(
T
.
RNNTLoss
(),
logits
,
targets
,
logit_lengths
,
target_lengths
)
class
TransformsFloat64Only
(
TestBaseMixin
):
@
parameterized
.
expand
([
[
"ref_channel"
,
True
],
[
"stv_evd"
,
True
],
[
"stv_power"
,
True
],
[
"ref_channel"
,
False
],
[
"stv_evd"
,
False
],
[
"stv_power"
,
False
],
])
def
test_MVDR
(
self
,
solution
,
online
):
tensor
=
common_utils
.
get_whitenoise
(
sample_rate
=
8000
,
n_channels
=
4
)
spectrogram
=
common_utils
.
get_spectrogram
(
tensor
,
n_fft
=
400
,
hop_length
=
100
)
spectrogram
=
spectrogram
.
to
(
device
=
self
.
device
,
dtype
=
torch
.
cdouble
)
mask_s
=
torch
.
rand
(
spectrogram
.
shape
[
-
2
:],
device
=
self
.
device
)
mask_n
=
torch
.
rand
(
spectrogram
.
shape
[
-
2
:],
device
=
self
.
device
)
self
.
_assert_consistency_complex
(
T
.
MVDR
(
solution
=
solution
,
online
=
online
),
spectrogram
,
False
,
mask_s
,
mask_n
)
test/torchaudio_unittest/transforms/transforms_cpu_test.py
0 → 100644
View file @
9dcc7a15
import
torch
from
torchaudio_unittest.common_utils
import
PytorchTestCase
from
.
transforms_test_impl
import
TransformsTestBase
class
TransformsCPUFloat32Test
(
TransformsTestBase
,
PytorchTestCase
):
device
=
'cpu'
dtype
=
torch
.
float32
class
TransformsCPUFloat64Test
(
TransformsTestBase
,
PytorchTestCase
):
device
=
'cpu'
dtype
=
torch
.
float64
test/torchaudio_unittest/transforms/transforms_cuda_test.py
0 → 100644
View file @
9dcc7a15
import
torch
from
torchaudio_unittest.common_utils
import
(
PytorchTestCase
,
skipIfNoCuda
,
)
from
.
transforms_test_impl
import
TransformsTestBase
@
skipIfNoCuda
class
TransformsCUDAFloat32Test
(
TransformsTestBase
,
PytorchTestCase
):
device
=
'cuda'
dtype
=
torch
.
float32
@
skipIfNoCuda
class
TransformsCUDAFloat64Test
(
TransformsTestBase
,
PytorchTestCase
):
device
=
'cuda'
dtype
=
torch
.
float64
Prev
1
…
12
13
14
15
16
17
18
19
20
21
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment