Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
Torchaudio
Commits
ffeba11a
Commit
ffeba11a
authored
Sep 02, 2024
by
mayp777
Browse files
UPDATE
parent
29deb085
Changes
337
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
1057 additions
and
405 deletions
+1057
-405
test/torchaudio_unittest/sox_effect/sox_effect_test.py
test/torchaudio_unittest/sox_effect/sox_effect_test.py
+0
-143
test/torchaudio_unittest/transforms/autograd_cuda_test.py
test/torchaudio_unittest/transforms/autograd_cuda_test.py
+2
-1
test/torchaudio_unittest/transforms/autograd_test_impl.py
test/torchaudio_unittest/transforms/autograd_test_impl.py
+63
-5
test/torchaudio_unittest/transforms/batch_consistency_test.py
.../torchaudio_unittest/transforms/batch_consistency_test.py
+121
-4
test/torchaudio_unittest/transforms/librosa_compatibility_cuda_test.py
...io_unittest/transforms/librosa_compatibility_cuda_test.py
+2
-1
test/torchaudio_unittest/transforms/librosa_compatibility_test_impl.py
...io_unittest/transforms/librosa_compatibility_test_impl.py
+2
-2
test/torchaudio_unittest/transforms/torchscript_consistency_cuda_test.py
..._unittest/transforms/torchscript_consistency_cuda_test.py
+2
-1
test/torchaudio_unittest/transforms/torchscript_consistency_impl.py
...audio_unittest/transforms/torchscript_consistency_impl.py
+89
-3
test/torchaudio_unittest/transforms/transforms_cuda_test.py
test/torchaudio_unittest/transforms/transforms_cuda_test.py
+2
-1
test/torchaudio_unittest/transforms/transforms_test.py
test/torchaudio_unittest/transforms/transforms_test.py
+3
-5
test/torchaudio_unittest/transforms/transforms_test_impl.py
test/torchaudio_unittest/transforms/transforms_test_impl.py
+324
-4
test/torchaudio_unittest/utils/ffmpeg_utils_test.py
test/torchaudio_unittest/utils/ffmpeg_utils_test.py
+14
-0
third_party/LICENSES_BUNDLED.txt
third_party/LICENSES_BUNDLED.txt
+11
-0
third_party/ffmpeg/multi/CMakeLists.txt
third_party/ffmpeg/multi/CMakeLists.txt
+208
-0
third_party/ffmpeg/single/CMakeLists.txt
third_party/ffmpeg/single/CMakeLists.txt
+40
-0
third_party/sox/CMakeLists.txt
third_party/sox/CMakeLists.txt
+18
-209
third_party/sox/stub.c
third_party/sox/stub.c
+85
-0
tools/release_notes/classify_prs.py
tools/release_notes/classify_prs.py
+4
-2
tools/setup_helpers/extension.py
tools/setup_helpers/extension.py
+45
-22
torchaudio/__init__.py
torchaudio/__init__.py
+22
-2
No files found.
Too many changes to show.
To preserve performance only
337 of 337+
files are displayed.
Plain diff
Email patch
test/torchaudio_unittest/sox_effect/sox_effect_test.py
View file @
ffeba11a
import
io
import
itertools
import
itertools
import
tarfile
from
pathlib
import
Path
from
pathlib
import
Path
from
parameterized
import
parameterized
from
parameterized
import
parameterized
from
torchaudio
import
sox_effects
from
torchaudio
import
sox_effects
from
torchaudio._internal
import
module_utils
as
_mod_utils
from
torchaudio_unittest.common_utils
import
(
from
torchaudio_unittest.common_utils
import
(
get_sinusoid
,
get_sinusoid
,
get_wav_data
,
get_wav_data
,
HttpServerMixin
,
load_wav
,
load_wav
,
PytorchTestCase
,
PytorchTestCase
,
save_wav
,
save_wav
,
skipIfNoExec
,
skipIfNoModule
,
skipIfNoSox
,
skipIfNoSox
,
sox_utils
,
sox_utils
,
TempDirMixin
,
TempDirMixin
,
...
@@ -23,10 +17,6 @@ from torchaudio_unittest.common_utils import (
...
@@ -23,10 +17,6 @@ from torchaudio_unittest.common_utils import (
from
.common
import
load_params
,
name_func
from
.common
import
load_params
,
name_func
if
_mod_utils
.
is_module_available
(
"requests"
):
import
requests
@
skipIfNoSox
@
skipIfNoSox
class
TestSoxEffects
(
PytorchTestCase
):
class
TestSoxEffects
(
PytorchTestCase
):
def
test_init
(
self
):
def
test_init
(
self
):
...
@@ -241,136 +231,3 @@ class TestFileFormats(TempDirMixin, PytorchTestCase):
...
@@ -241,136 +231,3 @@ class TestFileFormats(TempDirMixin, PytorchTestCase):
assert
sr
==
expected_sr
assert
sr
==
expected_sr
self
.
assertEqual
(
found
,
expected
)
self
.
assertEqual
(
found
,
expected
)
@
skipIfNoExec
(
"sox"
)
@
skipIfNoSox
class
TestFileObject
(
TempDirMixin
,
PytorchTestCase
):
@
parameterized
.
expand
(
[
(
"wav"
,
None
),
(
"flac"
,
0
),
(
"flac"
,
5
),
(
"flac"
,
8
),
(
"vorbis"
,
-
1
),
(
"vorbis"
,
10
),
(
"amb"
,
None
),
]
)
def
test_fileobj
(
self
,
ext
,
compression
):
"""Applying effects via file object works"""
sample_rate
=
16000
channels_first
=
True
effects
=
[[
"band"
,
"300"
,
"10"
]]
input_path
=
self
.
get_temp_path
(
f
"input.
{
ext
}
"
)
reference_path
=
self
.
get_temp_path
(
"reference.wav"
)
sox_utils
.
gen_audio_file
(
input_path
,
sample_rate
,
num_channels
=
2
,
compression
=
compression
)
sox_utils
.
run_sox_effect
(
input_path
,
reference_path
,
effects
,
output_bitdepth
=
32
)
expected
,
expected_sr
=
load_wav
(
reference_path
)
with
open
(
input_path
,
"rb"
)
as
fileobj
:
found
,
sr
=
sox_effects
.
apply_effects_file
(
fileobj
,
effects
,
channels_first
=
channels_first
)
save_wav
(
self
.
get_temp_path
(
"result.wav"
),
found
,
sr
,
channels_first
=
channels_first
)
assert
sr
==
expected_sr
self
.
assertEqual
(
found
,
expected
)
@
parameterized
.
expand
(
[
(
"wav"
,
None
),
(
"flac"
,
0
),
(
"flac"
,
5
),
(
"flac"
,
8
),
(
"vorbis"
,
-
1
),
(
"vorbis"
,
10
),
(
"amb"
,
None
),
]
)
def
test_bytesio
(
self
,
ext
,
compression
):
"""Applying effects via BytesIO object works"""
sample_rate
=
16000
channels_first
=
True
effects
=
[[
"band"
,
"300"
,
"10"
]]
input_path
=
self
.
get_temp_path
(
f
"input.
{
ext
}
"
)
reference_path
=
self
.
get_temp_path
(
"reference.wav"
)
sox_utils
.
gen_audio_file
(
input_path
,
sample_rate
,
num_channels
=
2
,
compression
=
compression
)
sox_utils
.
run_sox_effect
(
input_path
,
reference_path
,
effects
,
output_bitdepth
=
32
)
expected
,
expected_sr
=
load_wav
(
reference_path
)
with
open
(
input_path
,
"rb"
)
as
file_
:
fileobj
=
io
.
BytesIO
(
file_
.
read
())
found
,
sr
=
sox_effects
.
apply_effects_file
(
fileobj
,
effects
,
channels_first
=
channels_first
)
save_wav
(
self
.
get_temp_path
(
"result.wav"
),
found
,
sr
,
channels_first
=
channels_first
)
assert
sr
==
expected_sr
self
.
assertEqual
(
found
,
expected
)
@
parameterized
.
expand
(
[
(
"wav"
,
None
),
(
"flac"
,
0
),
(
"flac"
,
5
),
(
"flac"
,
8
),
(
"vorbis"
,
-
1
),
(
"vorbis"
,
10
),
(
"amb"
,
None
),
]
)
def
test_tarfile
(
self
,
ext
,
compression
):
"""Applying effects to compressed audio via file-like file works"""
sample_rate
=
16000
channels_first
=
True
effects
=
[[
"band"
,
"300"
,
"10"
]]
audio_file
=
f
"input.
{
ext
}
"
input_path
=
self
.
get_temp_path
(
audio_file
)
reference_path
=
self
.
get_temp_path
(
"reference.wav"
)
archive_path
=
self
.
get_temp_path
(
"archive.tar.gz"
)
sox_utils
.
gen_audio_file
(
input_path
,
sample_rate
,
num_channels
=
2
,
compression
=
compression
)
sox_utils
.
run_sox_effect
(
input_path
,
reference_path
,
effects
,
output_bitdepth
=
32
)
expected
,
expected_sr
=
load_wav
(
reference_path
)
with
tarfile
.
TarFile
(
archive_path
,
"w"
)
as
tarobj
:
tarobj
.
add
(
input_path
,
arcname
=
audio_file
)
with
tarfile
.
TarFile
(
archive_path
,
"r"
)
as
tarobj
:
fileobj
=
tarobj
.
extractfile
(
audio_file
)
found
,
sr
=
sox_effects
.
apply_effects_file
(
fileobj
,
effects
,
channels_first
=
channels_first
)
save_wav
(
self
.
get_temp_path
(
"result.wav"
),
found
,
sr
,
channels_first
=
channels_first
)
assert
sr
==
expected_sr
self
.
assertEqual
(
found
,
expected
)
@
skipIfNoSox
@
skipIfNoExec
(
"sox"
)
@
skipIfNoModule
(
"requests"
)
class
TestFileObjectHttp
(
HttpServerMixin
,
PytorchTestCase
):
@
parameterized
.
expand
(
[
(
"wav"
,
None
),
(
"flac"
,
0
),
(
"flac"
,
5
),
(
"flac"
,
8
),
(
"vorbis"
,
-
1
),
(
"vorbis"
,
10
),
(
"amb"
,
None
),
]
)
def
test_requests
(
self
,
ext
,
compression
):
sample_rate
=
16000
channels_first
=
True
effects
=
[[
"band"
,
"300"
,
"10"
]]
audio_file
=
f
"input.
{
ext
}
"
input_path
=
self
.
get_temp_path
(
audio_file
)
reference_path
=
self
.
get_temp_path
(
"reference.wav"
)
sox_utils
.
gen_audio_file
(
input_path
,
sample_rate
,
num_channels
=
2
,
compression
=
compression
)
sox_utils
.
run_sox_effect
(
input_path
,
reference_path
,
effects
,
output_bitdepth
=
32
)
expected
,
expected_sr
=
load_wav
(
reference_path
)
url
=
self
.
get_url
(
audio_file
)
with
requests
.
get
(
url
,
stream
=
True
)
as
resp
:
found
,
sr
=
sox_effects
.
apply_effects_file
(
resp
.
raw
,
effects
,
channels_first
=
channels_first
)
save_wav
(
self
.
get_temp_path
(
"result.wav"
),
found
,
sr
,
channels_first
=
channels_first
)
assert
sr
==
expected_sr
self
.
assertEqual
(
found
,
expected
)
test/torchaudio_unittest/transforms/autograd_cuda_test.py
View file @
ffeba11a
from
torchaudio_unittest.common_utils
import
PytorchTestCase
,
skipIfNoCuda
from
torchaudio_unittest.common_utils
import
PytorchTestCase
,
skipIfNoCuda
,
skipIfkmeMark
from
.autograd_test_impl
import
AutogradTestFloat32
,
AutogradTestMixin
from
.autograd_test_impl
import
AutogradTestFloat32
,
AutogradTestMixin
@
skipIfNoCuda
@
skipIfNoCuda
@
skipIfkmeMark
class
AutogradCUDATest
(
AutogradTestMixin
,
PytorchTestCase
):
class
AutogradCUDATest
(
AutogradTestMixin
,
PytorchTestCase
):
device
=
"cuda"
device
=
"cuda"
...
...
test/torchaudio_unittest/transforms/autograd_test_impl.py
View file @
ffeba11a
...
@@ -28,6 +28,7 @@ class AutogradTestMixin(TestBaseMixin):
...
@@ -28,6 +28,7 @@ class AutogradTestMixin(TestBaseMixin):
inputs
:
List
[
torch
.
Tensor
],
inputs
:
List
[
torch
.
Tensor
],
*
,
*
,
nondet_tol
:
float
=
0.0
,
nondet_tol
:
float
=
0.0
,
enable_all_grad
:
bool
=
True
,
):
):
transform
=
transform
.
to
(
dtype
=
torch
.
float64
,
device
=
self
.
device
)
transform
=
transform
.
to
(
dtype
=
torch
.
float64
,
device
=
self
.
device
)
...
@@ -37,7 +38,8 @@ class AutogradTestMixin(TestBaseMixin):
...
@@ -37,7 +38,8 @@ class AutogradTestMixin(TestBaseMixin):
for
i
in
inputs
:
for
i
in
inputs
:
if
torch
.
is_tensor
(
i
):
if
torch
.
is_tensor
(
i
):
i
=
i
.
to
(
dtype
=
torch
.
cdouble
if
i
.
is_complex
()
else
torch
.
double
,
device
=
self
.
device
)
i
=
i
.
to
(
dtype
=
torch
.
cdouble
if
i
.
is_complex
()
else
torch
.
double
,
device
=
self
.
device
)
i
.
requires_grad
=
True
if
enable_all_grad
:
i
.
requires_grad
=
True
inputs_
.
append
(
i
)
inputs_
.
append
(
i
)
assert
gradcheck
(
transform
,
inputs_
)
assert
gradcheck
(
transform
,
inputs_
)
assert
gradgradcheck
(
transform
,
inputs_
,
nondet_tol
=
nondet_tol
)
assert
gradgradcheck
(
transform
,
inputs_
,
nondet_tol
=
nondet_tol
)
...
@@ -110,14 +112,14 @@ class AutogradTestMixin(TestBaseMixin):
...
@@ -110,14 +112,14 @@ class AutogradTestMixin(TestBaseMixin):
sample_rate
=
8000
sample_rate
=
8000
transform
=
T
.
MFCC
(
sample_rate
=
sample_rate
,
log_mels
=
log_mels
)
transform
=
T
.
MFCC
(
sample_rate
=
sample_rate
,
log_mels
=
log_mels
)
waveform
=
get_whitenoise
(
sample_rate
=
sample_rate
,
duration
=
0.05
,
n_channels
=
2
)
waveform
=
get_whitenoise
(
sample_rate
=
sample_rate
,
duration
=
0.05
,
n_channels
=
2
)
self
.
assert_grad
(
transform
,
[
waveform
])
self
.
assert_grad
(
transform
,
[
waveform
]
,
nondet_tol
=
1e-10
)
@
parameterized
.
expand
([(
False
,),
(
True
,)])
@
parameterized
.
expand
([(
False
,),
(
True
,)])
def
test_lfcc
(
self
,
log_lf
):
def
test_lfcc
(
self
,
log_lf
):
sample_rate
=
8000
sample_rate
=
8000
transform
=
T
.
LFCC
(
sample_rate
=
sample_rate
,
log_lf
=
log_lf
)
transform
=
T
.
LFCC
(
sample_rate
=
sample_rate
,
log_lf
=
log_lf
)
waveform
=
get_whitenoise
(
sample_rate
=
sample_rate
,
duration
=
0.05
,
n_channels
=
2
)
waveform
=
get_whitenoise
(
sample_rate
=
sample_rate
,
duration
=
0.05
,
n_channels
=
2
)
self
.
assert_grad
(
transform
,
[
waveform
])
self
.
assert_grad
(
transform
,
[
waveform
]
,
nondet_tol
=
1e-10
)
def
test_compute_deltas
(
self
):
def
test_compute_deltas
(
self
):
transform
=
T
.
ComputeDeltas
()
transform
=
T
.
ComputeDeltas
()
...
@@ -187,8 +189,9 @@ class AutogradTestMixin(TestBaseMixin):
...
@@ -187,8 +189,9 @@ class AutogradTestMixin(TestBaseMixin):
def
test_melscale
(
self
):
def
test_melscale
(
self
):
sample_rate
=
8000
sample_rate
=
8000
n_fft
=
400
n_fft
=
400
n_mels
=
n_fft
//
2
+
1
n_stft
=
n_fft
//
2
+
1
transform
=
T
.
MelScale
(
sample_rate
=
sample_rate
,
n_mels
=
n_mels
)
n_mels
=
128
transform
=
T
.
MelScale
(
sample_rate
=
sample_rate
,
n_mels
=
n_mels
,
n_stft
=
n_stft
)
spec
=
get_spectrogram
(
spec
=
get_spectrogram
(
get_whitenoise
(
sample_rate
=
sample_rate
,
duration
=
0.05
,
n_channels
=
2
),
n_fft
=
n_fft
,
power
=
1
get_whitenoise
(
sample_rate
=
sample_rate
,
duration
=
0.05
,
n_channels
=
2
),
n_fft
=
n_fft
,
power
=
1
)
)
...
@@ -317,6 +320,61 @@ class AutogradTestMixin(TestBaseMixin):
...
@@ -317,6 +320,61 @@ class AutogradTestMixin(TestBaseMixin):
reference_channel
=
0
reference_channel
=
0
self
.
assert_grad
(
transform
,
[
specgram
,
psd_s
,
psd_n
,
reference_channel
])
self
.
assert_grad
(
transform
,
[
specgram
,
psd_s
,
psd_n
,
reference_channel
])
@
nested_params
(
[
"Convolve"
,
"FFTConvolve"
],
[
"full"
,
"valid"
,
"same"
],
)
def
test_convolve
(
self
,
cls
,
mode
):
leading_dims
=
(
4
,
3
,
2
)
L_x
,
L_y
=
23
,
40
x
=
torch
.
rand
(
*
leading_dims
,
L_x
)
y
=
torch
.
rand
(
*
leading_dims
,
L_y
)
convolve
=
getattr
(
T
,
cls
)(
mode
=
mode
)
self
.
assert_grad
(
convolve
,
[
x
,
y
])
def
test_speed
(
self
):
leading_dims
=
(
3
,
2
)
time
=
200
waveform
=
torch
.
rand
(
*
leading_dims
,
time
,
requires_grad
=
True
)
lengths
=
torch
.
randint
(
1
,
time
,
leading_dims
)
speed
=
T
.
Speed
(
1000
,
1.1
)
self
.
assert_grad
(
speed
,
(
waveform
,
lengths
),
enable_all_grad
=
False
)
def
test_speed_perturbation
(
self
):
leading_dims
=
(
3
,
2
)
time
=
200
waveform
=
torch
.
rand
(
*
leading_dims
,
time
,
requires_grad
=
True
)
lengths
=
torch
.
randint
(
1
,
time
,
leading_dims
)
speed
=
T
.
SpeedPerturbation
(
1000
,
[
0.9
])
self
.
assert_grad
(
speed
,
(
waveform
,
lengths
),
enable_all_grad
=
False
)
@
nested_params
([
True
,
False
])
def
test_add_noise
(
self
,
use_lengths
):
leading_dims
=
(
2
,
3
)
L
=
31
waveform
=
torch
.
rand
(
*
leading_dims
,
L
)
noise
=
torch
.
rand
(
*
leading_dims
,
L
)
if
use_lengths
:
lengths
=
torch
.
rand
(
*
leading_dims
)
else
:
lengths
=
None
snr
=
torch
.
rand
(
*
leading_dims
)
add_noise
=
T
.
AddNoise
()
self
.
assert_grad
(
add_noise
,
(
waveform
,
noise
,
snr
,
lengths
))
def
test_preemphasis
(
self
):
waveform
=
torch
.
rand
(
3
,
4
,
10
)
preemphasis
=
T
.
Preemphasis
(
coeff
=
0.97
)
self
.
assert_grad
(
preemphasis
,
(
waveform
,))
def
test_deemphasis
(
self
):
waveform
=
torch
.
rand
(
3
,
4
,
10
)
deemphasis
=
T
.
Deemphasis
(
coeff
=
0.97
)
self
.
assert_grad
(
deemphasis
,
(
waveform
,))
class
AutogradTestFloat32
(
TestBaseMixin
):
class
AutogradTestFloat32
(
TestBaseMixin
):
def
assert_grad
(
def
assert_grad
(
...
...
test/torchaudio_unittest/transforms/batch_consistency_test.py
View file @
ffeba11a
...
@@ -52,11 +52,9 @@ class TestTransforms(common_utils.TorchaudioTestCase):
...
@@ -52,11 +52,9 @@ class TestTransforms(common_utils.TorchaudioTestCase):
n_mels
=
32
n_mels
=
32
n_stft
=
5
n_stft
=
5
mel_spec
=
torch
.
randn
(
3
,
2
,
n_mels
,
32
)
**
2
mel_spec
=
torch
.
randn
(
3
,
2
,
n_mels
,
32
)
**
2
transform
=
T
.
InverseMelScale
(
n_stft
,
n_mels
)
transform
=
T
.
InverseMelScale
(
n_stft
,
n_mels
,
driver
=
"gelsd"
)
# Because InverseMelScale runs SGD on randomly initialized values so they do not yield
self
.
assert_batch_consistency
(
transform
,
mel_spec
)
# exactly same result. For this reason, tolerance is very relaxed here.
self
.
assert_batch_consistency
(
transform
,
mel_spec
,
atol
=
1.0
,
rtol
=
1e-5
)
def
test_batch_compute_deltas
(
self
):
def
test_batch_compute_deltas
(
self
):
specgram
=
torch
.
randn
(
3
,
2
,
31
,
2786
)
specgram
=
torch
.
randn
(
3
,
2
,
31
,
2786
)
...
@@ -257,3 +255,122 @@ class TestTransforms(common_utils.TorchaudioTestCase):
...
@@ -257,3 +255,122 @@ class TestTransforms(common_utils.TorchaudioTestCase):
computed
=
transform
(
specgram
,
psd_s
,
psd_n
,
reference_channel
)
computed
=
transform
(
specgram
,
psd_s
,
psd_n
,
reference_channel
)
self
.
assertEqual
(
computed
,
expected
)
self
.
assertEqual
(
computed
,
expected
)
@
common_utils
.
nested_params
(
[
"Convolve"
,
"FFTConvolve"
],
[
"full"
,
"valid"
,
"same"
],
)
def
test_convolve
(
self
,
cls
,
mode
):
leading_dims
=
(
2
,
3
)
L_x
,
L_y
=
89
,
43
x
=
torch
.
rand
(
*
leading_dims
,
L_x
,
dtype
=
self
.
dtype
,
device
=
self
.
device
)
y
=
torch
.
rand
(
*
leading_dims
,
L_y
,
dtype
=
self
.
dtype
,
device
=
self
.
device
)
convolve
=
getattr
(
T
,
cls
)(
mode
=
mode
)
actual
=
convolve
(
x
,
y
)
expected
=
torch
.
stack
(
[
torch
.
stack
(
[
convolve
(
x
[
i
,
j
].
unsqueeze
(
0
),
y
[
i
,
j
].
unsqueeze
(
0
)).
squeeze
(
0
)
for
j
in
range
(
leading_dims
[
1
])]
)
for
i
in
range
(
leading_dims
[
0
])
]
)
self
.
assertEqual
(
expected
,
actual
)
def
test_speed
(
self
):
B
=
5
orig_freq
=
100
factor
=
0.8
input_lengths
=
torch
.
randint
(
1
,
1000
,
(
B
,),
dtype
=
torch
.
int32
)
speed
=
T
.
Speed
(
orig_freq
,
factor
)
unbatched_input
=
[
torch
.
ones
((
int
(
length
),))
*
1.0
for
length
in
input_lengths
]
batched_input
=
torch
.
nn
.
utils
.
rnn
.
pad_sequence
(
unbatched_input
,
batch_first
=
True
)
output
,
output_lengths
=
speed
(
batched_input
,
input_lengths
)
unbatched_output
=
[]
unbatched_output_lengths
=
[]
for
idx
in
range
(
len
(
unbatched_input
)):
w
,
l
=
speed
(
unbatched_input
[
idx
],
input_lengths
[
idx
])
unbatched_output
.
append
(
w
)
unbatched_output_lengths
.
append
(
l
)
self
.
assertEqual
(
output_lengths
,
torch
.
stack
(
unbatched_output_lengths
))
for
idx
in
range
(
len
(
unbatched_output
)):
w
,
l
=
output
[
idx
],
output_lengths
[
idx
]
self
.
assertEqual
(
unbatched_output
[
idx
],
w
[:
l
])
def
test_speed_perturbation
(
self
):
B
=
5
orig_freq
=
100
factor
=
0.8
input_lengths
=
torch
.
randint
(
1
,
1000
,
(
B
,),
dtype
=
torch
.
int32
)
speed
=
T
.
SpeedPerturbation
(
orig_freq
,
[
factor
])
unbatched_input
=
[
torch
.
ones
((
int
(
length
),))
*
1.0
for
length
in
input_lengths
]
batched_input
=
torch
.
nn
.
utils
.
rnn
.
pad_sequence
(
unbatched_input
,
batch_first
=
True
)
output
,
output_lengths
=
speed
(
batched_input
,
input_lengths
)
unbatched_output
=
[]
unbatched_output_lengths
=
[]
for
idx
in
range
(
len
(
unbatched_input
)):
w
,
l
=
speed
(
unbatched_input
[
idx
],
input_lengths
[
idx
])
unbatched_output
.
append
(
w
)
unbatched_output_lengths
.
append
(
l
)
self
.
assertEqual
(
output_lengths
,
torch
.
stack
(
unbatched_output_lengths
))
for
idx
in
range
(
len
(
unbatched_output
)):
w
,
l
=
output
[
idx
],
output_lengths
[
idx
]
self
.
assertEqual
(
unbatched_output
[
idx
],
w
[:
l
])
def
test_add_noise
(
self
):
leading_dims
=
(
5
,
2
,
3
)
L
=
51
waveform
=
torch
.
rand
(
*
leading_dims
,
L
,
dtype
=
self
.
dtype
,
device
=
self
.
device
)
noise
=
torch
.
rand
(
*
leading_dims
,
L
,
dtype
=
self
.
dtype
,
device
=
self
.
device
)
lengths
=
torch
.
rand
(
*
leading_dims
,
dtype
=
self
.
dtype
,
device
=
self
.
device
)
snr
=
torch
.
rand
(
*
leading_dims
,
dtype
=
self
.
dtype
,
device
=
self
.
device
)
*
10
add_noise
=
T
.
AddNoise
()
actual
=
add_noise
(
waveform
,
noise
,
snr
,
lengths
)
expected
=
[]
for
i
in
range
(
leading_dims
[
0
]):
for
j
in
range
(
leading_dims
[
1
]):
for
k
in
range
(
leading_dims
[
2
]):
expected
.
append
(
add_noise
(
waveform
[
i
][
j
][
k
],
noise
[
i
][
j
][
k
],
snr
[
i
][
j
][
k
],
lengths
[
i
][
j
][
k
]))
self
.
assertEqual
(
torch
.
stack
(
expected
),
actual
.
reshape
(
-
1
,
L
))
def
test_preemphasis
(
self
):
waveform
=
torch
.
rand
((
3
,
5
,
2
,
100
),
dtype
=
self
.
dtype
,
device
=
self
.
device
)
preemphasis
=
T
.
Preemphasis
(
coeff
=
0.97
)
actual
=
preemphasis
(
waveform
)
expected
=
[]
for
i
in
range
(
waveform
.
size
(
0
)):
for
j
in
range
(
waveform
.
size
(
1
)):
for
k
in
range
(
waveform
.
size
(
2
)):
expected
.
append
(
preemphasis
(
waveform
[
i
][
j
][
k
]))
self
.
assertEqual
(
torch
.
stack
(
expected
),
actual
.
reshape
(
-
1
,
waveform
.
size
(
-
1
)))
def
test_deemphasis
(
self
):
waveform
=
torch
.
rand
((
3
,
5
,
2
,
100
),
dtype
=
self
.
dtype
,
device
=
self
.
device
)
deemphasis
=
T
.
Deemphasis
(
coeff
=
0.97
)
actual
=
deemphasis
(
waveform
)
expected
=
[]
for
i
in
range
(
waveform
.
size
(
0
)):
for
j
in
range
(
waveform
.
size
(
1
)):
for
k
in
range
(
waveform
.
size
(
2
)):
expected
.
append
(
deemphasis
(
waveform
[
i
][
j
][
k
]))
self
.
assertEqual
(
torch
.
stack
(
expected
),
actual
.
reshape
(
-
1
,
waveform
.
size
(
-
1
)))
test/torchaudio_unittest/transforms/librosa_compatibility_cuda_test.py
View file @
ffeba11a
import
torch
import
torch
from
torchaudio_unittest.common_utils
import
PytorchTestCase
,
skipIfNoCuda
from
torchaudio_unittest.common_utils
import
PytorchTestCase
,
skipIfNoCuda
,
skipIfkmeMark
from
.librosa_compatibility_test_impl
import
TransformsTestBase
from
.librosa_compatibility_test_impl
import
TransformsTestBase
@
skipIfNoCuda
@
skipIfNoCuda
@
skipIfkmeMark
class
TestTransforms
(
TransformsTestBase
,
PytorchTestCase
):
class
TestTransforms
(
TransformsTestBase
,
PytorchTestCase
):
dtype
=
torch
.
float64
dtype
=
torch
.
float64
device
=
torch
.
device
(
"cuda"
)
device
=
torch
.
device
(
"cuda"
)
test/torchaudio_unittest/transforms/librosa_compatibility_test_impl.py
View file @
ffeba11a
...
@@ -36,7 +36,7 @@ class TransformsTestBase(TestBaseMixin):
...
@@ -36,7 +36,7 @@ class TransformsTestBase(TestBaseMixin):
result
=
T
.
Spectrogram
(
n_fft
=
n_fft
,
hop_length
=
hop_length
,
power
=
power
,).
to
(
self
.
device
,
self
.
dtype
)(
result
=
T
.
Spectrogram
(
n_fft
=
n_fft
,
hop_length
=
hop_length
,
power
=
power
,).
to
(
self
.
device
,
self
.
dtype
)(
waveform
waveform
)[
0
]
)[
0
]
self
.
assertEqual
(
result
,
torch
.
from_numpy
(
expected
),
atol
=
1e-
5
,
rtol
=
1e-
5
)
self
.
assertEqual
(
result
,
torch
.
from_numpy
(
expected
),
atol
=
1e-
4
,
rtol
=
1e-
4
)
def
test_Spectrogram_complex
(
self
):
def
test_Spectrogram_complex
(
self
):
n_fft
=
400
n_fft
=
400
...
@@ -54,7 +54,7 @@ class TransformsTestBase(TestBaseMixin):
...
@@ -54,7 +54,7 @@ class TransformsTestBase(TestBaseMixin):
result
=
T
.
Spectrogram
(
n_fft
=
n_fft
,
hop_length
=
hop_length
,
power
=
None
,
return_complex
=
True
,).
to
(
result
=
T
.
Spectrogram
(
n_fft
=
n_fft
,
hop_length
=
hop_length
,
power
=
None
,
return_complex
=
True
,).
to
(
self
.
device
,
self
.
dtype
self
.
device
,
self
.
dtype
)(
waveform
)[
0
]
)(
waveform
)[
0
]
self
.
assertEqual
(
result
.
abs
(),
torch
.
from_numpy
(
expected
),
atol
=
1e-
5
,
rtol
=
1e-
5
)
self
.
assertEqual
(
result
.
abs
(),
torch
.
from_numpy
(
expected
),
atol
=
1e-
4
,
rtol
=
1e-
4
)
@
nested_params
(
@
nested_params
(
[
[
...
...
test/torchaudio_unittest/transforms/torchscript_consistency_cuda_test.py
View file @
ffeba11a
import
torch
import
torch
from
torchaudio_unittest.common_utils
import
PytorchTestCase
,
skipIfNoCuda
from
torchaudio_unittest.common_utils
import
PytorchTestCase
,
skipIfNoCuda
,
skipIfkmeMark
from
.torchscript_consistency_impl
import
Transforms
,
TransformsFloat32Only
from
.torchscript_consistency_impl
import
Transforms
,
TransformsFloat32Only
...
@@ -11,6 +11,7 @@ class TestTransformsFloat32(Transforms, TransformsFloat32Only, PytorchTestCase):
...
@@ -11,6 +11,7 @@ class TestTransformsFloat32(Transforms, TransformsFloat32Only, PytorchTestCase):
@
skipIfNoCuda
@
skipIfNoCuda
@
skipIfkmeMark
class
TestTransformsFloat64
(
Transforms
,
PytorchTestCase
):
class
TestTransformsFloat64
(
Transforms
,
PytorchTestCase
):
dtype
=
torch
.
float64
dtype
=
torch
.
float64
device
=
torch
.
device
(
"cuda"
)
device
=
torch
.
device
(
"cuda"
)
test/torchaudio_unittest/transforms/torchscript_consistency_impl.py
View file @
ffeba11a
...
@@ -4,7 +4,7 @@ import torch
...
@@ -4,7 +4,7 @@ import torch
import
torchaudio.transforms
as
T
import
torchaudio.transforms
as
T
from
parameterized
import
parameterized
from
parameterized
import
parameterized
from
torchaudio_unittest
import
common_utils
from
torchaudio_unittest
import
common_utils
from
torchaudio_unittest.common_utils
import
skipIfRocm
,
TestBaseMixin
,
torch_script
from
torchaudio_unittest.common_utils
import
skipIfRocm
,
TestBaseMixin
,
torch_script
,
skipIfkmeMark
class
Transforms
(
TestBaseMixin
):
class
Transforms
(
TestBaseMixin
):
...
@@ -38,7 +38,8 @@ class Transforms(TestBaseMixin):
...
@@ -38,7 +38,8 @@ class Transforms(TestBaseMixin):
def
test_Spectrogram_return_complex
(
self
):
def
test_Spectrogram_return_complex
(
self
):
tensor
=
torch
.
rand
((
1
,
1000
))
tensor
=
torch
.
rand
((
1
,
1000
))
self
.
_assert_consistency
(
T
.
Spectrogram
(
power
=
None
,
return_complex
=
True
),
tensor
)
self
.
_assert_consistency
(
T
.
Spectrogram
(
power
=
None
,
return_complex
=
True
),
tensor
)
@
skipIfkmeMark
def
test_InverseSpectrogram
(
self
):
def
test_InverseSpectrogram
(
self
):
tensor
=
common_utils
.
get_whitenoise
(
sample_rate
=
8000
)
tensor
=
common_utils
.
get_whitenoise
(
sample_rate
=
8000
)
spectrogram
=
common_utils
.
get_spectrogram
(
tensor
,
n_fft
=
400
,
hop_length
=
100
)
spectrogram
=
common_utils
.
get_spectrogram
(
tensor
,
n_fft
=
400
,
hop_length
=
100
)
...
@@ -118,6 +119,7 @@ class Transforms(TestBaseMixin):
...
@@ -118,6 +119,7 @@ class Transforms(TestBaseMixin):
waveform
=
common_utils
.
get_whitenoise
(
sample_rate
=
sample_rate
)
waveform
=
common_utils
.
get_whitenoise
(
sample_rate
=
sample_rate
)
self
.
_assert_consistency
(
T
.
SpectralCentroid
(
sample_rate
=
sample_rate
),
waveform
)
self
.
_assert_consistency
(
T
.
SpectralCentroid
(
sample_rate
=
sample_rate
),
waveform
)
@
skipIfkmeMark
def
test_TimeStretch
(
self
):
def
test_TimeStretch
(
self
):
n_fft
=
1025
n_fft
=
1025
n_freq
=
n_fft
//
2
+
1
n_freq
=
n_fft
//
2
+
1
...
@@ -144,12 +146,14 @@ class Transforms(TestBaseMixin):
...
@@ -144,12 +146,14 @@ class Transforms(TestBaseMixin):
pitch_shift
(
waveform
)
pitch_shift
(
waveform
)
self
.
_assert_consistency
(
pitch_shift
,
waveform
)
self
.
_assert_consistency
(
pitch_shift
,
waveform
)
@
skipIfkmeMark
def
test_PSD
(
self
):
def
test_PSD
(
self
):
tensor
=
common_utils
.
get_whitenoise
(
sample_rate
=
8000
,
n_channels
=
4
)
tensor
=
common_utils
.
get_whitenoise
(
sample_rate
=
8000
,
n_channels
=
4
)
spectrogram
=
common_utils
.
get_spectrogram
(
tensor
,
n_fft
=
400
,
hop_length
=
100
)
spectrogram
=
common_utils
.
get_spectrogram
(
tensor
,
n_fft
=
400
,
hop_length
=
100
)
spectrogram
=
spectrogram
.
to
(
self
.
device
)
spectrogram
=
spectrogram
.
to
(
self
.
device
)
self
.
_assert_consistency_complex
(
T
.
PSD
(),
spectrogram
)
self
.
_assert_consistency_complex
(
T
.
PSD
(),
spectrogram
)
@
skipIfkmeMark
def
test_PSD_with_mask
(
self
):
def
test_PSD_with_mask
(
self
):
tensor
=
common_utils
.
get_whitenoise
(
sample_rate
=
8000
,
n_channels
=
4
)
tensor
=
common_utils
.
get_whitenoise
(
sample_rate
=
8000
,
n_channels
=
4
)
spectrogram
=
common_utils
.
get_spectrogram
(
tensor
,
n_fft
=
400
,
hop_length
=
100
)
spectrogram
=
common_utils
.
get_spectrogram
(
tensor
,
n_fft
=
400
,
hop_length
=
100
)
...
@@ -167,6 +171,7 @@ class Transforms(TestBaseMixin):
...
@@ -167,6 +171,7 @@ class Transforms(TestBaseMixin):
[
"stv_power"
,
False
],
[
"stv_power"
,
False
],
]
]
)
)
@
skipIfkmeMark
def
test_MVDR
(
self
,
solution
,
online
):
def
test_MVDR
(
self
,
solution
,
online
):
tensor
=
common_utils
.
get_whitenoise
(
sample_rate
=
8000
,
n_channels
=
4
)
tensor
=
common_utils
.
get_whitenoise
(
sample_rate
=
8000
,
n_channels
=
4
)
spectrogram
=
common_utils
.
get_spectrogram
(
tensor
,
n_fft
=
400
,
hop_length
=
100
)
spectrogram
=
common_utils
.
get_spectrogram
(
tensor
,
n_fft
=
400
,
hop_length
=
100
)
...
@@ -174,6 +179,7 @@ class Transforms(TestBaseMixin):
...
@@ -174,6 +179,7 @@ class Transforms(TestBaseMixin):
mask_n
=
torch
.
rand
(
spectrogram
.
shape
[
-
2
:],
device
=
self
.
device
)
mask_n
=
torch
.
rand
(
spectrogram
.
shape
[
-
2
:],
device
=
self
.
device
)
self
.
_assert_consistency_complex
(
T
.
MVDR
(
solution
=
solution
,
online
=
online
),
spectrogram
,
mask_s
,
mask_n
)
self
.
_assert_consistency_complex
(
T
.
MVDR
(
solution
=
solution
,
online
=
online
),
spectrogram
,
mask_s
,
mask_n
)
@
skipIfkmeMark
def
test_rtf_mvdr
(
self
):
def
test_rtf_mvdr
(
self
):
tensor
=
common_utils
.
get_whitenoise
(
sample_rate
=
8000
,
n_channels
=
4
)
tensor
=
common_utils
.
get_whitenoise
(
sample_rate
=
8000
,
n_channels
=
4
)
specgram
=
common_utils
.
get_spectrogram
(
tensor
,
n_fft
=
400
,
hop_length
=
100
)
specgram
=
common_utils
.
get_spectrogram
(
tensor
,
n_fft
=
400
,
hop_length
=
100
)
...
@@ -182,7 +188,8 @@ class Transforms(TestBaseMixin):
...
@@ -182,7 +188,8 @@ class Transforms(TestBaseMixin):
psd_n
=
torch
.
rand
(
freq
,
channel
,
channel
,
dtype
=
self
.
complex_dtype
,
device
=
self
.
device
)
psd_n
=
torch
.
rand
(
freq
,
channel
,
channel
,
dtype
=
self
.
complex_dtype
,
device
=
self
.
device
)
reference_channel
=
0
reference_channel
=
0
self
.
_assert_consistency_complex
(
T
.
RTFMVDR
(),
specgram
,
rtf
,
psd_n
,
reference_channel
)
self
.
_assert_consistency_complex
(
T
.
RTFMVDR
(),
specgram
,
rtf
,
psd_n
,
reference_channel
)
@
skipIfkmeMark
def
test_souden_mvdr
(
self
):
def
test_souden_mvdr
(
self
):
tensor
=
common_utils
.
get_whitenoise
(
sample_rate
=
8000
,
n_channels
=
4
)
tensor
=
common_utils
.
get_whitenoise
(
sample_rate
=
8000
,
n_channels
=
4
)
specgram
=
common_utils
.
get_spectrogram
(
tensor
,
n_fft
=
400
,
hop_length
=
100
)
specgram
=
common_utils
.
get_spectrogram
(
tensor
,
n_fft
=
400
,
hop_length
=
100
)
...
@@ -192,6 +199,85 @@ class Transforms(TestBaseMixin):
...
@@ -192,6 +199,85 @@ class Transforms(TestBaseMixin):
reference_channel
=
0
reference_channel
=
0
self
.
_assert_consistency_complex
(
T
.
SoudenMVDR
(),
specgram
,
psd_s
,
psd_n
,
reference_channel
)
self
.
_assert_consistency_complex
(
T
.
SoudenMVDR
(),
specgram
,
psd_s
,
psd_n
,
reference_channel
)
@
common_utils
.
nested_params
(
[
"Convolve"
,
"FFTConvolve"
],
[
"full"
,
"valid"
,
"same"
],
)
def
test_convolve
(
self
,
cls
,
mode
):
leading_dims
=
(
2
,
3
,
2
)
L_x
,
L_y
=
32
,
55
x
=
torch
.
rand
(
*
leading_dims
,
L_x
,
dtype
=
self
.
dtype
,
device
=
self
.
device
)
y
=
torch
.
rand
(
*
leading_dims
,
L_y
,
dtype
=
self
.
dtype
,
device
=
self
.
device
)
convolve
=
getattr
(
T
,
cls
)(
mode
=
mode
).
to
(
device
=
self
.
device
,
dtype
=
self
.
dtype
)
output
=
convolve
(
x
,
y
)
ts_output
=
torch_script
(
convolve
)(
x
,
y
)
self
.
assertEqual
(
ts_output
,
output
)
@
common_utils
.
nested_params
([
True
,
False
])
def
test_speed
(
self
,
use_lengths
):
leading_dims
=
(
3
,
2
)
time
=
200
waveform
=
torch
.
rand
(
*
leading_dims
,
time
,
dtype
=
self
.
dtype
,
device
=
self
.
device
,
requires_grad
=
True
)
if
use_lengths
:
lengths
=
torch
.
randint
(
1
,
time
,
leading_dims
,
dtype
=
self
.
dtype
,
device
=
self
.
device
)
else
:
lengths
=
None
speed
=
T
.
Speed
(
1000
,
0.9
).
to
(
self
.
device
,
self
.
dtype
)
output
=
speed
(
waveform
,
lengths
)
ts_output
=
torch_script
(
speed
)(
waveform
,
lengths
)
self
.
assertEqual
(
ts_output
,
output
)
@
common_utils
.
nested_params
([
True
,
False
])
def
test_speed_perturbation
(
self
,
use_lengths
):
leading_dims
=
(
3
,
2
)
time
=
200
waveform
=
torch
.
rand
(
*
leading_dims
,
time
,
dtype
=
self
.
dtype
,
device
=
self
.
device
,
requires_grad
=
True
)
if
use_lengths
:
lengths
=
torch
.
randint
(
1
,
time
,
leading_dims
,
dtype
=
self
.
dtype
,
device
=
self
.
device
)
else
:
lengths
=
None
speed
=
T
.
SpeedPerturbation
(
1000
,
[
0.9
]).
to
(
self
.
device
,
self
.
dtype
)
output
=
speed
(
waveform
,
lengths
)
ts_output
=
torch_script
(
speed
)(
waveform
,
lengths
)
self
.
assertEqual
(
ts_output
,
output
)
@
common_utils
.
nested_params
([
True
,
False
])
def
test_add_noise
(
self
,
use_lengths
):
leading_dims
=
(
2
,
3
)
L
=
31
waveform
=
torch
.
rand
(
*
leading_dims
,
L
,
dtype
=
self
.
dtype
,
device
=
self
.
device
,
requires_grad
=
True
)
noise
=
torch
.
rand
(
*
leading_dims
,
L
,
dtype
=
self
.
dtype
,
device
=
self
.
device
,
requires_grad
=
True
)
if
use_lengths
:
lengths
=
torch
.
rand
(
*
leading_dims
,
dtype
=
self
.
dtype
,
device
=
self
.
device
,
requires_grad
=
True
)
else
:
lengths
=
None
snr
=
torch
.
rand
(
*
leading_dims
,
dtype
=
self
.
dtype
,
device
=
self
.
device
,
requires_grad
=
True
)
*
10
add_noise
=
T
.
AddNoise
().
to
(
self
.
device
,
self
.
dtype
)
output
=
add_noise
(
waveform
,
noise
,
snr
,
lengths
)
ts_output
=
torch_script
(
add_noise
)(
waveform
,
noise
,
snr
,
lengths
)
self
.
assertEqual
(
ts_output
,
output
)
def
test_preemphasis
(
self
):
waveform
=
torch
.
rand
(
3
,
4
,
10
,
dtype
=
self
.
dtype
,
device
=
self
.
device
)
preemphasis
=
T
.
Preemphasis
(
coeff
=
0.97
).
to
(
dtype
=
self
.
dtype
,
device
=
self
.
device
)
output
=
preemphasis
(
waveform
)
ts_output
=
torch_script
(
preemphasis
)(
waveform
)
self
.
assertEqual
(
ts_output
,
output
)
def
test_deemphasis
(
self
):
waveform
=
torch
.
rand
(
3
,
4
,
10
,
dtype
=
self
.
dtype
,
device
=
self
.
device
)
deemphasis
=
T
.
Deemphasis
(
coeff
=
0.97
).
to
(
dtype
=
self
.
dtype
,
device
=
self
.
device
)
output
=
deemphasis
(
waveform
)
ts_output
=
torch_script
(
deemphasis
)(
waveform
)
self
.
assertEqual
(
ts_output
,
output
)
class
TransformsFloat32Only
(
TestBaseMixin
):
class
TransformsFloat32Only
(
TestBaseMixin
):
def
test_rnnt_loss
(
self
):
def
test_rnnt_loss
(
self
):
...
...
test/torchaudio_unittest/transforms/transforms_cuda_test.py
View file @
ffeba11a
import
torch
import
torch
from
torchaudio_unittest.common_utils
import
PytorchTestCase
,
skipIfNoCuda
from
torchaudio_unittest.common_utils
import
PytorchTestCase
,
skipIfNoCuda
,
skipIfkmeMark
from
.transforms_test_impl
import
TransformsTestBase
from
.transforms_test_impl
import
TransformsTestBase
...
@@ -11,6 +11,7 @@ class TransformsCUDAFloat32Test(TransformsTestBase, PytorchTestCase):
...
@@ -11,6 +11,7 @@ class TransformsCUDAFloat32Test(TransformsTestBase, PytorchTestCase):
@
skipIfNoCuda
@
skipIfNoCuda
@
skipIfkmeMark
class
TransformsCUDAFloat64Test
(
TransformsTestBase
,
PytorchTestCase
):
class
TransformsCUDAFloat64Test
(
TransformsTestBase
,
PytorchTestCase
):
device
=
"cuda"
device
=
"cuda"
dtype
=
torch
.
float64
dtype
=
torch
.
float64
test/torchaudio_unittest/transforms/transforms_test.py
View file @
ffeba11a
...
@@ -25,7 +25,6 @@ class Tester(common_utils.TorchaudioTestCase):
...
@@ -25,7 +25,6 @@ class Tester(common_utils.TorchaudioTestCase):
return
waveform
/
factor
return
waveform
/
factor
def
test_mu_law_companding
(
self
):
def
test_mu_law_companding
(
self
):
quantization_channels
=
256
quantization_channels
=
256
waveform
=
self
.
waveform
.
clone
()
waveform
=
self
.
waveform
.
clone
()
...
@@ -237,7 +236,7 @@ class Tester(common_utils.TorchaudioTestCase):
...
@@ -237,7 +236,7 @@ class Tester(common_utils.TorchaudioTestCase):
torchaudio
.
transforms
.
Resample
(
sample_rate
,
upsample_rate
,
resampling_method
=
invalid_resampling_method
)
torchaudio
.
transforms
.
Resample
(
sample_rate
,
upsample_rate
,
resampling_method
=
invalid_resampling_method
)
upsample_resample
=
torchaudio
.
transforms
.
Resample
(
upsample_resample
=
torchaudio
.
transforms
.
Resample
(
sample_rate
,
upsample_rate
,
resampling_method
=
"sinc_interp
olatio
n"
sample_rate
,
upsample_rate
,
resampling_method
=
"sinc_interp
_han
n"
)
)
up_sampled
=
upsample_resample
(
waveform
)
up_sampled
=
upsample_resample
(
waveform
)
...
@@ -245,7 +244,7 @@ class Tester(common_utils.TorchaudioTestCase):
...
@@ -245,7 +244,7 @@ class Tester(common_utils.TorchaudioTestCase):
self
.
assertTrue
(
up_sampled
.
size
(
-
1
)
==
waveform
.
size
(
-
1
)
*
2
)
self
.
assertTrue
(
up_sampled
.
size
(
-
1
)
==
waveform
.
size
(
-
1
)
*
2
)
downsample_resample
=
torchaudio
.
transforms
.
Resample
(
downsample_resample
=
torchaudio
.
transforms
.
Resample
(
sample_rate
,
downsample_rate
,
resampling_method
=
"sinc_interp
olatio
n"
sample_rate
,
downsample_rate
,
resampling_method
=
"sinc_interp
_han
n"
)
)
down_sampled
=
downsample_resample
(
waveform
)
down_sampled
=
downsample_resample
(
waveform
)
...
@@ -292,8 +291,7 @@ class SmokeTest(common_utils.TorchaudioTestCase):
...
@@ -292,8 +291,7 @@ class SmokeTest(common_utils.TorchaudioTestCase):
self
.
assertEqual
(
specgram
.
onesided
,
False
)
self
.
assertEqual
(
specgram
.
onesided
,
False
)
def
test_melspectrogram
(
self
):
def
test_melspectrogram
(
self
):
melspecgram
=
transforms
.
MelSpectrogram
(
center
=
True
,
pad_mode
=
"reflect"
,
onesided
=
False
)
melspecgram
=
transforms
.
MelSpectrogram
(
center
=
True
,
pad_mode
=
"reflect"
)
specgram
=
melspecgram
.
spectrogram
specgram
=
melspecgram
.
spectrogram
self
.
assertEqual
(
specgram
.
center
,
True
)
self
.
assertEqual
(
specgram
.
center
,
True
)
self
.
assertEqual
(
specgram
.
pad_mode
,
"reflect"
)
self
.
assertEqual
(
specgram
.
pad_mode
,
"reflect"
)
self
.
assertEqual
(
specgram
.
onesided
,
False
)
test/torchaudio_unittest/transforms/transforms_test_impl.py
View file @
ffeba11a
import
math
import
random
from
unittest.mock
import
patch
import
numpy
as
np
import
torch
import
torch
import
torchaudio.transforms
as
T
import
torchaudio.transforms
as
T
from
parameterized
import
param
,
parameterized
from
parameterized
import
param
,
parameterized
from
scipy
import
signal
from
torchaudio.functional
import
lfilter
,
preemphasis
from
torchaudio.functional.functional
import
_get_sinc_resample_kernel
from
torchaudio.functional.functional
import
_get_sinc_resample_kernel
from
torchaudio_unittest.common_utils
import
get_spectrogram
,
get_whitenoise
,
nested_params
,
TestBaseMixin
from
torchaudio_unittest.common_utils
import
get_spectrogram
,
get_whitenoise
,
nested_params
,
TestBaseMixin
from
torchaudio_unittest.common_utils.psd_utils
import
psd_numpy
from
torchaudio_unittest.common_utils.psd_utils
import
psd_numpy
...
@@ -11,11 +18,11 @@ def _get_ratio(mat):
...
@@ -11,11 +18,11 @@ def _get_ratio(mat):
class
TransformsTestBase
(
TestBaseMixin
):
class
TransformsTestBase
(
TestBaseMixin
):
def
test_
I
nverse
M
el
S
cale
(
self
):
def
test_
i
nverse
_m
el
s
cale
(
self
):
"""Gauge the quality of InverseMelScale transform.
"""Gauge the quality of InverseMelScale transform.
As InverseMelScale is currently implemented with
As InverseMelScale is currently implemented with
random ini
tial
ization + iterative optimization
,
sub-op
ti
m
al
solution (compute matrix inverse + relu)
,
it is not practically possible to assert the difference between
it is not practically possible to assert the difference between
the estimated spectrogram and the original spectrogram as a whole.
the estimated spectrogram and the original spectrogram as a whole.
Estimated spectrogram has very huge descrepency locally.
Estimated spectrogram has very huge descrepency locally.
...
@@ -53,7 +60,7 @@ class TransformsTestBase(TestBaseMixin):
...
@@ -53,7 +60,7 @@ class TransformsTestBase(TestBaseMixin):
assert
_get_ratio
(
relative_diff
<
1e-5
)
>
1e-5
assert
_get_ratio
(
relative_diff
<
1e-5
)
>
1e-5
@
nested_params
(
@
nested_params
(
[
"sinc_interp
olation"
,
"kaiser_window
"
],
[
"sinc_interp
_hann"
,
"sinc_interp_kaiser
"
],
[
16000
,
44100
],
[
16000
,
44100
],
)
)
def
test_resample_identity
(
self
,
resampling_method
,
sample_rate
):
def
test_resample_identity
(
self
,
resampling_method
,
sample_rate
):
...
@@ -65,7 +72,7 @@ class TransformsTestBase(TestBaseMixin):
...
@@ -65,7 +72,7 @@ class TransformsTestBase(TestBaseMixin):
self
.
assertEqual
(
waveform
,
resampled
)
self
.
assertEqual
(
waveform
,
resampled
)
@
nested_params
(
@
nested_params
(
[
"sinc_interp
olation"
,
"kaiser_window
"
],
[
"sinc_interp
_hann"
,
"sinc_interp_kaiser
"
],
[
None
,
torch
.
float64
],
[
None
,
torch
.
float64
],
)
)
def
test_resample_cache_dtype
(
self
,
resampling_method
,
dtype
):
def
test_resample_cache_dtype
(
self
,
resampling_method
,
dtype
):
...
@@ -158,3 +165,316 @@ class TransformsTestBase(TestBaseMixin):
...
@@ -158,3 +165,316 @@ class TransformsTestBase(TestBaseMixin):
trans
.
orig_freq
,
sample_rate
,
trans
.
gcd
,
device
=
self
.
device
,
dtype
=
self
.
dtype
trans
.
orig_freq
,
sample_rate
,
trans
.
gcd
,
device
=
self
.
device
,
dtype
=
self
.
dtype
)
)
self
.
assertEqual
(
trans
.
kernel
,
expected
)
self
.
assertEqual
(
trans
.
kernel
,
expected
)
@
nested_params
(
[(
10
,
4
),
(
4
,
3
,
1
,
2
),
(
2
,),
()],
[(
100
,
43
),
(
21
,
45
)],
[
"full"
,
"valid"
,
"same"
],
)
def
test_convolve
(
self
,
leading_dims
,
lengths
,
mode
):
"""Check that Convolve returns values identical to those that SciPy produces."""
L_x
,
L_y
=
lengths
x
=
torch
.
rand
(
*
(
leading_dims
+
(
L_x
,)),
dtype
=
self
.
dtype
,
device
=
self
.
device
)
y
=
torch
.
rand
(
*
(
leading_dims
+
(
L_y
,)),
dtype
=
self
.
dtype
,
device
=
self
.
device
)
convolve
=
T
.
Convolve
(
mode
=
mode
).
to
(
self
.
device
)
actual
=
convolve
(
x
,
y
)
num_signals
=
torch
.
tensor
(
leading_dims
).
prod
()
if
leading_dims
else
1
x_reshaped
=
x
.
reshape
((
num_signals
,
L_x
))
y_reshaped
=
y
.
reshape
((
num_signals
,
L_y
))
expected
=
[
signal
.
convolve
(
x_reshaped
[
i
].
detach
().
cpu
().
numpy
(),
y_reshaped
[
i
].
detach
().
cpu
().
numpy
(),
mode
=
mode
)
for
i
in
range
(
num_signals
)
]
expected
=
torch
.
tensor
(
np
.
array
(
expected
))
expected
=
expected
.
reshape
(
leading_dims
+
(
-
1
,))
self
.
assertEqual
(
expected
,
actual
)
@
nested_params
(
[(
10
,
4
),
(
4
,
3
,
1
,
2
),
(
2
,),
()],
[(
100
,
43
),
(
21
,
45
)],
[
"full"
,
"valid"
,
"same"
],
)
def
test_fftconvolve
(
self
,
leading_dims
,
lengths
,
mode
):
"""Check that FFTConvolve returns values identical to those that SciPy produces."""
L_x
,
L_y
=
lengths
x
=
torch
.
rand
(
*
(
leading_dims
+
(
L_x
,)),
dtype
=
self
.
dtype
,
device
=
self
.
device
)
y
=
torch
.
rand
(
*
(
leading_dims
+
(
L_y
,)),
dtype
=
self
.
dtype
,
device
=
self
.
device
)
convolve
=
T
.
FFTConvolve
(
mode
=
mode
).
to
(
self
.
device
)
actual
=
convolve
(
x
,
y
)
expected
=
signal
.
fftconvolve
(
x
.
detach
().
cpu
().
numpy
(),
y
.
detach
().
cpu
().
numpy
(),
axes
=-
1
,
mode
=
mode
)
expected
=
torch
.
tensor
(
expected
)
self
.
assertEqual
(
expected
,
actual
)
def
test_speed_identity
(
self
):
"""speed of 1.0 does not alter input waveform and length"""
leading_dims
=
(
5
,
4
,
2
)
time
=
1000
waveform
=
torch
.
rand
(
*
leading_dims
,
time
)
lengths
=
torch
.
randint
(
1
,
1000
,
leading_dims
)
speed
=
T
.
Speed
(
1000
,
1.0
)
actual_waveform
,
actual_lengths
=
speed
(
waveform
,
lengths
)
self
.
assertEqual
(
waveform
,
actual_waveform
)
self
.
assertEqual
(
lengths
,
actual_lengths
)
@
nested_params
([
0.8
,
1.1
,
1.2
],
[
True
,
False
])
def
test_speed_accuracy
(
self
,
factor
,
use_lengths
):
"""sinusoidal waveform is properly compressed by factor"""
n_to_trim
=
20
sample_rate
=
1000
freq
=
2
times
=
torch
.
arange
(
0
,
5
,
1.0
/
sample_rate
)
waveform
=
torch
.
cos
(
2
*
math
.
pi
*
freq
*
times
).
unsqueeze
(
0
).
to
(
self
.
device
,
self
.
dtype
)
if
use_lengths
:
lengths
=
torch
.
tensor
([
waveform
.
size
(
1
)])
else
:
lengths
=
None
speed
=
T
.
Speed
(
sample_rate
,
factor
).
to
(
self
.
device
,
self
.
dtype
)
output
,
output_lengths
=
speed
(
waveform
,
lengths
)
if
use_lengths
:
self
.
assertEqual
(
output
.
size
(
1
),
output_lengths
[
0
])
else
:
self
.
assertEqual
(
None
,
output_lengths
)
new_times
=
torch
.
arange
(
0
,
5
/
factor
,
1.0
/
sample_rate
)
expected_waveform
=
torch
.
cos
(
2
*
math
.
pi
*
freq
*
factor
*
new_times
).
unsqueeze
(
0
).
to
(
self
.
device
,
self
.
dtype
)
self
.
assertEqual
(
expected_waveform
[...,
n_to_trim
:
-
n_to_trim
],
output
[...,
n_to_trim
:
-
n_to_trim
],
atol
=
1e-1
,
rtol
=
1e-4
)
def
test_speed_perturbation
(
self
):
"""sinusoidal waveform is properly compressed by sampled factors"""
n_to_trim
=
20
sample_rate
=
1000
freq
=
2
times
=
torch
.
arange
(
0
,
5
,
1.0
/
sample_rate
)
waveform
=
torch
.
cos
(
2
*
math
.
pi
*
freq
*
times
).
unsqueeze
(
0
).
to
(
self
.
device
,
self
.
dtype
)
lengths
=
torch
.
tensor
([
waveform
.
size
(
1
)])
factors
=
[
0.8
,
1.1
,
1.0
]
indices
=
random
.
choices
(
range
(
len
(
factors
)),
k
=
5
)
speed_perturb
=
T
.
SpeedPerturbation
(
sample_rate
,
factors
).
to
(
self
.
device
,
self
.
dtype
)
with
patch
(
"torch.randint"
,
side_effect
=
indices
):
for
idx
in
indices
:
output
,
output_lengths
=
speed_perturb
(
waveform
,
lengths
)
self
.
assertEqual
(
output
.
size
(
1
),
output_lengths
[
0
])
factor
=
factors
[
idx
]
new_times
=
torch
.
arange
(
0
,
5
/
factor
,
1.0
/
sample_rate
)
expected_waveform
=
(
torch
.
cos
(
2
*
math
.
pi
*
freq
*
factor
*
new_times
).
unsqueeze
(
0
).
to
(
self
.
device
,
self
.
dtype
)
)
self
.
assertEqual
(
expected_waveform
[...,
n_to_trim
:
-
n_to_trim
],
output
[...,
n_to_trim
:
-
n_to_trim
],
atol
=
1e-1
,
rtol
=
1e-4
,
)
def
test_add_noise_broadcast
(
self
):
"""Check that AddNoise produces correct outputs when broadcasting input dimensions."""
leading_dims
=
(
5
,
2
,
3
)
L
=
51
waveform
=
torch
.
rand
(
*
leading_dims
,
L
,
dtype
=
self
.
dtype
,
device
=
self
.
device
)
noise
=
torch
.
rand
(
5
,
1
,
1
,
L
,
dtype
=
self
.
dtype
,
device
=
self
.
device
)
lengths
=
torch
.
rand
(
5
,
1
,
3
,
dtype
=
self
.
dtype
,
device
=
self
.
device
)
snr
=
torch
.
rand
(
1
,
1
,
1
,
dtype
=
self
.
dtype
,
device
=
self
.
device
)
*
10
add_noise
=
T
.
AddNoise
()
actual
=
add_noise
(
waveform
,
noise
,
snr
,
lengths
)
noise_expanded
=
noise
.
expand
(
*
leading_dims
,
L
)
snr_expanded
=
snr
.
expand
(
*
leading_dims
)
lengths_expanded
=
lengths
.
expand
(
*
leading_dims
)
expected
=
add_noise
(
waveform
,
noise_expanded
,
snr_expanded
,
lengths_expanded
)
self
.
assertEqual
(
expected
,
actual
)
@
parameterized
.
expand
(
[((
5
,
2
,
3
),
(
2
,
1
,
1
),
(
5
,
2
),
(
5
,
2
,
3
)),
((
2
,
1
),
(
5
,),
(
5
,),
(
5
,)),
((
3
,),
(
5
,
2
,
3
),
(
2
,
1
,
1
),
(
5
,
2
))]
)
def
test_add_noise_leading_dim_check
(
self
,
waveform_dims
,
noise_dims
,
lengths_dims
,
snr_dims
):
"""Check that AddNoise properly rejects inputs with different leading dimension lengths."""
L
=
51
waveform
=
torch
.
rand
(
*
waveform_dims
,
L
,
dtype
=
self
.
dtype
,
device
=
self
.
device
)
noise
=
torch
.
rand
(
*
noise_dims
,
L
,
dtype
=
self
.
dtype
,
device
=
self
.
device
)
lengths
=
torch
.
rand
(
*
lengths_dims
,
dtype
=
self
.
dtype
,
device
=
self
.
device
)
snr
=
torch
.
rand
(
*
snr_dims
,
dtype
=
self
.
dtype
,
device
=
self
.
device
)
*
10
add_noise
=
T
.
AddNoise
()
with
self
.
assertRaisesRegex
(
ValueError
,
"Input leading dimensions"
):
add_noise
(
waveform
,
noise
,
snr
,
lengths
)
def
test_add_noise_length_check
(
self
):
"""Check that add_noise properly rejects inputs that have inconsistent length dimensions."""
leading_dims
=
(
5
,
2
,
3
)
L
=
51
waveform
=
torch
.
rand
(
*
leading_dims
,
L
,
dtype
=
self
.
dtype
,
device
=
self
.
device
)
noise
=
torch
.
rand
(
*
leading_dims
,
50
,
dtype
=
self
.
dtype
,
device
=
self
.
device
)
lengths
=
torch
.
rand
(
*
leading_dims
,
dtype
=
self
.
dtype
,
device
=
self
.
device
)
snr
=
torch
.
rand
(
*
leading_dims
,
dtype
=
self
.
dtype
,
device
=
self
.
device
)
*
10
add_noise
=
T
.
AddNoise
()
with
self
.
assertRaisesRegex
(
ValueError
,
"Length dimensions"
):
add_noise
(
waveform
,
noise
,
snr
,
lengths
)
@
nested_params
(
[(
2
,
1
,
31
)],
[
0.97
,
0.72
],
)
def
test_preemphasis
(
self
,
input_shape
,
coeff
):
waveform
=
torch
.
rand
(
*
input_shape
,
dtype
=
self
.
dtype
,
device
=
self
.
device
)
preemphasis
=
T
.
Preemphasis
(
coeff
=
coeff
).
to
(
dtype
=
self
.
dtype
,
device
=
self
.
device
)
actual
=
preemphasis
(
waveform
)
a_coeffs
=
torch
.
tensor
([
1.0
,
0.0
],
device
=
self
.
device
,
dtype
=
self
.
dtype
)
b_coeffs
=
torch
.
tensor
([
1.0
,
-
coeff
],
device
=
self
.
device
,
dtype
=
self
.
dtype
)
expected
=
lfilter
(
waveform
,
a_coeffs
=
a_coeffs
,
b_coeffs
=
b_coeffs
)
self
.
assertEqual
(
actual
,
expected
)
@
nested_params
(
[(
2
,
1
,
31
)],
[
0.97
,
0.72
],
)
def
test_deemphasis
(
self
,
input_shape
,
coeff
):
waveform
=
torch
.
rand
(
*
input_shape
,
dtype
=
self
.
dtype
,
device
=
self
.
device
)
preemphasized
=
preemphasis
(
waveform
,
coeff
=
coeff
)
deemphasis
=
T
.
Deemphasis
(
coeff
=
coeff
).
to
(
dtype
=
self
.
dtype
,
device
=
self
.
device
)
deemphasized
=
deemphasis
(
preemphasized
)
self
.
assertEqual
(
deemphasized
,
waveform
)
@
nested_params
(
[(
100
,
200
),
(
5
,
10
,
20
),
(
50
,
50
,
100
,
200
)],
)
def
test_time_masking
(
self
,
input_shape
):
transform
=
T
.
TimeMasking
(
time_mask_param
=
5
)
# Genearte a specgram tensor containing 1's only, for the ease of testing.
specgram
=
torch
.
ones
(
*
input_shape
)
masked
=
transform
(
specgram
)
dim
=
len
(
input_shape
)
# Across the axis (dim-1) where we apply masking,
# the mean tensor should contain equal elements,
# and the value should be between 0 and 1.
m_masked
=
torch
.
mean
(
masked
,
dim
-
1
)
self
.
assertEqual
(
torch
.
var
(
m_masked
),
0
)
self
.
assertTrue
(
torch
.
mean
(
m_masked
)
>
0
)
self
.
assertTrue
(
torch
.
mean
(
m_masked
)
<
1
)
# Across all other dimensions, the mean tensor should contain at least
# one zero element, and all non-zero elements should be 1.
for
axis
in
range
(
dim
-
1
):
unmasked_axis_mean
=
torch
.
mean
(
masked
,
axis
)
self
.
assertTrue
(
0
in
unmasked_axis_mean
)
self
.
assertFalse
(
False
in
torch
.
eq
(
unmasked_axis_mean
[
unmasked_axis_mean
!=
0
],
1
))
@
nested_params
(
[(
100
,
200
),
(
5
,
10
,
20
),
(
50
,
50
,
100
,
200
)],
)
def
test_freq_masking
(
self
,
input_shape
):
transform
=
T
.
FrequencyMasking
(
freq_mask_param
=
5
)
# Genearte a specgram tensor containing 1's only, for the ease of testing.
specgram
=
torch
.
ones
(
*
input_shape
)
masked
=
transform
(
specgram
)
dim
=
len
(
input_shape
)
# Across the axis (dim-2) where we apply masking,
# the mean tensor should contain equal elements,
# and the value should be between 0 and 1.
m_masked
=
torch
.
mean
(
masked
,
dim
-
2
)
self
.
assertEqual
(
torch
.
var
(
m_masked
),
0
)
self
.
assertTrue
(
torch
.
mean
(
m_masked
)
>
0
)
self
.
assertTrue
(
torch
.
mean
(
m_masked
)
<
1
)
# Across all other dimensions, the mean tensor should contain at least
# one zero element, and all non-zero elements should be 1.
for
axis
in
range
(
dim
):
if
axis
!=
dim
-
2
:
unmasked_axis_mean
=
torch
.
mean
(
masked
,
axis
)
self
.
assertTrue
(
0
in
unmasked_axis_mean
)
self
.
assertFalse
(
False
in
torch
.
eq
(
unmasked_axis_mean
[
unmasked_axis_mean
!=
0
],
1
))
@
parameterized
.
expand
(
[
param
(
10
,
20
,
10
,
20
,
False
),
param
(
0
,
20
,
10
,
20
,
False
),
param
(
10
,
20
,
0
,
20
,
False
),
param
(
10
,
20
,
10
,
20
,
True
),
param
(
0
,
20
,
10
,
20
,
True
),
param
(
10
,
20
,
0
,
20
,
True
),
]
)
def
test_specaugment
(
self
,
n_time_masks
,
time_mask_param
,
n_freq_masks
,
freq_mask_param
,
iid_masks
):
"""Make sure SpecAug masking works as expected"""
spec
=
torch
.
ones
(
2
,
200
,
200
)
transform
=
T
.
SpecAugment
(
n_time_masks
=
n_time_masks
,
time_mask_param
=
time_mask_param
,
n_freq_masks
=
n_freq_masks
,
freq_mask_param
=
freq_mask_param
,
iid_masks
=
iid_masks
,
zero_masking
=
True
,
)
spec_masked
=
transform
(
spec
)
f_axis_mean
=
torch
.
mean
(
spec_masked
,
1
)
t_axis_mean
=
torch
.
mean
(
spec_masked
,
2
)
if
n_time_masks
==
0
and
n_freq_masks
==
0
:
self
.
assertEqual
(
spec
,
spec_masked
)
elif
n_time_masks
>
0
and
n_freq_masks
>
0
:
# Across both time and frequency dimensions, the mean tensor should contain
# at least one zero element, and all non-zero elements should be less than 1.
self
.
assertTrue
(
0
in
t_axis_mean
)
self
.
assertFalse
(
False
in
torch
.
lt
(
t_axis_mean
[
t_axis_mean
!=
0
],
1
))
self
.
assertTrue
(
0
in
f_axis_mean
)
self
.
assertFalse
(
False
in
torch
.
lt
(
f_axis_mean
[
f_axis_mean
!=
0
],
1
))
elif
n_freq_masks
>
0
:
# Across the frequency axis where we apply masking,
# the mean tensor should contain equal elements,
# and the value should be between 0 and 1.
self
.
assertFalse
(
False
in
torch
.
eq
(
f_axis_mean
[
0
],
f_axis_mean
[
0
][
0
]))
self
.
assertFalse
(
False
in
torch
.
eq
(
f_axis_mean
[
1
],
f_axis_mean
[
1
][
0
]))
self
.
assertTrue
(
f_axis_mean
[
0
][
0
]
<
1
)
self
.
assertTrue
(
f_axis_mean
[
1
][
0
]
>
0
)
# Across the time axis where we don't mask, the mean tensor should contain at
# least one zero element, and all non-zero elements should be 1.
self
.
assertTrue
(
0
in
t_axis_mean
)
self
.
assertFalse
(
False
in
torch
.
eq
(
t_axis_mean
[
t_axis_mean
!=
0
],
1
))
else
:
# Across the time axis where we apply masking,
# the mean tensor should contain equal elements,
# and the value should be between 0 and 1.
self
.
assertFalse
(
False
in
torch
.
eq
(
t_axis_mean
[
0
],
t_axis_mean
[
0
][
0
]))
self
.
assertFalse
(
False
in
torch
.
eq
(
t_axis_mean
[
1
],
t_axis_mean
[
1
][
0
]))
self
.
assertTrue
(
t_axis_mean
[
0
][
0
]
<
1
)
self
.
assertTrue
(
t_axis_mean
[
1
][
0
]
>
0
)
# Across the frequency axis where we don't mask, the mean tensor should contain at
# least one zero element, and all non-zero elements should be 1.
self
.
assertTrue
(
0
in
f_axis_mean
)
self
.
assertFalse
(
False
in
torch
.
eq
(
f_axis_mean
[
f_axis_mean
!=
0
],
1
))
# Test if iid_masks gives different masking results for different spectrograms across the 0th dimension.
diff
=
torch
.
linalg
.
vector_norm
(
spec_masked
[
0
]
-
spec_masked
[
1
]).
item
()
print
(
diff
)
if
iid_masks
is
True
:
self
.
assertTrue
(
diff
>
0
)
else
:
self
.
assertTrue
(
diff
==
0
)
test/torchaudio_unittest/utils/ffmpeg_utils_test.py
View file @
ffeba11a
...
@@ -25,3 +25,17 @@ class TestFFmpegUtils(PytorchTestCase):
...
@@ -25,3 +25,17 @@ class TestFFmpegUtils(PytorchTestCase):
"""`get_versions` does not crash"""
"""`get_versions` does not crash"""
versions
=
ffmpeg_utils
.
get_versions
()
versions
=
ffmpeg_utils
.
get_versions
()
assert
set
(
versions
.
keys
())
==
{
"libavutil"
,
"libavcodec"
,
"libavformat"
,
"libavfilter"
,
"libavdevice"
}
assert
set
(
versions
.
keys
())
==
{
"libavutil"
,
"libavcodec"
,
"libavformat"
,
"libavfilter"
,
"libavdevice"
}
def
test_available_stuff
(
self
):
"""get_encoders|decoders|muxers|demuxers|devices function does not segfault"""
ffmpeg_utils
.
get_demuxers
()
ffmpeg_utils
.
get_muxers
()
ffmpeg_utils
.
get_audio_decoders
()
ffmpeg_utils
.
get_audio_encoders
()
ffmpeg_utils
.
get_video_decoders
()
ffmpeg_utils
.
get_video_encoders
()
ffmpeg_utils
.
get_input_devices
()
ffmpeg_utils
.
get_output_devices
()
ffmpeg_utils
.
get_input_protocols
()
ffmpeg_utils
.
get_output_protocols
()
third_party/LICENSES_BUNDLED.txt
0 → 100644
View file @
ffeba11a
The Torchaudio repository and source distributions bundle several libraries that are
compatibly licensed. We list some here.
Name: cuctc
License: BSD-2-Clause (Files without specific notes)
BSD-3-Clause File:
torchaudio/csrc/cuctc/src/ctc_fast_divmod.cuh,
Apache 2.0 Files:
torchaudio/csrc/cuctc/src/bitonic_topk
For details, see: cuctc/LICENSE,
torchaudio/csrc/cuctc/src/bitonic_topk/LICENSE
third_party/ffmpeg/multi/CMakeLists.txt
0 → 100644
View file @
ffeba11a
################################################################################
# This file defines the following FFmpeg libraries using pre-built binaries.
add_library
(
ffmpeg4 INTERFACE
)
add_library
(
ffmpeg5 INTERFACE
)
add_library
(
ffmpeg6 INTERFACE
)
################################################################################
include
(
FetchContent
)
set
(
base_url https://pytorch.s3.amazonaws.com/torchaudio/ffmpeg
)
if
(
APPLE
)
if
(
"
${
CMAKE_SYSTEM_PROCESSOR
}
"
STREQUAL
"arm64"
)
FetchContent_Declare
(
f4
URL
${
base_url
}
/2023-08-14/macos_arm64/4.4.4.tar.gz
URL_HASH SHA256=9a593eb241eb8b23bc557856ee6db5d9aecd2d8895c614a949f3a1ad9799c1a1
)
FetchContent_Declare
(
f5
URL
${
base_url
}
/2023-07-06/macos_arm64/5.0.3.tar.gz
URL_HASH SHA256=316fe8378afadcf63089acf3ad53a626fd3c26cc558b96ce1dc94d2a78f4deb4
)
FetchContent_Declare
(
f6
URL
${
base_url
}
/2023-07-06/macos_arm64/6.0.tar.gz
URL_HASH SHA256=5d1da9626f8cb817d6c558a2c61085a3d39a8d9f725a6f69f4658bea8efa9389
)
elseif
(
"
${
CMAKE_SYSTEM_PROCESSOR
}
"
STREQUAL
"x86_64"
)
FetchContent_Declare
(
f4
URL
${
base_url
}
/2023-08-14/macos_x86_64/4.4.4.tar.gz
URL_HASH SHA256=0935e359c0864969987d908397f9208d6dc4dc0ef8bfe2ec730bb2c44eae89fc
)
FetchContent_Declare
(
f5
URL
${
base_url
}
/2023-07-06/macos_x86_64/5.0.3.tar.gz
URL_HASH SHA256=d0b49575d3b174cfcca53b3049641855e48028cf22dd32f3334bbec4ca94f43e
)
FetchContent_Declare
(
f6
URL
${
base_url
}
/2023-07-06/macos_x86_64/6.0.tar.gz
URL_HASH SHA256=eabc01eb7d9e714e484d5e1b27bf7d921e87c1f3c00334abd1729e158d6db862
)
else
()
message
(
FATAL_ERROR
"
${
CMAKE_SYSTEM_PROCESSOR
}
is not supported for FFmpeg multi-version integration. "
"If you have FFmpeg libraries installed in the system,"
" setting FFMPEG_ROOT environment variable to the root directory of FFmpeg installation"
" (the directory where `include` and `lib` sub directories with corresponding headers"
" and library files are present) will invoke the FFmpeg single-version integration. "
"If you do not need the FFmpeg integration, setting USE_FFMPEG=0 will bypass the issue."
)
endif
()
elseif
(
UNIX
)
if
(
"
${
CMAKE_SYSTEM_PROCESSOR
}
"
STREQUAL
"aarch64"
)
FetchContent_Declare
(
f4
URL
${
base_url
}
/2023-08-14/linux_aarch64/4.4.4.tar.gz
URL_HASH SHA256=6f00437d13a3b3812ebe81c6e6f3a84a58f260d946a1995df87ba09aae234504
)
FetchContent_Declare
(
f5
URL
${
base_url
}
/2023-07-06/linux_aarch64/5.0.3.tar.gz
URL_HASH SHA256=65c663206982ee3f0ff88436d8869d191b46061e01e753518c77ecc13ea0236d
)
FetchContent_Declare
(
f6
URL
${
base_url
}
/2023-07-06/linux_aarch64/6.0.tar.gz
URL_HASH SHA256=ec762fd41ea7b8d9ad4f810f53fd78a565f2bc6f680afe56d555c80f3d35adef
)
elseif
(
"
${
CMAKE_SYSTEM_PROCESSOR
}
"
STREQUAL
"x86_64"
)
FetchContent_Declare
(
f4
URL
${
base_url
}
/2023-08-14/linux_x86_64/4.4.4.tar.gz
URL_HASH SHA256=9b87eeba9b6975e25f28ba12163bd713228ed84f4c2b3721bc5ebe92055edb51
)
FetchContent_Declare
(
f5
URL
${
base_url
}
/2023-07-06/linux_x86_64/5.0.3.tar.gz
URL_HASH SHA256=de3c75c99b9ce33de7efdc144566804ae5880457ce71e185b3f592dc452edce7
)
FetchContent_Declare
(
f6
URL
${
base_url
}
/2023-07-06/linux_x86_64/6.0.tar.gz
URL_HASH SHA256=04d3916404bab5efadd29f68361b7d13ea71e6242c6473edcb747a41a9fb97a6
)
else
()
# Possible case ppc64le (though it's not officially supported.)
message
(
FATAL_ERROR
"
${
CMAKE_SYSTEM_PROCESSOR
}
is not supported for FFmpeg multi-version integration. "
"If you have FFmpeg libraries installed in the system,"
" setting FFMPEG_ROOT environment variable to the root directory of FFmpeg installation"
" (the directory where `include` and `lib` sub directories with corresponding headers"
" and library files are present) will invoke the FFmpeg single-version integration. "
"If you do not need the FFmpeg integration, setting USE_FFMPEG=0 will bypass the issue."
)
endif
()
elseif
(
MSVC
)
FetchContent_Declare
(
f4
URL
${
base_url
}
/2023-08-14/windows/4.4.4.tar.gz
URL_HASH SHA256=9f9a65cf03a3e164edca601ba18180a504e44e03fae48ce706ca3120b55a4db5
)
FetchContent_Declare
(
f5
URL
${
base_url
}
/2023-07-06/windows/5.0.3.tar.gz
URL_HASH SHA256=e2daa10799909e366cb1b4b91a217d35f6749290dcfeea40ecae3d5b05a46cb3
)
FetchContent_Declare
(
f6
URL
${
base_url
}
/2023-07-06/windows/6.0.tar.gz
URL_HASH SHA256=098347eca8cddb5aaa61e9ecc1a00548c645fc59b4f7346b3d91414aa00a9cf6
)
endif
()
FetchContent_MakeAvailable
(
f4 f5 f6
)
target_include_directories
(
ffmpeg4 INTERFACE
${
f4_SOURCE_DIR
}
/include
)
target_include_directories
(
ffmpeg5 INTERFACE
${
f5_SOURCE_DIR
}
/include
)
target_include_directories
(
ffmpeg6 INTERFACE
${
f6_SOURCE_DIR
}
/include
)
if
(
APPLE
)
target_link_libraries
(
ffmpeg4
INTERFACE
${
f4_SOURCE_DIR
}
/lib/libavutil.56.dylib
${
f4_SOURCE_DIR
}
/lib/libavcodec.58.dylib
${
f4_SOURCE_DIR
}
/lib/libavformat.58.dylib
${
f4_SOURCE_DIR
}
/lib/libavdevice.58.dylib
${
f4_SOURCE_DIR
}
/lib/libavfilter.7.dylib
)
target_link_libraries
(
ffmpeg5
INTERFACE
${
f5_SOURCE_DIR
}
/lib/libavutil.57.dylib
${
f5_SOURCE_DIR
}
/lib/libavcodec.59.dylib
${
f5_SOURCE_DIR
}
/lib/libavformat.59.dylib
${
f5_SOURCE_DIR
}
/lib/libavdevice.59.dylib
${
f5_SOURCE_DIR
}
/lib/libavfilter.8.dylib
)
target_link_libraries
(
ffmpeg6
INTERFACE
${
f6_SOURCE_DIR
}
/lib/libavutil.58.dylib
${
f6_SOURCE_DIR
}
/lib/libavcodec.60.dylib
${
f6_SOURCE_DIR
}
/lib/libavformat.60.dylib
${
f6_SOURCE_DIR
}
/lib/libavdevice.60.dylib
${
f6_SOURCE_DIR
}
/lib/libavfilter.9.dylib
)
elseif
(
UNIX
)
target_link_libraries
(
ffmpeg4
INTERFACE
${
f4_SOURCE_DIR
}
/lib/libavutil.so.56
${
f4_SOURCE_DIR
}
/lib/libavcodec.so.58
${
f4_SOURCE_DIR
}
/lib/libavformat.so.58
${
f4_SOURCE_DIR
}
/lib/libavdevice.so.58
${
f4_SOURCE_DIR
}
/lib/libavfilter.so.7
)
target_link_libraries
(
ffmpeg5
INTERFACE
${
f5_SOURCE_DIR
}
/lib/libavutil.so.57
${
f5_SOURCE_DIR
}
/lib/libavcodec.so.59
${
f5_SOURCE_DIR
}
/lib/libavformat.so.59
${
f5_SOURCE_DIR
}
/lib/libavdevice.so.59
${
f5_SOURCE_DIR
}
/lib/libavfilter.so.8
)
target_link_libraries
(
ffmpeg6
INTERFACE
${
f6_SOURCE_DIR
}
/lib/libavutil.so.58
${
f6_SOURCE_DIR
}
/lib/libavcodec.so.60
${
f6_SOURCE_DIR
}
/lib/libavformat.so.60
${
f6_SOURCE_DIR
}
/lib/libavdevice.so.60
${
f6_SOURCE_DIR
}
/lib/libavfilter.so.9
)
elseif
(
MSVC
)
target_link_libraries
(
ffmpeg4
INTERFACE
${
f4_SOURCE_DIR
}
/bin/avutil.lib
${
f4_SOURCE_DIR
}
/bin/avcodec.lib
${
f4_SOURCE_DIR
}
/bin/avformat.lib
${
f4_SOURCE_DIR
}
/bin/avdevice.lib
${
f4_SOURCE_DIR
}
/bin/avfilter.lib
)
target_link_libraries
(
ffmpeg5
INTERFACE
${
f5_SOURCE_DIR
}
/bin/avutil.lib
${
f5_SOURCE_DIR
}
/bin/avcodec.lib
${
f5_SOURCE_DIR
}
/bin/avformat.lib
${
f5_SOURCE_DIR
}
/bin/avdevice.lib
${
f5_SOURCE_DIR
}
/bin/avfilter.lib
)
target_link_libraries
(
ffmpeg6
INTERFACE
${
f6_SOURCE_DIR
}
/bin/avutil.lib
${
f6_SOURCE_DIR
}
/bin/avcodec.lib
${
f6_SOURCE_DIR
}
/bin/avformat.lib
${
f6_SOURCE_DIR
}
/bin/avdevice.lib
${
f6_SOURCE_DIR
}
/bin/avfilter.lib
)
endif
()
third_party/ffmpeg/single/CMakeLists.txt
0 → 100644
View file @
ffeba11a
# CMake file for searching existing FFmpeg installation and defining ffmpeg TARGET
message
(
STATUS
"Searching existing FFmpeg installation"
)
message
(
STATUS FFMPEG_ROOT=$ENV{FFMPEG_ROOT}
)
if
(
NOT DEFINED ENV{FFMPEG_ROOT}
)
message
(
FATAL_ERROR
"Environment variable FFMPEG_ROOT is not set."
)
endif
()
set
(
_root $ENV{FFMPEG_ROOT}
)
set
(
lib_dirs
"
${
_root
}
/lib"
"
${
_root
}
/bin"
)
set
(
include_dir
"
${
_root
}
/include"
)
add_library
(
ffmpeg INTERFACE
)
target_include_directories
(
ffmpeg INTERFACE
"
${
include_dir
}
"
)
function
(
_find_ffmpeg_lib component
)
find_path
(
"
${
component
}
_header"
NAMES
"lib
${
component
}
/
${
component
}
.h"
PATHS
"
${
include_dir
}
"
DOC
"The include directory for
${
component
}
"
REQUIRED
NO_DEFAULT_PATH
)
find_library
(
"lib
${
component
}
"
NAMES
"
${
component
}
"
PATHS
${
lib_dirs
}
DOC
"
${
component
}
library"
REQUIRED
NO_DEFAULT_PATH
)
message
(
STATUS
"Found
${
component
}
:
${
lib
${
component
}}
"
)
target_link_libraries
(
ffmpeg
INTERFACE
${
lib
${
component
}}
)
endfunction
()
_find_ffmpeg_lib
(
avutil
)
_find_ffmpeg_lib
(
avcodec
)
_find_ffmpeg_lib
(
avformat
)
_find_ffmpeg_lib
(
avdevice
)
_find_ffmpeg_lib
(
avfilter
)
third_party/sox/CMakeLists.txt
View file @
ffeba11a
f
in
d_package
(
PkgConfig REQUIRED
)
in
clude
(
FetchContent
)
include
(
ExternalProject
)
FetchContent_Declare
(
sox_src
set
(
INSTALL_DIR
${
CMAKE_CURRENT_SOURCE_DIR
}
/../install
)
set
(
ARCHIVE_DIR
${
CMAKE_CURRENT_SOURCE_DIR
}
/../archives
)
set
(
patch_dir
${
PROJECT_SOURCE_DIR
}
/third_party/patches
)
set
(
COMMON_ARGS --quiet --disable-shared --enable-static --prefix=
${
INSTALL_DIR
}
--with-pic --disable-dependency-tracking --disable-debug --disable-examples --disable-doc
)
# To pass custom environment variables to ExternalProject_Add command,
# we need to do `${CMAKE_COMMAND} -E env ${envs} <COMMANAD>`.
# https://stackoverflow.com/a/62437353
# We constrcut the custom environment variables here
set
(
envs
"PKG_CONFIG_PATH=
${
INSTALL_DIR
}
/lib/pkgconfig"
"LDFLAGS=-L
${
INSTALL_DIR
}
/lib $ENV{LDFLAGS}"
"CFLAGS=-I
${
INSTALL_DIR
}
/include -fvisibility=hidden $ENV{CFLAGS}"
)
ExternalProject_Add
(
amr
PREFIX
${
CMAKE_CURRENT_BINARY_DIR
}
DOWNLOAD_DIR
${
ARCHIVE_DIR
}
URL https://sourceforge.net/projects/opencore-amr/files/opencore-amr/opencore-amr-0.1.5.tar.gz
URL_HASH SHA256=2c006cb9d5f651bfb5e60156dbff6af3c9d35c7bbcc9015308c0aff1e14cd341
PATCH_COMMAND cp
${
patch_dir
}
/config.guess
${
patch_dir
}
/config.sub
${
CMAKE_CURRENT_BINARY_DIR
}
/src/amr/
CONFIGURE_COMMAND
${
CMAKE_COMMAND
}
-E env
${
envs
}
${
CMAKE_CURRENT_BINARY_DIR
}
/src/amr/configure
${
COMMON_ARGS
}
DOWNLOAD_NO_PROGRESS ON
LOG_DOWNLOAD ON
LOG_UPDATE ON
LOG_CONFIGURE ON
LOG_BUILD ON
LOG_INSTALL ON
LOG_MERGED_STDOUTERR ON
LOG_OUTPUT_ON_FAILURE ON
)
ExternalProject_Add
(
lame
PREFIX
${
CMAKE_CURRENT_BINARY_DIR
}
DOWNLOAD_DIR
${
ARCHIVE_DIR
}
URL https://downloads.sourceforge.net/project/lame/lame/3.99/lame-3.99.5.tar.gz
URL_HASH SHA256=24346b4158e4af3bd9f2e194bb23eb473c75fb7377011523353196b19b9a23ff
PATCH_COMMAND cp
${
patch_dir
}
/config.guess
${
patch_dir
}
/config.sub
${
CMAKE_CURRENT_BINARY_DIR
}
/src/lame/
CONFIGURE_COMMAND
${
CMAKE_COMMAND
}
-E env
${
envs
}
${
CMAKE_CURRENT_BINARY_DIR
}
/src/lame/configure
${
COMMON_ARGS
}
--enable-nasm
DOWNLOAD_NO_PROGRESS ON
LOG_DOWNLOAD ON
LOG_UPDATE ON
LOG_CONFIGURE ON
LOG_BUILD ON
LOG_INSTALL ON
LOG_MERGED_STDOUTERR ON
LOG_OUTPUT_ON_FAILURE ON
)
ExternalProject_Add
(
ogg
PREFIX
${
CMAKE_CURRENT_BINARY_DIR
}
DOWNLOAD_DIR
${
ARCHIVE_DIR
}
URL https://ftp.osuosl.org/pub/xiph/releases/ogg/libogg-1.3.3.tar.gz
URL_HASH SHA256=c2e8a485110b97550f453226ec644ebac6cb29d1caef2902c007edab4308d985
PATCH_COMMAND cp
${
patch_dir
}
/config.guess
${
patch_dir
}
/config.sub
${
CMAKE_CURRENT_BINARY_DIR
}
/src/ogg/
CONFIGURE_COMMAND
${
CMAKE_COMMAND
}
-E env
${
envs
}
${
CMAKE_CURRENT_BINARY_DIR
}
/src/ogg/configure
${
COMMON_ARGS
}
DOWNLOAD_NO_PROGRESS ON
LOG_DOWNLOAD ON
LOG_UPDATE ON
LOG_CONFIGURE ON
LOG_BUILD ON
LOG_INSTALL ON
LOG_MERGED_STDOUTERR ON
LOG_OUTPUT_ON_FAILURE ON
)
ExternalProject_Add
(
flac
PREFIX
${
CMAKE_CURRENT_BINARY_DIR
}
DEPENDS ogg
DOWNLOAD_DIR
${
ARCHIVE_DIR
}
URL https://ftp.osuosl.org/pub/xiph/releases/flac/flac-1.3.2.tar.xz
URL_HASH SHA256=91cfc3ed61dc40f47f050a109b08610667d73477af6ef36dcad31c31a4a8d53f
PATCH_COMMAND cp
${
patch_dir
}
/config.guess
${
patch_dir
}
/config.sub
${
CMAKE_CURRENT_BINARY_DIR
}
/src/flac/
CONFIGURE_COMMAND
${
CMAKE_COMMAND
}
-E env
${
envs
}
${
CMAKE_CURRENT_BINARY_DIR
}
/src/flac/configure
${
COMMON_ARGS
}
--with-ogg --disable-cpplibs
DOWNLOAD_NO_PROGRESS ON
LOG_DOWNLOAD ON
LOG_UPDATE ON
LOG_CONFIGURE ON
LOG_BUILD ON
LOG_INSTALL ON
LOG_MERGED_STDOUTERR ON
LOG_OUTPUT_ON_FAILURE ON
)
ExternalProject_Add
(
vorbis
PREFIX
${
CMAKE_CURRENT_BINARY_DIR
}
DEPENDS ogg
DOWNLOAD_DIR
${
ARCHIVE_DIR
}
URL https://ftp.osuosl.org/pub/xiph/releases/vorbis/libvorbis-1.3.6.tar.gz
URL_HASH SHA256=6ed40e0241089a42c48604dc00e362beee00036af2d8b3f46338031c9e0351cb
PATCH_COMMAND cp
${
patch_dir
}
/config.guess
${
patch_dir
}
/config.sub
${
CMAKE_CURRENT_BINARY_DIR
}
/src/vorbis/
CONFIGURE_COMMAND
${
CMAKE_COMMAND
}
-E env
${
envs
}
${
CMAKE_CURRENT_BINARY_DIR
}
/src/vorbis/configure
${
COMMON_ARGS
}
--with-ogg
DOWNLOAD_NO_PROGRESS ON
LOG_DOWNLOAD ON
LOG_UPDATE ON
LOG_CONFIGURE ON
LOG_BUILD ON
LOG_INSTALL ON
LOG_MERGED_STDOUTERR ON
LOG_OUTPUT_ON_FAILURE ON
)
ExternalProject_Add
(
opus
PREFIX
${
CMAKE_CURRENT_BINARY_DIR
}
DEPENDS ogg
DOWNLOAD_DIR
${
ARCHIVE_DIR
}
URL https://ftp.osuosl.org/pub/xiph/releases/opus/opus-1.3.1.tar.gz
URL_HASH SHA256=65b58e1e25b2a114157014736a3d9dfeaad8d41be1c8179866f144a2fb44ff9d
PATCH_COMMAND cp
${
patch_dir
}
/config.guess
${
patch_dir
}
/config.sub
${
CMAKE_CURRENT_BINARY_DIR
}
/src/opus/
CONFIGURE_COMMAND
${
CMAKE_COMMAND
}
-E env
${
envs
}
${
CMAKE_CURRENT_BINARY_DIR
}
/src/opus/configure
${
COMMON_ARGS
}
--with-ogg
DOWNLOAD_NO_PROGRESS ON
LOG_DOWNLOAD ON
LOG_UPDATE ON
LOG_CONFIGURE ON
LOG_BUILD ON
LOG_INSTALL ON
LOG_MERGED_STDOUTERR ON
LOG_OUTPUT_ON_FAILURE ON
)
ExternalProject_Add
(
opusfile
PREFIX
${
CMAKE_CURRENT_BINARY_DIR
}
DEPENDS opus
DOWNLOAD_DIR
${
ARCHIVE_DIR
}
URL https://ftp.osuosl.org/pub/xiph/releases/opus/opusfile-0.12.tar.gz
URL_HASH SHA256=118d8601c12dd6a44f52423e68ca9083cc9f2bfe72da7a8c1acb22a80ae3550b
PATCH_COMMAND cp
${
patch_dir
}
/config.guess
${
patch_dir
}
/config.sub
${
CMAKE_CURRENT_BINARY_DIR
}
/src/opusfile/
CONFIGURE_COMMAND
${
CMAKE_COMMAND
}
-E env
${
envs
}
${
CMAKE_CURRENT_BINARY_DIR
}
/src/opusfile/configure
${
COMMON_ARGS
}
--disable-http
DOWNLOAD_NO_PROGRESS ON
LOG_DOWNLOAD ON
LOG_UPDATE ON
LOG_CONFIGURE ON
LOG_BUILD ON
LOG_INSTALL ON
LOG_MERGED_STDOUTERR ON
LOG_OUTPUT_ON_FAILURE ON
)
# OpenMP is by default compiled against GNU OpenMP, which conflicts with the version of OpenMP that PyTorch uses.
# See https://github.com/pytorch/audio/pull/1026
# TODO: Add flags like https://github.com/suphoff/pytorch_parallel_extension_cpp/blob/master/setup.py
set
(
SOX_OPTIONS
--disable-openmp
--with-amrnb
--with-amrwb
--with-flac
--with-lame
--with-oggvorbis
--with-opus
--without-alsa
--without-ao
--without-coreaudio
--without-oss
--without-id3tag
--without-ladspa
--without-mad
--without-magic
--without-png
--without-pulseaudio
--without-sndfile
--without-sndio
--without-sunaudio
--without-waveaudio
--without-wavpack
--without-twolame
)
set
(
SOX_LIBRARIES
${
INSTALL_DIR
}
/lib/libsox.a
${
INSTALL_DIR
}
/lib/libopencore-amrnb.a
${
INSTALL_DIR
}
/lib/libopencore-amrwb.a
${
INSTALL_DIR
}
/lib/libmp3lame.a
${
INSTALL_DIR
}
/lib/libFLAC.a
${
INSTALL_DIR
}
/lib/libopusfile.a
${
INSTALL_DIR
}
/lib/libopus.a
${
INSTALL_DIR
}
/lib/libvorbisenc.a
${
INSTALL_DIR
}
/lib/libvorbisfile.a
${
INSTALL_DIR
}
/lib/libvorbis.a
${
INSTALL_DIR
}
/lib/libogg.a
)
set
(
sox_depends
ogg flac vorbis opusfile lame amr
)
ExternalProject_Add
(
sox
PREFIX
${
CMAKE_CURRENT_BINARY_DIR
}
DEPENDS
${
sox_depends
}
DOWNLOAD_DIR
${
ARCHIVE_DIR
}
URL https://downloads.sourceforge.net/project/sox/sox/14.4.2/sox-14.4.2.tar.bz2
URL https://downloads.sourceforge.net/project/sox/sox/14.4.2/sox-14.4.2.tar.bz2
URL_HASH SHA256=81a6956d4330e75b5827316e44ae381e6f1e8928003c6aa45896da9041ea149c
URL_HASH SHA256=81a6956d4330e75b5827316e44ae381e6f1e8928003c6aa45896da9041ea149c
PATCH_COMMAND patch -p1 <
${
patch_dir
}
/sox.patch && cp
${
patch_dir
}
/config.guess
${
patch_dir
}
/config.sub
${
CMAKE_CURRENT_BINARY_DIR
}
/src/sox/
PATCH_COMMAND
""
CONFIGURE_COMMAND
${
CMAKE_COMMAND
}
-E env
${
envs
}
${
CMAKE_CURRENT_BINARY_DIR
}
/src/sox/configure
${
COMMON_ARGS
}
${
SOX_OPTIONS
}
CONFIGURE_COMMAND
""
BUILD_BYPRODUCTS
${
SOX_LIBRARIES
}
BUILD_COMMAND
""
DOWNLOAD_NO_PROGRESS ON
)
LOG_DOWNLOAD ON
# FetchContent_MakeAvailable will parse the downloaded content and setup the targets.
LOG_UPDATE ON
# We want to only download and not build, so we run Populate manually.
LOG_CONFIGURE ON
if
(
NOT sox_src_POPULATED
)
LOG_BUILD ON
FetchContent_Populate
(
sox_src
)
LOG_INSTALL ON
endif
()
LOG_MERGED_STDOUTERR ON
LOG_OUTPUT_ON_FAILURE ON
add_library
(
sox SHARED stub.c
)
)
if
(
APPLE
)
set_target_properties
(
sox PROPERTIES SUFFIX .dylib
)
add_library
(
libsox INTERFACE
)
endif
(
APPLE
)
add_dependencies
(
libsox sox
)
target_include_directories
(
sox PUBLIC
${
sox_src_SOURCE_DIR
}
/src
)
target_include_directories
(
libsox INTERFACE
${
INSTALL_DIR
}
/include
)
target_link_libraries
(
libsox INTERFACE
${
SOX_LIBRARIES
}
)
third_party/sox/stub.c
0 → 100644
View file @
ffeba11a
#include <sox.h>
int
sox_add_effect
(
sox_effects_chain_t
*
chain
,
sox_effect_t
*
effp
,
sox_signalinfo_t
*
in
,
sox_signalinfo_t
const
*
out
)
{
return
-
1
;
}
int
sox_close
(
sox_format_t
*
ft
)
{
return
-
1
;
}
sox_effect_t
*
sox_create_effect
(
sox_effect_handler_t
const
*
eh
)
{
return
NULL
;
}
sox_effects_chain_t
*
sox_create_effects_chain
(
sox_encodinginfo_t
const
*
in_enc
,
sox_encodinginfo_t
const
*
out_enc
)
{
return
NULL
;
}
void
sox_delete_effect
(
sox_effect_t
*
effp
)
{}
void
sox_delete_effects_chain
(
sox_effects_chain_t
*
ecp
)
{}
int
sox_effect_options
(
sox_effect_t
*
effp
,
int
argc
,
char
*
const
argv
[])
{
return
-
1
;
}
const
sox_effect_handler_t
*
sox_find_effect
(
char
const
*
name
)
{
return
NULL
;
}
int
sox_flow_effects
(
sox_effects_chain_t
*
chain
,
int
callback
(
sox_bool
all_done
,
void
*
client_data
),
void
*
client_data
)
{
return
-
1
;
}
const
sox_effect_fn_t
*
sox_get_effect_fns
(
void
)
{
return
NULL
;
}
const
sox_format_tab_t
*
sox_get_format_fns
(
void
)
{
return
NULL
;
}
sox_globals_t
*
sox_get_globals
(
void
)
{
return
NULL
;
}
sox_format_t
*
sox_open_read
(
char
const
*
path
,
sox_signalinfo_t
const
*
signal
,
sox_encodinginfo_t
const
*
encoding
,
char
const
*
filetype
)
{
return
NULL
;
}
sox_format_t
*
sox_open_write
(
char
const
*
path
,
sox_signalinfo_t
const
*
signal
,
sox_encodinginfo_t
const
*
encoding
,
char
const
*
filetype
,
sox_oob_t
const
*
oob
,
sox_bool
overwrite_permitted
(
char
const
*
filename
))
{
return
NULL
;
}
const
char
*
sox_strerror
(
int
sox_errno
)
{
return
NULL
;
}
size_t
sox_write
(
sox_format_t
*
ft
,
const
sox_sample_t
*
buf
,
size_t
len
)
{
return
0
;
}
int
sox_init
()
{
return
-
1
;
};
int
sox_quit
()
{
return
-
1
;
};
tools/release_notes/classify_prs.py
View file @
ffeba11a
...
@@ -14,20 +14,22 @@ primary_labels_mapping = {
...
@@ -14,20 +14,22 @@ primary_labels_mapping = {
"bug fix"
:
"Bug Fixes"
,
"bug fix"
:
"Bug Fixes"
,
"new feature"
:
"New Features"
,
"new feature"
:
"New Features"
,
"improvement"
:
"Improvements"
,
"improvement"
:
"Improvements"
,
"example"
:
"Examples"
,
"prototype"
:
"Prototypes"
,
"prototype"
:
"Prototypes"
,
"other"
:
"Other"
,
"other"
:
"Other"
,
"None"
:
"Missing"
,
"None"
:
"Missing"
,
}
}
secondary_labels_mapping
=
{
secondary_labels_mapping
=
{
"module:
I/O
"
:
"I/O"
,
"module:
io
"
:
"I/O"
,
"module: ops"
:
"Ops"
,
"module: ops"
:
"Ops"
,
"module: models"
:
"Models"
,
"module: models"
:
"Models"
,
"module: pipelines"
:
"Pipelines"
,
"module: pipelines"
:
"Pipelines"
,
"module: datasets"
:
"Datasets"
,
"module: datasets"
:
"Datasets"
,
"module: docs"
:
"Documentation"
,
"module: docs"
:
"Documentation"
,
"module: tests"
:
"Tests"
,
"module: tests"
:
"Tests"
,
"tutorial"
:
"Tutorials"
,
"recipe"
:
"Recipes"
,
"example"
:
"Examples"
,
"build"
:
"Build"
,
"build"
:
"Build"
,
"style"
:
"Style"
,
"style"
:
"Style"
,
"perf"
:
"Performance"
,
"perf"
:
"Performance"
,
...
...
tools/setup_helpers/extension.py
View file @
ffeba11a
...
@@ -7,7 +7,7 @@ from pathlib import Path
...
@@ -7,7 +7,7 @@ from pathlib import Path
import
torch
import
torch
from
setuptools
import
Extension
from
setuptools
import
Extension
from
setuptools.command.build_ext
import
build_ext
from
setuptools.command.build_ext
import
build_ext
from
torch.utils.cpp_extension
import
CUDA_HOME
,
ROCM_HOME
from
torch.utils.cpp_extension
import
CUDA_HOME
__all__
=
[
__all__
=
[
"get_ext_modules"
,
"get_ext_modules"
,
...
@@ -34,12 +34,13 @@ def _get_build(var, default=False):
...
@@ -34,12 +34,13 @@ def _get_build(var, default=False):
_BUILD_SOX
=
False
if
platform
.
system
()
==
"Windows"
else
_get_build
(
"BUILD_SOX"
,
True
)
_BUILD_SOX
=
False
if
platform
.
system
()
==
"Windows"
else
_get_build
(
"BUILD_SOX"
,
True
)
_BUILD_
KALDI
=
False
if
platform
.
system
()
==
"Windows"
else
_get_build
(
"BUILD_
KALDI
"
,
True
)
_BUILD_
RIR
=
_get_build
(
"BUILD_
RIR
"
,
True
)
_BUILD_RNNT
=
_get_build
(
"BUILD_RNNT"
,
True
)
_BUILD_RNNT
=
_get_build
(
"BUILD_RNNT"
,
True
)
_BUILD_CTC_DECODER
=
_get_build
(
"BUILD_CTC_DECODER"
,
True
)
_USE_FFMPEG
=
_get_build
(
"USE_FFMPEG"
,
True
)
_USE_FFMPEG
=
_get_build
(
"USE_FFMPEG"
,
False
)
_USE_ROCM
=
_get_build
(
"USE_ROCM"
,
torch
.
backends
.
cuda
.
is_built
()
and
torch
.
version
.
hip
is
not
None
)
_USE_ROCM
=
_get_build
(
"USE_ROCM"
,
torch
.
cuda
.
is_available
()
and
torch
.
version
.
hip
is
not
None
)
_USE_CUDA
=
_get_build
(
"USE_CUDA"
,
torch
.
backends
.
cuda
.
is_built
()
and
torch
.
version
.
hip
is
None
)
_USE_CUDA
=
_get_build
(
"USE_CUDA"
,
torch
.
cuda
.
is_available
()
and
torch
.
version
.
hip
is
None
)
_BUILD_ALIGN
=
_get_build
(
"BUILD_ALIGN"
,
True
)
_BUILD_CUDA_CTC_DECODER
=
_get_build
(
"BUILD_CUDA_CTC_DECODER"
,
_USE_CUDA
)
_USE_OPENMP
=
_get_build
(
"USE_OPENMP"
,
True
)
and
"ATen parallel backend: OpenMP"
in
torch
.
__config__
.
parallel_info
()
_USE_OPENMP
=
_get_build
(
"USE_OPENMP"
,
True
)
and
"ATen parallel backend: OpenMP"
in
torch
.
__config__
.
parallel_info
()
_TORCH_CUDA_ARCH_LIST
=
os
.
environ
.
get
(
"TORCH_CUDA_ARCH_LIST"
,
None
)
_TORCH_CUDA_ARCH_LIST
=
os
.
environ
.
get
(
"TORCH_CUDA_ARCH_LIST"
,
None
)
...
@@ -47,23 +48,42 @@ _TORCH_CUDA_ARCH_LIST = os.environ.get("TORCH_CUDA_ARCH_LIST", None)
...
@@ -47,23 +48,42 @@ _TORCH_CUDA_ARCH_LIST = os.environ.get("TORCH_CUDA_ARCH_LIST", None)
def
get_ext_modules
():
def
get_ext_modules
():
modules
=
[
modules
=
[
Extension
(
name
=
"torchaudio.lib.libtorchaudio"
,
sources
=
[]),
Extension
(
name
=
"torchaudio.lib.libtorchaudio"
,
sources
=
[]),
Extension
(
name
=
"torchaudio._torchaudio"
,
sources
=
[]),
Extension
(
name
=
"torchaudio.
lib.
_torchaudio"
,
sources
=
[]),
]
]
if
_BUILD_
CTC_DECODER
:
if
_BUILD_
SOX
:
modules
.
extend
(
modules
.
extend
(
[
[
Extension
(
name
=
"torchaudio.lib.libflashlight-text"
,
sources
=
[]),
Extension
(
name
=
"torchaudio.lib.libtorchaudio_sox"
,
sources
=
[]),
Extension
(
name
=
"torchaudio.flashlight_lib_text_decoder"
,
sources
=
[]),
Extension
(
name
=
"torchaudio.lib._torchaudio_sox"
,
sources
=
[]),
Extension
(
name
=
"torchaudio.flashlight_lib_text_dictionary"
,
sources
=
[]),
]
]
)
)
if
_
USE_FFMPEG
:
if
_
BUILD_CUDA_CTC_DECODER
:
modules
.
extend
(
modules
.
extend
(
[
[
Extension
(
name
=
"torchaudio.lib.lib
torchaudio_ffmpeg
"
,
sources
=
[]),
Extension
(
name
=
"torchaudio.lib.lib
ctc_prefix_decoder
"
,
sources
=
[]),
Extension
(
name
=
"torchaudio.
_torchaudio_ffmpeg
"
,
sources
=
[]),
Extension
(
name
=
"torchaudio.
lib.pybind11_prefixctc
"
,
sources
=
[]),
]
]
)
)
if
_USE_FFMPEG
:
if
"FFMPEG_ROOT"
in
os
.
environ
:
# single version ffmpeg mode
modules
.
extend
(
[
Extension
(
name
=
"torchaudio.lib.libtorchaudio_ffmpeg"
,
sources
=
[]),
Extension
(
name
=
"torchaudio.lib._torchaudio_ffmpeg"
,
sources
=
[]),
]
)
else
:
modules
.
extend
(
[
Extension
(
name
=
"torchaudio.lib.libtorchaudio_ffmpeg4"
,
sources
=
[]),
Extension
(
name
=
"torchaudio.lib._torchaudio_ffmpeg4"
,
sources
=
[]),
Extension
(
name
=
"torchaudio.lib.libtorchaudio_ffmpeg5"
,
sources
=
[]),
Extension
(
name
=
"torchaudio.lib._torchaudio_ffmpeg5"
,
sources
=
[]),
Extension
(
name
=
"torchaudio.lib.libtorchaudio_ffmpeg6"
,
sources
=
[]),
Extension
(
name
=
"torchaudio.lib._torchaudio_ffmpeg6"
,
sources
=
[]),
]
)
return
modules
return
modules
...
@@ -84,10 +104,16 @@ class CMakeBuild(build_ext):
...
@@ -84,10 +104,16 @@ class CMakeBuild(build_ext):
# However, the following `cmake` command will build all of them at the same time,
# However, the following `cmake` command will build all of them at the same time,
# so, we do not need to perform `cmake` twice.
# so, we do not need to perform `cmake` twice.
# Therefore we call `cmake` only for `torchaudio._torchaudio`.
# Therefore we call `cmake` only for `torchaudio._torchaudio`.
if
ext
.
name
!=
"torchaudio.
_
torchaudio"
:
if
ext
.
name
!=
"torchaudio.
lib.lib
torchaudio"
:
return
return
extdir
=
os
.
path
.
abspath
(
os
.
path
.
dirname
(
self
.
get_ext_fullpath
(
ext
.
name
)))
# Note:
# the last part "lib" does not really matter. We want to get the full path of
# the root build directory. Passing "torchaudio" will be interpreted as
# `torchaudio.[so|dylib|pyd]`, so we need something `torchaudio.foo`, that is
# interpreted as `torchaudio/foo.so` then use dirname to get the `torchaudio`
# directory.
extdir
=
os
.
path
.
abspath
(
os
.
path
.
dirname
(
self
.
get_ext_fullpath
(
"torchaudio.lib"
)))
# required for auto-detection of auxiliary "native" libs
# required for auto-detection of auxiliary "native" libs
if
not
extdir
.
endswith
(
os
.
path
.
sep
):
if
not
extdir
.
endswith
(
os
.
path
.
sep
):
...
@@ -102,9 +128,10 @@ class CMakeBuild(build_ext):
...
@@ -102,9 +128,10 @@ class CMakeBuild(build_ext):
"-DCMAKE_VERBOSE_MAKEFILE=ON"
,
"-DCMAKE_VERBOSE_MAKEFILE=ON"
,
f
"-DPython_INCLUDE_DIR=
{
distutils
.
sysconfig
.
get_python_inc
()
}
"
,
f
"-DPython_INCLUDE_DIR=
{
distutils
.
sysconfig
.
get_python_inc
()
}
"
,
f
"-DBUILD_SOX:BOOL=
{
'ON'
if
_BUILD_SOX
else
'OFF'
}
"
,
f
"-DBUILD_SOX:BOOL=
{
'ON'
if
_BUILD_SOX
else
'OFF'
}
"
,
f
"-DBUILD_
KALDI
:BOOL=
{
'ON'
if
_BUILD_
KALDI
else
'OFF'
}
"
,
f
"-DBUILD_
RIR
:BOOL=
{
'ON'
if
_BUILD_
RIR
else
'OFF'
}
"
,
f
"-DBUILD_RNNT:BOOL=
{
'ON'
if
_BUILD_RNNT
else
'OFF'
}
"
,
f
"-DBUILD_RNNT:BOOL=
{
'ON'
if
_BUILD_RNNT
else
'OFF'
}
"
,
f
"-DBUILD_CTC_DECODER:BOOL=
{
'ON'
if
_BUILD_CTC_DECODER
else
'OFF'
}
"
,
f
"-DBUILD_ALIGN:BOOL=
{
'ON'
if
_BUILD_ALIGN
else
'OFF'
}
"
,
f
"-DBUILD_CUDA_CTC_DECODER:BOOL=
{
'ON'
if
_BUILD_CUDA_CTC_DECODER
else
'OFF'
}
"
,
"-DBUILD_TORCHAUDIO_PYTHON_EXTENSION:BOOL=ON"
,
"-DBUILD_TORCHAUDIO_PYTHON_EXTENSION:BOOL=ON"
,
f
"-DUSE_ROCM:BOOL=
{
'ON'
if
_USE_ROCM
else
'OFF'
}
"
,
f
"-DUSE_ROCM:BOOL=
{
'ON'
if
_USE_ROCM
else
'OFF'
}
"
,
f
"-DUSE_CUDA:BOOL=
{
'ON'
if
_USE_CUDA
else
'OFF'
}
"
,
f
"-DUSE_CUDA:BOOL=
{
'ON'
if
_USE_CUDA
else
'OFF'
}
"
,
...
@@ -123,10 +150,6 @@ class CMakeBuild(build_ext):
...
@@ -123,10 +150,6 @@ class CMakeBuild(build_ext):
if
platform
.
system
()
!=
"Windows"
and
CUDA_HOME
is
not
None
:
if
platform
.
system
()
!=
"Windows"
and
CUDA_HOME
is
not
None
:
cmake_args
+=
[
f
"-DCMAKE_CUDA_COMPILER='
{
CUDA_HOME
}
/bin/nvcc'"
]
cmake_args
+=
[
f
"-DCMAKE_CUDA_COMPILER='
{
CUDA_HOME
}
/bin/nvcc'"
]
cmake_args
+=
[
f
"-DCUDA_TOOLKIT_ROOT_DIR='
{
CUDA_HOME
}
'"
]
cmake_args
+=
[
f
"-DCUDA_TOOLKIT_ROOT_DIR='
{
CUDA_HOME
}
'"
]
if
platform
.
system
()
!=
"Windows"
and
ROCM_HOME
is
not
None
:
cmake_args
+=
[
f
"-DCMAKE_HIP_COMPILER='
{
ROCM_HOME
}
/bin/hipcc'"
]
cmake_args
+=
[
f
"-DCUDA_TOOLKIT_ROOT_DIR='
{
ROCM_HOME
}
'"
]
# Default to Ninja
# Default to Ninja
if
"CMAKE_GENERATOR"
not
in
os
.
environ
or
platform
.
system
()
==
"Windows"
:
if
"CMAKE_GENERATOR"
not
in
os
.
environ
or
platform
.
system
()
==
"Windows"
:
...
...
torchaudio/__init__.py
View file @
ffeba11a
from
torchaudio
import
(
# noqa: F401
from
.
import
(
# noqa: F401
_extension
,
_extension
,
compliance
,
compliance
,
datasets
,
datasets
,
...
@@ -11,7 +11,7 @@ from torchaudio import ( # noqa: F401
...
@@ -11,7 +11,7 @@ from torchaudio import ( # noqa: F401
transforms
,
transforms
,
utils
,
utils
,
)
)
from
torchaudio.backend
import
get_audio_backend
,
list_audio_backends
,
set_audio_backend
from
._backend.common
import
AudioMetaData
# noqa
try
:
try
:
from
.version
import
__version__
,
git_version
,
abi
,
dtk
,
torch_version
,
dcu_version
# noqa: F401
from
.version
import
__version__
,
git_version
,
abi
,
dtk
,
torch_version
,
dcu_version
# noqa: F401
...
@@ -19,7 +19,27 @@ try:
...
@@ -19,7 +19,27 @@ try:
except
ImportError
:
except
ImportError
:
pass
pass
def
_is_backend_dispatcher_enabled
():
import
os
return
os
.
getenv
(
"TORCHAUDIO_USE_BACKEND_DISPATCHER"
,
default
=
"1"
)
==
"1"
if
_is_backend_dispatcher_enabled
():
from
._backend
import
_init_backend
,
get_audio_backend
,
list_audio_backends
,
set_audio_backend
else
:
from
.backend
import
_init_backend
,
get_audio_backend
,
list_audio_backends
,
set_audio_backend
_init_backend
()
# for backward compatibility. This has to happen after _backend is imported.
from
.
import
backend
# noqa: F401
__all__
=
[
__all__
=
[
"AudioMetaData"
,
"io"
,
"io"
,
"compliance"
,
"compliance"
,
"datasets"
,
"datasets"
,
...
...
Prev
1
…
10
11
12
13
14
15
16
17
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment