Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
Torchaudio
Commits
873af313
Commit
873af313
authored
Jul 26, 2019
by
jamarshon
Committed by
cpuhrsch
Jul 26, 2019
Browse files
Rename SpectrogramToDB to AmplitudeToDB (#170)
parent
d3fe2a77
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
32 additions
and
32 deletions
+32
-32
test/test_jit.py
test/test_jit.py
+5
-5
test/test_transforms.py
test/test_transforms.py
+2
-2
torchaudio/functional.py
torchaudio/functional.py
+13
-13
torchaudio/transforms.py
torchaudio/transforms.py
+12
-12
No files found.
test/test_jit.py
View file @
873af313
...
...
@@ -78,11 +78,11 @@ class Test_JIT(unittest.TestCase):
self
.
_test_script_module
(
spec_f
,
transforms
.
MelScale
)
def
test_torchscript_
spectrogram
_to_DB
(
self
):
def
test_torchscript_
amplitude
_to_DB
(
self
):
@
torch
.
jit
.
script
def
jit_method
(
spec
,
multiplier
,
amin
,
db_multiplier
,
top_db
):
# type: (Tensor, float, float, float, Optional[float]) -> Tensor
return
F
.
spectrogram
_to_DB
(
spec
,
multiplier
,
amin
,
db_multiplier
,
top_db
)
return
F
.
amplitude
_to_DB
(
spec
,
multiplier
,
amin
,
db_multiplier
,
top_db
)
spec
=
torch
.
rand
((
6
,
201
))
multiplier
=
10.
...
...
@@ -91,15 +91,15 @@ class Test_JIT(unittest.TestCase):
top_db
=
80.
jit_out
=
jit_method
(
spec
,
multiplier
,
amin
,
db_multiplier
,
top_db
)
py_out
=
F
.
spectrogram
_to_DB
(
spec
,
multiplier
,
amin
,
db_multiplier
,
top_db
)
py_out
=
F
.
amplitude
_to_DB
(
spec
,
multiplier
,
amin
,
db_multiplier
,
top_db
)
self
.
assertTrue
(
torch
.
allclose
(
jit_out
,
py_out
))
@
unittest
.
skipIf
(
not
RUN_CUDA
,
"no CUDA"
)
def
test_scriptmodule_
Spectrogram
ToDB
(
self
):
def
test_scriptmodule_
Amplitude
ToDB
(
self
):
spec
=
torch
.
rand
((
6
,
201
),
device
=
"cuda"
)
self
.
_test_script_module
(
spec
,
transforms
.
Spectrogram
ToDB
)
self
.
_test_script_module
(
spec
,
transforms
.
Amplitude
ToDB
)
def
test_torchscript_create_dct
(
self
):
@
torch
.
jit
.
script
...
...
test/test_transforms.py
View file @
873af313
...
...
@@ -52,7 +52,7 @@ class Tester(unittest.TestCase):
def
test_mel2
(
self
):
top_db
=
80.
s2db
=
transforms
.
Spectrogram
ToDB
(
'power'
,
top_db
)
s2db
=
transforms
.
Amplitude
ToDB
(
'power'
,
top_db
)
waveform
=
self
.
waveform
.
clone
()
# (1, 16000)
waveform_scaled
=
self
.
scale
(
waveform
)
# (1, 16000)
...
...
@@ -155,7 +155,7 @@ class Tester(unittest.TestCase):
self
.
assertTrue
(
torch
.
allclose
(
torch_mel
.
type
(
librosa_mel_tensor
.
dtype
),
librosa_mel_tensor
,
atol
=
5e-3
))
# test s2db
db_transform
=
torchaudio
.
transforms
.
Spectrogram
ToDB
(
'power'
,
80.
)
db_transform
=
torchaudio
.
transforms
.
Amplitude
ToDB
(
'power'
,
80.
)
db_torch
=
db_transform
(
spect_transform
(
sound
)).
squeeze
().
cpu
()
db_librosa
=
librosa
.
core
.
spectrum
.
power_to_db
(
out_librosa
)
self
.
assertTrue
(
torch
.
allclose
(
db_torch
,
torch
.
from_numpy
(
db_librosa
),
atol
=
5e-3
))
...
...
torchaudio/functional.py
View file @
873af313
...
...
@@ -5,7 +5,7 @@ import torch
__all__
=
[
'istft'
,
'spectrogram'
,
'
spectrogram
_to_DB'
,
'
amplitude
_to_DB'
,
'create_fb_matrix'
,
'create_dct'
,
'mu_law_encoding'
,
...
...
@@ -207,34 +207,34 @@ def spectrogram(waveform, pad, window, n_fft, hop_length, win_length, power, nor
@
torch
.
jit
.
script
def
spectrogram_to_DB
(
specgram
,
multiplier
,
amin
,
db_multiplier
,
top_db
=
None
):
def
amplitude_to_DB
(
x
,
multiplier
,
amin
,
db_multiplier
,
top_db
=
None
):
# type: (Tensor, float, float, float, Optional[float]) -> Tensor
r
"""Turns a
spectrogram
from the power/amplitude scale to the decibel scale.
r
"""Turns a
tensor
from the power/amplitude scale to the decibel scale.
This output depends on the maximum value in the input
spectrogram
, and so
This output depends on the maximum value in the input
tensor
, and so
may return different values for an audio clip split into snippets vs. a
a full clip.
Args:
specgram
(torch.Tensor):
Normal STFT of size (c, f, t)
x
(torch.Tensor):
Input tensor before being converted to decibel scale
multiplier (float): Use 10. for power and 20. for amplitude
amin (float): Number to clamp
specgram
amin (float): Number to clamp
``x``
db_multiplier (float): Log10(max(reference value and amin))
top_db (Optional[float]): Minimum negative cut-off in decibels. A reasonable number
is 80.
Returns:
torch.Tensor:
Spectrogram in DB of size (c, f, t)
torch.Tensor:
Output tensor in decibel scale
"""
specgram
_db
=
multiplier
*
torch
.
log10
(
torch
.
clamp
(
specgram
,
min
=
amin
))
specgram
_db
-=
multiplier
*
db_multiplier
x
_db
=
multiplier
*
torch
.
log10
(
torch
.
clamp
(
x
,
min
=
amin
))
x
_db
-=
multiplier
*
db_multiplier
if
top_db
is
not
None
:
new_
spec
_db_max
=
torch
.
tensor
(
float
(
specgram
_db
.
max
())
-
top_db
,
dtype
=
specgram
_db
.
dtype
,
device
=
specgram
_db
.
device
)
specgram
_db
=
torch
.
max
(
specgram
_db
,
new_
spec
_db_max
)
new_
x
_db_max
=
torch
.
tensor
(
float
(
x
_db
.
max
())
-
top_db
,
dtype
=
x
_db
.
dtype
,
device
=
x
_db
.
device
)
x
_db
=
torch
.
max
(
x
_db
,
new_
x
_db_max
)
return
specgram
_db
return
x
_db
@
torch
.
jit
.
script
...
...
torchaudio/transforms.py
View file @
873af313
...
...
@@ -9,7 +9,7 @@ from .compliance import kaldi
__all__
=
[
'Spectrogram'
,
'
Spectrogram
ToDB'
,
'
Amplitude
ToDB'
,
'MelScale'
,
'MelSpectrogram'
,
'MFCC'
,
...
...
@@ -67,15 +67,15 @@ class Spectrogram(torch.jit.ScriptModule):
self
.
win_length
,
self
.
power
,
self
.
normalized
)
class
Spectrogram
ToDB
(
torch
.
jit
.
ScriptModule
):
r
"""Turns a
spectrogram
from the power/amplitude scale to the decibel scale.
class
Amplitude
ToDB
(
torch
.
jit
.
ScriptModule
):
r
"""Turns a
tensor
from the power/amplitude scale to the decibel scale.
This output depends on the maximum value in the input
spectrogram
, and so
This output depends on the maximum value in the input
tensor
, and so
may return different values for an audio clip split into snippets vs. a
a full clip.
Args:
stype (str): scale of input
spectrogram
('power' or 'magnitude'). The
stype (str): scale of input
tensor
('power' or 'magnitude'). The
power being the elementwise square of the magnitude. (Default: 'power')
top_db (float, optional): minimum negative cut-off in decibels. A reasonable number
is 80.
...
...
@@ -83,7 +83,7 @@ class SpectrogramToDB(torch.jit.ScriptModule):
__constants__
=
[
'multiplier'
,
'amin'
,
'ref_value'
,
'db_multiplier'
]
def
__init__
(
self
,
stype
=
'power'
,
top_db
=
None
):
super
(
Spectrogram
ToDB
,
self
).
__init__
()
super
(
Amplitude
ToDB
,
self
).
__init__
()
self
.
stype
=
torch
.
jit
.
Attribute
(
stype
,
str
)
if
top_db
is
not
None
and
top_db
<
0
:
raise
ValueError
(
'top_db must be positive value'
)
...
...
@@ -94,17 +94,17 @@ class SpectrogramToDB(torch.jit.ScriptModule):
self
.
db_multiplier
=
math
.
log10
(
max
(
self
.
amin
,
self
.
ref_value
))
@
torch
.
jit
.
script_method
def
forward
(
self
,
specgram
):
def
forward
(
self
,
x
):
r
"""Numerically stable implementation from Librosa
https://librosa.github.io/librosa/_modules/librosa/core/spectrum.html
Args:
specgram
(torch.Tensor):
STFT of size (c, f, t)
x
(torch.Tensor):
Input tensor before being converted to decibel scale
Returns:
torch.Tensor:
STFT after changing scale of size (c, f, t)
torch.Tensor:
Output tensor in decibel scale
"""
return
F
.
spectrogram_to_DB
(
specgram
,
self
.
multiplier
,
self
.
amin
,
self
.
db_multiplier
,
self
.
top_db
)
return
F
.
amplitude_to_DB
(
x
,
self
.
multiplier
,
self
.
amin
,
self
.
db_multiplier
,
self
.
top_db
)
class
MelScale
(
torch
.
jit
.
ScriptModule
):
...
...
@@ -246,7 +246,7 @@ class MFCC(torch.jit.ScriptModule):
self
.
dct_type
=
dct_type
self
.
norm
=
torch
.
jit
.
Attribute
(
norm
,
Optional
[
str
])
self
.
top_db
=
80.0
self
.
spectrogram_to_DB
=
Spectrogram
ToDB
(
'power'
,
self
.
top_db
)
self
.
amplitude_to_DB
=
Amplitude
ToDB
(
'power'
,
self
.
top_db
)
if
melkwargs
is
not
None
:
self
.
MelSpectrogram
=
MelSpectrogram
(
sample_rate
=
self
.
sample_rate
,
**
melkwargs
)
...
...
@@ -273,7 +273,7 @@ class MFCC(torch.jit.ScriptModule):
log_offset
=
1e-6
mel_specgram
=
torch
.
log
(
mel_specgram
+
log_offset
)
else
:
mel_specgram
=
self
.
spectrogram
_to_DB
(
mel_specgram
)
mel_specgram
=
self
.
amplitude
_to_DB
(
mel_specgram
)
# (c, `n_mels`, t).tranpose(...) dot (`n_mels`, `n_mfcc`) -> (c, t, `n_mfcc`).tranpose(...)
mfcc
=
torch
.
matmul
(
mel_specgram
.
transpose
(
1
,
2
),
self
.
dct_mat
).
transpose
(
1
,
2
)
return
mfcc
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment