Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
Torchaudio
Commits
23ecb772
Commit
23ecb772
authored
May 16, 2019
by
Jason Lian
Browse files
more
parent
3d21b437
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
78 additions
and
57 deletions
+78
-57
torchaudio/functional.py
torchaudio/functional.py
+72
-2
torchaudio/transforms.py
torchaudio/transforms.py
+6
-55
No files found.
torchaudio/functional.py
View file @
23ecb772
import
numpy
as
np
import
torch
import
torch
def
scale
(
tensor
,
factor
):
def
scale
(
tensor
,
factor
):
# type: (Tensor, int) -> Tensor
# type: (Tensor, int) -> Tensor
if
not
tensor
.
dtype
.
is_floating_point
:
if
not
tensor
.
dtype
.
is_floating_point
:
...
@@ -34,7 +34,7 @@ def downmix_mono(tensor, ch_dim):
...
@@ -34,7 +34,7 @@ def downmix_mono(tensor, ch_dim):
return
tensor
return
tensor
def
lc2cl
(
tensor
):
def
LC2CL
(
tensor
):
# type: (Tensor) -> Tensor
# type: (Tensor) -> Tensor
return
tensor
.
transpose
(
0
,
1
).
contiguous
()
return
tensor
.
transpose
(
0
,
1
).
contiguous
()
...
@@ -104,9 +104,79 @@ def mel_scale(spec_f, f_min, f_max, n_mels, fb=None):
...
@@ -104,9 +104,79 @@ def mel_scale(spec_f, f_min, f_max, n_mels, fb=None):
def
spectrogram_to_DB
(
spec
,
multiplier
,
amin
,
db_multiplier
,
top_db
):
def
spectrogram_to_DB
(
spec
,
multiplier
,
amin
,
db_multiplier
,
top_db
):
# type: (Tensor, float, float, float, Optional[float]) -> Tensor
spec_db
=
multiplier
*
torch
.
log10
(
torch
.
clamp
(
spec
,
min
=
amin
))
spec_db
=
multiplier
*
torch
.
log10
(
torch
.
clamp
(
spec
,
min
=
amin
))
spec_db
-=
multiplier
*
db_multiplier
spec_db
-=
multiplier
*
db_multiplier
if
top_db
is
not
None
:
if
top_db
is
not
None
:
spec_db
=
torch
.
max
(
spec_db
,
spec_db
.
new_full
((
1
,),
spec_db
.
max
()
-
top_db
))
spec_db
=
torch
.
max
(
spec_db
,
spec_db
.
new_full
((
1
,),
spec_db
.
max
()
-
top_db
))
return
spec_db
return
spec_db
def
create_dct
(
n_mfcc
,
n_mels
,
norm
):
# type: (int, int, string) -> Tensor
"""
Creates a DCT transformation matrix with shape (num_mels, num_mfcc),
normalized depending on norm
Returns:
The transformation matrix, to be right-multiplied to row-wise data.
"""
outdim
=
n_mfcc
dim
=
n_mels
# http://en.wikipedia.org/wiki/Discrete_cosine_transform#DCT-II
n
=
np
.
arange
(
dim
)
k
=
np
.
arange
(
outdim
)[:,
np
.
newaxis
]
dct
=
np
.
cos
(
np
.
pi
/
dim
*
(
n
+
0.5
)
*
k
)
if
norm
==
'ortho'
:
dct
[
0
]
*=
1.0
/
np
.
sqrt
(
2
)
dct
*=
np
.
sqrt
(
2.0
/
dim
)
else
:
dct
*=
2
return
torch
.
Tensor
(
dct
.
T
)
def
MFCC
(
sig
,
mel_spect
,
log_mels
,
s2db
,
dct_mat
):
# type: (Tensor, MelSpectrogram, bool, SpectrogramToDB, Tensor) -> Tensor
if
log_mels
:
log_offset
=
1e-6
mel_spect
=
torch
.
log
(
mel_spect
+
log_offset
)
else
:
mel_spect
=
s2db
(
mel_spect
)
mfcc
=
torch
.
matmul
(
mel_spect
,
dct_mat
.
to
(
mel_spect
.
device
))
return
mfcc
def
BLC2CBL
(
tensor
):
# type: (Tensor) -> Tensor
return
tensor
.
permute
(
2
,
0
,
1
).
contiguous
()
def
mu_law_encoding
(
x
,
qc
):
# type: (Tensor/ndarray, int) -> Tensor/ndarray
mu
=
qc
-
1.
if
isinstance
(
x
,
np
.
ndarray
):
x_mu
=
np
.
sign
(
x
)
*
np
.
log1p
(
mu
*
np
.
abs
(
x
))
/
np
.
log1p
(
mu
)
x_mu
=
((
x_mu
+
1
)
/
2
*
mu
+
0.5
).
astype
(
int
)
elif
isinstance
(
x
,
torch
.
Tensor
):
if
not
x
.
dtype
.
is_floating_point
:
x
=
x
.
to
(
torch
.
float
)
mu
=
torch
.
tensor
(
mu
,
dtype
=
x
.
dtype
)
x_mu
=
torch
.
sign
(
x
)
*
torch
.
log1p
(
mu
*
torch
.
abs
(
x
))
/
torch
.
log1p
(
mu
)
x_mu
=
((
x_mu
+
1
)
/
2
*
mu
+
0.5
).
long
()
return
x_mu
def
mu_law_expanding
(
x
,
qc
):
# type: (Tensor/ndarray, int) -> Tensor/ndarray
mu
=
qc
-
1.
if
isinstance
(
x_mu
,
np
.
ndarray
):
x
=
((
x_mu
)
/
mu
)
*
2
-
1.
x
=
np
.
sign
(
x
)
*
(
np
.
exp
(
np
.
abs
(
x
)
*
np
.
log1p
(
mu
))
-
1.
)
/
mu
elif
isinstance
(
x_mu
,
torch
.
Tensor
):
if
not
x_mu
.
dtype
.
is_floating_point
:
x_mu
=
x_mu
.
to
(
torch
.
float
)
mu
=
torch
.
tensor
(
mu
,
dtype
=
x_mu
.
dtype
)
x
=
((
x_mu
)
/
mu
)
*
2
-
1.
x
=
torch
.
sign
(
x
)
*
(
torch
.
exp
(
torch
.
abs
(
x
)
*
torch
.
log1p
(
mu
))
-
1.
)
/
mu
return
x
torchaudio/transforms.py
View file @
23ecb772
...
@@ -128,7 +128,7 @@ class LC2CL(object):
...
@@ -128,7 +128,7 @@ class LC2CL(object):
Returns:
Returns:
tensor (Tensor): Tensor of audio signal with shape (CxL)
tensor (Tensor): Tensor of audio signal with shape (CxL)
"""
"""
return
F
.
lc2cl
(
tensor
)
return
F
.
LC2CL
(
tensor
)
def
__repr__
(
self
):
def
__repr__
(
self
):
return
self
.
__class__
.
__name__
+
'()'
return
self
.
__class__
.
__name__
+
'()'
...
@@ -282,29 +282,9 @@ class MFCC(object):
...
@@ -282,29 +282,9 @@ class MFCC(object):
if
self
.
n_mfcc
>
self
.
MelSpectrogram
.
n_mels
:
if
self
.
n_mfcc
>
self
.
MelSpectrogram
.
n_mels
:
raise
ValueError
(
'Cannot select more MFCC coefficients than # mel bins'
)
raise
ValueError
(
'Cannot select more MFCC coefficients than # mel bins'
)
self
.
dct_mat
=
self
.
create_dct
()
self
.
dct_mat
=
F
.
create_dct
(
self
.
n_mfcc
,
self
.
MelSpectrogram
.
n_mels
,
self
.
norm
)
self
.
log_mels
=
log_mels
self
.
log_mels
=
log_mels
def
create_dct
(
self
):
"""
Creates a DCT transformation matrix with shape (num_mels, num_mfcc),
normalized depending on self.norm
Returns:
The transformation matrix, to be right-multiplied to row-wise data.
"""
outdim
=
self
.
n_mfcc
dim
=
self
.
MelSpectrogram
.
n_mels
# http://en.wikipedia.org/wiki/Discrete_cosine_transform#DCT-II
n
=
np
.
arange
(
dim
)
k
=
np
.
arange
(
outdim
)[:,
np
.
newaxis
]
dct
=
np
.
cos
(
np
.
pi
/
dim
*
(
n
+
0.5
)
*
k
)
if
self
.
norm
==
'ortho'
:
dct
[
0
]
*=
1.0
/
np
.
sqrt
(
2
)
dct
*=
np
.
sqrt
(
2.0
/
dim
)
else
:
dct
*=
2
return
torch
.
Tensor
(
dct
.
T
)
def
__call__
(
self
,
sig
):
def
__call__
(
self
,
sig
):
"""
"""
Args:
Args:
...
@@ -315,14 +295,7 @@ class MFCC(object):
...
@@ -315,14 +295,7 @@ class MFCC(object):
is unchanged, hops is the number of hops, and n_mels is the
is unchanged, hops is the number of hops, and n_mels is the
number of mel bins.
number of mel bins.
"""
"""
mel_spect
=
self
.
MelSpectrogram
(
sig
)
return
F
.
MFCC
(
sig
,
self
.
MelSpectrogram
(
sig
),
self
.
log_mels
,
self
.
s2db
,
self
.
dct_mat
)
if
self
.
log_mels
:
log_offset
=
1e-6
mel_spect
=
torch
.
log
(
mel_spect
+
log_offset
)
else
:
mel_spect
=
self
.
s2db
(
mel_spect
)
mfcc
=
torch
.
matmul
(
mel_spect
,
self
.
dct_mat
.
to
(
mel_spect
.
device
))
return
mfcc
class
MelSpectrogram
(
object
):
class
MelSpectrogram
(
object
):
...
@@ -405,8 +378,7 @@ class BLC2CBL(object):
...
@@ -405,8 +378,7 @@ class BLC2CBL(object):
tensor (Tensor): Tensor of spectrogram with shape (CxBxL)
tensor (Tensor): Tensor of spectrogram with shape (CxBxL)
"""
"""
return
F
.
BLC2CBL
(
tensor
)
return
tensor
.
permute
(
2
,
0
,
1
).
contiguous
()
def
__repr__
(
self
):
def
__repr__
(
self
):
return
self
.
__class__
.
__name__
+
'()'
return
self
.
__class__
.
__name__
+
'()'
...
@@ -437,18 +409,7 @@ class MuLawEncoding(object):
...
@@ -437,18 +409,7 @@ class MuLawEncoding(object):
x_mu (LongTensor or ndarray)
x_mu (LongTensor or ndarray)
"""
"""
mu
=
self
.
qc
-
1.
return
self
.
mu_law_encoding
(
x
,
self
.
qc
)
if
isinstance
(
x
,
np
.
ndarray
):
x_mu
=
np
.
sign
(
x
)
*
np
.
log1p
(
mu
*
np
.
abs
(
x
))
/
np
.
log1p
(
mu
)
x_mu
=
((
x_mu
+
1
)
/
2
*
mu
+
0.5
).
astype
(
int
)
elif
isinstance
(
x
,
torch
.
Tensor
):
if
not
x
.
dtype
.
is_floating_point
:
x
=
x
.
to
(
torch
.
float
)
mu
=
torch
.
tensor
(
mu
,
dtype
=
x
.
dtype
)
x_mu
=
torch
.
sign
(
x
)
*
torch
.
log1p
(
mu
*
torch
.
abs
(
x
))
/
torch
.
log1p
(
mu
)
x_mu
=
((
x_mu
+
1
)
/
2
*
mu
+
0.5
).
long
()
return
x_mu
def
__repr__
(
self
):
def
__repr__
(
self
):
return
self
.
__class__
.
__name__
+
'()'
return
self
.
__class__
.
__name__
+
'()'
...
@@ -479,17 +440,7 @@ class MuLawExpanding(object):
...
@@ -479,17 +440,7 @@ class MuLawExpanding(object):
x (FloatTensor or ndarray)
x (FloatTensor or ndarray)
"""
"""
mu
=
self
.
qc
-
1.
return
F
.
mu_law_expanding
(
x
,
self
.
qc
)
if
isinstance
(
x_mu
,
np
.
ndarray
):
x
=
((
x_mu
)
/
mu
)
*
2
-
1.
x
=
np
.
sign
(
x
)
*
(
np
.
exp
(
np
.
abs
(
x
)
*
np
.
log1p
(
mu
))
-
1.
)
/
mu
elif
isinstance
(
x_mu
,
torch
.
Tensor
):
if
not
x_mu
.
dtype
.
is_floating_point
:
x_mu
=
x_mu
.
to
(
torch
.
float
)
mu
=
torch
.
tensor
(
mu
,
dtype
=
x_mu
.
dtype
)
x
=
((
x_mu
)
/
mu
)
*
2
-
1.
x
=
torch
.
sign
(
x
)
*
(
torch
.
exp
(
torch
.
abs
(
x
)
*
torch
.
log1p
(
mu
))
-
1.
)
/
mu
return
x
def
__repr__
(
self
):
def
__repr__
(
self
):
return
self
.
__class__
.
__name__
+
'()'
return
self
.
__class__
.
__name__
+
'()'
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment