Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
Torchaudio
Commits
101e0d5f
Commit
101e0d5f
authored
May 16, 2019
by
Jason Lian
Browse files
adding file
parent
acdedc4a
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
61 additions
and
38 deletions
+61
-38
torchaudio/functional.py
torchaudio/functional.py
+54
-0
torchaudio/transforms.py
torchaudio/transforms.py
+7
-38
No files found.
torchaudio/functional.py
0 → 100644
View file @
101e0d5f
import
torch
def
scale
(
tensor
,
factor
):
# type: (Tensor, int) -> Tensor
if
not
tensor
.
dtype
.
is_floating_point
:
tensor
=
tensor
.
to
(
torch
.
float32
)
return
tensor
/
factor
def
pad_trim
(
tensor
,
ch_dim
,
max_len
,
len_dim
,
fill_value
):
# type: (Tensor, int, int, int, float) -> Tensor
assert
tensor
.
size
(
ch_dim
)
<
128
,
\
"Too many channels ({}) detected, see channels_first param."
.
format
(
tensor
.
size
(
ch_dim
))
if
max_len
>
tensor
.
size
(
len_dim
):
padding
=
[
max_len
-
tensor
.
size
(
len_dim
)
if
(
i
%
2
==
1
)
and
(
i
//
2
!=
len_dim
)
else
0
for
i
in
range
(
4
)]
with
torch
.
no_grad
():
tensor
=
torch
.
nn
.
functional
.
pad
(
tensor
,
padding
,
"constant"
,
fill_value
)
elif
max_len
<
tensor
.
size
(
len_dim
):
tensor
=
tensor
.
narrow
(
len_dim
,
0
,
max_len
)
return
tensor
def
downmix_mono
(
tensor
,
ch_dim
):
# type: (Tensor, int) -> Tensor
if
not
tensor
.
dtype
.
is_floating_point
:
tensor
=
tensor
.
to
(
torch
.
float32
)
tensor
=
torch
.
mean
(
tensor
,
ch_dim
,
True
)
return
tensor
def
lc2cl
(
tensor
):
# type: (Tensor) -> Tensor
return
tensor
.
transpose
(
0
,
1
).
contiguous
()
def
spectrogram
(
sig
,
pad
,
window
,
n_fft
,
hop
,
ws
,
power
,
normalize
):
# type: (Tensor, int, Tensor, int, int, int, int, bool) -> Tensor
assert
sig
.
dim
()
==
2
if
pad
>
0
:
with
torch
.
no_grad
():
sig
=
torch
.
nn
.
functional
.
pad
(
sig
,
(
pad
,
pad
),
"constant"
)
window
=
window
.
to
(
sig
.
device
)
# default values are consistent with librosa.core.spectrum._spectrogram
spec_f
=
torch
.
stft
(
sig
,
n_fft
,
hop
,
ws
,
window
,
center
=
True
,
normalized
=
False
,
onesided
=
True
,
pad_mode
=
'reflect'
).
transpose
(
1
,
2
)
if
normalize
:
spec_f
/=
window
.
pow
(
2
).
sum
().
sqrt
()
spec_f
=
spec_f
.
pow
(
power
).
sum
(
-
1
)
# get power of "complex" tensor (c, l, n_fft)
return
spec_f
torchaudio/transforms.py
View file @
101e0d5f
...
@@ -2,6 +2,7 @@ from __future__ import division, print_function
...
@@ -2,6 +2,7 @@ from __future__ import division, print_function
from
warnings
import
warn
from
warnings
import
warn
import
torch
import
torch
import
numpy
as
np
import
numpy
as
np
import
functional
as
F
class
Compose
(
object
):
class
Compose
(
object
):
...
@@ -57,10 +58,7 @@ class Scale(object):
...
@@ -57,10 +58,7 @@ class Scale(object):
Tensor: Scaled by the scale factor. (default between -1.0 and 1.0)
Tensor: Scaled by the scale factor. (default between -1.0 and 1.0)
"""
"""
if
not
tensor
.
dtype
.
is_floating_point
:
return
F
.
scale
(
tensor
,
factor
)
tensor
=
tensor
.
to
(
torch
.
float32
)
return
tensor
/
self
.
factor
def
__repr__
(
self
):
def
__repr__
(
self
):
return
self
.
__class__
.
__name__
+
'()'
return
self
.
__class__
.
__name__
+
'()'
...
@@ -88,18 +86,7 @@ class PadTrim(object):
...
@@ -88,18 +86,7 @@ class PadTrim(object):
Tensor: (c x n) or (n x c)
Tensor: (c x n) or (n x c)
"""
"""
assert
tensor
.
size
(
self
.
ch_dim
)
<
128
,
\
return
F
.
pad_trim
(
tensor
,
self
.
ch_dim
,
self
.
max_len
,
self
.
len_dim
,
self
.
fill_value
)
"Too many channels ({}) detected, see channels_first param."
.
format
(
tensor
.
size
(
self
.
ch_dim
))
if
self
.
max_len
>
tensor
.
size
(
self
.
len_dim
):
padding
=
[
self
.
max_len
-
tensor
.
size
(
self
.
len_dim
)
if
(
i
%
2
==
1
)
and
(
i
//
2
!=
self
.
len_dim
)
else
0
for
i
in
range
(
4
)]
with
torch
.
no_grad
():
tensor
=
torch
.
nn
.
functional
.
pad
(
tensor
,
padding
,
"constant"
,
self
.
fill_value
)
elif
self
.
max_len
<
tensor
.
size
(
self
.
len_dim
):
tensor
=
tensor
.
narrow
(
self
.
len_dim
,
0
,
self
.
max_len
)
return
tensor
def
__repr__
(
self
):
def
__repr__
(
self
):
return
self
.
__class__
.
__name__
+
'(max_len={0})'
.
format
(
self
.
max_len
)
return
self
.
__class__
.
__name__
+
'(max_len={0})'
.
format
(
self
.
max_len
)
...
@@ -122,11 +109,7 @@ class DownmixMono(object):
...
@@ -122,11 +109,7 @@ class DownmixMono(object):
self
.
ch_dim
=
int
(
not
channels_first
)
self
.
ch_dim
=
int
(
not
channels_first
)
def
__call__
(
self
,
tensor
):
def
__call__
(
self
,
tensor
):
if
not
tensor
.
dtype
.
is_floating_point
:
return
F
.
downmix_mono
(
tensor
,
self
.
ch_dim
)
tensor
=
tensor
.
to
(
torch
.
float32
)
tensor
=
torch
.
mean
(
tensor
,
self
.
ch_dim
,
True
)
return
tensor
def
__repr__
(
self
):
def
__repr__
(
self
):
return
self
.
__class__
.
__name__
+
'()'
return
self
.
__class__
.
__name__
+
'()'
...
@@ -145,7 +128,7 @@ class LC2CL(object):
...
@@ -145,7 +128,7 @@ class LC2CL(object):
Returns:
Returns:
tensor (Tensor): Tensor of audio signal with shape (CxL)
tensor (Tensor): Tensor of audio signal with shape (CxL)
"""
"""
return
tensor
.
transpose
(
0
,
1
).
contiguous
(
)
return
F
.
lc2cl
(
tensor
)
def
__repr__
(
self
):
def
__repr__
(
self
):
return
self
.
__class__
.
__name__
+
'()'
return
self
.
__class__
.
__name__
+
'()'
...
@@ -196,22 +179,8 @@ class Spectrogram(object):
...
@@ -196,22 +179,8 @@ class Spectrogram(object):
by 2 plus 1.
by 2 plus 1.
"""
"""
assert
sig
.
dim
()
==
2
return
F
.
spectrogram
(
sig
,
self
.
pad
,
self
.
window
,
self
.
n_fft
,
self
.
hop
,
self
.
ws
,
self
.
power
,
self
.
normalize
)
if
self
.
pad
>
0
:
with
torch
.
no_grad
():
sig
=
torch
.
nn
.
functional
.
pad
(
sig
,
(
self
.
pad
,
self
.
pad
),
"constant"
)
self
.
window
=
self
.
window
.
to
(
sig
.
device
)
# default values are consistent with librosa.core.spectrum._spectrogram
spec_f
=
torch
.
stft
(
sig
,
self
.
n_fft
,
self
.
hop
,
self
.
ws
,
self
.
window
,
center
=
True
,
normalized
=
False
,
onesided
=
True
,
pad_mode
=
'reflect'
).
transpose
(
1
,
2
)
if
self
.
normalize
:
spec_f
/=
self
.
window
.
pow
(
2
).
sum
().
sqrt
()
spec_f
=
spec_f
.
pow
(
self
.
power
).
sum
(
-
1
)
# get power of "complex" tensor (c, l, n_fft)
return
spec_f
def
F2M
(
*
args
,
**
kwargs
):
def
F2M
(
*
args
,
**
kwargs
):
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment