Commit 101e0d5f authored by Jason Lian's avatar Jason Lian
Browse files

adding file

parent acdedc4a
import torch
def scale(tensor, factor):
# type: (Tensor, int) -> Tensor
if not tensor.dtype.is_floating_point:
tensor = tensor.to(torch.float32)
return tensor / factor
def pad_trim(tensor, ch_dim, max_len, len_dim, fill_value):
# type: (Tensor, int, int, int, float) -> Tensor
assert tensor.size(ch_dim) < 128, \
"Too many channels ({}) detected, see channels_first param.".format(tensor.size(ch_dim))
if max_len > tensor.size(len_dim):
padding = [max_len - tensor.size(len_dim)
if (i % 2 == 1) and (i // 2 != len_dim)
else 0
for i in range(4)]
with torch.no_grad():
tensor = torch.nn.functional.pad(tensor, padding, "constant", fill_value)
elif max_len < tensor.size(len_dim):
tensor = tensor.narrow(len_dim, 0, max_len)
return tensor
def downmix_mono(tensor, ch_dim):
# type: (Tensor, int) -> Tensor
if not tensor.dtype.is_floating_point:
tensor = tensor.to(torch.float32)
tensor = torch.mean(tensor, ch_dim, True)
return tensor
def lc2cl(tensor):
# type: (Tensor) -> Tensor
return tensor.transpose(0, 1).contiguous()
def spectrogram(sig, pad, window, n_fft, hop, ws, power, normalize):
# type: (Tensor, int, Tensor, int, int, int, int, bool) -> Tensor
assert sig.dim() == 2
if pad > 0:
with torch.no_grad():
sig = torch.nn.functional.pad(sig, (pad, pad), "constant")
window = window.to(sig.device)
# default values are consistent with librosa.core.spectrum._spectrogram
spec_f = torch.stft(sig, n_fft, hop, ws,
window, center=True,
normalized=False, onesided=True,
pad_mode='reflect').transpose(1, 2)
if normalize:
spec_f /= window.pow(2).sum().sqrt()
spec_f = spec_f.pow(power).sum(-1) # get power of "complex" tensor (c, l, n_fft)
return spec_f
......@@ -2,6 +2,7 @@ from __future__ import division, print_function
from warnings import warn
import torch
import numpy as np
import functional as F
class Compose(object):
......@@ -57,10 +58,7 @@ class Scale(object):
Tensor: Scaled by the scale factor. (default between -1.0 and 1.0)
"""
if not tensor.dtype.is_floating_point:
tensor = tensor.to(torch.float32)
return tensor / self.factor
return F.scale(tensor, factor)
def __repr__(self):
return self.__class__.__name__ + '()'
......@@ -88,18 +86,7 @@ class PadTrim(object):
Tensor: (c x n) or (n x c)
"""
assert tensor.size(self.ch_dim) < 128, \
"Too many channels ({}) detected, see channels_first param.".format(tensor.size(self.ch_dim))
if self.max_len > tensor.size(self.len_dim):
padding = [self.max_len - tensor.size(self.len_dim)
if (i % 2 == 1) and (i // 2 != self.len_dim)
else 0
for i in range(4)]
with torch.no_grad():
tensor = torch.nn.functional.pad(tensor, padding, "constant", self.fill_value)
elif self.max_len < tensor.size(self.len_dim):
tensor = tensor.narrow(self.len_dim, 0, self.max_len)
return tensor
return F.pad_trim(tensor, self.ch_dim, self.max_len, self.len_dim, self.fill_value)
def __repr__(self):
return self.__class__.__name__ + '(max_len={0})'.format(self.max_len)
......@@ -122,11 +109,7 @@ class DownmixMono(object):
self.ch_dim = int(not channels_first)
def __call__(self, tensor):
if not tensor.dtype.is_floating_point:
tensor = tensor.to(torch.float32)
tensor = torch.mean(tensor, self.ch_dim, True)
return tensor
return F.downmix_mono(tensor, self.ch_dim)
def __repr__(self):
return self.__class__.__name__ + '()'
......@@ -145,7 +128,7 @@ class LC2CL(object):
Returns:
tensor (Tensor): Tensor of audio signal with shape (CxL)
"""
return tensor.transpose(0, 1).contiguous()
return F.lc2cl(tensor)
def __repr__(self):
return self.__class__.__name__ + '()'
......@@ -196,22 +179,8 @@ class Spectrogram(object):
by 2 plus 1.
"""
assert sig.dim() == 2
if self.pad > 0:
with torch.no_grad():
sig = torch.nn.functional.pad(sig, (self.pad, self.pad), "constant")
self.window = self.window.to(sig.device)
# default values are consistent with librosa.core.spectrum._spectrogram
spec_f = torch.stft(sig, self.n_fft, self.hop, self.ws,
self.window, center=True,
normalized=False, onesided=True,
pad_mode='reflect').transpose(1, 2)
if self.normalize:
spec_f /= self.window.pow(2).sum().sqrt()
spec_f = spec_f.pow(self.power).sum(-1) # get power of "complex" tensor (c, l, n_fft)
return spec_f
return F.spectrogram(sig, self.pad, self.window, self.n_fft, self.hop,
self.ws, self.power, self.normalize)
def F2M(*args, **kwargs):
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment