Commit 23ecb772 authored by Jason Lian's avatar Jason Lian
Browse files

more

parent 3d21b437
import numpy as np
import torch import torch
def scale(tensor, factor): def scale(tensor, factor):
# type: (Tensor, int) -> Tensor # type: (Tensor, int) -> Tensor
if not tensor.dtype.is_floating_point: if not tensor.dtype.is_floating_point:
...@@ -34,7 +34,7 @@ def downmix_mono(tensor, ch_dim): ...@@ -34,7 +34,7 @@ def downmix_mono(tensor, ch_dim):
return tensor return tensor
def lc2cl(tensor): def LC2CL(tensor):
# type: (Tensor) -> Tensor # type: (Tensor) -> Tensor
return tensor.transpose(0, 1).contiguous() return tensor.transpose(0, 1).contiguous()
...@@ -104,9 +104,79 @@ def mel_scale(spec_f, f_min, f_max, n_mels, fb=None): ...@@ -104,9 +104,79 @@ def mel_scale(spec_f, f_min, f_max, n_mels, fb=None):
def spectrogram_to_DB(spec, multiplier, amin, db_multiplier, top_db): def spectrogram_to_DB(spec, multiplier, amin, db_multiplier, top_db):
# type: (Tensor, float, float, float, Optional[float]) -> Tensor
spec_db = multiplier * torch.log10(torch.clamp(spec, min=amin)) spec_db = multiplier * torch.log10(torch.clamp(spec, min=amin))
spec_db -= multiplier * db_multiplier spec_db -= multiplier * db_multiplier
if top_db is not None: if top_db is not None:
spec_db = torch.max(spec_db, spec_db.new_full((1,), spec_db.max() - top_db)) spec_db = torch.max(spec_db, spec_db.new_full((1,), spec_db.max() - top_db))
return spec_db return spec_db
def create_dct(n_mfcc, n_mels, norm):
# type: (int, int, string) -> Tensor
"""
Creates a DCT transformation matrix with shape (num_mels, num_mfcc),
normalized depending on norm
Returns:
The transformation matrix, to be right-multiplied to row-wise data.
"""
outdim = n_mfcc
dim = n_mels
# http://en.wikipedia.org/wiki/Discrete_cosine_transform#DCT-II
n = np.arange(dim)
k = np.arange(outdim)[:, np.newaxis]
dct = np.cos(np.pi / dim * (n + 0.5) * k)
if norm == 'ortho':
dct[0] *= 1.0 / np.sqrt(2)
dct *= np.sqrt(2.0 / dim)
else:
dct *= 2
return torch.Tensor(dct.T)
def MFCC(sig, mel_spect, log_mels, s2db, dct_mat):
# type: (Tensor, MelSpectrogram, bool, SpectrogramToDB, Tensor) -> Tensor
if log_mels:
log_offset = 1e-6
mel_spect = torch.log(mel_spect + log_offset)
else:
mel_spect = s2db(mel_spect)
mfcc = torch.matmul(mel_spect, dct_mat.to(mel_spect.device))
return mfcc
def BLC2CBL(tensor):
# type: (Tensor) -> Tensor
return tensor.permute(2, 0, 1).contiguous()
def mu_law_encoding(x, qc):
# type: (Tensor/ndarray, int) -> Tensor/ndarray
mu = qc - 1.
if isinstance(x, np.ndarray):
x_mu = np.sign(x) * np.log1p(mu * np.abs(x)) / np.log1p(mu)
x_mu = ((x_mu + 1) / 2 * mu + 0.5).astype(int)
elif isinstance(x, torch.Tensor):
if not x.dtype.is_floating_point:
x = x.to(torch.float)
mu = torch.tensor(mu, dtype=x.dtype)
x_mu = torch.sign(x) * torch.log1p(mu *
torch.abs(x)) / torch.log1p(mu)
x_mu = ((x_mu + 1) / 2 * mu + 0.5).long()
return x_mu
def mu_law_expanding(x, qc):
# type: (Tensor/ndarray, int) -> Tensor/ndarray
mu = qc - 1.
if isinstance(x_mu, np.ndarray):
x = ((x_mu) / mu) * 2 - 1.
x = np.sign(x) * (np.exp(np.abs(x) * np.log1p(mu)) - 1.) / mu
elif isinstance(x_mu, torch.Tensor):
if not x_mu.dtype.is_floating_point:
x_mu = x_mu.to(torch.float)
mu = torch.tensor(mu, dtype=x_mu.dtype)
x = ((x_mu) / mu) * 2 - 1.
x = torch.sign(x) * (torch.exp(torch.abs(x) * torch.log1p(mu)) - 1.) / mu
return x
...@@ -128,7 +128,7 @@ class LC2CL(object): ...@@ -128,7 +128,7 @@ class LC2CL(object):
Returns: Returns:
tensor (Tensor): Tensor of audio signal with shape (CxL) tensor (Tensor): Tensor of audio signal with shape (CxL)
""" """
return F.lc2cl(tensor) return F.LC2CL(tensor)
def __repr__(self): def __repr__(self):
return self.__class__.__name__ + '()' return self.__class__.__name__ + '()'
...@@ -282,29 +282,9 @@ class MFCC(object): ...@@ -282,29 +282,9 @@ class MFCC(object):
if self.n_mfcc > self.MelSpectrogram.n_mels: if self.n_mfcc > self.MelSpectrogram.n_mels:
raise ValueError('Cannot select more MFCC coefficients than # mel bins') raise ValueError('Cannot select more MFCC coefficients than # mel bins')
self.dct_mat = self.create_dct() self.dct_mat = F.create_dct(self.n_mfcc, self.MelSpectrogram.n_mels, self.norm)
self.log_mels = log_mels self.log_mels = log_mels
def create_dct(self):
"""
Creates a DCT transformation matrix with shape (num_mels, num_mfcc),
normalized depending on self.norm
Returns:
The transformation matrix, to be right-multiplied to row-wise data.
"""
outdim = self.n_mfcc
dim = self.MelSpectrogram.n_mels
# http://en.wikipedia.org/wiki/Discrete_cosine_transform#DCT-II
n = np.arange(dim)
k = np.arange(outdim)[:, np.newaxis]
dct = np.cos(np.pi / dim * (n + 0.5) * k)
if self.norm == 'ortho':
dct[0] *= 1.0 / np.sqrt(2)
dct *= np.sqrt(2.0 / dim)
else:
dct *= 2
return torch.Tensor(dct.T)
def __call__(self, sig): def __call__(self, sig):
""" """
Args: Args:
...@@ -315,14 +295,7 @@ class MFCC(object): ...@@ -315,14 +295,7 @@ class MFCC(object):
is unchanged, hops is the number of hops, and n_mels is the is unchanged, hops is the number of hops, and n_mels is the
number of mel bins. number of mel bins.
""" """
mel_spect = self.MelSpectrogram(sig) return F.MFCC(sig, self.MelSpectrogram(sig), self.log_mels, self.s2db, self.dct_mat)
if self.log_mels:
log_offset = 1e-6
mel_spect = torch.log(mel_spect + log_offset)
else:
mel_spect = self.s2db(mel_spect)
mfcc = torch.matmul(mel_spect, self.dct_mat.to(mel_spect.device))
return mfcc
class MelSpectrogram(object): class MelSpectrogram(object):
...@@ -405,8 +378,7 @@ class BLC2CBL(object): ...@@ -405,8 +378,7 @@ class BLC2CBL(object):
tensor (Tensor): Tensor of spectrogram with shape (CxBxL) tensor (Tensor): Tensor of spectrogram with shape (CxBxL)
""" """
return F.BLC2CBL(tensor)
return tensor.permute(2, 0, 1).contiguous()
def __repr__(self): def __repr__(self):
return self.__class__.__name__ + '()' return self.__class__.__name__ + '()'
...@@ -437,18 +409,7 @@ class MuLawEncoding(object): ...@@ -437,18 +409,7 @@ class MuLawEncoding(object):
x_mu (LongTensor or ndarray) x_mu (LongTensor or ndarray)
""" """
mu = self.qc - 1. return self.mu_law_encoding(x, self.qc)
if isinstance(x, np.ndarray):
x_mu = np.sign(x) * np.log1p(mu * np.abs(x)) / np.log1p(mu)
x_mu = ((x_mu + 1) / 2 * mu + 0.5).astype(int)
elif isinstance(x, torch.Tensor):
if not x.dtype.is_floating_point:
x = x.to(torch.float)
mu = torch.tensor(mu, dtype=x.dtype)
x_mu = torch.sign(x) * torch.log1p(mu *
torch.abs(x)) / torch.log1p(mu)
x_mu = ((x_mu + 1) / 2 * mu + 0.5).long()
return x_mu
def __repr__(self): def __repr__(self):
return self.__class__.__name__ + '()' return self.__class__.__name__ + '()'
...@@ -479,17 +440,7 @@ class MuLawExpanding(object): ...@@ -479,17 +440,7 @@ class MuLawExpanding(object):
x (FloatTensor or ndarray) x (FloatTensor or ndarray)
""" """
mu = self.qc - 1. return F.mu_law_expanding(x, self.qc)
if isinstance(x_mu, np.ndarray):
x = ((x_mu) / mu) * 2 - 1.
x = np.sign(x) * (np.exp(np.abs(x) * np.log1p(mu)) - 1.) / mu
elif isinstance(x_mu, torch.Tensor):
if not x_mu.dtype.is_floating_point:
x_mu = x_mu.to(torch.float)
mu = torch.tensor(mu, dtype=x_mu.dtype)
x = ((x_mu) / mu) * 2 - 1.
x = torch.sign(x) * (torch.exp(torch.abs(x) * torch.log1p(mu)) - 1.) / mu
return x
def __repr__(self): def __repr__(self):
return self.__class__.__name__ + '()' return self.__class__.__name__ + '()'
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment