Commit 3bd4db86 authored by David Pollack's avatar David Pollack Committed by Soumith Chintala
Browse files

refactoring and clearning up code

parent 0e0d1e59
---
AccessModifierOffset: -1
AlignAfterOpenBracket: AlwaysBreak
AlignConsecutiveAssignments: false
AlignConsecutiveDeclarations: false
AlignEscapedNewlinesLeft: true
AlignOperands: false
AlignTrailingComments: false
AllowAllParametersOfDeclarationOnNextLine: false
AllowShortBlocksOnASingleLine: false
AllowShortCaseLabelsOnASingleLine: false
AllowShortFunctionsOnASingleLine: Empty
AllowShortIfStatementsOnASingleLine: false
AllowShortLoopsOnASingleLine: false
AlwaysBreakAfterReturnType: None
AlwaysBreakBeforeMultilineStrings: true
AlwaysBreakTemplateDeclarations: true
BinPackArguments: false
BinPackParameters: false
BraceWrapping:
AfterClass: false
AfterControlStatement: false
AfterEnum: false
AfterFunction: false
AfterNamespace: false
AfterObjCDeclaration: false
AfterStruct: false
AfterUnion: false
BeforeCatch: false
BeforeElse: false
IndentBraces: false
BreakBeforeBinaryOperators: None
BreakBeforeBraces: Attach
BreakBeforeTernaryOperators: true
BreakConstructorInitializersBeforeComma: false
BreakAfterJavaFieldAnnotations: false
BreakStringLiterals: false
ColumnLimit: 80
CommentPragmas: '^ IWYU pragma:'
CompactNamespaces: false
ConstructorInitializerAllOnOneLineOrOnePerLine: true
ConstructorInitializerIndentWidth: 4
ContinuationIndentWidth: 4
Cpp11BracedListStyle: true
DerivePointerAlignment: false
DisableFormat: false
ForEachMacros: [ FOR_EACH_RANGE, FOR_EACH, ]
IncludeCategories:
- Regex: '^<.*\.h(pp)?>'
Priority: 1
- Regex: '^<.*'
Priority: 2
- Regex: '.*'
Priority: 3
IndentCaseLabels: true
IndentWidth: 2
IndentWrappedFunctionNames: false
KeepEmptyLinesAtTheStartOfBlocks: false
MacroBlockBegin: ''
MacroBlockEnd: ''
MaxEmptyLinesToKeep: 1
NamespaceIndentation: None
ObjCBlockIndentWidth: 2
ObjCSpaceAfterProperty: false
ObjCSpaceBeforeProtocolList: false
PenaltyBreakBeforeFirstCallParameter: 1
PenaltyBreakComment: 300
PenaltyBreakFirstLessLess: 120
PenaltyBreakString: 1000
PenaltyExcessCharacter: 1000000
PenaltyReturnTypeOnItsOwnLine: 2000000
PointerAlignment: Left
ReflowComments: true
SortIncludes: true
SpaceAfterCStyleCast: false
SpaceBeforeAssignmentOperators: true
SpaceBeforeParens: ControlStatements
SpaceInEmptyParentheses: false
SpacesBeforeTrailingComments: 1
SpacesInAngles: false
SpacesInContainerLiterals: true
SpacesInCStyleCastParentheses: false
SpacesInParentheses: false
SpacesInSquareBrackets: false
Standard: Cpp11
TabWidth: 8
UseTab: Never
...
---
# NOTE there must be no spaces before the '-', so put the comma first.
Checks: '
-*
,bugprone-*
,-bugprone-forward-declaration-namespace
,-bugprone-macro-parentheses
,cppcoreguidelines-*
,-cppcoreguidelines-interfaces-global-init
,-cppcoreguidelines-owning-memory
,-cppcoreguidelines-pro-bounds-array-to-pointer-decay
,-cppcoreguidelines-pro-bounds-constant-array-index
,-cppcoreguidelines-pro-bounds-pointer-arithmetic
,-cppcoreguidelines-pro-type-cstyle-cast
,-cppcoreguidelines-pro-type-reinterpret-cast
,-cppcoreguidelines-pro-type-static-cast-downcast
,-cppcoreguidelines-pro-type-union-access
,-cppcoreguidelines-pro-type-vararg
,-cppcoreguidelines-special-member-functions
,hicpp-exception-baseclass
,hicpp-avoid-goto
,modernize-*
,-modernize-return-braced-init-list
,-modernize-use-auto
,-modernize-use-default-member-init
,-modernize-use-using
,performance-unnecessary-value-param
'
WarningsAsErrors: '*'
HeaderFilterRegex: 'torchaudio/.*'
AnalyzeTemporaryDtors: false
CheckOptions:
...
[flake8]
max-line-length = 120
ignore = E305,E402,E721,E741,F401,F403,F405,F821,F841,F999,W503,W504
exclude = build,docs/source,_ext
#!/usr/bin/env python
import os
import platform
from setuptools import setup, find_packages
from torch.utils.cpp_extension import BuildExtension, CppExtension
def check_env_flag(name, default=''):
return os.getenv(name, default).upper() in set(['ON', '1', 'YES', 'TRUE', 'Y'])
DEBUG = check_env_flag('DEBUG')
eca = []
ela = []
if DEBUG:
if platform.system() == 'Windows':
ela += ['/DEBUG:FULL']
else:
eca += ['-O0', '-g']
ela += ['-O0', '-g']
setup(
name="torchaudio",
version="0.2",
......@@ -14,6 +30,10 @@ setup(
packages=find_packages(exclude=["build"]),
ext_modules=[
CppExtension(
'_torch_sox', ['torchaudio/torch_sox.cpp'], libraries=['sox']),
'_torch_sox',
['torchaudio/torch_sox.cpp'],
libraries=['sox'],
extra_compile_args=eca,
extra_link_args=ela),
],
cmdclass={'build_ext': BuildExtension})
......@@ -27,7 +27,6 @@ class Test_LoadSave(unittest.TestCase):
os.unlink(new_filepath)
# test save 1d tensor
#x = x[:, 0] # get mono signal
x = x[0, :] # get mono signal
x.squeeze_() # remove channel dim
torchaudio.save(new_filepath, x, sr)
......@@ -91,7 +90,7 @@ class Test_LoadSave(unittest.TestCase):
offset = 15
x, _ = torchaudio.load(self.test_filepath)
x_offset, _ = torchaudio.load(self.test_filepath, offset=offset)
self.assertTrue(x[:,offset:].allclose(x_offset))
self.assertTrue(x[:, offset:].allclose(x_offset))
# check number of frames
n = 201
......@@ -132,7 +131,7 @@ class Test_LoadSave(unittest.TestCase):
input_sine_path = os.path.join(self.test_dirpath, 'assets', 'sinewave.wav')
x_sine_full, sr_sine = torchaudio.load(input_sine_path)
x_sine_part, _ = torchaudio.load(input_sine_path, num_frames=num_frames, offset=offset)
l1_error = x_sine_full[:, offset:(num_frames+offset)].sub(x_sine_part).abs().sum().item()
l1_error = x_sine_full[:, offset:(num_frames + offset)].sub(x_sine_part).abs().sum().item()
# test for the correct number of samples and that the correct portion was loaded
self.assertEqual(x_sine_part.size(1), num_frames)
self.assertEqual(l1_error, 0.)
......@@ -148,7 +147,7 @@ class Test_LoadSave(unittest.TestCase):
# test with two channel mp3
x_2ch_full, sr_2ch = torchaudio.load(self.test_filepath, normalization=True)
x_2ch_part, _ = torchaudio.load(self.test_filepath, normalization=True, num_frames=num_frames, offset=offset)
l1_error = x_2ch_full[:, offset:(offset+num_frames)].sub(x_2ch_part).abs().sum().item()
l1_error = x_2ch_full[:, offset:(offset + num_frames)].sub(x_2ch_part).abs().sum().item()
self.assertEqual(x_2ch_part.size(1), num_frames)
self.assertEqual(l1_error, 0.)
......
......@@ -30,13 +30,14 @@ class TORCHAUDIODS(Dataset):
def __len__(self):
return len(self.data)
class Test_DataLoader(unittest.TestCase):
def test_1(self):
expected_size = (2, 1, 16000)
ds = TORCHAUDIODS()
dl = DataLoader(ds, batch_size=2)
for x in dl:
#print(x.size())
# print(x.size())
continue
self.assertTrue(x.size() == expected_size)
......
......@@ -120,7 +120,7 @@ class Test_LoadSave(unittest.TestCase):
input_sine_path = os.path.join(self.test_dirpath, 'assets', 'sinewave.wav')
x_sine_full, sr_sine = load(input_sine_path)
x_sine_part, _ = load(input_sine_path, num_frames=num_frames, offset=offset)
l1_error = x_sine_full[offset:(num_frames+offset)].sub(x_sine_part).abs().sum().item()
l1_error = x_sine_full[offset:(num_frames + offset)].sub(x_sine_part).abs().sum().item()
# test for the correct number of samples and that the correct portion was loaded
self.assertEqual(x_sine_part.size(0), num_frames)
self.assertEqual(l1_error, 0.)
......@@ -137,7 +137,7 @@ class Test_LoadSave(unittest.TestCase):
# test with two channel mp3
x_2ch_full, sr_2ch = load(self.test_filepath, normalization=True)
x_2ch_part, _ = load(self.test_filepath, normalization=True, num_frames=num_frames, offset=offset)
l1_error = x_2ch_full[offset:(offset+num_frames)].sub(x_2ch_part).abs().sum().item()
l1_error = x_2ch_full[offset:(offset + num_frames)].sub(x_2ch_part).abs().sum().item()
self.assertEqual(x_2ch_part.size(0), num_frames)
self.assertEqual(l1_error, 0.)
......
......@@ -17,7 +17,7 @@ class Test_SoxEffectsChain(unittest.TestCase):
E.append_effect_to_chain("echos", [0.8, 0.7, 40, 0.25, 63, 0.3])
x, sr = E.sox_build_flow_effects()
# check if effects worked
#print(x.size())
# print(x.size())
def test_rate_channels(self):
target_rate = 16000
......@@ -154,7 +154,7 @@ class Test_SoxEffectsChain(unittest.TestCase):
E.append_effect_to_chain("trim", [offset, num_frames])
x, sr = E.sox_build_flow_effects()
# check if effect worked
self.assertTrue(x.allclose(x_orig[:,offset_int:(offset_int+num_frames_int)], rtol=1e-4, atol=1e-4))
self.assertTrue(x.allclose(x_orig[:, offset_int:(offset_int + num_frames_int)], rtol=1e-4, atol=1e-4))
def test_silence_contrast(self):
si, _ = torchaudio.info(self.test_filepath)
......@@ -183,13 +183,14 @@ class Test_SoxEffectsChain(unittest.TestCase):
E.append_effect_to_chain("fade", ["q", "0.25", "0", "0.33"])
x, _ = E.sox_build_flow_effects()
# check if effect worked
#print(x.size())
# print(x.size())
def test_biquad_delay(self):
si, _ = torchaudio.info(self.test_filepath)
E = torchaudio.sox_effects.SoxEffectsChain()
E.set_input_file(self.test_filepath)
E.append_effect_to_chain("biquad", ["0.25136437", "0.50272873", "0.25136437", "1.0", "-0.17123075", "0.17668821"])
E.append_effect_to_chain("biquad", ["0.25136437", "0.50272873", "0.25136437",
"1.0", "-0.17123075", "0.17668821"])
E.append_effect_to_chain("delay", ["15000s"])
x, _ = E.sox_build_flow_effects()
# check if effect worked
......
......@@ -38,9 +38,11 @@ class Tester(unittest.TestCase):
length_new = int(length_orig * 1.2)
result = transforms.PadTrim(max_len=length_new, channels_first=False)(audio_orig)
self.assertEqual(result.size(0), length_new)
result = transforms.PadTrim(max_len=length_new, channels_first=True)(audio_orig.transpose(0, 1))
self.assertEqual(result.size(1), length_new)
audio_orig = self.sig.clone()
length_orig = audio_orig.size(0)
length_new = int(length_orig * 0.8)
......@@ -147,7 +149,7 @@ class Tester(unittest.TestCase):
audio_orig = self.sig.clone() # (16000, 1)
audio_scaled = transforms.Scale()(audio_orig) # (16000, 1)
audio_scaled = transforms.LC2CL()(audio_scaled) # (1, 16000)
spectrogram_torch = transforms.MEL2()(audio_scaled) # (1, 319, 40)
spectrogram_torch = transforms.MEL2(window_fn=torch.hamming_window, pad=10)(audio_scaled) # (1, 319, 40)
self.assertTrue(spectrogram_torch.dim() == 3)
self.assertTrue(spectrogram_torch.max() <= 0.)
......
......@@ -44,7 +44,8 @@ def load(filepath,
filetype (str, optional): a filetype or extension to be set if sox cannot determine it automatically
Returns: tuple(Tensor, int)
- Tensor: output Tensor of size `[C x L]` or `[L x C]` where L is the number of audio frames, C is the number of channels
- Tensor: output Tensor of size `[C x L]` or `[L x C]` where L is the number of audio frames and
C is the number of channels
- int: the sample rate of the audio (as listed in the metadata of the file)
Example::
......@@ -127,8 +128,7 @@ def save_encinfo(filepath,
>>> torchaudio.save('foo.wav', data, sample_rate)
"""
ch_idx = 0 if channels_first else 1
len_idx = 1 if channels_first else 0
ch_idx, len_idx = (0, 1) if channels_first else (1, 0)
# check if save directory exists
abs_dirpath = os.path.dirname(os.path.abspath(filepath))
......
......@@ -44,7 +44,8 @@ class SoxEffectsChain(object):
filetype (str, optional): a filetype or extension to be set if sox cannot determine it automatically
Returns: tuple(Tensor, int)
- Tensor: output Tensor of size `[C x L]` or `[L x C]` where L is the number of audio frames, C is the number of channels
- Tensor: output Tensor of size `[C x L]` or `[L x C]` where L is the number of audio frames and
C is the number of channels
- int: the sample rate of the audio (as listed in the metadata of the file)
Example::
......
......@@ -158,7 +158,7 @@ int read_audio_file(
void write_audio_file(
const std::string& file_name,
at::Tensor tensor,
const at::Tensor& tensor,
sox_signalinfo_t* si,
sox_encodinginfo_t* ei,
const char* file_type) {
......@@ -332,16 +332,9 @@ int build_flow_effects(const std::string& file_name,
int sr;
// Read the in-memory audio buffer or temp file that we just wrote.
#ifdef __APPLE__
/* certain effects will result in a target signal length of 0.
if (target_signal->length > 0) {
if (target_signal->channels != output->signal.channels) {
std::cout << "output: " << output->signal.channels << "|" << output->signal.length << "\n";
std::cout << "interm: " << interm_signal.channels << "|" << interm_signal.length << "\n";
std::cout << "target: " << target_signal->channels << "|" << target_signal->length << "\n";
unlink(tmp_name);
throw std::runtime_error("unexpected number of audio channels");
}
}
/*
Temporary filetype must have a valid header. Wav seems to work here while
raw does not. Certain effects like chorus caused strange behavior on the mac.
*/
// read_audio_file reads the temporary file and returns the sr and otensor
sr = read_audio_file(tmp_name, otensor, ch_first, 0, 0,
......
......@@ -26,10 +26,10 @@ int read_audio_file(
/// writing, or an error ocurred during writing of the audio data.
void write_audio_file(
const std::string& file_name,
at::Tensor tensor,
at::Tensor& tensor,
sox_signalinfo_t* si,
sox_encodinginfo_t* ei,
const char* extension)
const char* file_type)
/// Reads an audio file from the given `path` and returns a tuple of
/// sox_signalinfo_t and sox_encodinginfo_t, which contain information about
......@@ -46,6 +46,13 @@ std::vector<std::string> get_effect_names();
int initialize_sox();
int shutdown_sox();
// Struct for build_flow_effects function
struct SoxEffect {
SoxEffect() : ename(""), eopts({""}) { }
std::string ename;
std::vector<std::string> eopts;
};
/// Build a SoX chain, flow the effects, and capture the results in a tensor.
/// An audio file from the given `path` flows through an effects chain given
/// by a list of effects and effect options to an output buffer which is encoded
......
from __future__ import division, print_function
import torch
from torch.autograd import Variable
import numpy as np
try:
import librosa
......@@ -8,18 +7,6 @@ except ImportError:
librosa = None
def _check_is_variable(tensor):
if isinstance(tensor, torch.Tensor):
is_variable = False
tensor = Variable(tensor, requires_grad=False)
elif isinstance(tensor, Variable):
is_variable = True
else:
raise TypeError("tensor should be a Variable or Tensor, but is {}".format(type(tensor)))
return tensor, is_variable
class Compose(object):
"""Composes several transforms together.
......@@ -73,8 +60,8 @@ class Scale(object):
Tensor: Scaled by the scale factor. (default between -1.0 and 1.0)
"""
if isinstance(tensor, (torch.LongTensor, torch.IntTensor)):
tensor = tensor.float()
if not tensor.is_floating_point():
tensor = tensor.to(torch.float32)
return tensor / self.factor
......@@ -101,18 +88,18 @@ class PadTrim(object):
"""
Returns:
Tensor: (c x Ln or (n x c)
Tensor: (c x n) or (n x c)
"""
assert tensor.size(self.ch_dim) < 128, \
"Too many channels ({}) detected, look at channels_first param.".format(tensor.size(self.ch_dim))
"Too many channels ({}) detected, see channels_first param.".format(tensor.size(self.ch_dim))
if self.max_len > tensor.size(self.len_dim):
padding_size = [self.max_len - tensor.size(self.len_dim) if i == self.len_dim
else tensor.size(self.ch_dim)
for i in range(2)]
pad = torch.empty(padding_size, dtype=tensor.dtype).fill_(self.fill_value)
tensor = torch.cat((tensor, pad), dim=self.len_dim)
padding = [self.max_len - tensor.size(self.len_dim)
if (i % 2 == 1) and (i // 2 != self.len_dim)
else 0
for i in range(4)]
with torch.no_grad():
tensor = torch.nn.functional.pad(tensor, padding, "constant", self.fill_value)
elif self.max_len < tensor.size(self.len_dim):
tensor = tensor.narrow(self.len_dim, 0, self.max_len)
return tensor
......@@ -138,8 +125,8 @@ class DownmixMono(object):
self.ch_dim = int(not channels_first)
def __call__(self, tensor):
if isinstance(tensor, (torch.LongTensor, torch.IntTensor)):
tensor = tensor.float()
if not tensor.is_floating_point():
tensor = tensor.to(torch.float32)
tensor = torch.mean(tensor, self.ch_dim, True)
return tensor
......@@ -182,12 +169,8 @@ class SPECTROGRAM(object):
"""
def __init__(self, sr=16000, ws=400, hop=None, n_fft=None,
pad=0, window=torch.hann_window, wkwargs=None):
if isinstance(window, Variable):
self.window = window
else:
self.window = window(ws) if wkwargs is None else window(ws, **wkwargs)
self.window = Variable(self.window, volatile=True)
pad=0, window_fn=torch.hann_window, wkwargs=None):
self.window = window_fn(ws) if wkwargs is None else window_fn(ws, **wkwargs)
self.sr = sr
self.ws = ws
self.hop = hop if hop is not None else ws // 2
......@@ -200,33 +183,27 @@ class SPECTROGRAM(object):
def __call__(self, sig):
"""
Args:
sig (Tensor or Variable): Tensor of audio of size (c, n)
sig (Tensor): Tensor of audio of size (c, n)
Returns:
spec_f (Tensor or Variable): channels x hops x n_fft (c, l, f), where channels
spec_f (Tensor): channels x hops x n_fft (c, l, f), where channels
is unchanged, hops is the number of hops, and n_fft is the
number of fourier bins, which should be the window size divided
by 2 plus 1.
"""
sig, is_variable = _check_is_variable(sig)
assert sig.dim() == 2
if self.pad > 0:
c, n = sig.size()
new_sig = sig.new_empty(c, n + self.pad * 2)
new_sig[:, :self.pad].zero_()
new_sig[:, -self.pad:].zero_()
new_sig.narrow(1, self.pad, n).copy_(sig)
sig = new_sig
with torch.no_grad():
sig = torch.nn.functional.pad(sig, (self.pad, self.pad), "constant")
spec_f = torch.stft(sig, self.n_fft, self.hop, self.ws,
self.window, center=False,
normalized=True, onesided=True).transpose(1, 2)
spec_f /= self.window.pow(2).sum().sqrt()
spec_f = spec_f.pow(2).sum(-1) # get power of "complex" tensor (c, l, n_fft)
return spec_f if is_variable else spec_f.data
return spec_f
class F2M(object):
......@@ -247,7 +224,6 @@ class F2M(object):
def __call__(self, spec_f):
spec_f, is_variable = _check_is_variable(spec_f)
n_fft = spec_f.size(2)
m_min = 0. if self.f_min == 0 else 2595 * np.log10(1. + (self.f_min / 700))
......@@ -269,9 +245,8 @@ class F2M(object):
if f_m != f_m_plus:
fb[f_m:f_m_plus, m - 1] = (f_m_plus - torch.arange(f_m, f_m_plus)) / (f_m_plus - f_m)
fb = Variable(fb)
spec_m = torch.matmul(spec_f, fb) # (c, l, n_fft) dot (n_fft, n_mels) -> (c, l, n_mels)
return spec_m if is_variable else spec_m.data
return spec_m
class SPEC2DB(object):
......@@ -290,11 +265,10 @@ class SPEC2DB(object):
def __call__(self, spec):
spec, is_variable = _check_is_variable(spec)
spec_db = self.multiplier * torch.log10(spec / spec.max()) # power -> dB
if self.top_db is not None:
spec_db = torch.max(spec_db, spec_db.new([self.top_db]))
return spec_db if is_variable else spec_db.data
return spec_db
class MEL2(object):
......@@ -322,9 +296,8 @@ class MEL2(object):
>>> spec_mel = transforms.MEL2(sr)(sig) # (c, l, m)
"""
def __init__(self, sr=16000, ws=400, hop=None, n_fft=None,
pad=0, n_mels=40, window=torch.hann_window, wkwargs=None):
self.window = window(ws) if wkwargs is None else window(ws, **wkwargs)
self.window = Variable(self.window, requires_grad=False)
pad=0, n_mels=40, window_fn=torch.hann_window, wkwargs=None):
self.window_fn = window_fn
self.sr = sr
self.ws = ws
self.hop = hop if hop is not None else ws // 2
......@@ -348,18 +321,16 @@ class MEL2(object):
"""
sig, is_variable = _check_is_variable(sig)
transforms = Compose([
SPECTROGRAM(self.sr, self.ws, self.hop, self.n_fft,
self.pad, self.window),
self.pad, self.window_fn, self.wkwargs),
F2M(self.n_mels, self.sr, self.f_max, self.f_min),
SPEC2DB("power", self.top_db),
])
spec_mel_db = transforms(sig)
return spec_mel_db if is_variable else spec_mel_db.data
return spec_mel_db
class MEL(object):
......@@ -454,10 +425,10 @@ class MuLawEncoding(object):
if isinstance(x, np.ndarray):
x_mu = np.sign(x) * np.log1p(mu * np.abs(x)) / np.log1p(mu)
x_mu = ((x_mu + 1) / 2 * mu + 0.5).astype(int)
elif isinstance(x, (torch.Tensor, torch.LongTensor)):
if isinstance(x, torch.LongTensor):
x = x.float()
mu = torch.FloatTensor([mu])
elif isinstance(x, torch.Tensor):
if not x.is_floating_point():
x = x.to(torch.float)
mu = torch.tensor(mu, dtype=x.dtype)
x_mu = torch.sign(x) * torch.log1p(mu *
torch.abs(x)) / torch.log1p(mu)
x_mu = ((x_mu + 1) / 2 * mu + 0.5).long()
......@@ -496,10 +467,10 @@ class MuLawExpanding(object):
if isinstance(x_mu, np.ndarray):
x = ((x_mu) / mu) * 2 - 1.
x = np.sign(x) * (np.exp(np.abs(x) * np.log1p(mu)) - 1.) / mu
elif isinstance(x_mu, (torch.Tensor, torch.LongTensor)):
if isinstance(x_mu, torch.LongTensor):
x_mu = x_mu.float()
mu = torch.FloatTensor([mu])
elif isinstance(x_mu, torch.Tensor):
if not x_mu.is_floating_point():
x_mu = x_mu.to(torch.float)
mu = torch.tensor(mu, dtype=x_mu.dtype)
x = ((x_mu) / mu) * 2 - 1.
x = torch.sign(x) * (torch.exp(torch.abs(x) * torch.log1p(mu)) - 1.) / mu
return x
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment