Commit 8647f903 authored by David Pollack's avatar David Pollack Committed by Soumith Chintala
Browse files

modified tests for speed and tempo

parent a4df8caf
...@@ -57,7 +57,7 @@ class Test_SoxEffectsChain(unittest.TestCase): ...@@ -57,7 +57,7 @@ class Test_SoxEffectsChain(unittest.TestCase):
x, sr = E.sox_build_flow_effects() x, sr = E.sox_build_flow_effects()
# Note: the output was encoded into ulaw because the # Note: the output was encoded into ulaw because the
# number of unique values in the output is less than 256. # number of unique values in the output is less than 256.
self.assertLess(x.unique().size(0), 2**8) self.assertLess(x.unique().size(0), 2**8 + 1)
self.assertEqual(x.numel(), si_in.length) self.assertEqual(x.numel(), si_in.length)
def test_band_chorus(self): def test_band_chorus(self):
...@@ -110,7 +110,7 @@ class Test_SoxEffectsChain(unittest.TestCase): ...@@ -110,7 +110,7 @@ class Test_SoxEffectsChain(unittest.TestCase):
E.clear_chain() E.clear_chain()
self.assertLess(x.abs().max().item(), 1.) self.assertLess(x.abs().max().item(), 1.)
def test_tempo(self): def test_tempo_or_speed(self):
tempo = .8 tempo = .8
si, _ = torchaudio.info(self.test_filepath) si, _ = torchaudio.info(self.test_filepath)
E = torchaudio.sox_effects.SoxEffectsChain() E = torchaudio.sox_effects.SoxEffectsChain()
...@@ -118,7 +118,30 @@ class Test_SoxEffectsChain(unittest.TestCase): ...@@ -118,7 +118,30 @@ class Test_SoxEffectsChain(unittest.TestCase):
E.append_effect_to_chain("tempo", ["-s", tempo]) E.append_effect_to_chain("tempo", ["-s", tempo])
x, sr = E.sox_build_flow_effects() x, sr = E.sox_build_flow_effects()
# check if effect worked # check if effect worked
self.assertEqual(x.size(1), int((si.length / si.channels) / tempo)) self.assertAlmostEqual(x.size(1), math.ceil((si.length / si.channels) / tempo), delta=1)
# tempo > 1
E.clear_chain()
tempo = 1.2
E.append_effect_to_chain("tempo", ["-s", tempo])
x, sr = E.sox_build_flow_effects()
# check if effect worked
self.assertAlmostEqual(x.size(1), math.ceil((si.length / si.channels) / tempo), delta=1)
# tempo > 1
E.clear_chain()
speed = 1.2
E.append_effect_to_chain("speed", [speed])
E.append_effect_to_chain("rate", [si.rate])
x, sr = E.sox_build_flow_effects()
# check if effect worked
self.assertAlmostEqual(x.size(1), math.ceil((si.length / si.channels) / speed), delta=1)
# speed < 1
E.clear_chain()
speed = 0.8
E.append_effect_to_chain("speed", [speed])
E.append_effect_to_chain("rate", [si.rate])
x, sr = E.sox_build_flow_effects()
# check if effect worked
self.assertAlmostEqual(x.size(1), math.ceil((si.length / si.channels) / speed), delta=1)
def test_trim(self): def test_trim(self):
x_orig, _ = torchaudio.load(self.test_filepath) x_orig, _ = torchaudio.load(self.test_filepath)
......
...@@ -295,6 +295,7 @@ int build_flow_effects(const std::string& file_name, ...@@ -295,6 +295,7 @@ int build_flow_effects(const std::string& file_name,
for(SoxEffect tae : pyeffs) { for(SoxEffect tae : pyeffs) {
if(tae.ename == "no_effects") break; if(tae.ename == "no_effects") break;
e = sox_create_effect(sox_find_effect(tae.ename.c_str())); e = sox_create_effect(sox_find_effect(tae.ename.c_str()));
e->global_info->global_info->verbosity = 1;
if(tae.eopts[0] == "") { if(tae.eopts[0] == "") {
sox_effect_options(e, 0, nullptr); sox_effect_options(e, 0, nullptr);
} else { } else {
...@@ -354,6 +355,8 @@ int build_flow_effects(const std::string& file_name, ...@@ -354,6 +355,8 @@ int build_flow_effects(const std::string& file_name,
// Could be related to: https://sourceforge.net/p/sox/bugs/314/ // Could be related to: https://sourceforge.net/p/sox/bugs/314/
int nc, ns; int nc, ns;
if (output->signal.length == 0) { if (output->signal.length == 0) {
// sometimes interm_signal length is extremely large, but the buffer_size
// is double the length of the output signal
if (interm_signal.length > (buffer_size * 10)) { if (interm_signal.length > (buffer_size * 10)) {
ns = buffer_size / 2; ns = buffer_size / 2;
} else { } else {
...@@ -366,24 +369,24 @@ int build_flow_effects(const std::string& file_name, ...@@ -366,24 +369,24 @@ int build_flow_effects(const std::string& file_name,
} }
otensor.resize_({ns/nc, nc}); otensor.resize_({ns/nc, nc});
otensor = otensor.contiguous(); otensor = otensor.contiguous();
input = sox_open_mem_read(buffer, buffer_size, target_signal, target_encoding, file_type); input = sox_open_mem_read(buffer, buffer_size, target_signal, target_encoding, file_type);
std::vector<sox_sample_t> samples(buffer_size); std::vector<sox_sample_t> samples(buffer_size);
const int64_t samples_read = sox_read(input, samples.data(), buffer_size); const int64_t samples_read = sox_read(input, samples.data(), buffer_size);
// buffer size is twice signal length, but half the buffer is empty so correct
// number of samples should be read
assert(samples_read != nc * ns && samples_read != 0); assert(samples_read != nc * ns && samples_read != 0);
AT_DISPATCH_ALL_TYPES(otensor.type(), "effects_buffer", [&] { AT_DISPATCH_ALL_TYPES(otensor.type(), "effects_buffer", [&] {
auto* data = otensor.data<scalar_t>(); auto* data = otensor.data<scalar_t>();
std::copy(samples.begin(), samples.begin() + samples_read, data); std::copy(samples.begin(), samples.begin() + samples_read, data);
}); });
// free buffer and close mem_read
sox_close(input); sox_close(input);
free(buffer);
if (ch_first) { if (ch_first) {
otensor.transpose_(1, 0); otensor.transpose_(1, 0);
} }
sr = target_signal->rate; sr = target_signal->rate;
// free buffer
free(buffer);
#endif #endif
// return sample rate, output tensor modified in-place // return sample rate, output tensor modified in-place
return sr; return sr;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment