Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
Torchaudio
Commits
e95c8f5a
Commit
e95c8f5a
authored
Oct 02, 2018
by
David Pollack
Committed by
Soumith Chintala
Dec 25, 2018
Browse files
fixes for OSX, behavior still inconsistent
parent
301e2e98
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
43 additions
and
30 deletions
+43
-30
test/test_sox_effects.py
test/test_sox_effects.py
+7
-7
torchaudio/torch_sox.cpp
torchaudio/torch_sox.cpp
+36
-23
No files found.
test/test_sox_effects.py
View file @
e95c8f5a
...
...
@@ -46,35 +46,35 @@ class Test_SoxEffectsChain(unittest.TestCase):
def
test_ulaw_and_siginfo
(
self
):
si_out
=
torchaudio
.
sox_signalinfo_t
()
ei_out
=
torchaudio
.
sox_encodinginfo_t
()
si_out
.
rate
=
16000
si_out
.
channels
=
1
si_out
.
precision
=
8
ei_out
.
encoding
=
torchaudio
.
get_sox_encoding_t
(
9
)
ei_out
.
bits_per_sample
=
8
si_in
,
ei_in
=
torchaudio
.
info
(
self
.
test_filepath
)
si_out
.
rate
=
44100
si_out
.
channels
=
2
E
=
torchaudio
.
sox_effects
.
SoxEffectsChain
(
out_siginfo
=
si_out
,
out_encinfo
=
ei_out
)
E
.
set_input_file
(
self
.
test_filepath
)
x
,
sr
=
E
.
sox_build_flow_effects
()
# Note: the sample rate is reported as "changed", but no downsampling occured
# also the number of channels has not changed. Run rate and channels effects
# to make those changes. However, the output was encoded into ulaw because the
# Note: the output was encoded into ulaw because the
# number of unique values in the output is less than 256.
self
.
assertLess
(
x
.
unique
().
size
(
0
),
2
**
8
)
self
.
assertEqual
(
x
.
size
(
0
),
si_in
.
channels
)
self
.
assertEqual
(
sr
,
si_out
.
rate
)
self
.
assertEqual
(
x
.
numel
(),
si_in
.
length
)
def
test_band_chorus
(
self
):
si_in
,
ei_in
=
torchaudio
.
info
(
self
.
test_filepath
)
ei_in
.
encoding
=
torchaudio
.
get_sox_encoding_t
(
1
)
E
=
torchaudio
.
sox_effects
.
SoxEffectsChain
(
out_encinfo
=
ei_in
,
out_siginfo
=
si_in
)
E
.
set_input_file
(
self
.
test_filepath
)
E
.
append_effect_to_chain
(
"band"
,
[
"-n"
,
"10k"
,
"3.5k"
])
E
.
append_effect_to_chain
(
"chorus"
,
[.
5
,
.
7
,
55
,
0.4
,
.
25
,
2
,
'-s'
])
E
.
append_effect_to_chain
(
"rate"
,
[
si_in
.
rate
])
E
.
append_effect_to_chain
(
"channels"
,
[
si_in
.
channels
])
x
,
sr
=
E
.
sox_build_flow_effects
()
#print(x.size(), sr)
def
test_synth
(
self
):
si_in
,
ei_in
=
torchaudio
.
info
(
self
.
test_filepath
)
ei_in
.
encoding
=
torchaudio
.
get_sox_encoding_t
(
1
)
E
=
torchaudio
.
sox_effects
.
SoxEffectsChain
(
out_encinfo
=
ei_in
,
out_siginfo
=
si_in
)
E
.
set_input_file
(
self
.
test_filepath
)
E
.
append_effect_to_chain
(
"synth"
,
[
"1"
,
"pinknoise"
,
"mix"
])
...
...
torchaudio/torch_sox.cpp
View file @
e95c8f5a
...
...
@@ -259,17 +259,18 @@ int build_flow_effects(const std::string& file_name,
// create interm_signal for effects, intermediate steps change this in-place
sox_signalinfo_t
interm_signal
=
input
->
signal
;
// create buffer and buffer_size for output in memwrite
char
*
buffer
;
size_t
buffer_size
;
#ifdef __APPLE__
// According to Mozilla Deepspeech sox_open_memstream_write doesn't work
// with OSX
char
*
tmp_name
=
tmpnam
(
NULL
);
assert
(
tmp_name
);
sox_format_t
*
output
=
sox_open_write
(
tmp_name
,
&
target_signal
,
&
target_encoding
,
file_type
,
nullptr
,
nullptr
);
char
tmp_name
[]
=
"/tmp/fileXXXXXX"
;
int
tmp_fd
=
mkstemp
(
tmp_name
);
close
(
tmp_fd
);
sox_format_t
*
output
=
sox_open_write
(
tmp_name
,
target_signal
,
target_encoding
,
file_type
,
nullptr
,
nullptr
);
#else
// create buffer and buffer_size for output in memwrite
char
*
buffer
;
size_t
buffer_size
;
// in-memory descriptor (this may not work for OSX)
sox_format_t
*
output
=
sox_open_memstream_write
(
&
buffer
,
&
buffer_size
,
...
...
@@ -303,10 +304,13 @@ int build_flow_effects(const std::string& file_name,
sox_args
[
i
]
=
(
char
*
)
tae
.
eopts
[
i
].
c_str
();
}
if
(
sox_effect_options
(
e
,
num_opts
,
sox_args
)
!=
SOX_SUCCESS
)
{
#ifdef __APPLE__
unlink
(
tmp_name
);
#endif
throw
std
::
runtime_error
(
"invalid effect options, see SoX docs for details"
);
}
}
sox_add_effect
(
chain
,
e
,
&
interm_signal
,
&
in
put
->
signal
);
sox_add_effect
(
chain
,
e
,
&
interm_signal
,
&
out
put
->
signal
);
free
(
e
);
}
...
...
@@ -324,6 +328,24 @@ int build_flow_effects(const std::string& file_name,
sox_close
(
output
);
sox_close
(
input
);
int
sr
;
// Read the in-memory audio buffer or temp file that we just wrote.
#ifdef __APPLE__
if
(
target_signal
->
length
>
0
)
{
if
(
target_signal
->
channels
!=
output
->
signal
.
channels
)
{
//std::cout << "output: " << output->signal.channels << "|" << output->signal.length << "\n";
//std::cout << "target: " << target_signal->channels << "|" << target_signal->length << "\n";
unlink
(
tmp_name
);
throw
std
::
runtime_error
(
"unexpected number of audio channels"
);
}
sr
=
read_audio_file
(
tmp_name
,
otensor
,
ch_first
,
0
,
0
,
&
output
->
signal
,
&
output
->
encoding
,
file_type
);
}
else
{
sr
=
read_audio_file
(
tmp_name
,
otensor
,
ch_first
,
0
,
0
,
target_signal
,
target_encoding
,
file_type
);
}
unlink
(
tmp_name
);
#else
// Resize output tensor to desired dimensions, different effects result in output->signal.length,
// interm_signal.length and buffer size being inconsistent with the result of the file output.
// We prioritize in the order: output->signal.length > interm_signal.length > buffer_size
...
...
@@ -341,14 +363,7 @@ int build_flow_effects(const std::string& file_name,
}
otensor
.
resize_
({
ns
/
nc
,
nc
});
otensor
=
otensor
.
contiguous
();
// Read the in-memory audio buffer or temp file that we just wrote.
#ifdef __APPLE__
buffer_size
=
(
size_t
)
ns
*
2
;
// sizeof(char)? dependent on bit precision?
input
=
sox_open_read
(
tmp_name
,
target_signal
,
target_encoding
,
file_type
);
#else
input
=
sox_open_mem_read
(
buffer
,
buffer_size
,
target_signal
,
target_encoding
,
file_type
);
#endif
std
::
vector
<
sox_sample_t
>
samples
(
buffer_size
);
const
int64_t
samples_read
=
sox_read
(
input
,
samples
.
data
(),
buffer_size
);
// buffer size is twice signal length, but half the buffer is empty so correct
...
...
@@ -358,19 +373,17 @@ int build_flow_effects(const std::string& file_name,
auto
*
data
=
otensor
.
data
<
scalar_t
>
();
std
::
copy
(
samples
.
begin
(),
samples
.
begin
()
+
samples_read
,
data
);
});
// free buffer and quit sox
sox_close
(
input
);
#ifdef __APPLE__
unlink
(
tmp_name
)
#endif
free
(
buffer
);
if
(
ch_first
)
{
otensor
.
transpose_
(
1
,
0
);
}
sr
=
target_signal
->
rate
;
// free buffer
free
(
buffer
);
#endif
return
(
int
)
target_signal
->
rate
;
return
sr
;
}
}
// namespace audio
}
// namespace torch
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment