Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
Torchaudio
Commits
0406d30d
Unverified
Commit
0406d30d
authored
Jul 22, 2020
by
moto
Committed by
GitHub
Jul 22, 2020
Browse files
Replace sox_io save/load with sox effects chain in C++ (#779)
* Replace save/load function with sox effects chain
parent
0812f22a
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
75 additions
and
74 deletions
+75
-74
torchaudio/csrc/sox_effects_chain.cpp
torchaudio/csrc/sox_effects_chain.cpp
+52
-1
torchaudio/csrc/sox_effects_chain.h
torchaudio/csrc/sox_effects_chain.h
+2
-0
torchaudio/csrc/sox_io.cpp
torchaudio/csrc/sox_io.cpp
+21
-73
No files found.
torchaudio/csrc/sox_effects_chain.cpp
View file @
0406d30d
...
@@ -46,6 +46,9 @@ struct TensorInputPriv {
...
@@ -46,6 +46,9 @@ struct TensorInputPriv {
struct
TensorOutputPriv
{
struct
TensorOutputPriv
{
std
::
vector
<
sox_sample_t
>*
buffer
;
std
::
vector
<
sox_sample_t
>*
buffer
;
};
};
struct
FileOutputPriv
{
sox_format_t
*
sf
;
};
/// Callback function to feed Tensor data to SoxEffectChain.
/// Callback function to feed Tensor data to SoxEffectChain.
int
tensor_input_drain
(
sox_effect_t
*
effp
,
sox_sample_t
*
obuf
,
size_t
*
osamp
)
{
int
tensor_input_drain
(
sox_effect_t
*
effp
,
sox_sample_t
*
obuf
,
size_t
*
osamp
)
{
...
@@ -84,7 +87,7 @@ int tensor_input_drain(sox_effect_t* effp, sox_sample_t* obuf, size_t* osamp) {
...
@@ -84,7 +87,7 @@ int tensor_input_drain(sox_effect_t* effp, sox_sample_t* obuf, size_t* osamp) {
/// Callback function to fetch data from SoxEffectChain.
/// Callback function to fetch data from SoxEffectChain.
int
tensor_output_flow
(
int
tensor_output_flow
(
sox_effect_t
*
effp
LSX_UNUSED
,
sox_effect_t
*
effp
,
sox_sample_t
const
*
ibuf
,
sox_sample_t
const
*
ibuf
,
sox_sample_t
*
obuf
LSX_UNUSED
,
sox_sample_t
*
obuf
LSX_UNUSED
,
size_t
*
isamp
,
size_t
*
isamp
,
...
@@ -97,6 +100,28 @@ int tensor_output_flow(
...
@@ -97,6 +100,28 @@ int tensor_output_flow(
return
SOX_SUCCESS
;
return
SOX_SUCCESS
;
}
}
int
file_output_flow
(
sox_effect_t
*
effp
,
sox_sample_t
const
*
ibuf
,
sox_sample_t
*
obuf
LSX_UNUSED
,
size_t
*
isamp
,
size_t
*
osamp
)
{
*
osamp
=
0
;
if
(
*
isamp
)
{
auto
sf
=
static_cast
<
FileOutputPriv
*>
(
effp
->
priv
)
->
sf
;
if
(
sox_write
(
sf
,
ibuf
,
*
isamp
)
!=
*
isamp
)
{
if
(
sf
->
sox_errno
)
{
std
::
ostringstream
stream
;
stream
<<
sf
->
sox_errstr
<<
" "
<<
sox_strerror
(
sf
->
sox_errno
)
<<
" "
<<
sf
->
filename
;
throw
std
::
runtime_error
(
stream
.
str
());
}
return
SOX_EOF
;
}
}
return
SOX_SUCCESS
;
}
sox_effect_handler_t
*
get_tensor_input_handler
()
{
sox_effect_handler_t
*
get_tensor_input_handler
()
{
static
sox_effect_handler_t
handler
{
/*name=*/
"input_tensor"
,
static
sox_effect_handler_t
handler
{
/*name=*/
"input_tensor"
,
/*usage=*/
NULL
,
/*usage=*/
NULL
,
...
@@ -125,6 +150,20 @@ sox_effect_handler_t* get_tensor_output_handler() {
...
@@ -125,6 +150,20 @@ sox_effect_handler_t* get_tensor_output_handler() {
return
&
handler
;
return
&
handler
;
}
}
sox_effect_handler_t
*
get_file_output_handler
()
{
static
sox_effect_handler_t
handler
{
/*name=*/
"output_file"
,
/*usage=*/
NULL
,
/*flags=*/
SOX_EFF_MCHAN
,
/*getopts=*/
NULL
,
/*start=*/
NULL
,
/*flow=*/
file_output_flow
,
/*drain=*/
NULL
,
/*stop=*/
NULL
,
/*kill=*/
NULL
,
/*priv_size=*/
sizeof
(
FileOutputPriv
)};
return
&
handler
;
}
}
// namespace
}
// namespace
SoxEffectsChain
::
SoxEffectsChain
(
SoxEffectsChain
::
SoxEffectsChain
(
...
@@ -134,6 +173,7 @@ SoxEffectsChain::SoxEffectsChain(
...
@@ -134,6 +173,7 @@ SoxEffectsChain::SoxEffectsChain(
out_enc_
(
output_encoding
),
out_enc_
(
output_encoding
),
in_sig_
(),
in_sig_
(),
interm_sig_
(),
interm_sig_
(),
out_sig_
(),
sec_
(
sox_create_effects_chain
(
&
in_enc_
,
&
out_enc_
))
{
sec_
(
sox_create_effects_chain
(
&
in_enc_
,
&
out_enc_
))
{
if
(
!
sec_
)
{
if
(
!
sec_
)
{
throw
std
::
runtime_error
(
"Failed to create effect chain."
);
throw
std
::
runtime_error
(
"Failed to create effect chain."
);
...
@@ -184,6 +224,17 @@ void SoxEffectsChain::addInputFile(sox_format_t* sf) {
...
@@ -184,6 +224,17 @@ void SoxEffectsChain::addInputFile(sox_format_t* sf) {
}
}
}
}
void
SoxEffectsChain
::
addOutputFile
(
sox_format_t
*
sf
)
{
out_sig_
=
sf
->
signal
;
SoxEffect
e
(
sox_create_effect
(
get_file_output_handler
()));
static_cast
<
FileOutputPriv
*>
(
e
->
priv
)
->
sf
=
sf
;
if
(
sox_add_effect
(
sec_
,
e
,
&
interm_sig_
,
&
out_sig_
)
!=
SOX_SUCCESS
)
{
std
::
ostringstream
stream
;
stream
<<
"Failed to add effect: output "
<<
sf
->
filename
;
throw
std
::
runtime_error
(
stream
.
str
());
}
}
void
SoxEffectsChain
::
addEffect
(
const
std
::
vector
<
std
::
string
>
effect
)
{
void
SoxEffectsChain
::
addEffect
(
const
std
::
vector
<
std
::
string
>
effect
)
{
const
auto
num_args
=
effect
.
size
();
const
auto
num_args
=
effect
.
size
();
if
(
num_args
==
0
)
{
if
(
num_args
==
0
)
{
...
...
torchaudio/csrc/sox_effects_chain.h
View file @
0406d30d
...
@@ -14,6 +14,7 @@ class SoxEffectsChain {
...
@@ -14,6 +14,7 @@ class SoxEffectsChain {
const
sox_encodinginfo_t
out_enc_
;
const
sox_encodinginfo_t
out_enc_
;
sox_signalinfo_t
in_sig_
;
sox_signalinfo_t
in_sig_
;
sox_signalinfo_t
interm_sig_
;
sox_signalinfo_t
interm_sig_
;
sox_signalinfo_t
out_sig_
;
sox_effects_chain_t
*
sec_
;
sox_effects_chain_t
*
sec_
;
public:
public:
...
@@ -29,6 +30,7 @@ class SoxEffectsChain {
...
@@ -29,6 +30,7 @@ class SoxEffectsChain {
void
addInputTensor
(
torchaudio
::
sox_utils
::
TensorSignal
*
signal
);
void
addInputTensor
(
torchaudio
::
sox_utils
::
TensorSignal
*
signal
);
void
addInputFile
(
sox_format_t
*
sf
);
void
addInputFile
(
sox_format_t
*
sf
);
void
addOutputBuffer
(
std
::
vector
<
sox_sample_t
>*
output_buffer
);
void
addOutputBuffer
(
std
::
vector
<
sox_sample_t
>*
output_buffer
);
void
addOutputFile
(
sox_format_t
*
sf
);
void
addEffect
(
const
std
::
vector
<
std
::
string
>
effect
);
void
addEffect
(
const
std
::
vector
<
std
::
string
>
effect
);
int64_t
getOutputNumChannels
();
int64_t
getOutputNumChannels
();
int64_t
getOutputSampleRate
();
int64_t
getOutputSampleRate
();
...
...
torchaudio/csrc/sox_io.cpp
View file @
0406d30d
#include <sox.h>
#include <sox.h>
#include <torchaudio/csrc/sox_effects.h>
#include <torchaudio/csrc/sox_effects_chain.h>
#include <torchaudio/csrc/sox_io.h>
#include <torchaudio/csrc/sox_io.h>
#include <torchaudio/csrc/sox_utils.h>
#include <torchaudio/csrc/sox_utils.h>
...
@@ -60,64 +62,21 @@ c10::intrusive_ptr<TensorSignal> load_audio_file(
...
@@ -60,64 +62,21 @@ c10::intrusive_ptr<TensorSignal> load_audio_file(
"Invalid argument: num_frames must be -1 or greater than 0."
);
"Invalid argument: num_frames must be -1 or greater than 0."
);
}
}
SoxFormat
sf
(
sox_open_read
(
std
::
vector
<
std
::
vector
<
std
::
string
>>
effects
;
path
.
c_str
(),
if
(
num_frames
!=
-
1
)
{
/*signal=*/
nullptr
,
std
::
ostringstream
offset
,
frames
;
/*encoding=*/
nullptr
,
offset
<<
frame_offset
<<
"s"
;
/*filetype=*/
nullptr
));
frames
<<
"+"
<<
num_frames
<<
"s"
;
effects
.
emplace_back
(
validate_input_file
(
sf
);
std
::
vector
<
std
::
string
>
{
"trim"
,
offset
.
str
(),
frames
.
str
()});
}
else
if
(
frame_offset
!=
0
)
{
const
int64_t
num_channels
=
sf
->
signal
.
channels
;
std
::
ostringstream
offset
;
const
int64_t
num_total_samples
=
sf
->
signal
.
length
;
offset
<<
frame_offset
<<
"s"
;
const
int64_t
sample_start
=
sf
->
signal
.
channels
*
frame_offset
;
effects
.
emplace_back
(
std
::
vector
<
std
::
string
>
{
"trim"
,
offset
.
str
()});
if
(
sox_seek
(
sf
,
sample_start
,
0
)
==
SOX_EOF
)
{
throw
std
::
runtime_error
(
"Error reading audio file: offset past EOF."
);
}
const
int64_t
sample_end
=
[
&
]()
{
if
(
num_frames
==
-
1
)
return
num_total_samples
;
const
int64_t
sample_end_
=
num_channels
*
num_frames
+
sample_start
;
if
(
num_total_samples
<
sample_end_
)
{
// For lossy encoding, it is difficult to predict exact size of buffer for
// reading the number of samples required.
// So we allocate buffer size of given `num_frames` and ask sox to read as
// much as possible. For lossless format, sox reads exact number of
// samples, but for lossy encoding, sox can end up reading less. (i.e.
// mp3) For the consistent behavior specification between lossy/lossless
// format, we allow users to provide `num_frames` value that exceeds #of
// available samples, and we adjust it here.
return
num_total_samples
;
}
}
return
sample_end_
;
}();
const
int64_t
max_samples
=
sample_end
-
sample_start
;
return
torchaudio
::
sox_effects
::
apply_effects_file
(
path
,
effects
,
normalize
,
channels_first
);
// Read samples into buffer
std
::
vector
<
sox_sample_t
>
buffer
;
buffer
.
reserve
(
max_samples
);
const
int64_t
num_samples
=
sox_read
(
sf
,
buffer
.
data
(),
max_samples
);
if
(
num_samples
==
0
)
{
throw
std
::
runtime_error
(
"Error reading audio file: empty file or read operation failed."
);
}
// NOTE: num_samples may be smaller than max_samples if the input
// format is compressed (i.e. mp3).
// Convert to Tensor
auto
tensor
=
convert_to_tensor
(
buffer
.
data
(),
num_samples
,
num_channels
,
get_dtype
(
sf
->
encoding
.
encoding
,
sf
->
signal
.
precision
),
normalize
,
channels_first
);
return
c10
::
make_intrusive
<
TensorSignal
>
(
tensor
,
static_cast
<
int64_t
>
(
sf
->
signal
.
rate
),
channels_first
);
}
}
void
save_audio_file
(
void
save_audio_file
(
...
@@ -125,7 +84,6 @@ void save_audio_file(
...
@@ -125,7 +84,6 @@ void save_audio_file(
const
c10
::
intrusive_ptr
<
TensorSignal
>&
signal
,
const
c10
::
intrusive_ptr
<
TensorSignal
>&
signal
,
const
double
compression
)
{
const
double
compression
)
{
const
auto
tensor
=
signal
->
getTensor
();
const
auto
tensor
=
signal
->
getTensor
();
const
auto
channels_first
=
signal
->
getChannelsFirst
();
validate_input_tensor
(
tensor
);
validate_input_tensor
(
tensor
);
...
@@ -146,22 +104,12 @@ void save_audio_file(
...
@@ -146,22 +104,12 @@ void save_audio_file(
throw
std
::
runtime_error
(
"Error saving audio file: failed to open file."
);
throw
std
::
runtime_error
(
"Error saving audio file: failed to open file."
);
}
}
auto
tensor_
=
tensor
;
torchaudio
::
sox_effects_chain
::
SoxEffectsChain
chain
(
if
(
channels_first
)
{
/*input_encoding=*/
get_encodinginfo
(
"wav"
,
tensor
.
dtype
(),
0.
),
tensor_
=
tensor_
.
t
();
/*output_encoding=*/
sf
->
encoding
);
}
chain
.
addInputTensor
(
signal
.
get
());
chain
.
addOutputFile
(
sf
);
const
int64_t
frames_per_chunk
=
65536
;
chain
.
run
();
for
(
int64_t
i
=
0
;
i
<
tensor_
.
size
(
0
);
i
+=
frames_per_chunk
)
{
auto
chunk
=
tensor_
.
index
({
Slice
(
i
,
i
+
frames_per_chunk
),
Slice
()});
chunk
=
unnormalize_wav
(
chunk
).
contiguous
();
const
size_t
numel
=
chunk
.
numel
();
if
(
sox_write
(
sf
,
chunk
.
data_ptr
<
int32_t
>
(),
numel
)
!=
numel
)
{
throw
std
::
runtime_error
(
"Error saving audio file: failed to write the entier buffer."
);
}
}
}
}
}
// namespace sox_io
}
// namespace sox_io
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment