Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
Torchaudio
Commits
47d97e30
Unverified
Commit
47d97e30
authored
Jan 27, 2021
by
moto
Committed by
GitHub
Jan 27, 2021
Browse files
Fix load from file object for small files and shorter bytes (#1181)
parent
41c76a17
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
146 additions
and
61 deletions
+146
-61
test/torchaudio_unittest/sox_io_backend/load_test.py
test/torchaudio_unittest/sox_io_backend/load_test.py
+75
-0
torchaudio/csrc/sox/effects.cpp
torchaudio/csrc/sox/effects.cpp
+49
-35
torchaudio/csrc/sox/effects_chain.cpp
torchaudio/csrc/sox/effects_chain.cpp
+22
-23
torchaudio/csrc/sox/utils.cpp
torchaudio/csrc/sox/utils.cpp
+0
-3
No files found.
test/torchaudio_unittest/sox_io_backend/load_test.py
View file @
47d97e30
...
@@ -380,6 +380,19 @@ class TestLoadWithoutExtension(PytorchTestCase):
...
@@ -380,6 +380,19 @@ class TestLoadWithoutExtension(PytorchTestCase):
assert
sr
==
16000
assert
sr
==
16000
class
CloggedFileObj
:
def
__init__
(
self
,
fileobj
):
self
.
fileobj
=
fileobj
self
.
buffer
=
b
''
def
read
(
self
,
n
):
if
not
self
.
buffer
:
self
.
buffer
+=
self
.
fileobj
.
read
(
n
)
ret
=
self
.
buffer
[:
2
]
self
.
buffer
=
self
.
buffer
[
2
:]
return
ret
@
skipIfNoExtension
@
skipIfNoExtension
@
skipIfNoExec
(
'sox'
)
@
skipIfNoExec
(
'sox'
)
class
TestFileObject
(
TempDirMixin
,
PytorchTestCase
):
class
TestFileObject
(
TempDirMixin
,
PytorchTestCase
):
...
@@ -444,6 +457,68 @@ class TestFileObject(TempDirMixin, PytorchTestCase):
...
@@ -444,6 +457,68 @@ class TestFileObject(TempDirMixin, PytorchTestCase):
assert
sr
==
sample_rate
assert
sr
==
sample_rate
self
.
assertEqual
(
expected
,
found
)
self
.
assertEqual
(
expected
,
found
)
@
parameterized
.
expand
([
(
'wav'
,
None
),
(
'mp3'
,
128
),
(
'mp3'
,
320
),
(
'flac'
,
0
),
(
'flac'
,
5
),
(
'flac'
,
8
),
(
'vorbis'
,
-
1
),
(
'vorbis'
,
10
),
(
'amb'
,
None
),
])
def
test_bytesio_clogged
(
self
,
ext
,
compression
):
"""Loading audio via clogged file object returns the same result as via file path.
This test case validates the case where fileobject returns shorter bytes than requeted.
"""
sample_rate
=
16000
format_
=
ext
if
ext
in
[
'mp3'
]
else
None
path
=
self
.
get_temp_path
(
f
'test.
{
ext
}
'
)
sox_utils
.
gen_audio_file
(
path
,
sample_rate
,
num_channels
=
2
,
compression
=
compression
)
expected
,
_
=
sox_io_backend
.
load
(
path
)
with
open
(
path
,
'rb'
)
as
file_
:
fileobj
=
CloggedFileObj
(
io
.
BytesIO
(
file_
.
read
()))
found
,
sr
=
sox_io_backend
.
load
(
fileobj
,
format
=
format_
)
assert
sr
==
sample_rate
self
.
assertEqual
(
expected
,
found
)
@
parameterized
.
expand
([
(
'wav'
,
None
),
(
'mp3'
,
128
),
(
'mp3'
,
320
),
(
'flac'
,
0
),
(
'flac'
,
5
),
(
'flac'
,
8
),
(
'vorbis'
,
-
1
),
(
'vorbis'
,
10
),
(
'amb'
,
None
),
])
def
test_bytesio_tiny
(
self
,
ext
,
compression
):
"""Loading very small audio via file object returns the same result as via file path.
"""
sample_rate
=
16000
format_
=
ext
if
ext
in
[
'mp3'
]
else
None
path
=
self
.
get_temp_path
(
f
'test.
{
ext
}
'
)
sox_utils
.
gen_audio_file
(
path
,
sample_rate
,
num_channels
=
2
,
compression
=
compression
,
duration
=
1
/
1600
)
expected
,
_
=
sox_io_backend
.
load
(
path
)
with
open
(
path
,
'rb'
)
as
file_
:
fileobj
=
io
.
BytesIO
(
file_
.
read
())
found
,
sr
=
sox_io_backend
.
load
(
fileobj
,
format
=
format_
)
assert
sr
==
sample_rate
self
.
assertEqual
(
expected
,
found
)
@
parameterized
.
expand
([
@
parameterized
.
expand
([
(
'wav'
,
None
),
(
'wav'
,
None
),
(
'mp3'
,
128
),
(
'mp3'
,
128
),
...
...
torchaudio/csrc/sox/effects.cpp
View file @
47d97e30
...
@@ -137,51 +137,65 @@ c10::intrusive_ptr<TensorSignal> apply_effects_file(
...
@@ -137,51 +137,65 @@ c10::intrusive_ptr<TensorSignal> apply_effects_file(
#ifdef TORCH_API_INCLUDE_EXTENSION_H
#ifdef TORCH_API_INCLUDE_EXTENSION_H
// Streaming decoding over file-like object is tricky because libsox operates on
// FILE pointer. The folloing is what `sox` and `play` commands do
// - file input -> FILE pointer
// - URL input -> call wget in suprocess and pipe the data -> FILE pointer
// - stdin -> FILE pointer
//
// We want to, instead, fetch byte strings chunk by chunk, consume them, and
// discard.
//
// Here is the approach
// 1. Initialize sox_format_t using sox_open_mem_read, providing the initial
// chunk of byte string
// This will perform header-based format detection, if necessary, then fill
// the metadata of sox_format_t. Internally, sox_open_mem_read uses fmemopen,
// which returns FILE* which points the buffer of the provided byte string.
// 2. Each time sox reads a chunk from the FILE*, we update the underlying
// buffer in a way that it
// starts with unseen data, and append the new data read from the given
// fileobj. This will trick libsox as if it keeps reading from the FILE*
// continuously.
// For Step 2. see `fileobj_input_drain` function in effects_chain.cpp
std
::
tuple
<
torch
::
Tensor
,
int64_t
>
apply_effects_fileobj
(
std
::
tuple
<
torch
::
Tensor
,
int64_t
>
apply_effects_fileobj
(
py
::
object
fileobj
,
py
::
object
fileobj
,
std
::
vector
<
std
::
vector
<
std
::
string
>>
effects
,
std
::
vector
<
std
::
vector
<
std
::
string
>>
effects
,
c10
::
optional
<
bool
>&
normalize
,
c10
::
optional
<
bool
>&
normalize
,
c10
::
optional
<
bool
>&
channels_first
,
c10
::
optional
<
bool
>&
channels_first
,
c10
::
optional
<
std
::
string
>&
format
)
{
c10
::
optional
<
std
::
string
>&
format
)
{
// Streaming decoding over file-like object is tricky because libsox operates
// Prepare the buffer used throughout the lifecycle of SoxEffectChain.
// on FILE pointer. The folloing is what `sox` and `play` commands do
// - file input -> FILE pointer
// - URL input -> call wget in suprocess and pipe the data -> FILE pointer
// - stdin -> FILE pointer
//
//
// We want to, instead, fetch byte strings chunk by chunk, consume them, and
// For certain format (such as FLAC), libsox keeps reading the content at
// discard.
// the initialization unless it reaches EOF even when the header is properly
// parsed. (Making buffer size 8192, which is way bigger than the header,
// resulted in libsox consuming all the buffer content at the time it opens
// the file.) Therefore buffer has to always contain valid data, except after
// EOF. We default to `sox_get_globals()->bufsiz`* for buffer size and we
// first check if there is enough data to fill the buffer. `read_fileobj`
// repeatedly calls `read` method until it receives the requested lenght of
// bytes or it reaches EOF. If we get bytes shorter than requested, that means
// the whole audio data are fetched.
//
//
// Here is the approach
// * This can be changed with `torchaudio.utils.sox_utils.set_buffer_size`.
// 1. Initialize sox_format_t using sox_open_mem_read, providing the initial
auto
capacity
=
// chunk of byte string
(
sox_get_globals
()
->
bufsiz
>
256
)
?
sox_get_globals
()
->
bufsiz
:
256
;
// This will perform header-based format detection, if necessary, then fill
std
::
string
buffer
(
capacity
,
'\0'
);
// the metadata of sox_format_t. Internally, sox_open_mem_read uses
auto
*
in_buf
=
const_cast
<
char
*>
(
buffer
.
data
());
// fmemopen, which returns FILE* which points the buffer of the provided
auto
num_read
=
read_fileobj
(
&
fileobj
,
capacity
,
in_buf
);
// byte string.
// If the file is shorter than 256, then libsox cannot read the header.
// 2. Each time sox reads a chunk from the FILE*, we update the underlying
auto
in_buffer_size
=
(
num_read
>
256
)
?
num_read
:
256
;
// buffer in a way that it
// starts with unseen data, and append the new data read from the given
// fileobj. This will trick libsox as if it keeps reading from the FILE*
// continuously.
// Prepare the buffer used throughout the lifecycle of SoxEffectChain.
// Using std::string and let it manage memory.
// 4096 is minimum size requried by auto_detect_format
// https://github.com/dmkrepo/libsox/blob/b9dd1a86e71bbd62221904e3e59dfaa9e5e72046/src/formats.c#L40-L48
const
size_t
in_buffer_size
=
4096
;
std
::
string
in_buffer
(
in_buffer_size
,
'x'
);
auto
*
in_buf
=
const_cast
<
char
*>
(
in_buffer
.
data
());
// Fetch the header, and copy it to the buffer.
auto
header
=
static_cast
<
std
::
string
>
(
static_cast
<
py
::
bytes
>
(
fileobj
.
attr
(
"read"
)(
4096
)));
memcpy
(
static_cast
<
void
*>
(
in_buf
),
static_cast
<
void
*>
(
const_cast
<
char
*>
(
header
.
data
())),
header
.
length
());
// Open file (this starts reading the header)
// Open file (this starts reading the header)
// When opening a file there are two functions that can touches FILE*.
// * `auto_detect_format`
// https://github.com/dmkrepo/libsox/blob/b9dd1a86e71bbd62221904e3e59dfaa9e5e72046/src/formats.c#L43
// * `startread` handler of detected format.
// https://github.com/dmkrepo/libsox/blob/b9dd1a86e71bbd62221904e3e59dfaa9e5e72046/src/formats.c#L574
// To see the handler of a particular format, go to
// https://github.com/dmkrepo/libsox/blob/b9dd1a86e71bbd62221904e3e59dfaa9e5e72046/src/<FORMAT>.c
// For example, voribs can be found
// https://github.com/dmkrepo/libsox/blob/b9dd1a86e71bbd62221904e3e59dfaa9e5e72046/src/vorbis.c#L97-L158
SoxFormat
sf
(
sox_open_mem_read
(
SoxFormat
sf
(
sox_open_mem_read
(
in_buf
,
in_buf
,
in_buffer_size
,
in_buffer_size
,
...
...
torchaudio/csrc/sox/effects_chain.cpp
View file @
47d97e30
...
@@ -296,6 +296,7 @@ namespace {
...
@@ -296,6 +296,7 @@ namespace {
struct
FileObjInputPriv
{
struct
FileObjInputPriv
{
sox_format_t
*
sf
;
sox_format_t
*
sf
;
py
::
object
*
fileobj
;
py
::
object
*
fileobj
;
bool
eof_reached
;
char
*
buffer
;
char
*
buffer
;
uint64_t
buffer_size
;
uint64_t
buffer_size
;
};
};
...
@@ -312,9 +313,7 @@ struct FileObjOutputPriv {
...
@@ -312,9 +313,7 @@ struct FileObjOutputPriv {
int
fileobj_input_drain
(
sox_effect_t
*
effp
,
sox_sample_t
*
obuf
,
size_t
*
osamp
)
{
int
fileobj_input_drain
(
sox_effect_t
*
effp
,
sox_sample_t
*
obuf
,
size_t
*
osamp
)
{
auto
priv
=
static_cast
<
FileObjInputPriv
*>
(
effp
->
priv
);
auto
priv
=
static_cast
<
FileObjInputPriv
*>
(
effp
->
priv
);
auto
sf
=
priv
->
sf
;
auto
sf
=
priv
->
sf
;
auto
fileobj
=
priv
->
fileobj
;
auto
buffer
=
priv
->
buffer
;
auto
buffer
=
priv
->
buffer
;
auto
buffer_size
=
priv
->
buffer_size
;
// 1. Refresh the buffer
// 1. Refresh the buffer
//
//
...
@@ -326,32 +325,30 @@ int fileobj_input_drain(sox_effect_t* effp, sox_sample_t* obuf, size_t* osamp) {
...
@@ -326,32 +325,30 @@ int fileobj_input_drain(sox_effect_t* effp, sox_sample_t* obuf, size_t* osamp) {
//
//
// Before:
// Before:
//
//
// |<-------
-
consumed------
-
>|<-remaining->|
// |<-------consumed------>|<-
--
remaining-
--
>|
// |***********************
**|
-------------|
// |***********************
|----
-------------|
//
^ ftell
// ^ ftell
//
//
// After:
// After:
//
//
// |<-offset->|<-remaining->|<-
-
new data-
-
>|
// |<-offset->|<-
--
remaining-
--
>|<-new data->|
// |**********|-------------
|++
++++++++++++|
// |**********|-------------
----|
++++++++++++|
// ^ ftell
// ^ ftell
const
auto
num_consumed
=
sf
->
tell_off
;
const
auto
num_consumed
=
sf
->
tell_off
;
const
auto
num_remain
=
buffer_size
-
num_consumed
;
const
auto
num_remain
=
priv
->
buffer_size
-
num_consumed
;
// 1.1. First, we fetch the data to see if there is data to fill the buffer
// 1.1. Fetch the data to see if there is data to fill the buffer
py
::
bytes
chunk_
=
fileobj
->
attr
(
"read"
)(
num_consumed
);
uint64_t
num_refill
=
0
;
const
auto
num_refill
=
py
::
len
(
chunk_
);
std
::
string
chunk
(
num_consumed
,
'\0'
);
const
auto
offset
=
buffer_size
-
(
num_remain
+
num_refill
);
if
(
num_consumed
&&
!
priv
->
eof_reached
)
{
num_refill
=
read_fileobj
(
if
(
num_refill
>
num_consumed
)
{
priv
->
fileobj
,
num_consumed
,
const_cast
<
char
*>
(
chunk
.
data
()));
std
::
ostringstream
message
;
if
(
num_refill
<
num_consumed
)
{
message
priv
->
eof_reached
=
true
;
<<
"Tried to read up to "
<<
num_consumed
<<
" bytes but, "
}
<<
"recieved "
<<
num_refill
<<
" bytes. "
<<
"The given object does not confirm to read protocol of file object."
;
throw
std
::
runtime_error
(
message
.
str
());
}
}
const
auto
offset
=
num_consumed
-
num_refill
;
// 1.2. Move the unconsumed data towards the beginning of buffer.
// 1.2. Move the unconsumed data towards the beginning of buffer.
if
(
num_remain
)
{
if
(
num_remain
)
{
...
@@ -362,7 +359,6 @@ int fileobj_input_drain(sox_effect_t* effp, sox_sample_t* obuf, size_t* osamp) {
...
@@ -362,7 +359,6 @@ int fileobj_input_drain(sox_effect_t* effp, sox_sample_t* obuf, size_t* osamp) {
// 1.3. Refill the remaining buffer.
// 1.3. Refill the remaining buffer.
if
(
num_refill
)
{
if
(
num_refill
)
{
auto
chunk
=
static_cast
<
std
::
string
>
(
chunk_
);
auto
src
=
static_cast
<
void
*>
(
const_cast
<
char
*>
(
chunk
.
c_str
()));
auto
src
=
static_cast
<
void
*>
(
const_cast
<
char
*>
(
chunk
.
c_str
()));
auto
dst
=
buffer
+
offset
+
num_remain
;
auto
dst
=
buffer
+
offset
+
num_remain
;
memcpy
(
dst
,
src
,
num_refill
);
memcpy
(
dst
,
src
,
num_refill
);
...
@@ -383,7 +379,9 @@ int fileobj_input_drain(sox_effect_t* effp, sox_sample_t* obuf, size_t* osamp) {
...
@@ -383,7 +379,9 @@ int fileobj_input_drain(sox_effect_t* effp, sox_sample_t* obuf, size_t* osamp) {
// store the actual number read back to *osamp
// store the actual number read back to *osamp
*
osamp
=
sox_read
(
sf
,
obuf
,
*
osamp
);
*
osamp
=
sox_read
(
sf
,
obuf
,
*
osamp
);
return
*
osamp
?
SOX_SUCCESS
:
SOX_EOF
;
// Decoding is finished when fileobject is exhausted and sox can no longer
// decode a sample.
return
(
priv
->
eof_reached
&&
!*
osamp
)
?
SOX_EOF
:
SOX_SUCCESS
;
}
}
int
fileobj_output_flow
(
int
fileobj_output_flow
(
...
@@ -469,6 +467,7 @@ void SoxEffectsChain::addInputFileObj(
...
@@ -469,6 +467,7 @@ void SoxEffectsChain::addInputFileObj(
auto
priv
=
static_cast
<
FileObjInputPriv
*>
(
e
->
priv
);
auto
priv
=
static_cast
<
FileObjInputPriv
*>
(
e
->
priv
);
priv
->
sf
=
sf
;
priv
->
sf
=
sf
;
priv
->
fileobj
=
fileobj
;
priv
->
fileobj
=
fileobj
;
priv
->
eof_reached
=
false
;
priv
->
buffer
=
buffer
;
priv
->
buffer
=
buffer
;
priv
->
buffer_size
=
buffer_size
;
priv
->
buffer_size
=
buffer_size
;
if
(
sox_add_effect
(
sec_
,
e
,
&
interm_sig_
,
&
in_sig_
)
!=
SOX_SUCCESS
)
{
if
(
sox_add_effect
(
sec_
,
e
,
&
interm_sig_
,
&
in_sig_
)
!=
SOX_SUCCESS
)
{
...
...
torchaudio/csrc/sox/utils.cpp
View file @
47d97e30
...
@@ -337,9 +337,6 @@ uint64_t read_fileobj(py::object* fileobj, const uint64_t size, char* buffer) {
...
@@ -337,9 +337,6 @@ uint64_t read_fileobj(py::object* fileobj, const uint64_t size, char* buffer) {
<<
"The given object does not confirm to read protocol of file object."
;
<<
"The given object does not confirm to read protocol of file object."
;
throw
std
::
runtime_error
(
message
.
str
());
throw
std
::
runtime_error
(
message
.
str
());
}
}
std
::
cerr
<<
"req: "
<<
request
<<
", fetched: "
<<
chunk_len
<<
std
::
endl
;
std
::
cerr
<<
"buffer: "
<<
(
void
*
)
buffer
<<
std
::
endl
;
memcpy
(
buffer
,
chunk
.
data
(),
chunk_len
);
memcpy
(
buffer
,
chunk
.
data
(),
chunk_len
);
buffer
+=
chunk_len
;
buffer
+=
chunk_len
;
num_read
+=
chunk_len
;
num_read
+=
chunk_len
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment