Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
Torchaudio
Commits
4406a6bb
Unverified
Commit
4406a6bb
authored
Dec 04, 2020
by
moto
Committed by
GitHub
Dec 04, 2020
Browse files
Add AMB/AMR-NB/AMR-WB support to "sox_io" backend (#1066)
parent
2a02d7f5
Changes
8
Hide whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
207 additions
and
13 deletions
+207
-13
build_tools/setup_helpers/extension.py
build_tools/setup_helpers/extension.py
+2
-0
test/torchaudio_unittest/sox_io_backend/info_test.py
test/torchaudio_unittest/sox_io_backend/info_test.py
+30
-0
test/torchaudio_unittest/sox_io_backend/load_test.py
test/torchaudio_unittest/sox_io_backend/load_test.py
+61
-0
test/torchaudio_unittest/sox_io_backend/save_test.py
test/torchaudio_unittest/sox_io_backend/save_test.py
+75
-0
third_party/CMakeLists.txt
third_party/CMakeLists.txt
+10
-2
torchaudio/backend/sox_io_backend.py
torchaudio/backend/sox_io_backend.py
+6
-4
torchaudio/csrc/sox_io.cpp
torchaudio/csrc/sox_io.cpp
+7
-1
torchaudio/csrc/sox_utils.cpp
torchaudio/csrc/sox_utils.cpp
+16
-6
No files found.
build_tools/setup_helpers/extension.py
View file @
4406a6bb
...
...
@@ -89,6 +89,8 @@ def _get_extra_objects():
'libvorbisfile.a'
,
'libvorbis.a'
,
'libogg.a'
,
'libopencore-amrnb.a'
,
'libopencore-amrwb.a'
,
]
for
lib
in
libs
:
objs
.
append
(
str
(
_TP_INSTALL_DIR
/
'lib'
/
lib
))
...
...
test/torchaudio_unittest/sox_io_backend/info_test.py
View file @
4406a6bb
...
...
@@ -122,6 +122,36 @@ class TestInfo(TempDirMixin, PytorchTestCase):
assert
info
.
num_frames
==
sample_rate
*
duration
assert
info
.
num_channels
==
num_channels
@
parameterized
.
expand
(
list
(
itertools
.
product
(
[
'float32'
,
'int32'
,
'int16'
,
'uint8'
],
[
8000
,
16000
],
[
1
,
2
],
)),
name_func
=
name_func
)
def
test_amb
(
self
,
dtype
,
sample_rate
,
num_channels
):
"""`sox_io_backend.info` can check amb file correctly"""
duration
=
1
path
=
self
.
get_temp_path
(
'data.amb'
)
sox_utils
.
gen_audio_file
(
path
,
sample_rate
,
num_channels
,
bit_depth
=
sox_utils
.
get_bit_depth
(
dtype
),
duration
=
duration
)
info
=
sox_io_backend
.
info
(
path
)
assert
info
.
sample_rate
==
sample_rate
assert
info
.
num_frames
==
sample_rate
*
duration
assert
info
.
num_channels
==
num_channels
def
test_amr_nb
(
self
):
"""`sox_io_backend.info` can check amr-nb file correctly"""
duration
=
1
num_channels
=
1
sample_rate
=
8000
path
=
self
.
get_temp_path
(
'data.amr-nb'
)
sox_utils
.
gen_audio_file
(
path
,
sample_rate
=
sample_rate
,
num_channels
=
num_channels
,
bit_depth
=
16
,
duration
=
duration
)
info
=
sox_io_backend
.
info
(
path
)
assert
info
.
sample_rate
==
sample_rate
assert
info
.
num_frames
==
sample_rate
*
duration
assert
info
.
num_channels
==
num_channels
@
skipIfNoExtension
class
TestInfoOpus
(
PytorchTestCase
):
...
...
test/torchaudio_unittest/sox_io_backend/load_test.py
View file @
4406a6bb
...
...
@@ -142,6 +142,53 @@ class LoadTestBase(TempDirMixin, PytorchTestCase):
assert
sr
==
sample_rate
self
.
assertEqual
(
data
,
data_ref
,
atol
=
4e-05
,
rtol
=
1.3e-06
)
def
assert_amb
(
self
,
dtype
,
sample_rate
,
num_channels
,
normalize
,
duration
):
"""`sox_io_backend.load` can load amb format.
This test takes the same strategy as mp3 to compare the result
"""
path
=
self
.
get_temp_path
(
'1.original.amb'
)
ref_path
=
self
.
get_temp_path
(
'2.reference.wav'
)
# 1. Generate amb with sox
sox_utils
.
gen_audio_file
(
path
,
sample_rate
,
num_channels
,
encoding
=
sox_utils
.
get_encoding
(
dtype
),
bit_depth
=
sox_utils
.
get_bit_depth
(
dtype
),
duration
=
duration
)
# 2. Convert to wav with sox
sox_utils
.
convert_audio_file
(
path
,
ref_path
)
# 3. Load amb with torchaudio
data
,
sr
=
sox_io_backend
.
load
(
path
,
normalize
=
normalize
)
# 4. Load wav with scipy
data_ref
=
load_wav
(
ref_path
,
normalize
=
normalize
)[
0
]
# 5. Compare
assert
sr
==
sample_rate
self
.
assertEqual
(
data
,
data_ref
,
atol
=
4e-05
,
rtol
=
1.3e-06
)
def
assert_amr_nb
(
self
,
duration
):
"""`sox_io_backend.load` can load amr-nb format.
This test takes the same strategy as mp3 to compare the result
"""
sample_rate
=
8000
num_channels
=
1
path
=
self
.
get_temp_path
(
'1.original.amr-nb'
)
ref_path
=
self
.
get_temp_path
(
'2.reference.wav'
)
# 1. Generate amr-nb with sox
sox_utils
.
gen_audio_file
(
path
,
sample_rate
,
num_channels
,
bit_depth
=
32
,
duration
=
duration
)
# 2. Convert to wav with sox
sox_utils
.
convert_audio_file
(
path
,
ref_path
)
# 3. Load amr-nb with torchaudio
data
,
sr
=
sox_io_backend
.
load
(
path
)
# 4. Load wav with scipy
data_ref
=
load_wav
(
ref_path
)[
0
]
# 5. Compare
assert
sr
==
sample_rate
self
.
assertEqual
(
data
,
data_ref
,
atol
=
4e-05
,
rtol
=
1.3e-06
)
@
skipIfNoExec
(
'sox'
)
@
skipIfNoExtension
...
...
@@ -260,6 +307,20 @@ class TestLoad(LoadTestBase):
"""`sox_io_backend.load` can load sph format correctly."""
self
.
assert_sphere
(
sample_rate
,
num_channels
,
duration
=
1
)
@
parameterized
.
expand
(
list
(
itertools
.
product
(
[
'float32'
,
'int32'
,
'int16'
],
[
8000
,
16000
],
[
1
,
2
],
[
False
,
True
],
)),
name_func
=
name_func
)
def
test_amb
(
self
,
dtype
,
sample_rate
,
num_channels
,
normalize
):
"""`sox_io_backend.load` can load sph format correctly."""
self
.
assert_amb
(
dtype
,
sample_rate
,
num_channels
,
normalize
,
duration
=
1
)
def
test_amr_nb
(
self
):
"""`sox_io_backend.load` can load amr_nb format correctly."""
self
.
assert_amr_nb
(
duration
=
1
)
@
skipIfNoExec
(
'sox'
)
@
skipIfNoExtension
...
...
test/torchaudio_unittest/sox_io_backend/save_test.py
View file @
4406a6bb
...
...
@@ -200,6 +200,68 @@ class SaveTestBase(TempDirMixin, PytorchTestCase):
self
.
assertEqual
(
found
,
expected
)
def
assert_amb
(
self
,
dtype
,
sample_rate
,
num_channels
,
duration
):
"""`sox_io_backend.save` can save amb format.
This test takes the same strategy as mp3 to compare the result
"""
src_path
=
self
.
get_temp_path
(
'1.reference.wav'
)
amb_path
=
self
.
get_temp_path
(
'2.1.torchaudio.amb'
)
wav_path
=
self
.
get_temp_path
(
'2.2.torchaudio.wav'
)
amb_path_sox
=
self
.
get_temp_path
(
'3.1.sox.amb'
)
wav_path_sox
=
self
.
get_temp_path
(
'3.2.sox.wav'
)
# 1. Generate original wav
data
=
get_wav_data
(
dtype
,
num_channels
,
normalize
=
False
,
num_frames
=
duration
*
sample_rate
)
save_wav
(
src_path
,
data
,
sample_rate
)
# 2.1. Convert the original wav to amb with torchaudio
sox_io_backend
.
save
(
amb_path
,
load_wav
(
src_path
,
normalize
=
False
)[
0
],
sample_rate
)
# 2.2. Convert the amb to wav with Sox
sox_utils
.
convert_audio_file
(
amb_path
,
wav_path
)
# 2.3. Load
found
=
load_wav
(
wav_path
)[
0
]
# 3.1. Convert the original wav to amb with SoX
sox_utils
.
convert_audio_file
(
src_path
,
amb_path_sox
)
# 3.2. Convert the amb to wav with Sox
sox_utils
.
convert_audio_file
(
amb_path_sox
,
wav_path_sox
)
# 3.3. Load
expected
=
load_wav
(
wav_path_sox
)[
0
]
self
.
assertEqual
(
found
,
expected
)
def
assert_amr_nb
(
self
,
duration
):
"""`sox_io_backend.save` can save amr_nb format.
This test takes the same strategy as mp3 to compare the result
"""
sample_rate
=
8000
num_channels
=
1
src_path
=
self
.
get_temp_path
(
'1.reference.wav'
)
amr_path
=
self
.
get_temp_path
(
'2.1.torchaudio.amr-nb'
)
wav_path
=
self
.
get_temp_path
(
'2.2.torchaudio.wav'
)
amr_path_sox
=
self
.
get_temp_path
(
'3.1.sox.amr-nb'
)
wav_path_sox
=
self
.
get_temp_path
(
'3.2.sox.wav'
)
# 1. Generate original wav
data
=
get_wav_data
(
'int16'
,
num_channels
,
normalize
=
False
,
num_frames
=
duration
*
sample_rate
)
save_wav
(
src_path
,
data
,
sample_rate
)
# 2.1. Convert the original wav to amr_nb with torchaudio
sox_io_backend
.
save
(
amr_path
,
load_wav
(
src_path
,
normalize
=
False
)[
0
],
sample_rate
)
# 2.2. Convert the amr_nb to wav with Sox
sox_utils
.
convert_audio_file
(
amr_path
,
wav_path
)
# 2.3. Load
found
=
load_wav
(
wav_path
)[
0
]
# 3.1. Convert the original wav to amr_nb with SoX
sox_utils
.
convert_audio_file
(
src_path
,
amr_path_sox
)
# 3.2. Convert the amr_nb to wav with Sox
sox_utils
.
convert_audio_file
(
amr_path_sox
,
wav_path_sox
)
# 3.3. Load
expected
=
load_wav
(
wav_path_sox
)[
0
]
self
.
assertEqual
(
found
,
expected
)
@
skipIfNoExec
(
'sox'
)
@
skipIfNoExtension
...
...
@@ -302,6 +364,19 @@ class TestSave(SaveTestBase):
"""`sox_io_backend.save` can save sph format."""
self
.
assert_sphere
(
sample_rate
,
num_channels
,
duration
=
1
)
@
parameterized
.
expand
(
list
(
itertools
.
product
(
[
'float32'
,
'int32'
,
'int16'
,
'uint8'
],
[
8000
,
16000
],
[
1
,
2
],
)),
name_func
=
name_func
)
def
test_amb
(
self
,
dtype
,
sample_rate
,
num_channels
):
"""`sox_io_backend.save` can save amb format."""
self
.
assert_amb
(
dtype
,
sample_rate
,
num_channels
,
duration
=
1
)
def
test_amr_nb
(
self
):
"""`sox_io_backend.save` can save amr-nb format."""
self
.
assert_amr_nb
(
duration
=
1
)
@
skipIfNoExec
(
'sox'
)
@
skipIfNoExtension
...
...
third_party/CMakeLists.txt
View file @
4406a6bb
...
...
@@ -16,6 +16,14 @@ ExternalProject_Add(libmad
CONFIGURE_COMMAND
${
CMAKE_CURRENT_SOURCE_DIR
}
/src/libmad/configure
${
COMMON_ARGS
}
)
ExternalProject_Add
(
amr
PREFIX
${
CMAKE_CURRENT_SOURCE_DIR
}
DOWNLOAD_DIR
${
ARCHIVE_DIR
}
URL https://sourceforge.net/projects/opencore-amr/files/opencore-amr/opencore-amr-0.1.5.tar.gz
URL_HASH SHA256=2c006cb9d5f651bfb5e60156dbff6af3c9d35c7bbcc9015308c0aff1e14cd341
CONFIGURE_COMMAND
${
CMAKE_CURRENT_SOURCE_DIR
}
/src/amr/configure
${
COMMON_ARGS
}
)
ExternalProject_Add
(
libmp3lame
PREFIX
${
CMAKE_CURRENT_SOURCE_DIR
}
DOWNLOAD_DIR
${
ARCHIVE_DIR
}
...
...
@@ -72,11 +80,11 @@ ExternalProject_Add(opusfile
ExternalProject_Add
(
libsox
PREFIX
${
CMAKE_CURRENT_SOURCE_DIR
}
DEPENDS libogg libflac libvorbis opusfile libmp3lame libmad
DEPENDS libogg libflac libvorbis opusfile libmp3lame libmad
amr
DOWNLOAD_DIR
${
ARCHIVE_DIR
}
URL https://downloads.sourceforge.net/project/sox/sox/14.4.2/sox-14.4.2.tar.bz2
URL_HASH SHA256=81a6956d4330e75b5827316e44ae381e6f1e8928003c6aa45896da9041ea149c
# OpenMP is by default compiled against GNU OpenMP, which conflicts with the version of OpenMP that PyTorch uses.
# See https://github.com/pytorch/audio/pull/1026
CONFIGURE_COMMAND
${
CMAKE_CURRENT_SOURCE_DIR
}
/build_codec_helper.sh
${
CMAKE_CURRENT_SOURCE_DIR
}
/src/libsox/configure
${
COMMON_ARGS
}
--with-lame --with-flac --with-mad --with-oggvorbis --without-alsa --without-coreaudio --without-png --without-oss --without-sndfile --with-opus --disable-openmp
CONFIGURE_COMMAND
${
CMAKE_CURRENT_SOURCE_DIR
}
/build_codec_helper.sh
${
CMAKE_CURRENT_SOURCE_DIR
}
/src/libsox/configure
${
COMMON_ARGS
}
--with-lame --with-flac --with-mad --with-oggvorbis --without-alsa --without-coreaudio --without-png --without-oss --without-sndfile --with-opus
--with-amrwb --with-amrnb
--disable-openmp
)
torchaudio/backend/sox_io_backend.py
View file @
4406a6bb
...
...
@@ -40,18 +40,19 @@ def load(
This function can handle all the codecs that underlying libsox can handle,
however it is tested on the following formats;
* WAV
* WAV
, AMB
* 32-bit floating-point
* 32-bit signed integer
* 16-bit signed integer
* 8-bit unsigned integer
* 8-bit unsigned integer
(WAV only)
* MP3
* FLAC
* OGG/VORBIS
* OPUS
* SPHERE
* AMR-NB
To load ``MP3``, ``FLAC``, ``OGG/VORBIS``, ``OPUS`` and other codecs ``libsox`` does not
handle natively, your installation of ``torchaudio`` has to be linked to ``libsox``
...
...
@@ -119,7 +120,7 @@ def save(
Note:
Supported formats are;
* WAV
* WAV
, AMB
* 32-bit floating-point
* 32-bit signed integer
...
...
@@ -130,6 +131,7 @@ def save(
* FLAC
* OGG/VORBIS
* SPHERE
* AMR-NB
To save ``MP3``, ``FLAC``, ``OGG/VORBIS``, and other codecs ``libsox`` does not
handle natively, your installation of ``torchaudio`` has to be linked to ``libsox``
...
...
@@ -160,7 +162,7 @@ def save(
filepath
=
str
(
filepath
)
if
compression
is
None
:
ext
=
str
(
filepath
).
split
(
'.'
)[
-
1
].
lower
()
if
ext
in
[
'wav'
,
'sph'
]:
if
ext
in
[
'wav'
,
'sph'
,
'amb'
,
'amr-nb'
]:
compression
=
0.
elif
ext
==
'mp3'
:
compression
=
-
4.5
...
...
torchaudio/csrc/sox_io.cpp
View file @
4406a6bb
...
...
@@ -85,11 +85,17 @@ void save_audio_file(
const
std
::
string
&
file_name
,
const
c10
::
intrusive_ptr
<
TensorSignal
>&
signal
,
const
double
compression
)
{
const
auto
tensor
=
signal
->
getT
ensor
()
;
auto
tensor
=
signal
->
t
ensor
;
validate_input_tensor
(
tensor
);
const
auto
filetype
=
get_filetype
(
file_name
);
if
(
filetype
==
"amr-nb"
)
{
const
auto
num_channels
=
tensor
.
size
(
signal
->
channels_first
?
0
:
1
);
TORCH_CHECK
(
num_channels
==
1
,
"amr-nb format only supports single channel audio."
);
tensor
=
(
unnormalize_wav
(
tensor
)
/
65536
).
to
(
torch
::
kInt16
);
}
const
auto
signal_info
=
get_signalinfo
(
signal
.
get
(),
filetype
);
const
auto
encoding_info
=
get_encodinginfo
(
filetype
,
tensor
.
dtype
(),
compression
);
...
...
torchaudio/csrc/sox_utils.cpp
View file @
4406a6bb
...
...
@@ -223,7 +223,7 @@ sox_encoding_t get_encoding(
return
SOX_ENCODING_FLAC
;
if
(
filetype
==
"ogg"
||
filetype
==
"vorbis"
)
return
SOX_ENCODING_VORBIS
;
if
(
filetype
==
"wav"
)
{
if
(
filetype
==
"wav"
||
filetype
==
"amb"
)
{
if
(
dtype
==
torch
::
kUInt8
)
return
SOX_ENCODING_UNSIGNED
;
if
(
dtype
==
torch
::
kInt16
)
...
...
@@ -236,7 +236,9 @@ sox_encoding_t get_encoding(
}
if
(
filetype
==
"sph"
)
return
SOX_ENCODING_SIGN2
;
throw
std
::
runtime_error
(
"Unsupported file type."
);
if
(
filetype
==
"amr-nb"
)
return
SOX_ENCODING_AMR_NB
;
throw
std
::
runtime_error
(
"Unsupported file type: "
+
filetype
);
}
unsigned
get_precision
(
...
...
@@ -248,7 +250,7 @@ unsigned get_precision(
return
24
;
if
(
filetype
==
"ogg"
||
filetype
==
"vorbis"
)
return
SOX_UNSPEC
;
if
(
filetype
==
"wav"
)
{
if
(
filetype
==
"wav"
||
filetype
==
"amb"
)
{
if
(
dtype
==
torch
::
kUInt8
)
return
8
;
if
(
dtype
==
torch
::
kInt16
)
...
...
@@ -261,7 +263,13 @@ unsigned get_precision(
}
if
(
filetype
==
"sph"
)
return
32
;
throw
std
::
runtime_error
(
"Unsupported file type."
);
if
(
filetype
==
"amr-nb"
)
{
TORCH_INTERNAL_ASSERT
(
dtype
==
torch
::
kInt16
,
"When saving to AMR-NB format, the input tensor must be int16 type."
);
return
16
;
}
throw
std
::
runtime_error
(
"Unsupported file type: "
+
filetype
);
}
sox_signalinfo_t
get_signalinfo
(
...
...
@@ -287,11 +295,13 @@ sox_encodinginfo_t get_encodinginfo(
return
compression
;
if
(
filetype
==
"ogg"
||
filetype
==
"vorbis"
)
return
compression
;
if
(
filetype
==
"wav"
)
if
(
filetype
==
"wav"
||
filetype
==
"amb"
)
return
0.
;
if
(
filetype
==
"sph"
)
return
0.
;
throw
std
::
runtime_error
(
"Unsupported file type."
);
if
(
filetype
==
"amr-nb"
)
return
0.
;
throw
std
::
runtime_error
(
"Unsupported file type: "
+
filetype
);
}();
return
sox_encodinginfo_t
{
/*encoding=*/
get_encoding
(
filetype
,
dtype
),
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment