Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
Torchaudio
Commits
3488f314
"tests/vscode:/vscode.git/clone" did not exist on "8c8a625a828ae5a222b30cb1256af0da7ecfee26"
Unverified
Commit
3488f314
authored
Feb 19, 2021
by
SJ
Committed by
GitHub
Feb 19, 2021
Browse files
Add HTK format support to sox_io's save & info (#1276)
parent
a70931f1
Changes
6
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
43 additions
and
3 deletions
+43
-3
test/torchaudio_unittest/backend/sox_io/info_test.py
test/torchaudio_unittest/backend/sox_io/info_test.py
+17
-1
test/torchaudio_unittest/backend/sox_io/save_test.py
test/torchaudio_unittest/backend/sox_io/save_test.py
+6
-0
torchaudio/backend/sox_io_backend.py
torchaudio/backend/sox_io_backend.py
+5
-1
torchaudio/csrc/sox/types.cpp
torchaudio/csrc/sox/types.cpp
+2
-0
torchaudio/csrc/sox/types.h
torchaudio/csrc/sox/types.h
+1
-0
torchaudio/csrc/sox/utils.cpp
torchaudio/csrc/sox/utils.cpp
+12
-1
No files found.
test/torchaudio_unittest/backend/sox_io/info_test.py
View file @
3488f314
...
...
@@ -205,7 +205,7 @@ class TestInfo(TempDirMixin, PytorchTestCase):
assert
info
.
encoding
==
"ULAW"
def
test_alaw
(
self
):
"""`sox_io_backend.info` can check
u
law file correctly"""
"""`sox_io_backend.info` can check
a
law file correctly"""
duration
=
1
num_channels
=
1
sample_rate
=
8000
...
...
@@ -221,6 +221,22 @@ class TestInfo(TempDirMixin, PytorchTestCase):
assert
info
.
bits_per_sample
==
8
assert
info
.
encoding
==
"ALAW"
def
test_htk
(
self
):
"""`sox_io_backend.info` can check HTK file correctly"""
duration
=
1
num_channels
=
1
sample_rate
=
8000
path
=
self
.
get_temp_path
(
'data.htk'
)
sox_utils
.
gen_audio_file
(
path
,
sample_rate
=
sample_rate
,
num_channels
=
num_channels
,
bit_depth
=
16
,
duration
=
duration
)
info
=
sox_io_backend
.
info
(
path
)
assert
info
.
sample_rate
==
sample_rate
assert
info
.
num_frames
==
sample_rate
*
duration
assert
info
.
num_channels
==
num_channels
assert
info
.
bits_per_sample
==
16
assert
info
.
encoding
==
"PCM_S"
@
skipIfNoExtension
class
TestInfoOpus
(
PytorchTestCase
):
...
...
test/torchaudio_unittest/backend/sox_io/save_test.py
View file @
3488f314
...
...
@@ -237,6 +237,12 @@ class SaveTest(SaveTestBase):
"flac"
,
compression
=
compression_level
,
bits_per_sample
=
bits_per_sample
,
test_mode
=
test_mode
)
@
nested_params
(
[
"path"
,
"fileobj"
,
"bytesio"
],
)
def
test_save_htk
(
self
,
test_mode
):
self
.
assert_save_consistency
(
"htk"
,
test_mode
=
test_mode
,
num_channels
=
1
)
@
nested_params
(
[
"path"
,
"fileobj"
,
"bytesio"
],
[
...
...
torchaudio/backend/sox_io_backend.py
View file @
3488f314
...
...
@@ -195,7 +195,8 @@ def save(
When ``filepath`` argument is file-like object, this argument is required.
Valid values are ``"wav"``, ``"mp3"``, ``"ogg"``, ``"vorbis"``, ``"amr-nb"``,
``"amb"``, ``"flac"``, ``"sph"`` and ``"gsm"``.
``"amb"``, ``"flac"``, ``"sph"``, ``"gsm"``, and ``"htk"``.
encoding (str, optional): Changes the encoding for the supported formats.
This argument is effective only for supported formats, cush as ``"wav"``, ``""amb"``
and ``"sph"``. Valid values are;
...
...
@@ -294,6 +295,9 @@ def save(
``"gsm"``
Lossy Speech Compression, CPU intensive.
``"htk"``
Uses a default single-channel 16-bit PCM format.
Note:
To save into formats that ``libsox`` does not handle natively, (such as ``"mp3"``,
``"flac"``, ``"ogg"`` and ``"vorbis"``), your installation of ``torchaudio`` has
...
...
torchaudio/csrc/sox/types.cpp
View file @
3488f314
...
...
@@ -20,6 +20,8 @@ Format get_format_from_string(const std::string& format) {
return
Format
::
AMB
;
if
(
format
==
"sph"
)
return
Format
::
SPHERE
;
if
(
format
==
"htk"
)
return
Format
::
HTK
;
if
(
format
==
"gsm"
)
return
Format
::
GSM
;
std
::
ostringstream
stream
;
...
...
torchaudio/csrc/sox/types.h
View file @
3488f314
...
...
@@ -16,6 +16,7 @@ enum class Format {
AMB
,
SPHERE
,
GSM
,
HTK
,
};
Format
get_format_from_string
(
const
std
::
string
&
format
);
...
...
torchaudio/csrc/sox/utils.cpp
View file @
3488f314
...
...
@@ -314,6 +314,13 @@ std::tuple<sox_encoding_t, unsigned> get_save_encoding(
throw
std
::
runtime_error
(
"mp3 does not support `bits_per_sample` option."
);
return
std
::
make_tuple
<>
(
SOX_ENCODING_MP3
,
16
);
case
Format
::
HTK
:
if
(
enc
!=
Encoding
::
NOT_PROVIDED
)
throw
std
::
runtime_error
(
"htk does not support `encoding` option."
);
if
(
bps
!=
BitDepth
::
NOT_PROVIDED
)
throw
std
::
runtime_error
(
"htk does not support `bits_per_sample` option."
);
return
std
::
make_tuple
<>
(
SOX_ENCODING_SIGN2
,
16
);
case
Format
::
VORBIS
:
if
(
enc
!=
Encoding
::
NOT_PROVIDED
)
throw
std
::
runtime_error
(
"vorbis does not support `encoding` option."
);
...
...
@@ -417,8 +424,12 @@ unsigned get_precision(const std::string filetype, caffe2::TypeMeta dtype) {
if
(
filetype
==
"amr-nb"
)
{
return
16
;
}
if
(
filetype
==
"gsm"
)
if
(
filetype
==
"gsm"
)
{
return
16
;
}
if
(
filetype
==
"htk"
)
{
return
16
;
}
throw
std
::
runtime_error
(
"Unsupported file type: "
+
filetype
);
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment