Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
Torchaudio
Commits
883f2428
Commit
883f2428
authored
May 22, 2019
by
jamarshon
Committed by
cpuhrsch
May 22, 2019
Browse files
Use common_utils to check for correct import in torchaudio/kaldi_io.py (#114)
parent
af2c2bf7
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
136 additions
and
86 deletions
+136
-86
docs/source/kaldi_io.rst
docs/source/kaldi_io.rst
+25
-7
torchaudio/common_utils.py
torchaudio/common_utils.py
+2
-1
torchaudio/kaldi_io.py
torchaudio/kaldi_io.py
+109
-78
No files found.
docs/source/kaldi_io.rst
View file @
883f2428
.. role:: hidden
:class: hidden-section
torchaudio.kaldi_io
torchaudio.kaldi_io
======================
======================
...
@@ -9,17 +12,32 @@ This is a light wrapper around ``kaldi_io`` that returns :class:`torch.Tensors`.
...
@@ -9,17 +12,32 @@ This is a light wrapper around ``kaldi_io`` that returns :class:`torch.Tensors`.
.. _kaldi_io: https://github.com/vesis84/kaldi-io-for-python
.. _kaldi_io: https://github.com/vesis84/kaldi-io-for-python
Vectors
Vectors
~~~~~
-------
:hidden:`read_vec_int_ark`
~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autofunction:: read_vec_int_ark
.. autodata:: read_vec_int_ark
:hidden:`read_vec_flt_scp`
~~~~~~~~~~~~~~~~~~~~~~~~~~
.. auto
data
:: read_vec_flt_scp
.. auto
function
:: read_vec_flt_scp
.. autodata:: read_vec_flt_ark
:hidden:`read_vec_flt_ark`
~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autofunction:: read_vec_flt_ark
Matrices
Matrices
~~~~~
--------
:hidden:`read_mat_scp`
~~~~~~~~~~~~~~~~~~~~~~
.. autofunction:: read_mat_scp
.. autodata:: read_mat_scp
:hidden:`read_mat_ark`
~~~~~~~~~~~~~~~~~~~~~~
.. auto
data
:: read_mat_ark
.. auto
function
:: read_mat_ark
torchaudio/common_utils.py
View file @
883f2428
...
@@ -28,7 +28,8 @@ def _check_module_exists(name):
...
@@ -28,7 +28,8 @@ def _check_module_exists(name):
spec
=
importlib
.
util
.
find_spec
(
name
)
spec
=
importlib
.
util
.
find_spec
(
name
)
return
spec
is
not
None
return
spec
is
not
None
IMPORT_NUMPY
=
_check_module_exists
(
'numpy'
)
IMPORT_KALDI_IO
=
_check_module_exists
(
'kaldi_io'
)
IMPORT_SCIPY
=
_check_module_exists
(
'scipy'
)
IMPORT_SCIPY
=
_check_module_exists
(
'scipy'
)
# On Py2, importing librosa 0.6.1 triggers a TypeError (if using newest joblib)
# On Py2, importing librosa 0.6.1 triggers a TypeError (if using newest joblib)
...
...
torchaudio/kaldi_io.py
View file @
883f2428
# To use this file, the dependency (https://github.com/vesis84/kaldi-io-for-python)
# To use this file, the dependency (https://github.com/vesis84/kaldi-io-for-python)
# needs to be installed. This is a light wrapper around kaldi_io that returns
# needs to be installed. This is a light wrapper around kaldi_io that returns
# torch.Tensors.
# torch.Tensors.
import
numpy
as
np
import
torch
import
torch
from
torchaudio.common_utils
import
IMPORT_KALDI_IO
,
IMPORT_NUMPY
if
IMPORT_NUMPY
:
import
numpy
as
np
if
IMPORT_KALDI_IO
:
import
kaldi_io
__all__
=
[
__all__
=
[
...
@@ -14,86 +20,111 @@ __all__ = [
...
@@ -14,86 +20,111 @@ __all__ = [
]
]
def
_
default_not_imported_method
(
):
def
_
convert_method_output_to_tensor
(
file_or_fd
,
fn
,
convert_contiguous
=
False
):
r
aise
ImportError
(
'Could not import kaldi_io. Did you install it?'
)
r
""" Takes a method invokes it. The output is converted to a tensor.
Arguments:
file_or_fd (string/File Descriptor): file name or file descriptor.
fn (Function): function that has the signature (file name/descriptor) -> generator(string, ndarray)
and converts it to (file name/descriptor) -> generator(string, Tensor).
convert_contiguous (bool): determines whether the array should be converted into a
contiguous layout.
def
_wrap_method
(
fn
,
convert_contiguous
=
False
):
Returns:
# type: (Function, bool) -> Function
generator[key (string), vec/mat (Tensor)]
""" Takes a method with the signature (file name/descriptor) -> generator(string, ndarray)
and converts it to (file name/descriptor) -> generator(string, Tensor).
convert_contiguous determines whether the array should be converted into a
contiguous layout.
"""
"""
def
_wrapped_fn
(
file_or_fd
):
if
not
IMPORT_KALDI_IO
:
for
key
,
np_arr
in
fn
(
file_or_fd
):
raise
ImportError
(
'Could not import kaldi_io. Did you install it?'
)
if
convert_contiguous
:
np_arr
=
np
.
ascontiguousarray
(
np_arr
)
for
key
,
np_arr
in
fn
(
file_or_fd
):
yield
key
,
torch
.
from_numpy
(
np_arr
)
if
convert_contiguous
:
return
_wrapped_fn
np_arr
=
np
.
ascontiguousarray
(
np_arr
)
yield
key
,
torch
.
from_numpy
(
np_arr
)
#: Create generator of (key,vector<int>) tuples, which reads from the ark file/stream.
#:
def
read_vec_int_ark
(
file_or_fd
):
#: file_or_fd : ark, gzipped ark, pipe or opened file descriptor.
r
"""Create generator of (key,vector<int>) tuples, which reads from the ark file/stream.
#:
#: Example, read ark to a 'dictionary':
Arguments:
#:
file_or_fd (string/File Descriptor): ark, gzipped ark, pipe or opened file descriptor.
#: >>> # generator(key,vec) = torchaudio.kaldi_io.read_vec_int_ark(file_or_fd)
#: >>> d = { u:d for u,d in torchaudio.kaldi_io.read_vec_int_ark(file) }
Returns:
read_vec_int_ark
=
_default_not_imported_method
generator[key (string), vec (Tensor)]
#: Create generator of (key,vector<float32/float64>) tuples, read according to kaldi scp.
Example::
#:
#: file_or_fd : scp, gzipped scp, pipe or opened file descriptor.
>>> # read ark to a 'dictionary'
#:
>>> d = { u:d for u,d in torchaudio.kaldi_io.read_vec_int_ark(file) }
#: Example, read scp to a 'dictionary':
"""
#:
# Requires convert_contiguous to be True because elements from int32 vector are
#: >>> # generator(key,vec) = torchaudio.kaldi_io.read_vec_flt_scp(file_or_fd)
# sored in tuples: (sizeof(int32), value) so strides are (5,) instead of (4,) which will throw an error
#: >>> d = { u:d for u,d in torchaudio.kaldi_io.read_vec_flt_scp(file) }
# in from_numpy as it expects strides to be a multiple of 4 (int32).
read_vec_flt_scp
=
_default_not_imported_method
return
_convert_method_output_to_tensor
(
file_or_fd
,
kaldi_io
.
read_vec_int_ark
,
convert_contiguous
=
True
)
#: Create generator of (key,vector<float32/float64>) tuples, which reads from the ark file/stream.
#:
#: file_or_fd : ark, gzipped ark, pipe or opened file descriptor.
def
read_vec_flt_scp
(
file_or_fd
):
#:
r
"""Create generator of (key,vector<float32/float64>) tuples, read according to kaldi scp.
#: Example, read ark to a 'dictionary':
#:
Arguments:
#: >>> # generator(key,vec) = torchaudio.kaldi_io.read_vec_flt_ark(file_or_fd)
file_or_fd (string/File Descriptor): scp, gzipped scp, pipe or opened file descriptor.
#: >>> d = { u:d for u,d in torchaudio.kaldi_io.read_vec_flt_ark(file) }
read_vec_flt_ark
=
_default_not_imported_method
Returns:
generator[key (string), vec (Tensor)]
#: Create generator of (key,matrix<float32/float64>) tuples, read according to kaldi scp.
#:
Example::
#: file_or_fd : scp, gzipped scp, pipe or opened file descriptor.
#:
>>> # read scp to a 'dictionary'
#: Example, read scp to a 'dictionary':
>>> # d = { u:d for u,d in torchaudio.kaldi_io.read_vec_flt_scp(file) }
#:
"""
#: >>> # generator(key,mat) = torchaudio.kaldi_io.read_mat_scp(file_or_fd)
return
_convert_method_output_to_tensor
(
file_or_fd
,
kaldi_io
.
read_vec_flt_scp
)
#: >>> d = { u:d for u,d in torchaudio.kaldi_io.read_mat_scp(file) }
read_mat_scp
=
_default_not_imported_method
#: Create generator of (key,matrix<float32/float64>) tuples, which reads from the ark file/stream.
#:
#: file_or_fd : ark, gzipped ark, pipe or opened file descriptor.
#:
#: Example, read ark to a 'dictionary':
#:
#: >>> # generator(key,mat) = torchaudio.kaldi_io.read_mat_ark(file_or_fd)
#: >>> d = { u:d for u,d in torchaudio.kaldi_io.read_mat_ark(file) }
read_mat_ark
=
_default_not_imported_method
try
:
import
kaldi_io
# Overwrite methods
# Elements from int32 vector are sored in tuples: (sizeof(int32), value)
# so strides are (5,) instead of (4,) which will throw an error in from_numpy
# as it expects strides to be a multiple of 4 (int32).
read_vec_int_ark
=
_wrap_method
(
kaldi_io
.
read_vec_int_ark
,
convert_contiguous
=
True
)
read_vec_flt_
scp
=
_wrap_method
(
kaldi_io
.
read_vec_flt_scp
)
def
read_vec_flt_
ark
(
file_or_fd
):
r
ead_vec_flt_ark
=
_wrap_method
(
kaldi_io
.
read_vec_flt_ark
)
r
"""Create generator of (key,vector<float32/float64>) tuples, which reads from the ark file/stream.
read_mat_scp
=
_wrap_method
(
kaldi_io
.
read_mat_scp
)
Arguments:
read_mat_ark
=
_wrap_method
(
kaldi_io
.
read_mat_ark
)
file_or_fd (string/File Descriptor): ark, gzipped ark, pipe or opened file descriptor.
except
ImportError
:
pass
Returns:
generator[key (string), vec (Tensor)]
Example::
>>> # read ark to a 'dictionary'
>>> d = { u:d for u,d in torchaudio.kaldi_io.read_vec_flt_ark(file) }
"""
return
_convert_method_output_to_tensor
(
file_or_fd
,
kaldi_io
.
read_vec_flt_ark
)
def
read_mat_scp
(
file_or_fd
):
r
"""Create generator of (key,matrix<float32/float64>) tuples, read according to kaldi scp.
Arguments:
file_or_fd (string/File Descriptor): scp, gzipped scp, pipe or opened file descriptor.
Returns:
generator[key (string), mat (Tensor)]
Example::
>>> # read scp to a 'dictionary'
>>> d = { u:d for u,d in torchaudio.kaldi_io.read_mat_scp(file) }
"""
return
_convert_method_output_to_tensor
(
file_or_fd
,
kaldi_io
.
read_mat_scp
)
def
read_mat_ark
(
file_or_fd
):
r
"""Create generator of (key,matrix<float32/float64>) tuples, which reads from the ark file/stream.
Arguments:
file_or_fd (string/File Descriptor): ark, gzipped ark, pipe or opened file descriptor.
Returns:
generator[key (string), mat (Tensor)]
Example::
>>> # read ark to a 'dictionary'
>>> d = { u:d for u,d in torchaudio.kaldi_io.read_mat_ark(file) }
"""
return
_convert_method_output_to_tensor
(
file_or_fd
,
kaldi_io
.
read_mat_ark
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment