Commit 883f2428 authored by jamarshon's avatar jamarshon Committed by cpuhrsch
Browse files

Use common_utils to check for correct import in torchaudio/kaldi_io.py (#114)

parent af2c2bf7
.. role:: hidden
:class: hidden-section
torchaudio.kaldi_io torchaudio.kaldi_io
====================== ======================
...@@ -9,17 +12,32 @@ This is a light wrapper around ``kaldi_io`` that returns :class:`torch.Tensors`. ...@@ -9,17 +12,32 @@ This is a light wrapper around ``kaldi_io`` that returns :class:`torch.Tensors`.
.. _kaldi_io: https://github.com/vesis84/kaldi-io-for-python .. _kaldi_io: https://github.com/vesis84/kaldi-io-for-python
Vectors Vectors
~~~~~ -------
:hidden:`read_vec_int_ark`
~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autofunction:: read_vec_int_ark
.. autodata:: read_vec_int_ark :hidden:`read_vec_flt_scp`
~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autodata:: read_vec_flt_scp .. autofunction:: read_vec_flt_scp
.. autodata:: read_vec_flt_ark :hidden:`read_vec_flt_ark`
~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autofunction:: read_vec_flt_ark
Matrices Matrices
~~~~~ --------
:hidden:`read_mat_scp`
~~~~~~~~~~~~~~~~~~~~~~
.. autofunction:: read_mat_scp
.. autodata:: read_mat_scp :hidden:`read_mat_ark`
~~~~~~~~~~~~~~~~~~~~~~
.. autodata:: read_mat_ark .. autofunction:: read_mat_ark
...@@ -28,7 +28,8 @@ def _check_module_exists(name): ...@@ -28,7 +28,8 @@ def _check_module_exists(name):
spec = importlib.util.find_spec(name) spec = importlib.util.find_spec(name)
return spec is not None return spec is not None
IMPORT_NUMPY = _check_module_exists('numpy')
IMPORT_KALDI_IO = _check_module_exists('kaldi_io')
IMPORT_SCIPY = _check_module_exists('scipy') IMPORT_SCIPY = _check_module_exists('scipy')
# On Py2, importing librosa 0.6.1 triggers a TypeError (if using newest joblib) # On Py2, importing librosa 0.6.1 triggers a TypeError (if using newest joblib)
......
# To use this file, the dependency (https://github.com/vesis84/kaldi-io-for-python) # To use this file, the dependency (https://github.com/vesis84/kaldi-io-for-python)
# needs to be installed. This is a light wrapper around kaldi_io that returns # needs to be installed. This is a light wrapper around kaldi_io that returns
# torch.Tensors. # torch.Tensors.
import numpy as np
import torch import torch
from torchaudio.common_utils import IMPORT_KALDI_IO, IMPORT_NUMPY
if IMPORT_NUMPY:
import numpy as np
if IMPORT_KALDI_IO:
import kaldi_io
__all__ = [ __all__ = [
...@@ -14,86 +20,111 @@ __all__ = [ ...@@ -14,86 +20,111 @@ __all__ = [
] ]
def _default_not_imported_method(): def _convert_method_output_to_tensor(file_or_fd, fn, convert_contiguous=False):
raise ImportError('Could not import kaldi_io. Did you install it?') r""" Takes a method invokes it. The output is converted to a tensor.
Arguments:
file_or_fd (string/File Descriptor): file name or file descriptor.
fn (Function): function that has the signature (file name/descriptor) -> generator(string, ndarray)
and converts it to (file name/descriptor) -> generator(string, Tensor).
convert_contiguous (bool): determines whether the array should be converted into a
contiguous layout.
def _wrap_method(fn, convert_contiguous=False): Returns:
# type: (Function, bool) -> Function generator[key (string), vec/mat (Tensor)]
""" Takes a method with the signature (file name/descriptor) -> generator(string, ndarray)
and converts it to (file name/descriptor) -> generator(string, Tensor).
convert_contiguous determines whether the array should be converted into a
contiguous layout.
""" """
def _wrapped_fn(file_or_fd): if not IMPORT_KALDI_IO:
for key, np_arr in fn(file_or_fd): raise ImportError('Could not import kaldi_io. Did you install it?')
if convert_contiguous:
np_arr = np.ascontiguousarray(np_arr) for key, np_arr in fn(file_or_fd):
yield key, torch.from_numpy(np_arr) if convert_contiguous:
return _wrapped_fn np_arr = np.ascontiguousarray(np_arr)
yield key, torch.from_numpy(np_arr)
#: Create generator of (key,vector<int>) tuples, which reads from the ark file/stream.
#: def read_vec_int_ark(file_or_fd):
#: file_or_fd : ark, gzipped ark, pipe or opened file descriptor. r"""Create generator of (key,vector<int>) tuples, which reads from the ark file/stream.
#:
#: Example, read ark to a 'dictionary': Arguments:
#: file_or_fd (string/File Descriptor): ark, gzipped ark, pipe or opened file descriptor.
#: >>> # generator(key,vec) = torchaudio.kaldi_io.read_vec_int_ark(file_or_fd)
#: >>> d = { u:d for u,d in torchaudio.kaldi_io.read_vec_int_ark(file) } Returns:
read_vec_int_ark = _default_not_imported_method generator[key (string), vec (Tensor)]
#: Create generator of (key,vector<float32/float64>) tuples, read according to kaldi scp. Example::
#:
#: file_or_fd : scp, gzipped scp, pipe or opened file descriptor. >>> # read ark to a 'dictionary'
#: >>> d = { u:d for u,d in torchaudio.kaldi_io.read_vec_int_ark(file) }
#: Example, read scp to a 'dictionary': """
#: # Requires convert_contiguous to be True because elements from int32 vector are
#: >>> # generator(key,vec) = torchaudio.kaldi_io.read_vec_flt_scp(file_or_fd) # sored in tuples: (sizeof(int32), value) so strides are (5,) instead of (4,) which will throw an error
#: >>> d = { u:d for u,d in torchaudio.kaldi_io.read_vec_flt_scp(file) } # in from_numpy as it expects strides to be a multiple of 4 (int32).
read_vec_flt_scp = _default_not_imported_method return _convert_method_output_to_tensor(file_or_fd, kaldi_io.read_vec_int_ark, convert_contiguous=True)
#: Create generator of (key,vector<float32/float64>) tuples, which reads from the ark file/stream.
#:
#: file_or_fd : ark, gzipped ark, pipe or opened file descriptor. def read_vec_flt_scp(file_or_fd):
#: r"""Create generator of (key,vector<float32/float64>) tuples, read according to kaldi scp.
#: Example, read ark to a 'dictionary':
#: Arguments:
#: >>> # generator(key,vec) = torchaudio.kaldi_io.read_vec_flt_ark(file_or_fd) file_or_fd (string/File Descriptor): scp, gzipped scp, pipe or opened file descriptor.
#: >>> d = { u:d for u,d in torchaudio.kaldi_io.read_vec_flt_ark(file) }
read_vec_flt_ark = _default_not_imported_method Returns:
generator[key (string), vec (Tensor)]
#: Create generator of (key,matrix<float32/float64>) tuples, read according to kaldi scp.
#: Example::
#: file_or_fd : scp, gzipped scp, pipe or opened file descriptor.
#: >>> # read scp to a 'dictionary'
#: Example, read scp to a 'dictionary': >>> # d = { u:d for u,d in torchaudio.kaldi_io.read_vec_flt_scp(file) }
#: """
#: >>> # generator(key,mat) = torchaudio.kaldi_io.read_mat_scp(file_or_fd) return _convert_method_output_to_tensor(file_or_fd, kaldi_io.read_vec_flt_scp)
#: >>> d = { u:d for u,d in torchaudio.kaldi_io.read_mat_scp(file) }
read_mat_scp = _default_not_imported_method
#: Create generator of (key,matrix<float32/float64>) tuples, which reads from the ark file/stream.
#:
#: file_or_fd : ark, gzipped ark, pipe or opened file descriptor.
#:
#: Example, read ark to a 'dictionary':
#:
#: >>> # generator(key,mat) = torchaudio.kaldi_io.read_mat_ark(file_or_fd)
#: >>> d = { u:d for u,d in torchaudio.kaldi_io.read_mat_ark(file) }
read_mat_ark = _default_not_imported_method
try:
import kaldi_io
# Overwrite methods
# Elements from int32 vector are sored in tuples: (sizeof(int32), value)
# so strides are (5,) instead of (4,) which will throw an error in from_numpy
# as it expects strides to be a multiple of 4 (int32).
read_vec_int_ark = _wrap_method(kaldi_io.read_vec_int_ark, convert_contiguous=True)
read_vec_flt_scp = _wrap_method(kaldi_io.read_vec_flt_scp) def read_vec_flt_ark(file_or_fd):
read_vec_flt_ark = _wrap_method(kaldi_io.read_vec_flt_ark) r"""Create generator of (key,vector<float32/float64>) tuples, which reads from the ark file/stream.
read_mat_scp = _wrap_method(kaldi_io.read_mat_scp) Arguments:
read_mat_ark = _wrap_method(kaldi_io.read_mat_ark) file_or_fd (string/File Descriptor): ark, gzipped ark, pipe or opened file descriptor.
except ImportError:
pass Returns:
generator[key (string), vec (Tensor)]
Example::
>>> # read ark to a 'dictionary'
>>> d = { u:d for u,d in torchaudio.kaldi_io.read_vec_flt_ark(file) }
"""
return _convert_method_output_to_tensor(file_or_fd, kaldi_io.read_vec_flt_ark)
def read_mat_scp(file_or_fd):
r"""Create generator of (key,matrix<float32/float64>) tuples, read according to kaldi scp.
Arguments:
file_or_fd (string/File Descriptor): scp, gzipped scp, pipe or opened file descriptor.
Returns:
generator[key (string), mat (Tensor)]
Example::
>>> # read scp to a 'dictionary'
>>> d = { u:d for u,d in torchaudio.kaldi_io.read_mat_scp(file) }
"""
return _convert_method_output_to_tensor(file_or_fd, kaldi_io.read_mat_scp)
def read_mat_ark(file_or_fd):
r"""Create generator of (key,matrix<float32/float64>) tuples, which reads from the ark file/stream.
Arguments:
file_or_fd (string/File Descriptor): ark, gzipped ark, pipe or opened file descriptor.
Returns:
generator[key (string), mat (Tensor)]
Example::
>>> # read ark to a 'dictionary'
>>> d = { u:d for u,d in torchaudio.kaldi_io.read_mat_ark(file) }
"""
return _convert_method_output_to_tensor(file_or_fd, kaldi_io.read_mat_ark)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment