Use common_utils to check for correct import in torchaudio/kaldi_io.py (#114)

883f2428 · jamarshon · cpuhrsch · af2c2bf7 · 883f2428 · 883f2428
Commit 883f2428 authored May 22, 2019 by jamarshon Committed by cpuhrsch May 22, 2019
Showing with 136 additions and 86 deletions

docs/source/kaldi_io.rst docs/source/kaldi_io.rst +25 -7

torchaudio/common_utils.py torchaudio/common_utils.py +2 -1

torchaudio/kaldi_io.py torchaudio/kaldi_io.py +109 -78

No files found.
--- a/docs/source/kaldi_io.rst
+++ b/docs/source/kaldi_io.rst
+.. role:: hidden
+    :class: hidden-section
 torchaudio.kaldi_io
 ======================
@@ -9,17 +12,32 @@ This is a light wrapper around ``kaldi_io`` that returns :class:`torch.Tensors`.
 .. _kaldi_io: https://github.com/vesis84/kaldi-io-for-python
 Vectors
-~~~~~
+-------
+:hidden:`read_vec_int_ark`
+~~~~~~~~~~~~~~~~~~~~~~~~~~
+.. autofunction:: read_vec_int_ark
-.. autodata:: read_vec_int_ark
+:hidden:`read_vec_flt_scp`
+~~~~~~~~~~~~~~~~~~~~~~~~~~
-.. autodata:: read_vec_flt_scp
+.. autofunction:: read_vec_flt_scp
-.. autodata:: read_vec_flt_ark
+:hidden:`read_vec_flt_ark`
+~~~~~~~~~~~~~~~~~~~~~~~~~~
+.. autofunction:: read_vec_flt_ark
 Matrices
-~~~~~
+--------
+:hidden:`read_mat_scp`
+~~~~~~~~~~~~~~~~~~~~~~
+.. autofunction:: read_mat_scp
-.. autodata:: read_mat_scp
+:hidden:`read_mat_ark`
+~~~~~~~~~~~~~~~~~~~~~~
-.. autodata:: read_mat_ark
+.. autofunction:: read_mat_ark
--- a/torchaudio/common_utils.py
+++ b/torchaudio/common_utils.py
@@ -28,7 +28,8 @@ def _check_module_exists(name):
        spec = importlib.util.find_spec(name)
        return spec is not None
+IMPORT_NUMPY = _check_module_exists('numpy')
+IMPORT_KALDI_IO = _check_module_exists('kaldi_io')
 IMPORT_SCIPY = _check_module_exists('scipy')
 # On Py2, importing librosa 0.6.1 triggers a TypeError (if using newest joblib)

--- a/torchaudio/kaldi_io.py
+++ b/torchaudio/kaldi_io.py
 # To use this file, the dependency (https://github.com/vesis84/kaldi-io-for-python)
 # needs to be installed. This is a light wrapper around kaldi_io that returns
 # torch.Tensors.
-import numpy as np
 import torch
+from torchaudio.common_utils import IMPORT_KALDI_IO, IMPORT_NUMPY
+if IMPORT_NUMPY:
+    import numpy as np
+if IMPORT_KALDI_IO:
+    import kaldi_io
 __all__ = [
@@ -14,86 +20,111 @@ __all__ = [
 ]
-def _default_not_imported_method():
+def _convert_method_output_to_tensor(file_or_fd, fn, convert_contiguous=False):
-    raise ImportError('Could not import kaldi_io. Did you install it?')
+    r""" Takes a method invokes it. The output is converted to a tensor.
+    Arguments:
+        file_or_fd (string/File Descriptor): file name or file descriptor.
+        fn (Function): function that has the signature (file name/descriptor) -> generator(string, ndarray)
+            and converts it to (file name/descriptor) -> generator(string, Tensor).
+        convert_contiguous (bool): determines whether the array should be converted into a
+            contiguous layout.
-def _wrap_method(fn, convert_contiguous=False):
+    Returns:
-    # type: (Function, bool) -> Function
+        generator[key (string), vec/mat (Tensor)]
-    """ Takes a method with the signature (file name/descriptor) -> generator(string, ndarray)
-    and converts it to (file name/descriptor) -> generator(string, Tensor).
-    convert_contiguous determines whether the array should be converted into a
-    contiguous layout.
    """
-    def _wrapped_fn(file_or_fd):
+    if not IMPORT_KALDI_IO:
-        for key, np_arr in fn(file_or_fd):
+        raise ImportError('Could not import kaldi_io. Did you install it?')
-            if convert_contiguous:
-                np_arr = np.ascontiguousarray(np_arr)
+    for key, np_arr in fn(file_or_fd):
-            yield key, torch.from_numpy(np_arr)
+        if convert_contiguous:
-    return _wrapped_fn
+            np_arr = np.ascontiguousarray(np_arr)
+        yield key, torch.from_numpy(np_arr)
-#: Create generator of (key,vector<int>) tuples, which reads from the ark file/stream.
-#:
+def read_vec_int_ark(file_or_fd):
-#: file_or_fd : ark, gzipped ark, pipe or opened file descriptor.
+    r"""Create generator of (key,vector<int>) tuples, which reads from the ark file/stream.
-#:
-#: Example, read ark to a 'dictionary':
+    Arguments:
-#:
+        file_or_fd (string/File Descriptor): ark, gzipped ark, pipe or opened file descriptor.
-#: >>> # generator(key,vec) = torchaudio.kaldi_io.read_vec_int_ark(file_or_fd)
-#: >>> d = { u:d for u,d in torchaudio.kaldi_io.read_vec_int_ark(file) }
+    Returns:
-read_vec_int_ark = _default_not_imported_method
+        generator[key (string), vec (Tensor)]
-#: Create generator of (key,vector<float32/float64>) tuples, read according to kaldi scp.
+    Example::
-#:
-#: file_or_fd : scp, gzipped scp, pipe or opened file descriptor.
+        >>> # read ark to a 'dictionary'
-#:
+        >>> d = { u:d for u,d in torchaudio.kaldi_io.read_vec_int_ark(file) }
-#: Example, read scp to a 'dictionary':
+    """
-#:
+    # Requires convert_contiguous to be True because elements from int32 vector are
-#: >>> # generator(key,vec) = torchaudio.kaldi_io.read_vec_flt_scp(file_or_fd)
+    # sored in tuples: (sizeof(int32), value) so strides are (5,) instead of (4,) which will throw an error
-#: >>> d = { u:d for u,d in torchaudio.kaldi_io.read_vec_flt_scp(file) }
+    # in from_numpy as it expects strides to be a multiple of 4 (int32).
-read_vec_flt_scp = _default_not_imported_method
+    return _convert_method_output_to_tensor(file_or_fd, kaldi_io.read_vec_int_ark, convert_contiguous=True)
-#: Create generator of (key,vector<float32/float64>) tuples, which reads from the ark file/stream.
-#:
-#: file_or_fd : ark, gzipped ark, pipe or opened file descriptor.
+def read_vec_flt_scp(file_or_fd):
-#:
+    r"""Create generator of (key,vector<float32/float64>) tuples, read according to kaldi scp.
-#: Example, read ark to a 'dictionary':
-#:
+    Arguments:
-#: >>> # generator(key,vec) = torchaudio.kaldi_io.read_vec_flt_ark(file_or_fd)
+        file_or_fd (string/File Descriptor): scp, gzipped scp, pipe or opened file descriptor.
-#: >>> d = { u:d for u,d in torchaudio.kaldi_io.read_vec_flt_ark(file) }
-read_vec_flt_ark = _default_not_imported_method
+    Returns:
+        generator[key (string), vec (Tensor)]
-#: Create generator of (key,matrix<float32/float64>) tuples, read according to kaldi scp.
-#:
+    Example::
-#: file_or_fd : scp, gzipped scp, pipe or opened file descriptor.
-#:
+        >>> # read scp to a 'dictionary'
-#: Example, read scp to a 'dictionary':
+        >>> # d = { u:d for u,d in torchaudio.kaldi_io.read_vec_flt_scp(file) }
-#:
+    """
-#: >>> # generator(key,mat) = torchaudio.kaldi_io.read_mat_scp(file_or_fd)
+    return _convert_method_output_to_tensor(file_or_fd, kaldi_io.read_vec_flt_scp)
-#: >>> d = { u:d for u,d in torchaudio.kaldi_io.read_mat_scp(file) }
-read_mat_scp = _default_not_imported_method
-#: Create generator of (key,matrix<float32/float64>) tuples, which reads from the ark file/stream.
-#:
-#: file_or_fd : ark, gzipped ark, pipe or opened file descriptor.
-#:
-#: Example, read ark to a 'dictionary':
-#:
-#: >>> # generator(key,mat) = torchaudio.kaldi_io.read_mat_ark(file_or_fd)
-#: >>> d = { u:d for u,d in torchaudio.kaldi_io.read_mat_ark(file) }
-read_mat_ark = _default_not_imported_method
-try:
-    import kaldi_io
-    # Overwrite methods
-    # Elements from int32 vector are sored in tuples: (sizeof(int32), value)
-    # so strides are (5,) instead of (4,) which will throw an error in from_numpy
-    # as it expects strides to be a multiple of 4 (int32).
-    read_vec_int_ark = _wrap_method(kaldi_io.read_vec_int_ark, convert_contiguous=True)
-    read_vec_flt_scp = _wrap_method(kaldi_io.read_vec_flt_scp)
+def read_vec_flt_ark(file_or_fd):
-    read_vec_flt_ark = _wrap_method(kaldi_io.read_vec_flt_ark)
+    r"""Create generator of (key,vector<float32/float64>) tuples, which reads from the ark file/stream.
-    read_mat_scp = _wrap_method(kaldi_io.read_mat_scp)
+    Arguments:
-    read_mat_ark = _wrap_method(kaldi_io.read_mat_ark)
+        file_or_fd (string/File Descriptor): ark, gzipped ark, pipe or opened file descriptor.
-except ImportError:
-    pass
+    Returns:
+        generator[key (string), vec (Tensor)]
+    Example::
+        >>> # read ark to a 'dictionary'
+        >>> d = { u:d for u,d in torchaudio.kaldi_io.read_vec_flt_ark(file) }
+    """
+    return _convert_method_output_to_tensor(file_or_fd, kaldi_io.read_vec_flt_ark)
+def read_mat_scp(file_or_fd):
+    r"""Create generator of (key,matrix<float32/float64>) tuples, read according to kaldi scp.
+    Arguments:
+        file_or_fd (string/File Descriptor): scp, gzipped scp, pipe or opened file descriptor.
+    Returns:
+        generator[key (string), mat (Tensor)]
+    Example::
+        >>> # read scp to a 'dictionary'
+        >>> d = { u:d for u,d in torchaudio.kaldi_io.read_mat_scp(file) }
+    """
+    return _convert_method_output_to_tensor(file_or_fd, kaldi_io.read_mat_scp)
+def read_mat_ark(file_or_fd):
+    r"""Create generator of (key,matrix<float32/float64>) tuples, which reads from the ark file/stream.
+    Arguments:
+        file_or_fd (string/File Descriptor): ark, gzipped ark, pipe or opened file descriptor.
+    Returns:
+        generator[key (string), mat (Tensor)]
+    Example::
+        >>> # read ark to a 'dictionary'
+        >>> d = { u:d for u,d in torchaudio.kaldi_io.read_mat_ark(file) }
+    """
+    return _convert_method_output_to_tensor(file_or_fd, kaldi_io.read_mat_ark)