Move all SoX I/O functions to _sox_backend for better modularity (#695)

4d52106f · moto · GitHub · e5eb4857 · 4d52106f · 4d52106f
Unverified Commit 4d52106f authored Jun 05, 2020 by moto Committed by GitHub Jun 05, 2020
Show whitespace changes
Inline Side-by-side

Showing with 196 additions and 191 deletions

torchaudio/__init__.py torchaudio/__init__.py +8 -187

torchaudio/_sox_backend.py torchaudio/_sox_backend.py +188 -4

No files found.
--- a/torchaudio/__init__.py
+++ b/torchaudio/__init__.py
-import os.path
 from pathlib import Path
 from typing import Any, Callable, Optional, Tuple, Union

-import torch
 from torch import Tensor
 from torchaudio import (
    compliance,
@@ -16,6 +14,14 @@ from torchaudio._backend import (
    get_audio_backend,
    set_audio_backend,
 )
+from torchaudio._sox_backend import (
+    save_encinfo,
+    sox_signalinfo_t,
+    sox_encodinginfo_t,
+    get_sox_option_t,
+    get_sox_encoding_t,
+    get_sox_bool,
+)
 from torchaudio._soundfile_backend import SignalInfo, EncodingInfo
 from torchaudio._internal import (
    module_utils as _mod_utils,
@@ -130,77 +136,6 @@ def save(filepath: str, src: Tensor, sample_rate: int, precision: int = 16, chan
    )


-@_mod_utils.requires_module('torchaudio._torchaudio')
-def save_encinfo(filepath: str,
-                 src: Tensor,
-                 channels_first: bool = True,
-                 signalinfo: Optional[SignalInfo] = None,
-                 encodinginfo: Optional[EncodingInfo] = None,
-                 filetype: Optional[str] = None) -> None:
-    r"""Saves a tensor of an audio signal to disk as a standard format like mp3, wav, etc.
-
-    Args:
-        filepath (str): Path to audio file
-        src (Tensor): An input 2D tensor of shape `[C x L]` or `[L x C]` where L is
-            the number of audio frames, C is the number of channels
-        channels_first (bool, optional): Set channels first or length first in result. (Default: ``True``)
-        signalinfo (sox_signalinfo_t, optional): A sox_signalinfo_t type, which could be helpful if the
-            audio type cannot be automatically determined (Default: ``None``).
-        encodinginfo (sox_encodinginfo_t, optional): A sox_encodinginfo_t type, which could be set if the
-            audio type cannot be automatically determined (Default: ``None``).
-        filetype (str, optional): A filetype or extension to be set if sox cannot determine it
-            automatically. (Default: ``None``)
-
-    Example
-        >>> data, sample_rate = torchaudio.load('foo.mp3')
-        >>> torchaudio.save('foo.wav', data, sample_rate)
-
-    """
-    ch_idx, len_idx = (0, 1) if channels_first else (1, 0)
-
-    # check if save directory exists
-    abs_dirpath = os.path.dirname(os.path.abspath(filepath))
-    if not os.path.isdir(abs_dirpath):
-        raise OSError("Directory does not exist: {}".format(abs_dirpath))
-    # check that src is a CPU tensor
-    _misc_ops.check_input(src)
-    # Check/Fix shape of source data
-    if src.dim() == 1:
-        # 1d tensors as assumed to be mono signals
-        src.unsqueeze_(ch_idx)
-    elif src.dim() > 2 or src.size(ch_idx) > 16:
-        # assumes num_channels < 16
-        raise ValueError(
-            "Expected format where C < 16, but found {}".format(src.size()))
-    # sox stores the sample rate as a float, though practically sample rates are almost always integers
-    # convert integers to floats
-    if signalinfo:
-        if signalinfo.rate and not isinstance(signalinfo.rate, float):
-            if float(signalinfo.rate) == signalinfo.rate:
-                signalinfo.rate = float(signalinfo.rate)
-            else:
-                raise TypeError('Sample rate should be a float or int')
-        # check if the bit precision (i.e. bits per sample) is an integer
-        if signalinfo.precision and not isinstance(signalinfo.precision, int):
-            if int(signalinfo.precision) == signalinfo.precision:
-                signalinfo.precision = int(signalinfo.precision)
-            else:
-                raise TypeError('Bit precision should be an integer')
-    # programs such as librosa normalize the signal, unnormalize if detected
-    if src.min() >= -1.0 and src.max() <= 1.0:
-        src = src * (1 << 31)
-        src = src.long()
-    # set filetype and allow for files with no extensions
-    extension = os.path.splitext(filepath)[1]
-    filetype = extension[1:] if len(extension) > 0 else filetype
-    # transpose from C x L -> L x C
-    if channels_first:
-        src = src.transpose(1, 0)
-    # save data to file
-    src = src.contiguous()
-    _torchaudio.write_audio_file(filepath, src, signalinfo, encodinginfo, filetype)
-
-
 def info(filepath: str) -> Tuple[SignalInfo, EncodingInfo]:
    r"""Gets metadata from an audio file without loading the signal.

@@ -216,117 +151,3 @@ def info(filepath: str) -> Tuple[SignalInfo, EncodingInfo]:
         >>> rate, channels, encoding = si.rate, si.channels, ei.encoding
    """
    return _get_audio_backend_module().info(filepath)
-
-
-@_mod_utils.requires_module('torchaudio._torchaudio')
-def sox_signalinfo_t() -> SignalInfo:
-    r"""Create a sox_signalinfo_t object. This object can be used to set the sample
-    rate, number of channels, length, bit precision and headroom multiplier
-    primarily for effects
-
-    Returns: sox_signalinfo_t(object)
-        - rate (float), sample rate as a float, practically will likely be an integer float
-        - channel (int), number of audio channels
-        - precision (int), bit precision
-        - length (int), length of audio in samples * channels, 0 for unspecified and -1 for unknown
-        - mult (float, optional), headroom multiplier for effects and ``None`` for no multiplier
-
-    Example
-        >>> si = torchaudio.sox_signalinfo_t()
-        >>> si.channels = 1
-        >>> si.rate = 16000.
-        >>> si.precision = 16
-        >>> si.length = 0
-    """
-    return _torchaudio.sox_signalinfo_t()
-
-
-@_mod_utils.requires_module('torchaudio._torchaudio')
-def sox_encodinginfo_t() -> EncodingInfo:
-    r"""Create a sox_encodinginfo_t object.  This object can be used to set the encoding
-    type, bit precision, compression factor, reverse bytes, reverse nibbles,
-    reverse bits and endianness.  This can be used in an effects chain to encode the
-    final output or to save a file with a specific encoding.  For example, one could
-    use the sox ulaw encoding to do 8-bit ulaw encoding.  Note in a tensor output
-    the result will be a 32-bit number, but number of unique values will be determined by
-    the bit precision.
-
-    Returns: sox_encodinginfo_t(object)
-        - encoding (sox_encoding_t), output encoding
-        - bits_per_sample (int), bit precision, same as `precision` in sox_signalinfo_t
-        - compression (float), compression for lossy formats, 0.0 for default compression
-        - reverse_bytes (sox_option_t), reverse bytes, use sox_option_default
-        - reverse_nibbles (sox_option_t), reverse nibbles, use sox_option_default
-        - reverse_bits (sox_option_t), reverse bytes, use sox_option_default
-        - opposite_endian (sox_bool), change endianness, use sox_false
-
-    Example
-        >>> ei = torchaudio.sox_encodinginfo_t()
-        >>> ei.encoding = torchaudio.get_sox_encoding_t(1)
-        >>> ei.bits_per_sample = 16
-        >>> ei.compression = 0
-        >>> ei.reverse_bytes = torchaudio.get_sox_option_t(2)
-        >>> ei.reverse_nibbles = torchaudio.get_sox_option_t(2)
-        >>> ei.reverse_bits = torchaudio.get_sox_option_t(2)
-        >>> ei.opposite_endian = torchaudio.get_sox_bool(0)
-
-    """
-    ei = _torchaudio.sox_encodinginfo_t()
-    sdo = get_sox_option_t(2)  # sox_default_option
-    ei.reverse_bytes = sdo
-    ei.reverse_nibbles = sdo
-    ei.reverse_bits = sdo
-    return ei
-
-
-@_mod_utils.requires_module('torchaudio._torchaudio')
-def get_sox_encoding_t(i: int = None) -> EncodingInfo:
-    r"""Get enum of sox_encoding_t for sox encodings.
-
-    Args:
-        i (int, optional): Choose type or get a dict with all possible options
-            use ``__members__`` to see all options when not specified. (Default: ``None``)
-
-    Returns:
-        sox_encoding_t: A sox_encoding_t type for output encoding
-    """
-    if i is None:
-        # one can see all possible values using the .__members__ attribute
-        return _torchaudio.sox_encoding_t
-    else:
-        return _torchaudio.sox_encoding_t(i)
-
-
-@_mod_utils.requires_module('torchaudio._torchaudio')
-def get_sox_option_t(i: int = 2) -> Any:
-    r"""Get enum of sox_option_t for sox encodinginfo options.
-
-    Args:
-        i (int, optional): Choose type or get a dict with all possible options
-            use ``__members__`` to see all options when not specified.
-            (Default: ``sox_option_default`` or ``2``)
-    Returns:
-        sox_option_t: A sox_option_t type
-    """
-    if i is None:
-        return _torchaudio.sox_option_t
-    else:
-        return _torchaudio.sox_option_t(i)
-
-
-@_mod_utils.requires_module('torchaudio._torchaudio')
-def get_sox_bool(i: int = 0) -> Any:
-    r"""Get enum of sox_bool for sox encodinginfo options.
-
-    Args:
-        i (int, optional): Choose type or get a dict with all possible options
-            use ``__members__`` to see all options when not specified. (Default:
-            ``sox_false`` or ``0``)
-
-    Returns:
-        sox_bool: A sox_bool type
-    """
-    if i is None:
-        return _torchaudio.sox_bool
-    else:
-        return _torchaudio.sox_bool(i)
--- a/torchaudio/_sox_backend.py
+++ b/torchaudio/_sox_backend.py
 import os.path
-from typing import Optional, Tuple
+from typing import Any, Optional, Tuple

 import torch
 from torch import Tensor

-import torchaudio
 from torchaudio._internal import (
    module_utils as _mod_utils,
    misc_ops as _misc_ops,
@@ -65,16 +64,201 @@ def load(filepath: str,
 def save(filepath: str, src: Tensor, sample_rate: int, precision: int = 16, channels_first: bool = True) -> None:
    r"""See torchaudio.save"""

-    si = torchaudio.sox_signalinfo_t()
+    si = sox_signalinfo_t()
    ch_idx = 0 if channels_first else 1
    si.rate = sample_rate
    si.channels = 1 if src.dim() == 1 else src.size(ch_idx)
    si.length = src.numel()
    si.precision = precision
-    return torchaudio.save_encinfo(filepath, src, channels_first, si)
+    return save_encinfo(filepath, src, channels_first, si)


 @_mod_utils.requires_module('torchaudio._torchaudio')
 def info(filepath: str) -> Tuple[SignalInfo, EncodingInfo]:
    r"""See torchaudio.info"""
    return _torchaudio.get_info(filepath)
+
+
+@_mod_utils.requires_module('torchaudio._torchaudio')
+def save_encinfo(filepath: str,
+                 src: Tensor,
+                 channels_first: bool = True,
+                 signalinfo: Optional[SignalInfo] = None,
+                 encodinginfo: Optional[EncodingInfo] = None,
+                 filetype: Optional[str] = None) -> None:
+    r"""Saves a tensor of an audio signal to disk as a standard format like mp3, wav, etc.
+
+    Args:
+        filepath (str): Path to audio file
+        src (Tensor): An input 2D tensor of shape `[C x L]` or `[L x C]` where L is
+            the number of audio frames, C is the number of channels
+        channels_first (bool, optional): Set channels first or length first in result. (Default: ``True``)
+        signalinfo (sox_signalinfo_t, optional): A sox_signalinfo_t type, which could be helpful if the
+            audio type cannot be automatically determined (Default: ``None``).
+        encodinginfo (sox_encodinginfo_t, optional): A sox_encodinginfo_t type, which could be set if the
+            audio type cannot be automatically determined (Default: ``None``).
+        filetype (str, optional): A filetype or extension to be set if sox cannot determine it
+            automatically. (Default: ``None``)
+
+    Example
+        >>> data, sample_rate = torchaudio.load('foo.mp3')
+        >>> torchaudio.save('foo.wav', data, sample_rate)
+
+    """
+    ch_idx, len_idx = (0, 1) if channels_first else (1, 0)
+
+    # check if save directory exists
+    abs_dirpath = os.path.dirname(os.path.abspath(filepath))
+    if not os.path.isdir(abs_dirpath):
+        raise OSError("Directory does not exist: {}".format(abs_dirpath))
+    # check that src is a CPU tensor
+    _misc_ops.check_input(src)
+    # Check/Fix shape of source data
+    if src.dim() == 1:
+        # 1d tensors as assumed to be mono signals
+        src.unsqueeze_(ch_idx)
+    elif src.dim() > 2 or src.size(ch_idx) > 16:
+        # assumes num_channels < 16
+        raise ValueError(
+            "Expected format where C < 16, but found {}".format(src.size()))
+    # sox stores the sample rate as a float, though practically sample rates are almost always integers
+    # convert integers to floats
+    if signalinfo:
+        if signalinfo.rate and not isinstance(signalinfo.rate, float):
+            if float(signalinfo.rate) == signalinfo.rate:
+                signalinfo.rate = float(signalinfo.rate)
+            else:
+                raise TypeError('Sample rate should be a float or int')
+        # check if the bit precision (i.e. bits per sample) is an integer
+        if signalinfo.precision and not isinstance(signalinfo.precision, int):
+            if int(signalinfo.precision) == signalinfo.precision:
+                signalinfo.precision = int(signalinfo.precision)
+            else:
+                raise TypeError('Bit precision should be an integer')
+    # programs such as librosa normalize the signal, unnormalize if detected
+    if src.min() >= -1.0 and src.max() <= 1.0:
+        src = src * (1 << 31)
+        src = src.long()
+    # set filetype and allow for files with no extensions
+    extension = os.path.splitext(filepath)[1]
+    filetype = extension[1:] if len(extension) > 0 else filetype
+    # transpose from C x L -> L x C
+    if channels_first:
+        src = src.transpose(1, 0)
+    # save data to file
+    src = src.contiguous()
+    _torchaudio.write_audio_file(filepath, src, signalinfo, encodinginfo, filetype)
+
+
+@_mod_utils.requires_module('torchaudio._torchaudio')
+def sox_signalinfo_t() -> SignalInfo:
+    r"""Create a sox_signalinfo_t object. This object can be used to set the sample
+    rate, number of channels, length, bit precision and headroom multiplier
+    primarily for effects
+
+    Returns: sox_signalinfo_t(object)
+        - rate (float), sample rate as a float, practically will likely be an integer float
+        - channel (int), number of audio channels
+        - precision (int), bit precision
+        - length (int), length of audio in samples * channels, 0 for unspecified and -1 for unknown
+        - mult (float, optional), headroom multiplier for effects and ``None`` for no multiplier
+
+    Example
+        >>> si = torchaudio.sox_signalinfo_t()
+        >>> si.channels = 1
+        >>> si.rate = 16000.
+        >>> si.precision = 16
+        >>> si.length = 0
+    """
+    return _torchaudio.sox_signalinfo_t()
+
+
+@_mod_utils.requires_module('torchaudio._torchaudio')
+def sox_encodinginfo_t() -> EncodingInfo:
+    r"""Create a sox_encodinginfo_t object.  This object can be used to set the encoding
+    type, bit precision, compression factor, reverse bytes, reverse nibbles,
+    reverse bits and endianness.  This can be used in an effects chain to encode the
+    final output or to save a file with a specific encoding.  For example, one could
+    use the sox ulaw encoding to do 8-bit ulaw encoding.  Note in a tensor output
+    the result will be a 32-bit number, but number of unique values will be determined by
+    the bit precision.
+
+    Returns: sox_encodinginfo_t(object)
+        - encoding (sox_encoding_t), output encoding
+        - bits_per_sample (int), bit precision, same as `precision` in sox_signalinfo_t
+        - compression (float), compression for lossy formats, 0.0 for default compression
+        - reverse_bytes (sox_option_t), reverse bytes, use sox_option_default
+        - reverse_nibbles (sox_option_t), reverse nibbles, use sox_option_default
+        - reverse_bits (sox_option_t), reverse bytes, use sox_option_default
+        - opposite_endian (sox_bool), change endianness, use sox_false
+
+    Example
+        >>> ei = torchaudio.sox_encodinginfo_t()
+        >>> ei.encoding = torchaudio.get_sox_encoding_t(1)
+        >>> ei.bits_per_sample = 16
+        >>> ei.compression = 0
+        >>> ei.reverse_bytes = torchaudio.get_sox_option_t(2)
+        >>> ei.reverse_nibbles = torchaudio.get_sox_option_t(2)
+        >>> ei.reverse_bits = torchaudio.get_sox_option_t(2)
+        >>> ei.opposite_endian = torchaudio.get_sox_bool(0)
+
+    """
+    ei = _torchaudio.sox_encodinginfo_t()
+    sdo = get_sox_option_t(2)  # sox_default_option
+    ei.reverse_bytes = sdo
+    ei.reverse_nibbles = sdo
+    ei.reverse_bits = sdo
+    return ei
+
+
+@_mod_utils.requires_module('torchaudio._torchaudio')
+def get_sox_encoding_t(i: int = None) -> EncodingInfo:
+    r"""Get enum of sox_encoding_t for sox encodings.
+
+    Args:
+        i (int, optional): Choose type or get a dict with all possible options
+            use ``__members__`` to see all options when not specified. (Default: ``None``)
+
+    Returns:
+        sox_encoding_t: A sox_encoding_t type for output encoding
+    """
+    if i is None:
+        # one can see all possible values using the .__members__ attribute
+        return _torchaudio.sox_encoding_t
+    else:
+        return _torchaudio.sox_encoding_t(i)
+
+
+@_mod_utils.requires_module('torchaudio._torchaudio')
+def get_sox_option_t(i: int = 2) -> Any:
+    r"""Get enum of sox_option_t for sox encodinginfo options.
+
+    Args:
+        i (int, optional): Choose type or get a dict with all possible options
+            use ``__members__`` to see all options when not specified.
+            (Default: ``sox_option_default`` or ``2``)
+    Returns:
+        sox_option_t: A sox_option_t type
+    """
+    if i is None:
+        return _torchaudio.sox_option_t
+    else:
+        return _torchaudio.sox_option_t(i)
+
+
+@_mod_utils.requires_module('torchaudio._torchaudio')
+def get_sox_bool(i: int = 0) -> Any:
+    r"""Get enum of sox_bool for sox encodinginfo options.
+
+    Args:
+        i (int, optional): Choose type or get a dict with all possible options
+            use ``__members__`` to see all options when not specified. (Default:
+            ``sox_false`` or ``0``)
+
+    Returns:
+        sox_bool: A sox_bool type
+    """
+    if i is None:
+        return _torchaudio.sox_bool
+    else:
+        return _torchaudio.sox_bool(i)