Extract libsox integration from libtorchaudio (#2929)

Summary: This commit makes the following changes to the C++ library organization - Move sox-related feature implementations from `libtorchaudio` to `libtorchaudio_sox`. - Remove C++ implementation of `is_sox_available` and `is_ffmpeg_available` as it is now sufficient to check the existence of `libtorchaudio_sox` and `libtorchaudio_ffmpeg` to check the availability. This makes `libtorchaudio_sox` and `libtorchaudio_ffmpeg` independent from `libtorchaudio`. - Move PyBind11-based bindings (`_torchaudio_sox`, `_torchaudio_ffmpeg`) into `torchaudio.lib` so that the built library structure is less cluttered. Background: Originally, when the `libsox` was the only C++ extension and `libtorchaudio` was supposed to contain all the C++ code. The things are different now. We have a bunch of C++ extensions and we need to make the code/build structure more modular. The new `libtorchaudio_sox` contains the implementations and `_torchaudio_sox` contains the PyBin11-based bindings. Pull Request resolved: https://github.com/pytorch/audio/pull/2929 Reviewed By: hwangjeff Differential Revision: D42159594 Pulled By: mthrok fbshipit-source-id: 1a0fbca9e4143137f6363fc001b2378ce6029aa7

Extract libsox integration from libtorchaudio (#2929)
Summary: This commit makes the following changes to the C++ library organization - Move sox-related feature implementations from `libtorchaudio` to `libtorchaudio_sox`. - Remove C++ implementation of `is_sox_available` and `is_ffmpeg_available` as it is now sufficient to check the existence of `libtorchaudio_sox` and `libtorchaudio_ffmpeg` to check the availability. This makes `libtorchaudio_sox` and `libtorchaudio_ffmpeg` independent from `libtorchaudio`. - Move PyBind11-based bindings (`_torchaudio_sox`, `_torchaudio_ffmpeg`) into `torchaudio.lib` so that the built library structure is less cluttered. Background: Originally, when the `libsox` was the only C++ extension and `libtorchaudio` was supposed to contain all the C++ code. The things are different now. We have a bunch of C++ extensions and we need to make the code/build structure more modular. The new `libtorchaudio_sox` contains the implementations and `_torchaudio_sox` contains the PyBin11-based bindings. Pull Request resolved: https://github.com/pytorch/audio/pull/2929 Reviewed By: hwangjeff Differential Revision: D42159594 Pulled By: mthrok fbshipit-source-id: 1a0fbca9e4143137f6363fc001b2378ce6029aa7
1706a72f · moto · Facebook GitHub Bot · c6bc65fd · 1706a72f · 1706a72f
Commit 1706a72f authored Dec 21, 2022 by moto Committed by Facebook GitHub Bot Dec 21, 2022
16 changed files
--- a/cmake/TorchAudioHelper.cmake
+++ b/cmake/TorchAudioHelper.cmake
@@ -52,8 +52,8 @@ if (BUILD_TORCHAUDIO_PYTHON_EXTENSION)
    endif()
    install(
      TARGETS ${name}
-      LIBRARY DESTINATION .
+      LIBRARY DESTINATION lib
-      RUNTIME DESTINATION .  # For Windows
+      RUNTIME DESTINATION lib  # For Windows
      )
  endfunction()
 endif()
--- a/test/torchaudio_unittest/backend/sox_io/load_test.py
+++ b/test/torchaudio_unittest/backend/sox_io/load_test.py
@@ -325,7 +325,7 @@ class TestLoadParams(TempDirMixin, PytorchTestCase):
        # test file-like obj
        def func(path, *args):
            with open(path, "rb") as fileobj:
-                return torchaudio._torchaudio.load_audio_fileobj(fileobj, *args)
+                return torchaudio.lib._torchaudio_sox.load_audio_fileobj(fileobj, *args)
        self._test(func, frame_offset, num_frames, channels_first, normalize)

--- a/tools/setup_helpers/extension.py
+++ b/tools/setup_helpers/extension.py
@@ -47,21 +47,28 @@ _TORCH_CUDA_ARCH_LIST = os.environ.get("TORCH_CUDA_ARCH_LIST", None)
 def get_ext_modules():
    modules = [
        Extension(name="torchaudio.lib.libtorchaudio", sources=[]),
-        Extension(name="torchaudio._torchaudio", sources=[]),
+        Extension(name="torchaudio.lib._torchaudio", sources=[]),
    ]
+    if _BUILD_SOX:
+        modules.extend(
+            [
+                Extension(name="torchaudio.lib.libtorchaudio_sox", sources=[]),
+                Extension(name="torchaudio.lib._torchaudio_sox", sources=[]),
+            ]
+        )
    if _BUILD_CTC_DECODER:
        modules.extend(
            [
                Extension(name="torchaudio.lib.libflashlight-text", sources=[]),
-                Extension(name="torchaudio.flashlight_lib_text_decoder", sources=[]),
+                Extension(name="torchaudio.lib.flashlight_lib_text_decoder", sources=[]),
-                Extension(name="torchaudio.flashlight_lib_text_dictionary", sources=[]),
+                Extension(name="torchaudio.lib.flashlight_lib_text_dictionary", sources=[]),
            ]
        )
    if _USE_FFMPEG:
        modules.extend(
            [
                Extension(name="torchaudio.lib.libtorchaudio_ffmpeg", sources=[]),
-                Extension(name="torchaudio._torchaudio_ffmpeg", sources=[]),
+                Extension(name="torchaudio.lib._torchaudio_ffmpeg", sources=[]),
            ]
        )
    return modules
@@ -84,10 +91,16 @@ class CMakeBuild(build_ext):
        # However, the following `cmake` command will build all of them at the same time,
        # so, we do not need to perform `cmake` twice.
        # Therefore we call `cmake` only for `torchaudio._torchaudio`.
-        if ext.name != "torchaudio._torchaudio":
+        if ext.name != "torchaudio.lib.libtorchaudio":
            return
-        extdir = os.path.abspath(os.path.dirname(self.get_ext_fullpath(ext.name)))
+        # Note:
+        # the last part "lib" does not really matter. We want to get the full path of
+        # the root build directory. Passing "torchaudio" will be interpreted as
+        # `torchaudio.[so|dylib|pyd]`, so we need something `torchaudio.foo`, that is
+        # interpreted as `torchaudio/foo.so` then use dirname to get the `torchaudio`
+        # directory.
+        extdir = os.path.abspath(os.path.dirname(self.get_ext_fullpath("torchaudio.lib")))
        # required for auto-detection of auxiliary "native" libs
        if not extdir.endswith(os.path.sep):

--- a/torchaudio/_extension.py
+++ b/torchaudio/_extension.py
+import logging
 import os
 import sys
-import warnings
 from pathlib import Path
 import torch
-from torchaudio._internal import module_utils as _mod_utils  # noqa: F401
+from torchaudio._internal.module_utils import is_module_available
 _LIB_DIR = Path(__file__).parent / "lib"
+_LG = logging.getLogger(__name__)
 def _get_lib_path(lib: str):
    suffix = "pyd" if os.name == "nt" else "so"
@@ -62,7 +65,7 @@ def _init_ffmpeg():
    if _FFMPEG_INITIALIZED:
        return
-    if not torch.ops.torchaudio.is_ffmpeg_available():
+    if not is_module_available("torchaudio.lib._torchaudio_ffmpeg"):
        raise RuntimeError(
            "torchaudio is not compiled with FFmpeg integration. Please set USE_FFMPEG=1 when compiling torchaudio."
        )
@@ -72,7 +75,7 @@ def _init_ffmpeg():
    except OSError as err:
        raise ImportError("FFmpeg libraries are not found. Please install FFmpeg.") from err
-    import torchaudio._torchaudio_ffmpeg  # noqa
+    import torchaudio.lib._torchaudio_ffmpeg  # noqa
    torch.ops.torchaudio.ffmpeg_init()
    if torch.ops.torchaudio.ffmpeg_get_log_level() > 8:
@@ -82,10 +85,6 @@ def _init_ffmpeg():
 def _init_extension():
-    if not _mod_utils.is_module_available("torchaudio._torchaudio"):
-        warnings.warn("torchaudio C++ extension is not available.")
-        return
    # On Windows Python-3.8+ has `os.add_dll_directory` call,
    # which is called to configure dll search path.
    # To find cuda related dlls we need to make sure the
@@ -102,19 +101,27 @@ def _init_extension():
                except Exception:
                    pass
+    if is_module_available("torchaudio.lib._torchaudio"):
+        try:
            _load_lib("libtorchaudio")
-    # This import is for initializing the methods registered via PyBind11
+            import torchaudio.lib._torchaudio  # noqa
-    # This has to happen after the base library is loaded
+        except Exception:
-    from torchaudio import _torchaudio  # noqa
+            _LG.debug("Failed to initialize libtorchaudio", exc_info=True)
+    if is_module_available("torchaudio.lib._torchaudio_sox"):
+        try:
+            _load_lib("libtorchaudio_sox")
+            import torchaudio.lib._torchaudio_sox  # noqa
+        except Exception:
+            _LG.debug("Failed to initialize libsox bindings", exc_info=True)
-    # Because this part is executed as part of `import torchaudio`, we ignore the
-    # initialization failure.
    # If the FFmpeg integration is not properly initialized, then detailed error
    # will be raised when client code attempts to import the dedicated feature.
+    if is_module_available("torchaudio.lib._torchaudio_ffmpeg"):
        try:
            _init_ffmpeg()
        except Exception:
-        pass
+            _LG.debug("Failed to initialize ffmpeg bindings", exc_info=True)
 def _check_cuda_version():

--- a/torchaudio/_internal/module_utils.py
+++ b/torchaudio/_internal/module_utils.py
@@ -67,7 +67,10 @@ def deprecated(direction: str, version: Optional[str] = None):
 def is_kaldi_available():
-    return is_module_available("torchaudio._torchaudio") and torch.ops.torchaudio.is_kaldi_available()
+    try:
+        return torch.ops.torchaudio.is_kaldi_available()
+    except Exception:
+        return False
 def requires_kaldi():
@@ -126,7 +129,7 @@ def requires_soundfile():
 def is_sox_available():
-    return is_module_available("torchaudio._torchaudio") and torch.ops.torchaudio.is_sox_available()
+    return is_module_available("torchaudio.lib._torchaudio_sox")
 def requires_sox():

--- a/torchaudio/backend/sox_io_backend.py
+++ b/torchaudio/backend/sox_io_backend.py
@@ -95,7 +95,7 @@ def info(
            buffer_size = get_buffer_size()
            if format == "mp3":
                return _fallback_info_fileobj(filepath, format, buffer_size)
-            sinfo = torchaudio._torchaudio.get_info_fileobj(filepath, format)
+            sinfo = torchaudio.lib._torchaudio_sox.get_info_fileobj(filepath, format)
            if sinfo is not None:
                return AudioMetaData(*sinfo)
            return _fallback_info_fileobj(filepath, format, buffer_size)
@@ -223,7 +223,7 @@ def load(
                    format,
                    buffer_size,
                )
-            ret = torchaudio._torchaudio.load_audio_fileobj(
+            ret = torchaudio.lib._torchaudio_sox.load_audio_fileobj(
                filepath, frame_offset, num_frames, normalize, channels_first, format
            )
            if ret is not None:
@@ -403,7 +403,7 @@ def save(
    """
    if not torch.jit.is_scripting():
        if hasattr(filepath, "write"):
-            torchaudio._torchaudio.save_audio_fileobj(
+            torchaudio.lib._torchaudio_sox.save_audio_fileobj(
                filepath,
                src,
                sample_rate,

--- a/torchaudio/csrc/CMakeLists.txt
+++ b/torchaudio/csrc/CMakeLists.txt
@@ -78,28 +78,6 @@ if(BUILD_KALDI)
  list(APPEND LIBTORCHAUDIO_COMPILE_DEFINITIONS INCLUDE_KALDI)
 endif()
-if(BUILD_SOX)
-  list(
-    APPEND
-    LIBTORCHAUDIO_LINK_LIBRARIES
-    libsox
-    )
-  list(
-    APPEND
-    LIBTORCHAUDIO_SOURCES
-    sox/io.cpp
-    sox/utils.cpp
-    sox/effects.cpp
-    sox/effects_chain.cpp
-    sox/types.cpp
-    )
-  list(
-    APPEND
-    LIBTORCHAUDIO_COMPILE_DEFINITIONS
-    INCLUDE_SOX
-    )
-endif()
 if(OpenMP_CXX_FOUND)
  list(
    APPEND
@@ -108,14 +86,6 @@ if(OpenMP_CXX_FOUND)
    )
 endif()
-if(USE_FFMPEG)
-  list(
-    APPEND
-    LIBTORCHAUDIO_COMPILE_DEFINITIONS
-    USE_FFMPEG
-    )
-endif()
 #------------------------------------------------------------------------------#
 # END OF CUSTOMIZATION LOGICS
 #------------------------------------------------------------------------------#
@@ -134,6 +104,27 @@ else()
  set(TORCHAUDIO_LIBRARY -Wl,--no-as-needed libtorchaudio -Wl,--as-needed CACHE INTERNAL "")
 endif()
+################################################################################
+# libtorchaudio_sox
+################################################################################
+if (BUILD_SOX)
+  set(
+    libtorchaudio_sox_sources
+    sox/io.cpp
+    sox/utils.cpp
+    sox/effects.cpp
+    sox/effects_chain.cpp
+    sox/types.cpp
+    )
+  torchaudio_library(
+    libtorchaudio_sox
+    "${libtorchaudio_sox_sources}"
+    "${LIBTORCHAUDIO_INCLUDE_DIRS}"
+    "torch;libsox"
+    "${LIBTORCHAUDIO_COMPILE_DEFINITIONS}"
+    )
+endif()
 ################################################################################
 # libtorchaudio_ffmpeg
 ################################################################################
@@ -167,37 +158,37 @@ if(USE_FFMPEG)
 endif()
 ################################################################################
-# TODO: Rename this to _torchaudio_sox.so
+# Python extensions
-# _torchaudio.so
 ################################################################################
 if (BUILD_TORCHAUDIO_PYTHON_EXTENSION)
  set(
-    EXTENSION_SOURCES
+    extension_sources
-    sox/pybind/pybind.cpp
+    pybind/pybind.cpp
+    )
+  torchaudio_extension(
+    _torchaudio
+    "${extension_sources}"
+    ""
+    "libtorchaudio"
+    ""
    )
-  #----------------------------------------------------------------------------#
-  # START OF CUSTOMIZATION LOGICS
-  #----------------------------------------------------------------------------#
  if(BUILD_SOX)
-    list(
+    set(
-      APPEND
+      sox_extension_sources
-      EXTENSION_SOURCES
+      sox/pybind/pybind.cpp
      sox/pybind/effects.cpp
      sox/pybind/effects_chain.cpp
      sox/pybind/io.cpp
      sox/pybind/utils.cpp
      )
-  endif()
-  #----------------------------------------------------------------------------#
-  # END OF CUSTOMIZATION LOGICS
-  #----------------------------------------------------------------------------#
    torchaudio_extension(
-    _torchaudio
+      _torchaudio_sox
-    "${EXTENSION_SOURCES}"
+      "${sox_extension_sources}"
+      ""
+      "libtorchaudio_sox"
      ""
-    libtorchaudio
-    "${LIBTORCHAUDIO_COMPILE_DEFINITIONS}"
      )
+  endif()
  if(USE_FFMPEG)
    set(
      FFMPEG_EXTENSION_SOURCES

--- a/torchaudio/csrc/pybind/pybind.cpp
+++ b/torchaudio/csrc/pybind/pybind.cpp
+#include <torch/extension.h>
+#include <torchaudio/csrc/utils.h>
+namespace torchaudio {
+namespace {
+// Note
+// These functions are not intended for a real usecase.
+// They are accessible via TorchBind.
+// It is beneficial to have _torchaudio that is linked to libtorchaudio,
+// when torchaudio is deployed with PEX format, where the library location
+// is not in torchaudio/lib. But somewhere in LD_LIBRARY_PATH.
+// In this case, attempt to import _torchaudio will automatically resolves
+// libtorchaudio, if _torchaudio is linked to libtorchaudio.
+PYBIND11_MODULE(_torchaudio, m) {
+  m.def("is_kaldi_available", &is_kaldi_available, "");
+  m.def("cuda_version", &cuda_version, "");
+}
+} // namespace
+} // namespace torchaudio
--- a/torchaudio/csrc/sox/pybind/pybind.cpp
+++ b/torchaudio/csrc/sox/pybind/pybind.cpp
 #include <torch/extension.h>
-#ifdef INCLUDE_SOX
 #include <torchaudio/csrc/sox/pybind/effects.h>
 #include <torchaudio/csrc/sox/pybind/io.h>
-#endif
-PYBIND11_MODULE(_torchaudio, m) {
+PYBIND11_MODULE(_torchaudio_sox, m) {
-#ifdef INCLUDE_SOX
  m.def(
      "get_info_fileobj",
      &torchaudio::sox_io::get_info_fileobj,
@@ -23,5 +20,4 @@ PYBIND11_MODULE(_torchaudio, m) {
      "apply_effects_fileobj",
      &torchaudio::sox_effects::apply_effects_fileobj,
      "Decode audio data from file-like obj and apply effects.");
-#endif
 }
--- a/torchaudio/csrc/utils.cpp
+++ b/torchaudio/csrc/utils.cpp
 #include <torch/script.h>
+#include <torchaudio/csrc/utils.h>
 #ifdef USE_CUDA
 #include <cuda.h>
@@ -6,16 +7,6 @@
 namespace torchaudio {
-namespace {
-bool is_sox_available() {
-#ifdef INCLUDE_SOX
-  return true;
-#else
-  return false;
-#endif
-}
 bool is_kaldi_available() {
 #ifdef INCLUDE_KALDI
  return true;
@@ -24,16 +15,6 @@ bool is_kaldi_available() {
 #endif
 }
-// It tells whether torchaudio was compiled with ffmpeg
-// not the runtime availability.
-bool is_ffmpeg_available() {
-#ifdef USE_FFMPEG
-  return true;
-#else
-  return false;
-#endif
-}
 c10::optional<int64_t> cuda_version() {
 #ifdef USE_CUDA
  return CUDA_VERSION;
@@ -41,14 +22,11 @@ c10::optional<int64_t> cuda_version() {
  return {};
 #endif
 }
+namespace {
-} // namespace
 TORCH_LIBRARY_FRAGMENT(torchaudio, m) {
-  m.def("torchaudio::is_sox_available", &is_sox_available);
  m.def("torchaudio::is_kaldi_available", &is_kaldi_available);
-  m.def("torchaudio::is_ffmpeg_available", &is_ffmpeg_available);
  m.def("torchaudio::cuda_version", &cuda_version);
 }
+} // namespace
 } // namespace torchaudio
--- a/torchaudio/csrc/utils.h
+++ b/torchaudio/csrc/utils.h
+#pragma once
+#include <torch/torch.h>
+namespace torchaudio {
+bool is_kaldi_available();
+c10::optional<int64_t> cuda_version();
+} // namespace torchaudio
--- a/torchaudio/io/_compat.py
+++ b/torchaudio/io/_compat.py
@@ -38,7 +38,7 @@ def info_audio_fileobj(
    format: Optional[str],
    buffer_size: int = 4096,
 ) -> AudioMetaData:
-    s = torchaudio._torchaudio_ffmpeg.StreamReaderFileObj(src, format, None, buffer_size)
+    s = torchaudio.lib._torchaudio_ffmpeg.StreamReaderFileObj(src, format, None, buffer_size)
    return _info_audio(s)
@@ -113,5 +113,5 @@ def load_audio_fileobj(
    format: Optional[str] = None,
    buffer_size: int = 4096,
 ) -> Tuple[torch.Tensor, int]:
-    s = torchaudio._torchaudio_ffmpeg.StreamReaderFileObj(src, format, None, buffer_size)
+    s = torchaudio.lib._torchaudio_ffmpeg.StreamReaderFileObj(src, format, None, buffer_size)
    return _load_audio(s, frame_offset, num_frames, convert, channels_first)
--- a/torchaudio/io/_stream_reader.py
+++ b/torchaudio/io/_stream_reader.py
@@ -361,7 +361,7 @@ class StreamReader:
        elif isinstance(src, torch.Tensor):
            self._be = torch.classes.torchaudio.ffmpeg_StreamReaderTensor(src, format, option, buffer_size)
        elif hasattr(src, "read"):
-            self._be = torchaudio._torchaudio_ffmpeg.StreamReaderFileObj(src, format, option, buffer_size)
+            self._be = torchaudio.lib._torchaudio_ffmpeg.StreamReaderFileObj(src, format, option, buffer_size)
        else:
            raise ValueError("`src` must be either string, Tensor or file-like object.")

--- a/torchaudio/io/_stream_writer.py
+++ b/torchaudio/io/_stream_writer.py
@@ -108,7 +108,7 @@ class StreamWriter:
        if isinstance(dst, str):
            self._s = torch.classes.torchaudio.ffmpeg_StreamWriter(dst, format)
        elif hasattr(dst, "write"):
-            self._s = torchaudio._torchaudio_ffmpeg.StreamWriterFileObj(dst, format, buffer_size)
+            self._s = torchaudio.lib._torchaudio_ffmpeg.StreamWriterFileObj(dst, format, buffer_size)
        else:
            raise ValueError("`dst` must be either a string or a file-like object.")
        self._is_open = False

--- a/torchaudio/models/decoder/_ctc_decoder.py
+++ b/torchaudio/models/decoder/_ctc_decoder.py
@@ -33,7 +33,7 @@ try:
    )
 except Exception:
    torchaudio._extension._load_lib("libflashlight-text")
-    from torchaudio.flashlight_lib_text_decoder import (
+    from torchaudio.lib.flashlight_lib_text_decoder import (
        CriterionType as _CriterionType,
        KenLM as _KenLM,
        LexiconDecoder as _LexiconDecoder,
@@ -46,7 +46,7 @@ except Exception:
        Trie as _Trie,
        ZeroLM as _ZeroLM,
    )
-    from torchaudio.flashlight_lib_text_dictionary import (
+    from torchaudio.lib.flashlight_lib_text_dictionary import (
        create_word_dict as _create_word_dict,
        Dictionary as _Dictionary,
        load_words as _load_words,

--- a/torchaudio/sox_effects/sox_effects.py
+++ b/torchaudio/sox_effects/sox_effects.py
@@ -274,7 +274,7 @@ def apply_effects_file(
    """
    if not torch.jit.is_scripting():
        if hasattr(path, "read"):
-            ret = torchaudio._torchaudio.apply_effects_fileobj(path, effects, normalize, channels_first, format)
+            ret = torchaudio.lib._torchaudio_sox.apply_effects_fileobj(path, effects, normalize, channels_first, format)
            if ret is None:
                raise RuntimeError("Failed to load audio from {}".format(path))
            return ret