Unverified Commit 36f5010b authored by moto-meta's avatar moto-meta Committed by GitHub
Browse files

Swap decoder/encoder implementation

Differential Revision: D50677606

Pull Request resolved: https://github.com/pytorch/audio/pull/3681
parent 2a0f4c06
...@@ -43,7 +43,7 @@ endfunction() ...@@ -43,7 +43,7 @@ endfunction()
function(torio_library name source include_dirs link_libraries compile_defs) function(torio_library name source include_dirs link_libraries compile_defs)
_library( _library(
torchaudio/lib torio/lib
"${name}" "${name}"
"${source}" "${source}"
"${include_dirs}" "${include_dirs}"
...@@ -104,7 +104,7 @@ if (BUILD_TORCHAUDIO_PYTHON_EXTENSION) ...@@ -104,7 +104,7 @@ if (BUILD_TORCHAUDIO_PYTHON_EXTENSION)
endfunction() endfunction()
function(torio_extension name sources include_dirs libraries definitions) function(torio_extension name sources include_dirs libraries definitions)
_extension( _extension(
torchaudio/lib torio/lib
"${name}" "${name}"
"${sources}" "${sources}"
"${include_dirs}" "${include_dirs}"
......
...@@ -73,7 +73,7 @@ Support Structures ...@@ -73,7 +73,7 @@ Support Structures
{{ item | underline("~") }} {{ item | underline("~") }}
.. autoclass:: torchaudio.io._stream_reader.{{item}}() .. autoclass:: torio.io._streaming_media_decoder.{{item}}()
:members: :members:
{%- endfor %} {%- endfor %}
......
...@@ -385,6 +385,13 @@ def inject_minigalleries(app, what, name, obj, options, lines): ...@@ -385,6 +385,13 @@ def inject_minigalleries(app, what, name, obj, options, lines):
def setup(app): def setup(app):
app.connect("autodoc-process-docstring", inject_minigalleries) app.connect("autodoc-process-docstring", inject_minigalleries)
from torchaudio.io import StreamReader, StreamWriter
# need to assign the names here, otherwise autodoc won't document these classes,
# and will instead just say 'alias of ...'
StreamReader.__name__ = "StreamReader"
StreamWriter.__name__ = "StreamWriter"
from custom_directives import CustomCardEnd, CustomCardItem, CustomCardStart, SupportedDevices, SupportedProperties from custom_directives import CustomCardEnd, CustomCardItem, CustomCardStart, SupportedDevices, SupportedProperties
......
...@@ -4,8 +4,9 @@ from typing import BinaryIO, Dict, Optional, Tuple, Type, Union ...@@ -4,8 +4,9 @@ from typing import BinaryIO, Dict, Optional, Tuple, Type, Union
import torch import torch
from torchaudio._extension import lazy_import_ffmpeg_ext, lazy_import_sox_ext from torchaudio._extension import lazy_import_sox_ext
from torchaudio.io import CodecConfig from torchaudio.io import CodecConfig
from torio._extension import lazy_import_ffmpeg_ext
from . import soundfile_backend from . import soundfile_backend
......
...@@ -4,7 +4,7 @@ import sys ...@@ -4,7 +4,7 @@ import sys
from torchaudio._internal.module_utils import fail_with_message, is_module_available, no_op from torchaudio._internal.module_utils import fail_with_message, is_module_available, no_op
from .utils import _check_cuda_version, _init_dll_path, _init_ffmpeg, _init_sox, _LazyImporter, _load_lib from .utils import _check_cuda_version, _init_dll_path, _init_sox, _LazyImporter, _load_lib
_LG = logging.getLogger(__name__) _LG = logging.getLogger(__name__)
...@@ -18,7 +18,6 @@ __all__ = [ ...@@ -18,7 +18,6 @@ __all__ = [
"_IS_TORCHAUDIO_EXT_AVAILABLE", "_IS_TORCHAUDIO_EXT_AVAILABLE",
"_IS_RIR_AVAILABLE", "_IS_RIR_AVAILABLE",
"lazy_import_sox_ext", "lazy_import_sox_ext",
"lazy_import_ffmpeg_ext",
] ]
...@@ -57,18 +56,6 @@ def lazy_import_sox_ext(): ...@@ -57,18 +56,6 @@ def lazy_import_sox_ext():
return _SOX_EXT return _SOX_EXT
_FFMPEG_EXT = None
def lazy_import_ffmpeg_ext():
"""Load FFmpeg integration based on availability in lazy manner"""
global _FFMPEG_EXT
if _FFMPEG_EXT is None:
_FFMPEG_EXT = _LazyImporter("_torchaudio_ffmpeg", _init_ffmpeg)
return _FFMPEG_EXT
fail_if_no_rir = ( fail_if_no_rir = (
no_op no_op
if _IS_RIR_AVAILABLE if _IS_RIR_AVAILABLE
......
...@@ -4,7 +4,6 @@ The implementations here should be stateless. ...@@ -4,7 +4,6 @@ The implementations here should be stateless.
They should not depend on external state. They should not depend on external state.
Anything that depends on external state should happen in __init__.py Anything that depends on external state should happen in __init__.py
""" """
import importlib import importlib
import logging import logging
import os import os
...@@ -107,57 +106,6 @@ def _init_sox(): ...@@ -107,57 +106,6 @@ def _init_sox():
return ext return ext
_FFMPEG_VERS = ["6", "5", "4", ""]
def _find_versionsed_ffmpeg_extension(version: str):
_LG.debug("Attempting to load FFmpeg%s", version)
ext = f"torchaudio.lib._torio_ffmpeg{version}"
lib = f"libtorio_ffmpeg{version}"
if not importlib.util.find_spec(ext):
raise RuntimeError(f"FFmpeg{version} extension is not available.")
_load_lib(lib)
return importlib.import_module(ext)
def _find_ffmpeg_extension(ffmpeg_vers):
for ffmpeg_ver in ffmpeg_vers:
try:
return _find_versionsed_ffmpeg_extension(ffmpeg_ver)
except Exception:
_LG.debug("Failed to load FFmpeg%s extension.", ffmpeg_ver, exc_info=True)
continue
raise ImportError(
f"Failed to intialize FFmpeg extension. Tried versions: {ffmpeg_vers}. "
"Enable DEBUG logging to see more details about the error."
)
def _get_ffmpeg_versions():
ffmpeg_vers = _FFMPEG_VERS
# User override
if (ffmpeg_ver := os.environ.get("TORCHAUDIO_USE_FFMPEG_VERSION")) is not None:
if ffmpeg_ver not in ffmpeg_vers:
raise ValueError(
f"The FFmpeg version '{ffmpeg_ver}' (read from TORCHAUDIO_USE_FFMPEG_VERSION) "
f"is not one of supported values. Possible values are {ffmpeg_vers}"
)
ffmpeg_vers = [ffmpeg_ver]
return ffmpeg_vers
def _init_ffmpeg():
ffmpeg_vers = _get_ffmpeg_versions()
ext = _find_ffmpeg_extension(ffmpeg_vers)
ext.init()
if ext.get_log_level() > 8:
ext.set_log_level(8)
return ext
class _LazyImporter(types.ModuleType): class _LazyImporter(types.ModuleType):
"""Lazily import module/extension.""" """Lazily import module/extension."""
......
from torio.io import CodecConfig, StreamingMediaDecoder as StreamReader, StreamingMediaEncoder as StreamWriter
from ._effector import AudioEffector from ._effector import AudioEffector
from ._playback import play_audio from ._playback import play_audio
from ._stream_reader import StreamReader
from ._stream_writer import CodecConfig, StreamWriter
__all__ = [ __all__ = [
......
...@@ -4,8 +4,8 @@ from typing import Iterator, List, Optional ...@@ -4,8 +4,8 @@ from typing import Iterator, List, Optional
import torch import torch
from torch import Tensor from torch import Tensor
from ._stream_reader import _get_afilter_desc, StreamReader from torio.io._streaming_media_decoder import _get_afilter_desc, StreamingMediaDecoder as StreamReader
from ._stream_writer import CodecConfig, StreamWriter from torio.io._streaming_media_encoder import CodecConfig, StreamingMediaEncoder as StreamWriter
class _StreamingIOBuffer: class _StreamingIOBuffer:
......
from . import ffmpeg_utils, sox_utils from torio.utils import ffmpeg_utils
from . import sox_utils
from .download import download_asset from .download import download_asset
......
"""Module to change the configuration of FFmpeg libraries (such as libavformat). def __getattr__(item):
from torio.utils import ffmpeg_utils
It affects functionalities in :py:mod:`torchaudio.io` (and indirectly :py:func:`torchaudio.load`). return getattr(ffmpeg_utils, item)
"""
from typing import Dict, List, Tuple
import torchaudio
ffmpeg_ext = torchaudio._extension.lazy_import_ffmpeg_ext()
def get_versions() -> Dict[str, Tuple[int]]:
"""Get the versions of FFmpeg libraries
Returns:
dict: mapping from library names to version string,
i.e. `"libavutil": (56, 22, 100)`.
"""
return ffmpeg_ext.get_versions()
def get_log_level() -> int:
"""Get the log level of FFmpeg.
See :py:func:`set_log_level` for the detailo.
"""
return ffmpeg_ext.get_log_level()
def set_log_level(level: int):
"""Set the log level of FFmpeg (libavformat etc)
Arguments:
level (int): Log level. The larger, the more verbose.
The following values are common values, the corresponding ``ffmpeg``'s
``-loglevel`` option value and desription.
* ``-8`` (``quiet``):
Print no output.
* ``0`` (``panic``):
Something went really wrong and we will crash now.
* ``8`` (``fatal``):
Something went wrong and recovery is not possible.
For example, no header was found for a format which depends
on headers or an illegal combination of parameters is used.
* ``16`` (``error``):
Something went wrong and cannot losslessly be recovered.
However, not all future data is affected.
* ``24`` (``warning``):
Something somehow does not look correct.
This may or may not lead to problems.
* ``32`` (``info``):
Standard information.
* ``40`` (``verbose``):
Detailed information.
* ``48`` (``debug``):
Stuff which is only useful for libav* developers.
* ``56`` (``trace``):
Extremely verbose debugging, useful for libav* development.
"""
ffmpeg_ext.set_log_level(level)
def get_demuxers() -> Dict[str, str]:
"""Get the available demuxers.
Returns:
Dict[str, str]: Mapping from demuxer (format) short name to long name.
Example
>>> for k, v in get_demuxers().items():
>>> print(f"{k}: {v}")
... aa: Audible AA format files
... aac: raw ADTS AAC (Advanced Audio Coding)
... aax: CRI AAX
... ac3: raw AC-3
"""
return ffmpeg_ext.get_demuxers()
def get_muxers() -> Dict[str, str]:
"""Get the available muxers.
Returns:
Dict[str, str]: Mapping from muxer (format) short name to long name.
Example
>>> for k, v in get_muxers().items():
>>> print(f"{k}: {v}")
... a64: a64 - video for Commodore 64
... ac3: raw AC-3
... adts: ADTS AAC (Advanced Audio Coding)
... adx: CRI ADX
... aiff: Audio IFF
"""
return ffmpeg_ext.get_muxers()
def get_audio_decoders() -> Dict[str, str]:
"""Get the available audio decoders.
Returns:
Dict[str, str]: Mapping from decoder short name to long name.
Example
>>> for k, v in get_audio_decoders().items():
>>> print(f"{k}: {v}")
... a64: a64 - video for Commodore 64
... ac3: raw AC-3
... adts: ADTS AAC (Advanced Audio Coding)
... adx: CRI ADX
... aiff: Audio IFF
"""
return ffmpeg_ext.get_audio_decoders()
def get_audio_encoders() -> Dict[str, str]:
"""Get the available audio encoders.
Returns:
Dict[str, str]: Mapping from encoder short name to long name.
Example
>>> for k, v in get_audio_encoders().items():
>>> print(f"{k}: {v}")
... comfortnoise: RFC 3389 comfort noise generator
... s302m: SMPTE 302M
... aac: AAC (Advanced Audio Coding)
... ac3: ATSC A/52A (AC-3)
... ac3_fixed: ATSC A/52A (AC-3)
... alac: ALAC (Apple Lossless Audio Codec)
"""
return ffmpeg_ext.get_audio_encoders()
def get_video_decoders() -> Dict[str, str]:
"""Get the available video decoders.
Returns:
Dict[str, str]: Mapping from decoder short name to long name.
Example
>>> for k, v in get_video_decoders().items():
>>> print(f"{k}: {v}")
... aasc: Autodesk RLE
... aic: Apple Intermediate Codec
... alias_pix: Alias/Wavefront PIX image
... agm: Amuse Graphics Movie
... amv: AMV Video
... anm: Deluxe Paint Animation
"""
return ffmpeg_ext.get_video_decoders()
def get_video_encoders() -> Dict[str, str]:
"""Get the available video encoders.
Returns:
Dict[str, str]: Mapping from encoder short name to long name.
Example
>>> for k, v in get_audio_encoders().items():
>>> print(f"{k}: {v}")
... a64multi: Multicolor charset for Commodore 64
... a64multi5: Multicolor charset for Commodore 64, extended with 5th color (colram)
... alias_pix: Alias/Wavefront PIX image
... amv: AMV Video
... apng: APNG (Animated Portable Network Graphics) image
... asv1: ASUS V1
... asv2: ASUS V2
"""
return ffmpeg_ext.get_video_encoders()
def get_input_devices() -> Dict[str, str]:
"""Get the available input devices.
Returns:
Dict[str, str]: Mapping from device short name to long name.
Example
>>> for k, v in get_input_devices().items():
>>> print(f"{k}: {v}")
... avfoundation: AVFoundation input device
... lavfi: Libavfilter virtual input device
"""
return ffmpeg_ext.get_input_devices()
def get_output_devices() -> Dict[str, str]:
"""Get the available output devices.
Returns:
Dict[str, str]: Mapping from device short name to long name.
Example
>>> for k, v in get_output_devices().items():
>>> print(f"{k}: {v}")
... audiotoolbox: AudioToolbox output device
"""
return ffmpeg_ext.get_output_devices()
def get_input_protocols() -> List[str]:
"""Get the supported input protocols.
Returns:
List[str]: The names of supported input protocols
Example
>>> print(get_input_protocols())
... ['file', 'ftp', 'hls', 'http','https', 'pipe', 'rtmp', 'tcp', 'tls', 'udp', 'unix']
"""
return ffmpeg_ext.get_input_protocols()
def get_output_protocols() -> List[str]:
"""Get the supported output protocols.
Returns:
list of str: The names of supported output protocols
Example
>>> print(get_output_protocols())
... ['file', 'ftp', 'http', 'https', 'md5', 'pipe', 'prompeg', 'rtmp', 'tee', 'tcp', 'tls', 'udp', 'unix']
"""
return ffmpeg_ext.get_output_protocols()
def get_build_config() -> str:
"""Get the FFmpeg build configuration
Returns:
str: Build configuration string.
Example
>>> print(get_build_config())
--prefix=/Users/runner/miniforge3 --cc=arm64-apple-darwin20.0.0-clang --enable-gpl --enable-hardcoded-tables --enable-libfreetype --enable-libopenh264 --enable-neon --enable-libx264 --enable-libx265 --enable-libaom --enable-libsvtav1 --enable-libxml2 --enable-libvpx --enable-pic --enable-pthreads --enable-shared --disable-static --enable-version3 --enable-zlib --enable-libmp3lame --pkg-config=/Users/runner/miniforge3/conda-bld/ffmpeg_1646229390493/_build_env/bin/pkg-config --enable-cross-compile --arch=arm64 --target-os=darwin --cross-prefix=arm64-apple-darwin20.0.0- --host-cc=/Users/runner/miniforge3/conda-bld/ffmpeg_1646229390493/_build_env/bin/x86_64-apple-darwin13.4.0-clang # noqa
"""
return ffmpeg_ext.get_build_config()
def clear_cuda_context_cache():
"""Clear the CUDA context used by CUDA Hardware accelerated video decoding"""
ffmpeg_ext.clear_cuda_context_cache()
"""Module to change the configuration of FFmpeg libraries (such as libavformat). """Module to change the configuration of FFmpeg libraries (such as libavformat).
It affects functionalities in :py:mod:`torio.io`.
""" """
from typing import Dict, List, Tuple from typing import Dict, List, Tuple
...@@ -22,7 +20,7 @@ def get_versions() -> Dict[str, Tuple[int]]: ...@@ -22,7 +20,7 @@ def get_versions() -> Dict[str, Tuple[int]]:
def get_log_level() -> int: def get_log_level() -> int:
"""Get the log level of FFmpeg. """Get the log level of FFmpeg.
See :py:func:`set_log_level` for the detailo. See :py:func:`set_log_level` for the detail.
""" """
return ffmpeg_ext.get_log_level() return ffmpeg_ext.get_log_level()
......
...@@ -10,6 +10,7 @@ from itertools import zip_longest ...@@ -10,6 +10,7 @@ from itertools import zip_longest
import torch import torch
import torchaudio import torchaudio
import torio
from torch.testing._internal.common_utils import TestCase as PytorchTestCase from torch.testing._internal.common_utils import TestCase as PytorchTestCase
from torchaudio._internal.module_utils import eval_env, is_module_available from torchaudio._internal.module_utils import eval_env, is_module_available
from torchaudio.utils.ffmpeg_utils import get_video_decoders, get_video_encoders from torchaudio.utils.ffmpeg_utils import get_video_decoders, get_video_encoders
...@@ -111,7 +112,7 @@ class TorchaudioTestCase(TestBaseMixin, PytorchTestCase): ...@@ -111,7 +112,7 @@ class TorchaudioTestCase(TestBaseMixin, PytorchTestCase):
pass pass
_IS_FFMPEG_AVAILABLE = torchaudio._extension.lazy_import_ffmpeg_ext().is_available() _IS_FFMPEG_AVAILABLE = torio._extension.lazy_import_ffmpeg_ext().is_available()
_IS_SOX_AVAILABLE = torchaudio._extension.lazy_import_sox_ext().is_available() _IS_SOX_AVAILABLE = torchaudio._extension.lazy_import_sox_ext().is_available()
_IS_CTC_DECODER_AVAILABLE = None _IS_CTC_DECODER_AVAILABLE = None
_IS_CUDA_CTC_DECODER_AVAILABLE = None _IS_CUDA_CTC_DECODER_AVAILABLE = None
......
...@@ -5,14 +5,6 @@ import torchaudio ...@@ -5,14 +5,6 @@ import torchaudio
from parameterized import parameterized, parameterized_class from parameterized import parameterized, parameterized_class
from torchaudio.io import StreamReader, StreamWriter from torchaudio.io import StreamReader, StreamWriter
from torchaudio.io._stream_reader import (
ChunkTensor,
OutputAudioStream,
OutputVideoStream,
SourceAudioStream,
SourceStream,
SourceVideoStream,
)
from torchaudio_unittest.common_utils import ( from torchaudio_unittest.common_utils import (
disabledInCI, disabledInCI,
get_asset_path, get_asset_path,
...@@ -29,6 +21,14 @@ from torchaudio_unittest.common_utils import ( ...@@ -29,6 +21,14 @@ from torchaudio_unittest.common_utils import (
TempDirMixin, TempDirMixin,
TorchaudioTestCase, TorchaudioTestCase,
) )
from torio.io._streaming_media_decoder import (
ChunkTensor,
OutputAudioStream,
OutputVideoStream,
SourceAudioStream,
SourceStream,
SourceVideoStream,
)
@skipIfNoFFmpeg @skipIfNoFFmpeg
......
...@@ -70,19 +70,19 @@ def get_ext_modules(): ...@@ -70,19 +70,19 @@ def get_ext_modules():
# single version ffmpeg mode # single version ffmpeg mode
modules.extend( modules.extend(
[ [
Extension(name="torchaudio.lib.libtorio_ffmpeg", sources=[]), Extension(name="torio.lib.libtorio_ffmpeg", sources=[]),
Extension(name="torchaudio.lib._torio_ffmpeg", sources=[]), Extension(name="torio.lib._torio_ffmpeg", sources=[]),
] ]
) )
else: else:
modules.extend( modules.extend(
[ [
Extension(name="torchaudio.lib.libtorio_ffmpeg4", sources=[]), Extension(name="torio.lib.libtorio_ffmpeg4", sources=[]),
Extension(name="torchaudio.lib._torio_ffmpeg4", sources=[]), Extension(name="torio.lib._torio_ffmpeg4", sources=[]),
Extension(name="torchaudio.lib.libtorio_ffmpeg5", sources=[]), Extension(name="torio.lib.libtorio_ffmpeg5", sources=[]),
Extension(name="torchaudio.lib._torio_ffmpeg5", sources=[]), Extension(name="torio.lib._torio_ffmpeg5", sources=[]),
Extension(name="torchaudio.lib.libtorio_ffmpeg6", sources=[]), Extension(name="torio.lib.libtorio_ffmpeg6", sources=[]),
Extension(name="torchaudio.lib._torio_ffmpeg6", sources=[]), Extension(name="torio.lib._torio_ffmpeg6", sources=[]),
] ]
) )
return modules return modules
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment