Make buffer size configurable in ffmpeg file object operations and set size in backend

71ddee16 · hwangjeff · 89e28623 · 71ddee16 · 71ddee16
Commit 71ddee16 authored Oct 31, 2022 by hwangjeff
Show whitespace changes
Inline Side-by-side

Showing with 43 additions and 8 deletions

torchaudio/backend/sox_io_backend.py torchaudio/backend/sox_io_backend.py +39 -6

torchaudio/io/_compat.py torchaudio/io/_compat.py +4 -2

No files found.
--- a/torchaudio/backend/sox_io_backend.py
+++ b/torchaudio/backend/sox_io_backend.py
@@ -4,6 +4,7 @@ from typing import Optional, Tuple
 import torch
 import torchaudio
 from torchaudio._internal import module_utils as _mod_utils
+from torchaudio.utils.sox_utils import get_buffer_size
 from .common import AudioMetaData
@@ -91,12 +92,13 @@ def info(
            # The previous libsox-based implementation required `format="mp3"`
            # because internally libsox does not auto-detect the format.
            # For the special BC for mp3, we handle mp3 differently.
+            buffer_size = get_buffer_size()
            if format == "mp3":
-                return _fallback_info_fileobj(filepath, format)
+                return _fallback_info_fileobj(filepath, format, buffer_size)
            sinfo = torchaudio._torchaudio.get_info_fileobj(filepath, format)
            if sinfo is not None:
                return AudioMetaData(*sinfo)
-            return _fallback_info_fileobj(filepath, format)
+            return _fallback_info_fileobj(filepath, format, buffer_size)
        filepath = os.fspath(filepath)
    sinfo = torch.ops.torchaudio.sox_io_get_info(filepath, format)
    if sinfo is not None:
@@ -210,14 +212,31 @@ def load(
            # The previous libsox-based implementation required `format="mp3"`
            # because internally libsox does not auto-detect the format.
            # For the special BC for mp3, we handle mp3 differently.
+            buffer_size = get_buffer_size()
            if format == "mp3":
-                return _fallback_load_fileobj(filepath, frame_offset, num_frames, normalize, channels_first, format)
+                return _fallback_load_fileobj(
+                    filepath,
+                    frame_offset,
+                    num_frames,
+                    normalize,
+                    channels_first,
+                    format,
+                    buffer_size,
+                )
            ret = torchaudio._torchaudio.load_audio_fileobj(
                filepath, frame_offset, num_frames, normalize, channels_first, format
            )
            if ret is not None:
                return ret
-            return _fallback_load_fileobj(filepath, frame_offset, num_frames, normalize, channels_first, format)
+            return _fallback_load_fileobj(
+                filepath,
+                frame_offset,
+                num_frames,
+                normalize,
+                channels_first,
+                format,
+                buffer_size,
+            )
        filepath = os.fspath(filepath)
    ret = torch.ops.torchaudio.sox_io_load_audio_file(
        filepath, frame_offset, num_frames, normalize, channels_first, format
@@ -385,10 +404,24 @@ def save(
    if not torch.jit.is_scripting():
        if hasattr(filepath, "write"):
            torchaudio._torchaudio.save_audio_fileobj(
-                filepath, src, sample_rate, channels_first, compression, format, encoding, bits_per_sample
+                filepath,
+                src,
+                sample_rate,
+                channels_first,
+                compression,
+                format,
+                encoding,
+                bits_per_sample,
            )
            return
        filepath = os.fspath(filepath)
    torch.ops.torchaudio.sox_io_save_audio_file(
-        filepath, src, sample_rate, channels_first, compression, format, encoding, bits_per_sample
+        filepath,
+        src,
+        sample_rate,
+        channels_first,
+        compression,
+        format,
+        encoding,
+        bits_per_sample,
    )
--- a/torchaudio/io/_compat.py
+++ b/torchaudio/io/_compat.py
@@ -36,8 +36,9 @@ def info_audio(
 def info_audio_fileobj(
    src,
    format: Optional[str],
+    buffer_size: int = 4096,
 ) -> AudioMetaData:
-    s = torchaudio._torchaudio_ffmpeg.StreamReaderFileObj(src, format, None, 4096)
+    s = torchaudio._torchaudio_ffmpeg.StreamReaderFileObj(src, format, None, buffer_size)
    return _info_audio(s)
@@ -110,6 +111,7 @@ def load_audio_fileobj(
    convert: bool = True,
    channels_first: bool = True,
    format: Optional[str] = None,
+    buffer_size: int = 4096,
 ) -> Tuple[torch.Tensor, int]:
-    s = torchaudio._torchaudio_ffmpeg.StreamReaderFileObj(src, format, None, 4096)
+    s = torchaudio._torchaudio_ffmpeg.StreamReaderFileObj(src, format, None, buffer_size)
    return _load_audio(s, frame_offset, num_frames, convert, channels_first)