Commit 87eca36d authored by hwangjeff's avatar hwangjeff Committed by Facebook GitHub Bot
Browse files

Make buffer size configurable in ffmpeg file object operations and set size in backend (#2810)

Summary:
Partly addresses https://github.com/pytorch/audio/issues/2686 and https://github.com/pytorch/audio/issues/2356.

Currently, when the buffer used for file object decoding is insufficiently large, `torchaudio.load` returns a shorter waveform than expected. To deal with this, the user is expected to increase the buffer size via `torchaudio.utils.sox_utils.get_buffer_size`, but this does not influence the buffer used by the FFMpeg fallback. To fix this, this PR introduces changes that apply the buffer size set for the SoX backend to FFMpeg.

As a follow-up, we should see whether it's possible to programmatically detect that the buffer's too small and flag it to the user.

Pull Request resolved: https://github.com/pytorch/audio/pull/2810

Reviewed By: mthrok

Differential Revision: D40906978

Pulled By: hwangjeff

fbshipit-source-id: 256fe1da8b21610b05bea9a0e043f484f9ea2e76
parent 6318c81f
......@@ -4,6 +4,7 @@ from typing import Optional, Tuple
import torch
import torchaudio
from torchaudio._internal import module_utils as _mod_utils
from torchaudio.utils.sox_utils import get_buffer_size
from .common import AudioMetaData
......@@ -91,12 +92,13 @@ def info(
# The previous libsox-based implementation required `format="mp3"`
# because internally libsox does not auto-detect the format.
# For the special BC for mp3, we handle mp3 differently.
buffer_size = get_buffer_size()
if format == "mp3":
return _fallback_info_fileobj(filepath, format)
return _fallback_info_fileobj(filepath, format, buffer_size)
sinfo = torchaudio._torchaudio.get_info_fileobj(filepath, format)
if sinfo is not None:
return AudioMetaData(*sinfo)
return _fallback_info_fileobj(filepath, format)
return _fallback_info_fileobj(filepath, format, buffer_size)
filepath = os.fspath(filepath)
sinfo = torch.ops.torchaudio.sox_io_get_info(filepath, format)
if sinfo is not None:
......@@ -210,14 +212,31 @@ def load(
# The previous libsox-based implementation required `format="mp3"`
# because internally libsox does not auto-detect the format.
# For the special BC for mp3, we handle mp3 differently.
buffer_size = get_buffer_size()
if format == "mp3":
return _fallback_load_fileobj(filepath, frame_offset, num_frames, normalize, channels_first, format)
return _fallback_load_fileobj(
filepath,
frame_offset,
num_frames,
normalize,
channels_first,
format,
buffer_size,
)
ret = torchaudio._torchaudio.load_audio_fileobj(
filepath, frame_offset, num_frames, normalize, channels_first, format
)
if ret is not None:
return ret
return _fallback_load_fileobj(filepath, frame_offset, num_frames, normalize, channels_first, format)
return _fallback_load_fileobj(
filepath,
frame_offset,
num_frames,
normalize,
channels_first,
format,
buffer_size,
)
filepath = os.fspath(filepath)
ret = torch.ops.torchaudio.sox_io_load_audio_file(
filepath, frame_offset, num_frames, normalize, channels_first, format
......@@ -385,10 +404,24 @@ def save(
if not torch.jit.is_scripting():
if hasattr(filepath, "write"):
torchaudio._torchaudio.save_audio_fileobj(
filepath, src, sample_rate, channels_first, compression, format, encoding, bits_per_sample
filepath,
src,
sample_rate,
channels_first,
compression,
format,
encoding,
bits_per_sample,
)
return
filepath = os.fspath(filepath)
torch.ops.torchaudio.sox_io_save_audio_file(
filepath, src, sample_rate, channels_first, compression, format, encoding, bits_per_sample
filepath,
src,
sample_rate,
channels_first,
compression,
format,
encoding,
bits_per_sample,
)
......@@ -36,8 +36,9 @@ def info_audio(
def info_audio_fileobj(
src,
format: Optional[str],
buffer_size: int = 4096,
) -> AudioMetaData:
s = torchaudio._torchaudio_ffmpeg.StreamReaderFileObj(src, format, None, 4096)
s = torchaudio._torchaudio_ffmpeg.StreamReaderFileObj(src, format, None, buffer_size)
return _info_audio(s)
......@@ -110,6 +111,7 @@ def load_audio_fileobj(
convert: bool = True,
channels_first: bool = True,
format: Optional[str] = None,
buffer_size: int = 4096,
) -> Tuple[torch.Tensor, int]:
s = torchaudio._torchaudio_ffmpeg.StreamReaderFileObj(src, format, None, 4096)
s = torchaudio._torchaudio_ffmpeg.StreamReaderFileObj(src, format, None, buffer_size)
return _load_audio(s, frame_offset, num_frames, convert, channels_first)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment