sox.py 3.28 KB
Newer Older
moto's avatar
moto committed
1
2
3
4
import os
from typing import BinaryIO, Optional, Tuple, Union

import torch
5
import torchaudio
moto's avatar
moto committed
6
7

from .backend import Backend
8
from .common import AudioMetaData
moto's avatar
moto committed
9

10
11
sox_ext = torchaudio._extension.lazy_import_sox_ext()

moto's avatar
moto committed
12
13
14
15
16
17
18
19
20
21

class SoXBackend(Backend):
    @staticmethod
    def info(uri: Union[BinaryIO, str, os.PathLike], format: Optional[str], buffer_size: int = 4096) -> AudioMetaData:
        if hasattr(uri, "read"):
            raise ValueError(
                "SoX backend does not support reading from file-like objects. ",
                "Please use an alternative backend that does support reading from file-like objects, e.g. FFmpeg.",
            )
        else:
22
            sinfo = sox_ext.get_info(uri, format)
moto's avatar
moto committed
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
            if sinfo:
                return AudioMetaData(*sinfo)
            else:
                raise RuntimeError(f"Failed to fetch metadata for {uri}.")

    @staticmethod
    def load(
        uri: Union[BinaryIO, str, os.PathLike],
        frame_offset: int = 0,
        num_frames: int = -1,
        normalize: bool = True,
        channels_first: bool = True,
        format: Optional[str] = None,
        buffer_size: int = 4096,
    ) -> Tuple[torch.Tensor, int]:
        if hasattr(uri, "read"):
            raise ValueError(
                "SoX backend does not support loading from file-like objects. ",
                "Please use an alternative backend that does support loading from file-like objects, e.g. FFmpeg.",
            )
        else:
44
            ret = sox_ext.load_audio_file(uri, frame_offset, num_frames, normalize, channels_first, format)
moto's avatar
moto committed
45
46
47
48
49
50
51
52
53
54
55
56
57
58
            if not ret:
                raise RuntimeError(f"Failed to load audio from {uri}.")
            return ret

    @staticmethod
    def save(
        uri: Union[BinaryIO, str, os.PathLike],
        src: torch.Tensor,
        sample_rate: int,
        channels_first: bool = True,
        format: Optional[str] = None,
        encoding: Optional[str] = None,
        bits_per_sample: Optional[int] = None,
        buffer_size: int = 4096,
59
        compression: Optional[Union[torchaudio.io.CodecConfig, float, int]] = None,
moto's avatar
moto committed
60
    ) -> None:
61
62
63
64
65
        if not isinstance(compression, (float, int, type(None))):
            raise ValueError(
                "SoX backend expects non-`None` value for argument `compression` to be of ",
                f"type `float` or `int`, but received value of type {type(compression)}",
            )
moto's avatar
moto committed
66
67
68
69
70
71
        if hasattr(uri, "write"):
            raise ValueError(
                "SoX backend does not support writing to file-like objects. ",
                "Please use an alternative backend that does support writing to file-like objects, e.g. FFmpeg.",
            )
        else:
72
            sox_ext.save_audio_file(
moto's avatar
moto committed
73
74
75
76
                uri,
                src,
                sample_rate,
                channels_first,
77
                compression,
moto's avatar
moto committed
78
79
80
81
82
83
84
85
86
87
88
89
90
91
                format,
                encoding,
                bits_per_sample,
            )

    @staticmethod
    def can_decode(uri: Union[BinaryIO, str, os.PathLike], format: Optional[str]) -> bool:
        # i.e. not a file-like object.
        return not hasattr(uri, "read")

    @staticmethod
    def can_encode(uri: Union[BinaryIO, str, os.PathLike], format: Optional[str]) -> bool:
        # i.e. not a file-like object.
        return not hasattr(uri, "write")