sox_effects.py 8.87 KB
Newer Older
1
from typing import Any, Callable, List, Optional, Tuple, Union
David Pollack's avatar
David Pollack committed
2

3
4
import torch
from torch import Tensor
David Pollack's avatar
David Pollack committed
5

6
7
8
9
from torchaudio._internal import (
    module_utils as _mod_utils,
    misc_ops as _misc_ops,
)
Vincent QB's avatar
Vincent QB committed
10

11
if _mod_utils.is_module_available('torchaudio._torchaudio'):
12
    from torchaudio import _torchaudio
13
14


15
@_mod_utils.requires_module('torchaudio._torchaudio')
16
def init_sox_effects() -> None:
17
    """Initialize resources required to use ``SoxEffectsChain``
18

19
20
21
22
23
    You do not need to call this function manually. It is called automatically.

    Once initialized, you do not need to call this function again across the multiple call of
    ``SoxEffectsChain.sox_build_flow_effects``, though it is safe to do so as long as
    ``shutdown_sox_effects`` is not called yet.
24
25
    Once ``shutdown_sox_effects`` is called, you can no longer use SoX effects and
    initializing again will result in error.
26
27
28
29

    Note:
        This function is not required for simple loading.
    """
30
    torch.ops.torchaudio.sox_effects_initialize_sox_effects()
31
32
33


@_mod_utils.requires_module("torchaudio._torchaudio")
34
def shutdown_sox_effects() -> None:
35
    """Clean up resources required to use ``SoxEffectsChain``
36

37
    You do not need to call this function manually. It is called automatically.
38
39

    It is safe to call this function multiple times.
40
41
    Once ``shutdown_sox_effects`` is called, you can no longer use SoX effects and
    initializing again will result in error.
42
    """
43
    torch.ops.torchaudio.sox_effects_shutdown_sox_effects()
44
45


46
@_mod_utils.requires_module('torchaudio._torchaudio')
47
def effect_names() -> List[str]:
David Pollack's avatar
David Pollack committed
48
49
50
51
    """Gets list of valid sox effect names

    Returns: list[str]

52
    Example
David Pollack's avatar
David Pollack committed
53
54
        >>> EFFECT_NAMES = torchaudio.sox_effects.effect_names()
    """
55
    return torch.ops.torchaudio.sox_effects_list_effects()
David Pollack's avatar
David Pollack committed
56
57


58
@_mod_utils.requires_module('torchaudio._torchaudio')
David Pollack's avatar
David Pollack committed
59
def SoxEffect():
60
    r"""Create an object for passing sox effect information between python and c++
David Pollack's avatar
David Pollack committed
61

62
63
64
    Returns:
        SoxEffect: An object with the following attributes: ename (str) which is the
        name of effect, and eopts (List[str]) which is a list of effect options.
David Pollack's avatar
David Pollack committed
65
    """
66
    return _torchaudio.SoxEffect()
David Pollack's avatar
David Pollack committed
67
68
69


class SoxEffectsChain(object):
70
    r"""SoX effects chain class.
71
72
73

    Args:
        normalization (bool, number, or callable, optional): If boolean `True`, then output is divided by `1 << 31`
74
            (assumes signed 32-bit audio), and normalizes to `[-1, 1]`. If `number`, then output is divided by that
75
76
77
            number. If `callable`, then the output is passed as a parameter to the given function, then the
            output is divided by the result. (Default: ``True``)
        channels_first (bool, optional): Set channels first or length first in result.  (Default: ``True``)
78
        out_siginfo (sox_signalinfo_t, optional): a sox_signalinfo_t type, which could be helpful if the
79
            audio type cannot be automatically determined. (Default: ``None``)
80
        out_encinfo (sox_encodinginfo_t, optional): a sox_encodinginfo_t type, which could be set if the
81
82
83
84
85
            audio type cannot be automatically determined. (Default: ``None``)
        filetype (str, optional): a filetype or extension to be set if sox cannot determine it
            automatically. . (Default: ``'raw'``)

    Returns:
86
        Tuple[Tensor, int]: An output Tensor of size `[C x L]` or `[L x C]` where L is the number
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
        of audio frames and C is the number of channels. An integer which is the sample rate of the
        audio (as listed in the metadata of the file)

    Example
        >>> class MyDataset(Dataset):
        >>>     def __init__(self, audiodir_path):
        >>>         self.data = [os.path.join(audiodir_path, fn) for fn in os.listdir(audiodir_path)]
        >>>         self.E = torchaudio.sox_effects.SoxEffectsChain()
        >>>         self.E.append_effect_to_chain("rate", [16000])  # resample to 16000hz
        >>>         self.E.append_effect_to_chain("channels", ["1"])  # mono signal
        >>>     def __getitem__(self, index):
        >>>         fn = self.data[index]
        >>>         self.E.set_input_file(fn)
        >>>         x, sr = self.E.sox_build_flow_effects()
        >>>         return x, sr
        >>>
        >>>     def __len__(self):
        >>>         return len(self.data)
        >>>
106
107
108
109
110
111
        >>> torchaudio.initialize_sox()
        >>> ds = MyDataset(path_to_audio_files)
        >>> for sig, sr in ds:
        >>>   [do something here]
        >>> torchaudio.shutdown_sox()

David Pollack's avatar
David Pollack committed
112
113
    """

114
    EFFECTS_UNIMPLEMENTED = {"spectrogram", "splice", "noiseprof", "fir"}
David Pollack's avatar
David Pollack committed
115

116
117
118
119
120
121
    def __init__(self,
                 normalization: Union[bool, float, Callable] = True,
                 channels_first: bool = True,
                 out_siginfo: Any = None,
                 out_encinfo: Any = None,
                 filetype: str = "raw") -> None:
122
123
        self.input_file: Optional[str] = None
        self.chain: List[str] = []
David Pollack's avatar
David Pollack committed
124
125
126
127
128
129
130
        self.MAX_EFFECT_OPTS = 20
        self.out_siginfo = out_siginfo
        self.out_encinfo = out_encinfo
        self.filetype = filetype
        self.normalization = normalization
        self.channels_first = channels_first

Vincent QB's avatar
Vincent QB committed
131
132
133
        # Define in __init__ to avoid calling at import time
        self.EFFECTS_AVAILABLE = set(effect_names())

134
135
    def append_effect_to_chain(self,
                               ename: str,
136
                               eargs: Optional[Union[List[str], str]] = None) -> None:
137
138
139
140
        r"""Append effect to a sox effects chain.

        Args:
            ename (str): which is the name of effect
141
            eargs (List[str] or str, optional): which is a list of effect options. (Default: ``None``)
David Pollack's avatar
David Pollack committed
142
143
        """
        e = SoxEffect()
David Pollack's avatar
David Pollack committed
144
        # check if we have a valid effect
David Pollack's avatar
David Pollack committed
145
        ename = self._check_effect(ename)
David Pollack's avatar
David Pollack committed
146
147
148
149
150
151
152
153
154
155
156
157
158
        if eargs is None or eargs == []:
            eargs = [""]
        elif not isinstance(eargs, list):
            eargs = [eargs]
        eargs = self._flatten(eargs)
        if len(eargs) > self.MAX_EFFECT_OPTS:
            raise RuntimeError("Number of effect options ({}) is greater than max "
                               "suggested number of options {}.  Increase MAX_EFFECT_OPTS "
                               "or lower the number of effect options".format(len(eargs), self.MAX_EFFECT_OPTS))
        e.ename = ename
        e.eopts = eargs
        self.chain.append(e)

159
    @_mod_utils.requires_module('torchaudio._torchaudio')
160
161
    def sox_build_flow_effects(self,
                               out: Optional[Tensor] = None) -> Tuple[Tensor, int]:
162
163
164
        r"""Build effects chain and flow effects from input file to output tensor

        Args:
165
            out (Tensor, optional): Where the output will be written to. (Default: ``None``)
166
167

        Returns:
168
            Tuple[Tensor, int]: An output Tensor of size `[C x L]` or `[L x C]` where L is the number
169
170
            of audio frames and C is the number of channels. An integer which is the sample rate of the
            audio (as listed in the metadata of the file)
David Pollack's avatar
David Pollack committed
171
        """
David Pollack's avatar
David Pollack committed
172
173
        # initialize output tensor
        if out is not None:
174
            _misc_ops.check_input(out)
David Pollack's avatar
David Pollack committed
175
176
177
        else:
            out = torch.FloatTensor()
        if not len(self.chain):
David Pollack's avatar
David Pollack committed
178
            e = SoxEffect()
David Pollack's avatar
David Pollack committed
179
180
181
182
183
            e.ename = "no_effects"
            e.eopts = [""]
            self.chain.append(e)

        # print("effect options:", [x.eopts for x in self.chain])
Vincent QB's avatar
Vincent QB committed
184

185
186
187
188
189
190
191
192
        sr = _torchaudio.build_flow_effects(self.input_file,
                                            out,
                                            self.channels_first,
                                            self.out_siginfo,
                                            self.out_encinfo,
                                            self.filetype,
                                            self.chain,
                                            self.MAX_EFFECT_OPTS)
David Pollack's avatar
David Pollack committed
193

194
        _misc_ops.normalize_audio(out, self.normalization)
David Pollack's avatar
David Pollack committed
195
196
197

        return out, sr

198
    def clear_chain(self) -> None:
199
        r"""Clear effects chain in python
David Pollack's avatar
David Pollack committed
200
        """
David Pollack's avatar
David Pollack committed
201
202
        self.chain = []

203
    def set_input_file(self, input_file: str) -> None:
204
205
206
207
        r"""Set input file for input of chain

        Args:
            input_file (str): The path to the input file.
David Pollack's avatar
David Pollack committed
208
        """
David Pollack's avatar
David Pollack committed
209
210
        self.input_file = input_file

211
    def _check_effect(self, e: str) -> str:
David Pollack's avatar
David Pollack committed
212
213
214
215
216
217
        if e.lower() in self.EFFECTS_UNIMPLEMENTED:
            raise NotImplementedError("This effect ({}) is not implement in torchaudio".format(e))
        elif e.lower() not in self.EFFECTS_AVAILABLE:
            raise LookupError("Effect name, {}, not valid".format(e.lower()))
        return e.lower()

David Pollack's avatar
David Pollack committed
218
219
    # https://stackoverflow.com/questions/12472338/flattening-a-list-recursively
    # convenience function to flatten list recursively
220
    def _flatten(self, x: list) -> list:
David Pollack's avatar
David Pollack committed
221
222
223
224
225
        if x == []:
            return []
        if isinstance(x[0], list):
            return self._flatten(x[:1]) + self._flatten(x[:1])
        return [str(a) for a in x[:1]] + self._flatten(x[1:])