sox_effects.py 6.85 KB
Newer Older
1
from __future__ import absolute_import, division, print_function, unicode_literals
David Pollack's avatar
David Pollack committed
2
3
4
5
6
import torch
import _torch_sox

import torchaudio

David Pollack's avatar
David Pollack committed
7
8
9
10
11
12

def effect_names():
    """Gets list of valid sox effect names

    Returns: list[str]

13
    Example
David Pollack's avatar
David Pollack committed
14
15
16
17
18
19
        >>> EFFECT_NAMES = torchaudio.sox_effects.effect_names()
    """
    return _torch_sox.get_effect_names()


def SoxEffect():
20
    r"""Create an object for passing sox effect information between python and c++
David Pollack's avatar
David Pollack committed
21

22
23
24
    Returns:
        SoxEffect: An object with the following attributes: ename (str) which is the
        name of effect, and eopts (List[str]) which is a list of effect options.
David Pollack's avatar
David Pollack committed
25
26
27
28
29
    """
    return _torch_sox.SoxEffect()


class SoxEffectsChain(object):
30
    r"""SoX effects chain class.
31
32
33

    Args:
        normalization (bool, number, or callable, optional): If boolean `True`, then output is divided by `1 << 31`
34
            (assumes signed 32-bit audio), and normalizes to `[-1, 1]`. If `number`, then output is divided by that
35
36
37
            number. If `callable`, then the output is passed as a parameter to the given function, then the
            output is divided by the result. (Default: ``True``)
        channels_first (bool, optional): Set channels first or length first in result.  (Default: ``True``)
38
        out_siginfo (sox_signalinfo_t, optional): a sox_signalinfo_t type, which could be helpful if the
39
            audio type cannot be automatically determined. (Default: ``None``)
40
        out_encinfo (sox_encodinginfo_t, optional): a sox_encodinginfo_t type, which could be set if the
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
            audio type cannot be automatically determined. (Default: ``None``)
        filetype (str, optional): a filetype or extension to be set if sox cannot determine it
            automatically. . (Default: ``'raw'``)

    Returns:
        Tuple[torch.Tensor, int]: An output Tensor of size `[C x L]` or `[L x C]` where L is the number
        of audio frames and C is the number of channels. An integer which is the sample rate of the
        audio (as listed in the metadata of the file)

    Example
        >>> class MyDataset(Dataset):
        >>>     def __init__(self, audiodir_path):
        >>>         self.data = [os.path.join(audiodir_path, fn) for fn in os.listdir(audiodir_path)]
        >>>         self.E = torchaudio.sox_effects.SoxEffectsChain()
        >>>         self.E.append_effect_to_chain("rate", [16000])  # resample to 16000hz
        >>>         self.E.append_effect_to_chain("channels", ["1"])  # mono signal
        >>>     def __getitem__(self, index):
        >>>         fn = self.data[index]
        >>>         self.E.set_input_file(fn)
        >>>         x, sr = self.E.sox_build_flow_effects()
        >>>         return x, sr
        >>>
        >>>     def __len__(self):
        >>>         return len(self.data)
        >>>
66
67
68
69
70
71
        >>> torchaudio.initialize_sox()
        >>> ds = MyDataset(path_to_audio_files)
        >>> for sig, sr in ds:
        >>>   [do something here]
        >>> torchaudio.shutdown_sox()

David Pollack's avatar
David Pollack committed
72
73
74
75
    """

    EFFECTS_AVAILABLE = set(effect_names())
    EFFECTS_UNIMPLEMENTED = set(["spectrogram", "splice", "noiseprof", "fir"])
David Pollack's avatar
David Pollack committed
76
77
78
79
80
81
82
83
84
85
86

    def __init__(self, normalization=True, channels_first=True, out_siginfo=None, out_encinfo=None, filetype="raw"):
        self.input_file = None
        self.chain = []
        self.MAX_EFFECT_OPTS = 20
        self.out_siginfo = out_siginfo
        self.out_encinfo = out_encinfo
        self.filetype = filetype
        self.normalization = normalization
        self.channels_first = channels_first

David Pollack's avatar
David Pollack committed
87
    def append_effect_to_chain(self, ename, eargs=None):
88
89
90
91
92
        r"""Append effect to a sox effects chain.

        Args:
            ename (str): which is the name of effect
            eargs (List[str]): which is a list of effect options. (Default: ``None``)
David Pollack's avatar
David Pollack committed
93
94
        """
        e = SoxEffect()
David Pollack's avatar
David Pollack committed
95
        # check if we have a valid effect
David Pollack's avatar
David Pollack committed
96
        ename = self._check_effect(ename)
David Pollack's avatar
David Pollack committed
97
98
99
100
101
102
103
104
105
106
107
108
109
110
        if eargs is None or eargs == []:
            eargs = [""]
        elif not isinstance(eargs, list):
            eargs = [eargs]
        eargs = self._flatten(eargs)
        if len(eargs) > self.MAX_EFFECT_OPTS:
            raise RuntimeError("Number of effect options ({}) is greater than max "
                               "suggested number of options {}.  Increase MAX_EFFECT_OPTS "
                               "or lower the number of effect options".format(len(eargs), self.MAX_EFFECT_OPTS))
        e.ename = ename
        e.eopts = eargs
        self.chain.append(e)

    def sox_build_flow_effects(self, out=None):
111
112
113
114
115
116
117
118
119
        r"""Build effects chain and flow effects from input file to output tensor

        Args:
            out (torch.Tensor): Where the output will be written to. (Default: ``None``)

        Returns:
            Tuple[torch.Tensor, int]: An output Tensor of size `[C x L]` or `[L x C]` where L is the number
            of audio frames and C is the number of channels. An integer which is the sample rate of the
            audio (as listed in the metadata of the file)
David Pollack's avatar
David Pollack committed
120
        """
David Pollack's avatar
David Pollack committed
121
122
123
124
125
126
        # initialize output tensor
        if out is not None:
            torchaudio.check_input(out)
        else:
            out = torch.FloatTensor()
        if not len(self.chain):
David Pollack's avatar
David Pollack committed
127
            e = SoxEffect()
David Pollack's avatar
David Pollack committed
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
            e.ename = "no_effects"
            e.eopts = [""]
            self.chain.append(e)

        # print("effect options:", [x.eopts for x in self.chain])
        sr = _torch_sox.build_flow_effects(self.input_file,
                                           out,
                                           self.channels_first,
                                           self.out_siginfo,
                                           self.out_encinfo,
                                           self.filetype,
                                           self.chain,
                                           self.MAX_EFFECT_OPTS)

        torchaudio._audio_normalization(out, self.normalization)

        return out, sr

    def clear_chain(self):
147
        r"""Clear effects chain in python
David Pollack's avatar
David Pollack committed
148
        """
David Pollack's avatar
David Pollack committed
149
150
151
        self.chain = []

    def set_input_file(self, input_file):
152
153
154
155
        r"""Set input file for input of chain

        Args:
            input_file (str): The path to the input file.
David Pollack's avatar
David Pollack committed
156
        """
David Pollack's avatar
David Pollack committed
157
158
        self.input_file = input_file

David Pollack's avatar
David Pollack committed
159
160
161
162
163
164
165
    def _check_effect(self, e):
        if e.lower() in self.EFFECTS_UNIMPLEMENTED:
            raise NotImplementedError("This effect ({}) is not implement in torchaudio".format(e))
        elif e.lower() not in self.EFFECTS_AVAILABLE:
            raise LookupError("Effect name, {}, not valid".format(e.lower()))
        return e.lower()

David Pollack's avatar
David Pollack committed
166
167
168
169
170
171
172
173
    # https://stackoverflow.com/questions/12472338/flattening-a-list-recursively
    # convenience function to flatten list recursively
    def _flatten(self, x):
        if x == []:
            return []
        if isinstance(x[0], list):
            return self._flatten(x[:1]) + self._flatten(x[:1])
        return [str(a) for a in x[:1]] + self._flatten(x[1:])