Add adsr_envelope (#2859)

Summary: Add adsr_envelope op, which generates ADSR envelope * Supports generation of the envelope on GPU * Supports optional Hold * Supports polynomial decay <image src='https://download.pytorch.org/torchaudio/doc-assets/adsr_examples.png'> Pull Request resolved: https://github.com/pytorch/audio/pull/2859 Reviewed By: nateanl Differential Revision: D41379601 Pulled By: mthrok fbshipit-source-id: 3717a6e0360d2a24913c2a836c57c5edec1d7b31

Add adsr_envelope (#2859)
Summary: Add adsr_envelope op, which generates ADSR envelope * Supports generation of the envelope on GPU * Supports optional Hold * Supports polynomial decay <image src='https://download.pytorch.org/torchaudio/doc-assets/adsr_examples.png'> Pull Request resolved: https://github.com/pytorch/audio/pull/2859 Reviewed By: nateanl Differential Revision: D41379601 Pulled By: mthrok fbshipit-source-id: 3717a6e0360d2a24913c2a836c57c5edec1d7b31
793ff00b · moto · Facebook GitHub Bot · d912dcd7 · 793ff00b · 793ff00b
Commit 793ff00b authored Nov 17, 2022 by moto Committed by Facebook GitHub Bot Nov 17, 2022
4 changed files
--- a/docs/source/prototype.functional.rst
+++ b/docs/source/prototype.functional.rst
@@ -31,4 +31,5 @@ DSP
   :toctree: generated
   :nosignatures:
+   adsr_envelope
   oscillator_bank
--- a/test/torchaudio_unittest/prototype/functional/functional_test_impl.py
+++ b/test/torchaudio_unittest/prototype/functional/functional_test_impl.py
 import numpy as np
 import torch
 import torchaudio.prototype.functional as F
-from parameterized import parameterized
+from parameterized import param, parameterized
 from scipy import signal
 from torchaudio_unittest.common_utils import nested_params, TestBaseMixin
@@ -149,6 +149,147 @@ class FunctionalTestImpl(TestBaseMixin):
        with self.assertWarnsRegex(UserWarning, r"above nyquist frequency"):
            F.oscillator_bank(-nyquist * freqs, amps, sample_rate)
+    @parameterized.expand(
+        [
+            # Attack (full)
+            param(
+                num_frames=11,
+                expected=[i / 10 for i in range(11)],
+                attack=1.0,
+            ),
+            # Attack (partial)
+            param(
+                num_frames=11,
+                expected=[0, 0.2, 0.4, 0.6, 0.8, 1.0, 0, 0, 0, 0, 0],
+                attack=0.5,
+            ),
+            # Hold (partial with attack)
+            param(
+                num_frames=11,
+                expected=[0, 0.2, 0.4, 0.6, 0.8, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0],
+                attack=0.5,
+                hold=0.5,
+            ),
+            # Hold (partial without attack)
+            param(
+                num_frames=11,
+                expected=[1.0] * 6 + [0.0] * 5,
+                hold=0.5,
+            ),
+            # Hold (full)
+            param(
+                num_frames=11,
+                expected=[1.0] * 11,
+                hold=1.0,
+            ),
+            # Decay (partial - linear, preceded by attack)
+            param(
+                num_frames=11,
+                expected=[0, 0.2, 0.4, 0.6, 0.8, 1.0, 0.8, 0.6, 0.4, 0.2, 0],
+                attack=0.5,
+                decay=0.5,
+                n_decay=1,
+            ),
+            # Decay (partial - linear, preceded by hold)
+            param(
+                num_frames=11,
+                expected=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.8, 0.6, 0.4, 0.2, 0],
+                hold=0.5,
+                decay=0.5,
+                n_decay=1,
+            ),
+            # Decay (partial - linear)
+            param(
+                num_frames=11,
+                expected=[1.0, 0.8, 0.6, 0.4, 0.2, 0, 0, 0, 0, 0, 0],
+                decay=0.5,
+                n_decay=1,
+            ),
+            # Decay (partial - polynomial)
+            param(
+                num_frames=11,
+                expected=[1.0, 0.64, 0.36, 0.16, 0.04, 0, 0, 0, 0, 0, 0],
+                decay=0.5,
+                n_decay=2,
+            ),
+            # Decay (full - linear)
+            param(
+                num_frames=11,
+                expected=[1.0 - i / 10 for i in range(11)],
+                decay=1.0,
+                n_decay=1,
+            ),
+            # Decay (full - polynomial)
+            param(
+                num_frames=11,
+                expected=[(1.0 - i / 10) ** 2 for i in range(11)],
+                decay=1.0,
+                n_decay=2,
+            ),
+            # Sustain (partial - preceded by decay)
+            param(
+                num_frames=11,
+                expected=[1.0, 0.9, 0.8, 0.7, 0.6, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5],
+                decay=0.5,
+                sustain=0.5,
+                n_decay=1,
+            ),
+            # Sustain (partial - preceded by decay)
+            param(
+                num_frames=11,
+                expected=[1.0, 0.8, 0.6, 0.4, 0.4, 0.4, 0.4, 0.4, 0.4, 0.4, 0.4],
+                decay=0.3,
+                sustain=0.4,
+                n_decay=1,
+            ),
+            # Sustain (full)
+            param(
+                num_frames=11,
+                expected=[0.3] * 11,
+                sustain=0.3,
+            ),
+            # Release (partial - preceded by decay)
+            param(
+                num_frames=11,
+                expected=[1.0, 0.84, 0.68, 0.52, 0.36, 0.2, 0.16, 0.12, 0.08, 0.04, 0.0],
+                decay=0.5,
+                sustain=0.2,
+                release=0.5,
+                n_decay=1,
+            ),
+            # Release (partial - preceded by sustain)
+            param(
+                num_frames=11,
+                expected=[0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.4, 0.3, 0.2, 0.1, 0.0],
+                sustain=0.5,
+                release=0.5,
+            ),
+            # Release (full)
+            param(
+                num_frames=11,
+                expected=[1 - i / 10 for i in range(11)],
+                sustain=1.0,
+                release=1.0,
+            ),
+        ]
+    )
+    def test_adsr_envelope(
+        self, num_frames, expected, attack=0.0, hold=0.0, decay=0.0, sustain=0.0, release=0.0, n_decay=2.0
+    ):
+        """the distribution of time are correct"""
+        out = F.adsr_envelope(
+            num_frames,
+            attack=attack,
+            hold=hold,
+            decay=decay,
+            sustain=sustain,
+            release=release,
+            n_decay=n_decay,
+            device=self.device,
+            dtype=self.dtype,
+        )
+        self.assertEqual(out, torch.tensor(expected, device=self.device, dtype=self.dtype))
 class Functional64OnlyTestImpl(TestBaseMixin):
    @nested_params(

--- a/torchaudio/prototype/functional/__init__.py
+++ b/torchaudio/prototype/functional/__init__.py
-from ._dsp import oscillator_bank
+from ._dsp import adsr_envelope, oscillator_bank
 from .functional import add_noise, barkscale_fbanks, convolve, fftconvolve
 __all__ = [
    "add_noise",
+    "adsr_envelope",
    "barkscale_fbanks",
    "convolve",
    "fftconvolve",

--- a/torchaudio/prototype/functional/_dsp.py
+++ b/torchaudio/prototype/functional/_dsp.py
 import warnings
+from typing import Optional
 import torch
@@ -78,3 +79,104 @@ def oscillator_bank(
    if reduction == "mean":
        return waveform.mean(-1)
    return waveform
+def adsr_envelope(
+    num_frames: int,
+    *,
+    attack: float = 0.0,
+    hold: float = 0.0,
+    decay: float = 0.0,
+    sustain: float = 1.0,
+    release: float = 0.0,
+    n_decay: int = 2,
+    dtype: Optional[torch.dtype] = None,
+    device: Optional[torch.device] = None,
+):
+    """Generate ADSR Envelope
+    .. devices:: CPU CUDA
+    Args:
+        num_frames (int): The number of output frames.
+        attack (float, optional):
+            The relative *time* it takes to reach the maximum level from
+            the start. (Default: ``0.0``)
+        hold (float, optional):
+            The relative *time* the maximum level is held before
+            it starts to decay. (Default: ``0.0``)
+        decay (float, optional):
+            The relative *time* it takes to sustain from
+            the maximum level. (Default: ``0.0``)
+        sustain (float, optional): The relative *level* at which
+            the sound should sustain. (Default: ``1.0``)
+            .. Note::
+               The duration of sustain is derived as `1.0 - (The sum of attack, hold, decay and release)`.
+        release (float, optional): The relative *time* it takes for the sound level to
+            reach zero after the sustain. (Default: ``0.0``)
+        n_decay (int, optional): The degree of polynomial decay. Default: ``2``.
+        dtype (torch.dtype, optional): the desired data type of returned tensor.
+            Default: if ``None``, uses a global default
+            (see :py:func:`torch.set_default_tensor_type`).
+        device (torch.device, optional): the desired device of returned tensor.
+            Default: if ``None``, uses the current device for the default tensor type
+            (see :py:func:`torch.set_default_tensor_type`).
+            device will be the CPU for CPU tensor types and the current CUDA
+            device for CUDA tensor types.
+    Returns:
+        Tensor: ADSR Envelope. Shape: `(num_frames, )`
+    Example
+        .. image:: https://download.pytorch.org/torchaudio/doc-assets/adsr_examples.png
+    """
+    if not 0 <= attack <= 1:
+        raise ValueError(f"The value of `attack` must be within [0, 1]. Found: {attack}")
+    if not 0 <= decay <= 1:
+        raise ValueError(f"The value of `decay` must be within [0, 1]. Found: {decay}")
+    if not 0 <= sustain <= 1:
+        raise ValueError(f"The value of `sustain` must be within [0, 1]. Found: {sustain}")
+    if not 0 <= hold <= 1:
+        raise ValueError(f"The value of `hold` must be within [0, 1]. Found: {hold}")
+    if not 0 <= release <= 1:
+        raise ValueError(f"The value of `release` must be within [0, 1]. Found: {release}")
+    if attack + decay + release + hold > 1:
+        raise ValueError("The sum of `attack`, `hold`, `decay` and `release` must not exceed 1.")
+    nframes = num_frames - 1
+    num_a = int(nframes * attack)
+    num_h = int(nframes * hold)
+    num_d = int(nframes * decay)
+    num_r = int(nframes * release)
+    # Initialize with sustain
+    out = torch.full((num_frames,), float(sustain), device=device, dtype=dtype)
+    # attack
+    if num_a > 0:
+        torch.linspace(0.0, 1.0, num_a + 1, out=out[: num_a + 1])
+    # hold
+    if num_h > 0:
+        out[num_a : num_a + num_h + 1] = 1.0
+    # decay
+    if num_d > 0:
+        # Compute: sustain + (1.0 - sustain) * (linspace[1, 0] ** n_decay)
+        i = num_a + num_h
+        decay = out[i : i + num_d + 1]
+        torch.linspace(1.0, 0.0, num_d + 1, out=decay)
+        decay **= n_decay
+        decay *= 1.0 - sustain
+        decay += sustain
+    # sustain is handled by initialization
+    # release
+    if num_r > 0:
+        torch.linspace(sustain, 0, num_r + 1, out=out[-num_r - 1 :])
+    return out