Add oscillator tutorial (#2862)

Summary: This commits add tutorial for oscillator_bank and adsr_envelope, which will be a basis for DDSP. - [Review here](https://output.circle-artifacts.com/output/job/cf1d3001-88e5-418b-8cf8-ae22b4445dba/artifacts/0/docs/tutorials/oscillator_tutorial.html) Pull Request resolved: https://github.com/pytorch/audio/pull/2862 Reviewed By: carolineechen Differential Revision: D41559503 Pulled By: mthrok fbshipit-source-id: 3f1689186db7d246de14f228fc2f91bf37db98cd

Add oscillator tutorial (#2862)
Summary: This commits add tutorial for oscillator_bank and adsr_envelope, which will be a basis for DDSP. - [Review here](https://output.circle-artifacts.com/output/job/cf1d3001-88e5-418b-8cf8-ae22b4445dba/artifacts/0/docs/tutorials/oscillator_tutorial.html) Pull Request resolved: https://github.com/pytorch/audio/pull/2862 Reviewed By: carolineechen Differential Revision: D41559503 Pulled By: mthrok fbshipit-source-id: 3f1689186db7d246de14f228fc2f91bf37db98cd
52e89756 · moto · Facebook GitHub Bot · 3882c395 · 52e89756 · 52e89756
Commit 52e89756 authored Nov 28, 2022 by moto Committed by Facebook GitHub Bot Nov 28, 2022
Hide whitespace changes
Inline Side-by-side

Showing with 351 additions and 0 deletions

docs/source/index.rst docs/source/index.rst +8 -0

examples/tutorials/oscillator_tutorial.py examples/tutorials/oscillator_tutorial.py +343 -0

No files found.
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -38,6 +38,7 @@ model implementations and application components.
   tutorials/audio_data_augmentation_tutorial
   tutorials/audio_feature_extractions_tutorial
   tutorials/audio_feature_augmentation_tutorial
+   tutorials/oscillator_tutorial
   tutorials/audio_datasets_tutorial
@@ -183,6 +184,13 @@ Tutorials
   :link: tutorials/audio_feature_augmentation_tutorial.html
   :tags: Preprocessing
+.. customcarditem::
+   :header: Generating waveforms with oscillator
+   :card_description:
+   :image: _images/sphx_glr_oscillator_tutorial_003.png
+   :link: tutorials/oscillator_tutorial.html
+   :tags: DSP
 .. customcarditem::
   :header: Audio dataset
   :card_description: Learn how to use <code>torchaudio.datasets</code> module.

--- a/examples/tutorials/oscillator_tutorial.py
+++ b/examples/tutorials/oscillator_tutorial.py
+# -*- coding: utf-8 -*-
+"""
+Oscillator and ADSR envelope
+============================
+**Author**: `Moto Hira <moto@meta.com>`__
+This tutorial shows how to synthesize various waveforms using
+:py:func:`~torchaudio.prototype.functional.oscillator_bank` and
+:py:func:`~torchaudio.prototype.functional.adsr_envelope`.
+.. warning::
+   This tutorial requires prototype DSP features, which are
+   available in nightly builds.
+   Please refer to https://pytorch.org/get-started/locally
+   for instructions for installing a nightly build.
+"""
+import torch
+import torchaudio
+print(torch.__version__)
+print(torchaudio.__version__)
+######################################################################
+#
+try:
+    from torchaudio.prototype.functional import (
+        oscillator_bank,
+        adsr_envelope,
+    )
+except ModuleNotFoundError:
+    print(
+        "Failed to import prototype DSP features. "
+        "Please install torchaudio nightly builds. "
+        "Please refer to https://pytorch.org/get-started/locally "
+        "for instructions to install a nightly build.")
+    raise
+import math
+import matplotlib.pyplot as plt
+from IPython.display import Audio
+PI = torch.pi
+PI2 = 2 * torch.pi
+######################################################################
+# Oscillator Bank
+# ---------------
+#
+# Sinusoidal oscillator generates sinusoidal waveforms from given
+# amplitudes and frequencies.
+#
+# .. math::
+#
+#    x_t = A_t \sin \theta_t
+#
+# Where the phase :math:`\theta_t` is found by integrating the instantaneous
+# frequency :math:`f_t`.
+#
+# .. math::
+#
+#    \theta_t = \sum_{k=1}^{t} f_k
+#
+# .. note::
+#
+#    Why integrate the frequencies? Instantaneous frequency represents the velocity
+#    of oscillation at given time. So integrating the instantaneous frequency gives
+#    the displacement of the phase of the oscillation, since the start.
+#    In discrete-time signal processing, integration becomes accumulation.
+#    In PyTorch, accumulation can be computed using :py:func:`torch.cumsum`.
+#
+# :py:func:`torchaudio.prototype.functional.oscillator_bank` generates a bank of
+# sinsuoidal waveforms from amplitude envelopes and instantaneous frequencies.
+#
+######################################################################
+# Simple Sine Wave
+# ~~~~~~~~~~~~~~~~
+#
+# Let's start with simple case.
+#
+# First, we generate sinusoidal wave that has constant frequency and
+# amplitude everywhere, that is, a regular sine wave.
+#
+######################################################################
+#
+# We define some constants and helper function that we use for
+# the rest of the tutorial.
+#
+F0 = 344.  # fundamental frequency
+DURATION = 1.1  # [seconds]
+SAMPLE_RATE = 16_000  # [Hz]
+NUM_FRAMES = int(DURATION * SAMPLE_RATE)
+######################################################################
+#
+def show(freq, amp, waveform, sample_rate, zoom=None, vol=0.3):
+    t = torch.arange(waveform.size(0)) / sample_rate
+    fig, axes = plt.subplots(4, 1, sharex=True)
+    axes[0].plot(t, freq)
+    axes[0].set(
+        title=f"Oscillator bank (bank size: {amp.size(-1)})",
+        ylabel="Frequency [Hz]",
+        ylim=[-0.03, None])
+    axes[1].plot(t, amp)
+    axes[1].set(
+        ylabel="Amplitude",
+        ylim=[-0.03 if torch.all(amp >= 0.0) else None, None])
+    axes[2].plot(t, waveform)
+    axes[2].set(ylabel="Waveform")
+    axes[3].specgram(waveform, Fs=sample_rate)
+    axes[3].set(
+        ylabel="Spectrogram",
+        xlabel="Time [s]",
+        xlim=[-0.01, t[-1] + 0.01])
+    for i in range(4):
+        axes[i].grid(True)
+    pos = axes[2].get_position()
+    plt.tight_layout()
+    if zoom is not None:
+        ax = fig.add_axes([pos.x0 + 0.01, pos.y0 + 0.03, pos.width / 2.5, pos.height / 2.0])
+        ax.plot(t, waveform)
+        ax.set(xlim=zoom, xticks=[], yticks=[])
+    waveform /= waveform.abs().max()
+    return Audio(vol * waveform, rate=sample_rate, normalize=False)
+######################################################################
+#
+# Now we synthesize the audio with constant frequency and amplitude
+#
+freq = torch.full((NUM_FRAMES, 1), F0)
+amp = torch.ones((NUM_FRAMES, 1))
+waveform = oscillator_bank(freq, amp, sample_rate=SAMPLE_RATE)
+show(freq, amp, waveform, SAMPLE_RATE, zoom=(1/F0, 3/F0))
+######################################################################
+# Combining multiple sine waves
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+#
+# :py:func:`~torchaudio.prototype.functional.oscillator_bank` can
+# combine an arbitrary number of sinusoids to generate a waveform.
+#
+freq = torch.empty((NUM_FRAMES, 3))
+freq[:, 0] = F0
+freq[:, 1] = 3 * F0
+freq[:, 2] = 5 * F0
+amp = torch.ones((NUM_FRAMES, 3)) / 3
+waveform = oscillator_bank(freq, amp, sample_rate=SAMPLE_RATE)
+show(freq, amp, waveform, SAMPLE_RATE, zoom=(1/F0, 3/F0))
+######################################################################
+# Changing Frequencies across time
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+#
+# Let's change the frequency over time. Here, we change the frequency
+# from zero to the Nyquist frequency (half of the sample rate) in
+# log-scale so that it is easy to see the change in waveform.
+#
+nyquist_freq = SAMPLE_RATE / 2
+freq = torch.logspace(0, math.log(0.99 * nyquist_freq, 10), NUM_FRAMES).unsqueeze(-1)
+amp = torch.ones((NUM_FRAMES, 1))
+waveform = oscillator_bank(freq, amp, sample_rate=SAMPLE_RATE)
+show(freq, amp, waveform, SAMPLE_RATE, vol=0.2)
+######################################################################
+#
+# We can also oscillate frequency.
+#
+fm = 2.5  # rate at which the frequency oscillates
+f_dev = 0.9 * F0  # the degree of frequency oscillation
+freq = F0 + f_dev * torch.sin(torch.linspace(0, fm * PI2 * DURATION, NUM_FRAMES))
+freq = freq.unsqueeze(-1)
+amp = torch.ones((NUM_FRAMES, 1))
+waveform = oscillator_bank(freq, amp, sample_rate=SAMPLE_RATE)
+show(freq, amp, waveform, SAMPLE_RATE)
+######################################################################
+# ADSR Envelope
+# -------------
+#
+######################################################################
+#
+# Next, we change the amplitude over time. A common technique to model
+# amplitude is ADSR Envelope.
+#
+# ADSR stands for Attack, Decay, Sustain, and Release.
+#
+#  - `Attack` is the time it takes to reach from zero to the top level.
+#  - `Decay` is the time it takes from the top to reach sustain level.
+#  - `Sustain` is the level at which the level stays constant.
+#  - `Release` is the time it takes to drop to zero from sustain level.
+#
+# There are many variants of ADSR model, additionally, some models have
+# the following properties
+#
+#  - `Hold`: The time the level stays at the top level after attack.
+#  - non-linear decay/release: The decay and release take non-linear change.
+#
+# :py:class:`~torchaudio.prototype.functional.adsr_envelope` supports
+# hold and polynomial decay.
+#
+freq = torch.full((SAMPLE_RATE, 1), F0)
+amp = adsr_envelope(
+    SAMPLE_RATE,
+    attack=0.2,
+    hold=0.2,
+    decay=0.2,
+    sustain=0.5,
+    release=0.2,
+    n_decay=1,
+)
+amp = amp.unsqueeze(-1)
+waveform = oscillator_bank(freq, amp, sample_rate=SAMPLE_RATE)
+audio = show(freq, amp, waveform, SAMPLE_RATE)
+ax = plt.gcf().axes[1]
+ax.annotate("Attack", xy=(0.05, 0.7))
+ax.annotate("Hold", xy=(0.28, 0.65))
+ax.annotate("Decay", xy=(0.45, 0.5))
+ax.annotate("Sustain", xy=(0.65, 0.3))
+ax.annotate("Release", xy=(0.88, 0.35))
+audio
+######################################################################
+#
+# Now let's look into some examples of how ADSR envelope can be used
+# to create different sounds.
+#
+# The following examples are inspired by
+# `this article <https://www.edmprod.com/adsr-envelopes/>`__.
+#
+######################################################################
+# Drum Beats
+# ~~~~~~~~~~
+#
+unit = NUM_FRAMES // 3
+repeat = 9
+freq = torch.empty((unit * repeat, 2))
+freq[:, 0] = F0 / 9
+freq[:, 1] = F0 / 5
+amp = torch.stack(
+    (
+        adsr_envelope(unit, attack=0.01, hold=0.125, decay=0.12, sustain=0.05, release=0),
+        adsr_envelope(unit, attack=0.01, hold=0.25, decay=0.08, sustain=0, release=0),
+    ),
+    dim=-1)
+amp = amp.repeat(repeat, 1) / 2
+bass = oscillator_bank(freq, amp, sample_rate=SAMPLE_RATE)
+show(freq, amp, bass, SAMPLE_RATE, vol=0.5)
+######################################################################
+# Pluck
+# ~~~~~
+#
+tones = [
+    513.74,  # do
+    576.65,  # re
+    647.27,  # mi
+    685.76,  # fa
+    769.74,  # so
+    685.76,  # fa
+    647.27,  # mi
+    576.65,  # re
+    513.74,  # do
+]
+freq = torch.cat([torch.full((unit, 1), tone) for tone in tones], dim=0)
+amp = adsr_envelope(unit, attack=0, decay=0.7, sustain=0.28, release=0.29)
+amp = amp.repeat(9).unsqueeze(-1)
+doremi = oscillator_bank(freq, amp, sample_rate=SAMPLE_RATE)
+show(freq, amp, doremi, SAMPLE_RATE)
+######################################################################
+# Riser
+# ~~~~~
+#
+env = adsr_envelope(NUM_FRAMES * 6, attack=0.98, decay=0., sustain=1, release=0.02)
+tones = [
+    484.90,  # B4
+    513.74,  # C5
+    576.65,  # D5
+    1221.88,  # D#6/Eb6
+    3661.50,  # A#7/Bb7
+    6157.89,  # G8
+]
+freq = torch.stack([f * env for f in tones], dim=-1)
+amp = env.unsqueeze(-1).expand(freq.shape) / len(tones)
+waveform = oscillator_bank(freq, amp, sample_rate=SAMPLE_RATE)
+show(freq, amp, waveform, SAMPLE_RATE)
+######################################################################
+# References
+# ----------
+#
+# - https://www.edmprod.com/adsr-envelopes/
+# - https://pages.mtu.edu/~suits/notefreq432.html
+# - https://alijamieson.co.uk/2021/12/19/forgive-me-lord-for-i-have-synth-a-guide-to-subtractive-synthesis/