Update resampling tutorial (#2773)

Summary: * Refactor benchmark script * Rename `time` variable to avoid (potential) conflicting with time module * Fix `beta` parameter in benchmark (it was not used previously) * Use `timeit` module for benchmark * Add plot * Move the comment on result at the end * Add link to an explanation of aliasing https://output.circle-artifacts.com/output/job/20b57d2f-3614-4161-a18e-e0c1a537739c/artifacts/0/docs/tutorials/audio_resampling_tutorial.html Pull Request resolved: https://github.com/pytorch/audio/pull/2773 Reviewed By: carolineechen Differential Revision: D40421337 Pulled By: mthrok fbshipit-source-id: b402f84d4517695daeca75fb84ad876ef9354b3a

Update resampling tutorial (#2773)
Summary: * Refactor benchmark script * Rename `time` variable to avoid (potential) conflicting with time module * Fix `beta` parameter in benchmark (it was not used previously) * Use `timeit` module for benchmark * Add plot * Move the comment on result at the end * Add link to an explanation of aliasing https://output.circle-artifacts.com/output/job/20b57d2f-3614-4161-a18e-e0c1a537739c/artifacts/0/docs/tutorials/audio_resampling_tutorial.html Pull Request resolved: https://github.com/pytorch/audio/pull/2773 Reviewed By: carolineechen Differential Revision: D40421337 Pulled By: mthrok fbshipit-source-id: b402f84d4517695daeca75fb84ad876ef9354b3a
8f187354 · moto · Facebook GitHub Bot · 5239583e · 8f187354
Commit 8f187354 authored Oct 17, 2022 by moto Committed by Facebook GitHub Bot Oct 17, 2022
Show whitespace changes
Inline Side-by-side

Showing with 184 additions and 123 deletions

examples/tutorials/audio_resampling_tutorial.py examples/tutorials/audio_resampling_tutorial.py +184 -123

No files found.
--- a/examples/tutorials/audio_resampling_tutorial.py
+++ b/examples/tutorials/audio_resampling_tutorial.py
@@ -22,19 +22,14 @@ print(torchaudio.__version__)
 #
 # First, we import the modules and define the helper functions.
 #
-# .. note::
-#    When running this tutorial in Google Colab, install the required packages
-#    with the following.
-#
-#    .. code::
-#
-#       !pip install librosa
 import math
-import time
+import timeit
 import librosa
+import resampy
 import matplotlib.pyplot as plt
+import matplotlib.colors as mcolors
 import pandas as pd
 from IPython.display import Audio, display
@@ -63,18 +58,18 @@ def _get_inverse_log_freq(freq, sample_rate, offset):
 def _get_freq_ticks(sample_rate, offset, f_max):
    # Given the original sample rate used for generating the sweep,
    # find the x-axis value where the log-scale major frequency values fall in
-    time, freq = [], []
+    times, freq = [], []
    for exp in range(2, 5):
        for v in range(1, 10):
            f = v * 10**exp
            if f < sample_rate // 2:
                t = _get_inverse_log_freq(f, sample_rate, offset) / sample_rate
-                time.append(t)
+                times.append(t)
                freq.append(f)
    t_max = _get_inverse_log_freq(f_max, sample_rate, offset) / sample_rate
-    time.append(t_max)
+    times.append(t_max)
    freq.append(f_max)
-    return time, freq
+    return times, freq
 def get_sine_sweep(sample_rate, offset=DEFAULT_OFFSET):
@@ -118,7 +113,7 @@ def plot_sweep(
 # -------------------
 #
 # To resample an audio waveform from one freqeuncy to another, you can use
-# :py:func:`torchaudio.transforms.Resample` or
+# :py:class:`torchaudio.transforms.Resample` or
 # :py:func:`torchaudio.functional.resample`.
 # ``transforms.Resample`` precomputes and caches the kernel used for resampling,
 # while ``functional.resample`` computes it on the fly, so using
@@ -163,6 +158,9 @@ Audio(waveform.numpy()[0], rate=sample_rate)
 #
 # We see that in the spectrogram of the resampled waveform, there is an
 # artifact, which was not present in the original waveform.
+# This effect is called aliasing.
+# `This page <https://music.arts.uci.edu/dobrian/digitalaudio.htm>`__ has
+# an explanation of how it happens, and why it looks like a reflection.
 resample_rate = 32000
 resampler = T.Resample(sample_rate, resample_rate, dtype=waveform.dtype)
@@ -332,20 +330,15 @@ print("torchaudio and librosa kaiser fast MSE:", mse)
 # ``kaiser_best`` and ``kaiser_fast`` using their corresponding parameters
 # in ``torchaudio``.
 #
-# To elaborate on the results:
-#
-# - a larger ``lowpass_filter_width`` results in a larger resampling kernel,
-#   and therefore increases computation time for both the kernel computation
-#   and convolution
-# - using ``kaiser_window`` results in longer computation times than the default
-#   ``sinc_interpolation`` because it is more complex to compute the intermediate
-#   window values - a large GCD between the sample and resample rate will result
-#   in a simplification that allows for a smaller kernel and faster kernel computation.
-#
+print(f"torchaudio: {torchaudio.__version__}")
+print(f"librosa: {librosa.__version__}")
+print(f"resampy: {resampy.__version__}")
-def benchmark_resample(
+######################################################################
-    method,
+#
+def benchmark_resample_functional(
    waveform,
    sample_rate,
    resample_rate,
@@ -353,126 +346,194 @@ def benchmark_resample(
    rolloff=0.99,
    resampling_method="sinc_interpolation",
    beta=None,
-    librosa_type=None,
    iters=5,
 ):
-    if method == "functional":
+    return timeit.timeit(
-        begin = time.monotonic()
+        stmt='''
-        for _ in range(iters):
+torchaudio.functional.resample(
-            F.resample(
    waveform,
    sample_rate,
    resample_rate,
    lowpass_filter_width=lowpass_filter_width,
    rolloff=rolloff,
    resampling_method=resampling_method,
-            )
+    beta=beta,
-        elapsed = time.monotonic() - begin
+)
-        return elapsed / iters
+        ''',
-    elif method == "transforms":
+        setup='import torchaudio',
-        resampler = T.Resample(
+        number=iters,
+        globals=locals(),
+    ) * 1000 / iters
+######################################################################
+#
+def benchmark_resample_transforms(
+    waveform,
+    sample_rate,
+    resample_rate,
+    lowpass_filter_width=6,
+    rolloff=0.99,
+    resampling_method="sinc_interpolation",
+    beta=None,
+    iters=5,
+):
+    return timeit.timeit(
+        stmt='resampler(waveform)',
+        setup='''
+import torchaudio
+resampler = torchaudio.transforms.Resample(
    sample_rate,
    resample_rate,
    lowpass_filter_width=lowpass_filter_width,
    rolloff=rolloff,
    resampling_method=resampling_method,
    dtype=waveform.dtype,
-        )
+    beta=beta,
-        begin = time.monotonic()
+)
-        for _ in range(iters):
+resampler.to(waveform.device)
-            resampler(waveform)
+        ''',
-        elapsed = time.monotonic() - begin
+        number=iters,
-        return elapsed / iters
+        globals=locals(),
-    elif method == "librosa":
+    ) * 1000 / iters
-        waveform_np = waveform.squeeze().numpy()
-        begin = time.monotonic()
-        for _ in range(iters):
-            librosa.resample(waveform_np, orig_sr=sample_rate, target_sr=resample_rate, res_type=librosa_type)
-        elapsed = time.monotonic() - begin
-        return elapsed / iters
 ######################################################################
 #
-configs = {
+def benchmark_resample_librosa(
-    "downsample (48 -> 44.1 kHz)": [48000, 44100],
+    waveform,
-    "downsample (16 -> 8 kHz)": [16000, 8000],
+    sample_rate,
-    "upsample (44.1 -> 48 kHz)": [44100, 48000],
+    resample_rate,
-    "upsample (8 -> 16 kHz)": [8000, 16000],
+    res_type=None,
-}
+    iters=5,
+):
+    waveform_np = waveform.squeeze().numpy()
+    return timeit.timeit(
+        stmt='''
+librosa.resample(
+    waveform_np,
+    orig_sr=sample_rate,
+    target_sr=resample_rate,
+    res_type=res_type,
+)
+        ''',
+        setup='import librosa',
+        number=iters,
+        globals=locals(),
+    ) * 1000 / iters
-for label in configs:
+######################################################################
+#
+def benchmark(sample_rate, resample_rate):
    times, rows = [], []
-    sample_rate = configs[label][0]
+    waveform = get_sine_sweep(sample_rate).to(torch.float32)
-    resample_rate = configs[label][1]
-    waveform = get_sine_sweep(sample_rate)
+    args = (waveform, sample_rate, resample_rate)
    # sinc 64 zero-crossings
-    f_time = benchmark_resample("functional", waveform, sample_rate, resample_rate, lowpass_filter_width=64)
+    f_time = benchmark_resample_functional(*args, lowpass_filter_width=64)
-    t_time = benchmark_resample("transforms", waveform, sample_rate, resample_rate, lowpass_filter_width=64)
+    t_time = benchmark_resample_transforms(*args, lowpass_filter_width=64)
-    times.append([None, 1000 * f_time, 1000 * t_time])
+    times.append([None, f_time, t_time])
    rows.append("sinc (width 64)")
    # sinc 6 zero-crossings
-    f_time = benchmark_resample("functional", waveform, sample_rate, resample_rate, lowpass_filter_width=16)
+    f_time = benchmark_resample_functional(*args, lowpass_filter_width=16)
-    t_time = benchmark_resample("transforms", waveform, sample_rate, resample_rate, lowpass_filter_width=16)
+    t_time = benchmark_resample_transforms(*args, lowpass_filter_width=16)
-    times.append([None, 1000 * f_time, 1000 * t_time])
+    times.append([None, f_time, t_time])
    rows.append("sinc (width 16)")
    # kaiser best
-    lib_time = benchmark_resample("librosa", waveform, sample_rate, resample_rate, librosa_type="kaiser_best")
+    kwargs = {
-    f_time = benchmark_resample(
+        "lowpass_filter_width": 64,
-        "functional",
+        "rolloff": 0.9475937167399596,
-        waveform,
+        "resampling_method": "kaiser_window",
-        sample_rate,
+        "beta": 14.769656459379492,
-        resample_rate,
+    }
-        lowpass_filter_width=64,
+    lib_time = benchmark_resample_librosa(*args, res_type="kaiser_best")
-        rolloff=0.9475937167399596,
+    f_time = benchmark_resample_functional(*args, **kwargs)
-        resampling_method="kaiser_window",
+    t_time = benchmark_resample_transforms(*args, **kwargs)
-        beta=14.769656459379492,
+    times.append([lib_time, f_time, t_time])
-    )
-    t_time = benchmark_resample(
-        "transforms",
-        waveform,
-        sample_rate,
-        resample_rate,
-        lowpass_filter_width=64,
-        rolloff=0.9475937167399596,
-        resampling_method="kaiser_window",
-        beta=14.769656459379492,
-    )
-    times.append([1000 * lib_time, 1000 * f_time, 1000 * t_time])
    rows.append("kaiser_best")
    # kaiser fast
-    lib_time = benchmark_resample("librosa", waveform, sample_rate, resample_rate, librosa_type="kaiser_fast")
+    kwargs = {
-    f_time = benchmark_resample(
+        "lowpass_filter_width": 16,
-        "functional",
+        "rolloff": 0.85,
-        waveform,
+        "resampling_method": "kaiser_window",
-        sample_rate,
+        "beta": 8.555504641634386,
-        resample_rate,
+    }
-        lowpass_filter_width=16,
+    lib_time = benchmark_resample_librosa(*args, res_type="kaiser_fast")
-        rolloff=0.85,
+    f_time = benchmark_resample_functional(*args, **kwargs)
-        resampling_method="kaiser_window",
+    t_time = benchmark_resample_transforms(*args, **kwargs)
-        beta=8.555504641634386,
+    times.append([lib_time, f_time, t_time])
-    )
-    t_time = benchmark_resample(
-        "transforms",
-        waveform,
-        sample_rate,
-        resample_rate,
-        lowpass_filter_width=16,
-        rolloff=0.85,
-        resampling_method="kaiser_window",
-        beta=8.555504641634386,
-    )
-    times.append([1000 * lib_time, 1000 * f_time, 1000 * t_time])
    rows.append("kaiser_fast")
    df = pd.DataFrame(times, columns=["librosa", "functional", "transforms"], index=rows)
-    df.columns = pd.MultiIndex.from_product([[f"{label} time (ms)"], df.columns])
+    return df
+######################################################################
+#
+def plot(df):
+    print(df.round(2))
+    ax = df.plot(kind="bar")
+    plt.ylabel("Time Elapsed [ms]")
+    plt.xticks(rotation = 0, fontsize=10)
+    for cont, col, color in zip(ax.containers, df.columns, mcolors.TABLEAU_COLORS):
+        label = ["N/A" if v != v else str(v) for v in df[col].round(2)]
+        ax.bar_label(cont, labels=label, color=color, fontweight="bold", fontsize="x-small")
+######################################################################
+#
+# Downsample (48 -> 44.1 kHz)
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~
+df = benchmark(48_000, 44_100)
+plot(df)
+######################################################################
+#
+# Downsample (16 -> 8 kHz)
+# ~~~~~~~~~~~~~~~~~~~~~~~~
+df = benchmark(16_000, 8_000)
+plot(df)
+######################################################################
+#
+# Upsample (44.1 -> 48 kHz)
+# ~~~~~~~~~~~~~~~~~~~~~~~~~
+df = benchmark(44_100, 48_000)
+plot(df)
+######################################################################
+#
+# Upsample (8 -> 16 kHz)
+# ~~~~~~~~~~~~~~~~~~~~~~
+df = benchmark(8_000, 16_000)
+plot(df)
-    print(f"torchaudio: {torchaudio.__version__}")
+######################################################################
-    print(f"librosa: {librosa.__version__}")
+#
-    display(df.round(2))
+# Summary
+# ~~~~~~~
+#
+# To elaborate on the results:
+#
+# - a larger ``lowpass_filter_width`` results in a larger resampling kernel,
+#   and therefore increases computation time for both the kernel computation
+#   and convolution
+# - using ``kaiser_window`` results in longer computation times than the default
+#   ``sinc_interpolation`` because it is more complex to compute the intermediate
+#   window values
+# - a large GCD between the sample and resample rate will result
+#   in a simplification that allows for a smaller kernel and faster kernel computation.
+#