Add playback function (#3026)

Summary: Allows user to play audio through the device speaker. Pull Request resolved: https://github.com/pytorch/audio/pull/3026 Test Plan: Created a new test that mocks a call to the write audio chunk method from StreamWriter. To run the test: `pytest test/torchaudio_unittest/io/_playback_test.py` Reviewed By: mthrok Differential Revision: D43082062 Pulled By: jazcarretao fbshipit-source-id: 01a85b32ce925687a633d1208d15d54556e89dd8

Add playback function (#3026)
Summary: Allows user to play audio through the device speaker. Pull Request resolved: https://github.com/pytorch/audio/pull/3026 Test Plan: Created a new test that mocks a call to the write audio chunk method from StreamWriter. To run the test: `pytest test/torchaudio_unittest/io/_playback_test.py` Reviewed By: mthrok Differential Revision: D43082062 Pulled By: jazcarretao fbshipit-source-id: 01a85b32ce925687a633d1208d15d54556e89dd8
2ead941e · juan.azcarreta.ortiz · Facebook GitHub Bot · 9368f33b · 2ead941e · 2ead941e
Commit 2ead941e authored Feb 07, 2023 by juan.azcarreta.ortiz Committed by Facebook GitHub Bot Feb 07, 2023
7 changed files
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -646,6 +646,8 @@ jobs:
          environment:
              TORCHAUDIO_TEST_ALLOW_SKIP_IF_NO_CUDA: true
              TORCHAUDIO_TEST_ALLOW_SKIP_IF_ON_PYTHON_310: true
+              TORCHAUDIO_TEST_ALLOW_SKIP_IF_NO_AUDIO_OUT_DEVICE: true
+              TORCHAUDIO_TEST_ALLOW_SKIP_IF_NO_MACOS: true
      - store_test_results:
          path: test-results
      - store_artifacts:
@@ -717,6 +719,8 @@ jobs:
              TORCHAUDIO_TEST_ALLOW_SKIP_IF_NO_SOX: true
              TORCHAUDIO_TEST_ALLOW_SKIP_IF_ON_PYTHON_310: true
              TORCHAUDIO_TEST_ALLOW_SKIP_IF_NO_MOD_sentencepiece: true
+              TORCHAUDIO_TEST_ALLOW_SKIP_IF_NO_AUDIO_OUT_DEVICE: true
+              TORCHAUDIO_TEST_ALLOW_SKIP_IF_NO_MACOS: true
      - store_test_results:
          path: test-results
      - store_artifacts:
@@ -762,6 +766,8 @@ jobs:
              TORCHAUDIO_TEST_ALLOW_SKIP_IF_ON_PYTHON_310: true
              TORCHAUDIO_TEST_ALLOW_SKIP_IF_NO_MOD_sentencepiece: true
              TORCHAUDIO_TEST_ALLOW_SKIP_IF_CUDA_SMALL_MEMORY: true
+              TORCHAUDIO_TEST_ALLOW_SKIP_IF_NO_AUDIO_OUT_DEVICE: true
+              TORCHAUDIO_TEST_ALLOW_SKIP_IF_NO_MACOS: true
      - store_test_results:
          path: test-results
      - store_artifacts:
@@ -800,6 +806,7 @@ jobs:
              TORCHAUDIO_TEST_ALLOW_SKIP_IF_NO_QUANTIZATION: true
              TORCHAUDIO_TEST_ALLOW_SKIP_IF_ON_PYTHON_310: true
              TORCHAUDIO_TEST_ALLOW_SKIP_IF_NO_MOD_sentencepiece: true
+              TORCHAUDIO_TEST_ALLOW_SKIP_IF_NO_AUDIO_OUT_DEVICE: true
      - store_test_results:
          path: test-results
      - store_artifacts:

--- a/.circleci/config.yml.in
+++ b/.circleci/config.yml.in
@@ -646,6 +646,8 @@ jobs:
          environment:
              TORCHAUDIO_TEST_ALLOW_SKIP_IF_NO_CUDA: true
              TORCHAUDIO_TEST_ALLOW_SKIP_IF_ON_PYTHON_310: true
+              TORCHAUDIO_TEST_ALLOW_SKIP_IF_NO_AUDIO_OUT_DEVICE: true
+              TORCHAUDIO_TEST_ALLOW_SKIP_IF_NO_MACOS: true
      - store_test_results:
          path: test-results
      - store_artifacts:
@@ -717,6 +719,8 @@ jobs:
              TORCHAUDIO_TEST_ALLOW_SKIP_IF_NO_SOX: true
              TORCHAUDIO_TEST_ALLOW_SKIP_IF_ON_PYTHON_310: true
              TORCHAUDIO_TEST_ALLOW_SKIP_IF_NO_MOD_sentencepiece: true
+              TORCHAUDIO_TEST_ALLOW_SKIP_IF_NO_AUDIO_OUT_DEVICE: true
+              TORCHAUDIO_TEST_ALLOW_SKIP_IF_NO_MACOS: true
      - store_test_results:
          path: test-results
      - store_artifacts:
@@ -762,6 +766,8 @@ jobs:
              TORCHAUDIO_TEST_ALLOW_SKIP_IF_ON_PYTHON_310: true
              TORCHAUDIO_TEST_ALLOW_SKIP_IF_NO_MOD_sentencepiece: true
              TORCHAUDIO_TEST_ALLOW_SKIP_IF_CUDA_SMALL_MEMORY: true
+              TORCHAUDIO_TEST_ALLOW_SKIP_IF_NO_AUDIO_OUT_DEVICE: true
+              TORCHAUDIO_TEST_ALLOW_SKIP_IF_NO_MACOS: true
      - store_test_results:
          path: test-results
      - store_artifacts:
@@ -800,6 +806,7 @@ jobs:
              TORCHAUDIO_TEST_ALLOW_SKIP_IF_NO_QUANTIZATION: true
              TORCHAUDIO_TEST_ALLOW_SKIP_IF_ON_PYTHON_310: true
              TORCHAUDIO_TEST_ALLOW_SKIP_IF_NO_MOD_sentencepiece: true
+              TORCHAUDIO_TEST_ALLOW_SKIP_IF_NO_AUDIO_OUT_DEVICE: true
      - store_test_results:
          path: test-results
      - store_artifacts:

--- a/test/torchaudio_unittest/common_utils/__init__.py
+++ b/test/torchaudio_unittest/common_utils/__init__.py
@@ -4,11 +4,13 @@ from .case_utils import (
    is_ffmpeg_available,
    PytorchTestCase,
    skipIfCudaSmallMemory,
+    skipIfNoAudioDevice,
    skipIfNoCtcDecoder,
    skipIfNoCuda,
    skipIfNoExec,
    skipIfNoFFmpeg,
    skipIfNoKaldi,
+    skipIfNoMacOS,
    skipIfNoModule,
    skipIfNoQengine,
    skipIfNoSox,
@@ -37,10 +39,12 @@ __all__ = [
    "PytorchTestCase",
    "TorchaudioTestCase",
    "is_ffmpeg_available",
+    "skipIfNoAudioDevice",
    "skipIfNoCtcDecoder",
    "skipIfNoCuda",
    "skipIfCudaSmallMemory",
    "skipIfNoExec",
+    "skipIfNoMacOS",
    "skipIfNoModule",
    "skipIfNoKaldi",
    "skipIfNoSox",

--- a/test/torchaudio_unittest/common_utils/case_utils.py
+++ b/test/torchaudio_unittest/common_utils/case_utils.py
@@ -253,6 +253,16 @@ skipIfPy310 = _skipIf(
    ),
    key="ON_PYTHON_310",
 )
+skipIfNoAudioDevice = _skipIf(
+    not torchaudio.utils.ffmpeg_utils.get_output_devices(),
+    reason="No output audio device is available.",
+    key="NO_AUDIO_OUT_DEVICE",
+)
+skipIfNoMacOS = _skipIf(
+    sys.platform != "darwin",
+    reason="This feature is only available for MacOS.",
+    key="NO_MACOS",
+)
 def zip_equal(*iterables):

--- a/test/torchaudio_unittest/io/playback_test.py
+++ b/test/torchaudio_unittest/io/playback_test.py
+from unittest.mock import patch
+import torch
+from parameterized import parameterized
+from torchaudio.io import play_audio, StreamWriter
+from torchaudio_unittest.common_utils import get_sinusoid, skipIfNoAudioDevice, skipIfNoMacOS, TorchaudioTestCase
+@skipIfNoAudioDevice
+@skipIfNoMacOS
+class PlaybackInterfaceTest(TorchaudioTestCase):
+    @parameterized.expand([("uint8",), ("int16",), ("int32",), ("int64",), ("float32",), ("float64",)])
+    @patch.object(StreamWriter, "write_audio_chunk")
+    def test_playaudio(self, dtype, writeaudio_mock):
+        """Test playaudio function.
+        The patch object is used to check if the data is written
+        to the output device stream, without playing the actual audio.
+        """
+        dtype = getattr(torch, dtype)
+        sample_rate = 8000
+        waveform = get_sinusoid(
+            frequency=440,
+            sample_rate=sample_rate,
+            duration=1,  # seconds
+            n_channels=1,
+            dtype=dtype,
+            device="cpu",
+            channels_first=False,
+        )
+        play_audio(waveform, sample_rate=sample_rate)
+        writeaudio_mock.assert_called()
+    @parameterized.expand(
+        [
+            # Invalid number of dimensions (!= 2)
+            ("int16", 1, "audiotoolbox"),
+            ("int16", 3, "audiotoolbox"),
+            # Invalid tensor type
+            ("complex64", 2, "audiotoolbox"),
+            # Invalid output device
+            ("int16", 2, "audiotool"),
+        ]
+    )
+    @patch.object(StreamWriter, "write_audio_chunk")
+    def test_playaudio_invalid_options(self, dtype, ndim, device, writeaudio_mock):
+        """Test playaudio function raises error with invalid options."""
+        dtype = getattr(torch, dtype)
+        sample_rate = 8000
+        waveform = get_sinusoid(
+            frequency=440,
+            sample_rate=sample_rate,
+            duration=1,  # seconds
+            n_channels=1,
+            dtype=dtype,
+            device="cpu",
+            channels_first=False,
+        ).squeeze()
+        for _ in range(ndim - 1):
+            waveform = waveform.unsqueeze(-1)
+        with self.assertRaises(ValueError):
+            play_audio(waveform, sample_rate=sample_rate, device=device)
--- a/torchaudio/io/__init__.py
+++ b/torchaudio/io/__init__.py
@@ -8,8 +8,12 @@ _STREAM_WRITER = [
    "StreamWriter",
 ]
+_PLAYBACK = [
+    "play_audio",
+]
-_LAZILY_IMPORTED = _STREAM_READER + _STREAM_WRITER
+_LAZILY_IMPORTED = _STREAM_READER + _STREAM_WRITER + _PLAYBACK
 def __getattr__(name: str):
@@ -22,11 +26,16 @@ def __getattr__(name: str):
            item = getattr(_stream_reader, name)
-        else:
+        elif name in _STREAM_WRITER:
            from . import _stream_writer
            item = getattr(_stream_writer, name)
+        elif name in _PLAYBACK:
+            from . import _playback
+            item = getattr(_playback, name)
        globals()[name] = item
        return item
    raise AttributeError(f"module {__name__} has no attribute {name}")

--- a/torchaudio/io/_playback.py
+++ b/torchaudio/io/_playback.py
+import warnings
+from sys import platform
+from typing import Optional
+import torch
+import torchaudio
+from torchaudio.io import StreamWriter
+dict_format = {
+    torch.uint8: "u8",
+    torch.int16: "s16",
+    torch.int32: "s32",
+    torch.int64: "s64",
+    torch.float32: "flt",
+    torch.float64: "dbl",
+}
+def play_audio(
+    waveform: torch.Tensor,
+    sample_rate: Optional[float],
+    device: Optional[str] = None,
+) -> None:
+    """Plays audio through specified or available output device.
+    This function is currently only supported on MacOS, which has access
+    to "audiotoolbox" output device that can play up to two audio channels.
+    Args:
+        waveform: Tensor containing the audio to play.
+            Expected shape: `(time, num_channels)`.
+        sample_rate: Sample rate of the audio to play.
+        device: Output device to use. If None, the default device is used.
+    """
+    if platform == "darwin":
+        device = device or "audiotoolbox"
+        path = "-"
+    else:
+        raise ValueError(f"This function only supports MacOS, but current OS is {platform}")
+    available_devices = list(torchaudio.utils.ffmpeg_utils.get_output_devices().keys())
+    if device not in available_devices:
+        raise ValueError(f"Device {device} is not available. Available devices are: {available_devices}")
+    if waveform.dtype not in dict_format:
+        raise ValueError(f"Unsupported type {waveform.dtype}. The list of supported types is: {dict_format.keys()}")
+    format = dict_format[waveform.dtype]
+    if waveform.ndim != 2:
+        raise ValueError(f"Expected 2D tensor with shape `(time, num_channels)`, got {waveform.ndim}D tensor instead")
+    time, num_channels = waveform.size()
+    if num_channels > 2:
+        warnings.warn(
+            f"Expected up to 2 channels, got {num_channels} channels instead. Only the first 2 channels will be played."
+        )
+    # Write to speaker device
+    s = StreamWriter(dst=path, format=device)
+    s.add_audio_stream(sample_rate, num_channels, format=format)
+    # write audio to the device
+    block_size = 256
+    with s.open():
+        for i in range(0, time, block_size):
+            s.write_audio_chunk(0, waveform[i : i + block_size, :])