Merge pull request #11 from OIEIEIO/main

Added save audio function to Demo.ipynb

Merge pull request #11 from OIEIEIO/main
Added save audio function to Demo.ipynb
8ec5b6d1 · Soujanya Poria · GitHub · 5120a1c8 · ded20cda · 8ec5b6d1
Unverified Commit 8ec5b6d1 authored Jan 07, 2025 by Soujanya Poria Committed by GitHub Jan 07, 2025
Hide whitespace changes
Inline Side-by-side

Showing with 115 additions and 49 deletions

Demo.ipynb Demo.ipynb +115 -49

No files found.
--- a/Demo.ipynb
+++ b/Demo.ipynb
 {
- "cells": [
+  "cells": [
-  {
+    {
-   "cell_type": "code",
+      "cell_type": "markdown",
-   "execution_count": null,
+      "metadata": {
-   "metadata": {},
+        "id": "view-in-github",
-   "outputs": [],
+        "colab_type": "text"
-   "source": [
+      },
-    "!pip install git+https://github.com/declare-lab/TangoFlux.git"
+      "source": [
-   ]
+        "<a href=\"https://colab.research.google.com/github/OIEIEIO/TangoFlux/blob/main/Demo.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": true,
+        "id": "xiaRzuzPOP4H"
+      },
+      "outputs": [],
+      "source": [
+        "!pip install git+https://github.com/declare-lab/TangoFlux.git"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": true,
+        "id": "Hfu3zXTDOP4J"
+      },
+      "outputs": [],
+      "source": [
+        "import IPython\n",
+        "import torchaudio\n",
+        "from tangoflux import TangoFluxInference\n",
+        "from IPython.display import Audio\n",
+        "\n",
+        "model = TangoFluxInference(name='declare-lab/TangoFlux')"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "oFiak5QIOP4K"
+      },
+      "outputs": [],
+      "source": [
+        "# @title Generate Audio\n",
+        "\n",
+        "prompt = 'a futuristic space craft with unique engine sound' # @param {type:\"string\"}\n",
+        "duration = 10 # @param {type:\"number\"}\n",
+        "steps = 50 # @param {type:\"number\"}\n",
+        "\n",
+        "audio = model.generate(prompt, steps=steps, duration=duration)\n",
+        "\n",
+        "Audio(data=audio, rate=44100)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "import IPython\n",
+        "import torchaudio\n",
+        "from tangoflux import TangoFluxInference\n",
+        "from IPython.display import Audio\n",
+        "\n",
+        "model = TangoFluxInference(name='declare-lab/TangoFlux')\n",
+        "\n",
+        "# @title Generate Audio\n",
+        "prompt = 'Melodic human whistling harmonizing with natural birdsong'  # @param {type:\"string\"}\n",
+        "duration = 10  # @param {type:\"number\"}\n",
+        "steps = 50  # @param {type:\"number\"}\n",
+        "\n",
+        "# Generate the audio\n",
+        "audio = model.generate(prompt, steps=steps, duration=duration)\n",
+        "\n",
+        "# Ensure audio is in the correct format (2D Tensor: [channels, samples])\n",
+        "if len(audio.shape) == 1:  # If mono audio (1D tensor)\n",
+        "    audio_tensor = audio.unsqueeze(0)  # Add channel dimension to make it [1, samples]\n",
+        "elif len(audio.shape) == 2:  # Stereo audio (2D tensor)\n",
+        "    audio_tensor = audio  # Already in correct format\n",
+        "else:\n",
+        "    raise ValueError(f\"Unexpected audio tensor shape: {audio.shape}\")\n",
+        "\n",
+        "# Save the audio as a .wav file\n",
+        "torchaudio.save('generated_audio.wav', audio_tensor, sample_rate=44100)\n",
+        "\n",
+        "# Optionally play the audio in the notebook\n",
+        "Audio(data=audio.numpy(), rate=44100)\n"
+      ],
+      "metadata": {
+        "id": "_Z8elHyOHOQ1"
+      },
+      "execution_count": null,
+      "outputs": []
+    }
+  ],
+  "metadata": {
+    "language_info": {
+      "name": "python"
+    },
+    "colab": {
+      "provenance": [],
+      "machine_shape": "hm",
+      "private_outputs": true,
+      "include_colab_link": true
+    },
+    "kernelspec": {
+      "name": "python3",
+      "display_name": "Python 3"
+    }
  },
-  {
+  "nbformat": 4,
-   "cell_type": "code",
+  "nbformat_minor": 0
-   "execution_count": null,
+}
-   "metadata": {},
\ No newline at end of file
-   "outputs": [],
-   "source": [
-    "import IPython\n",
-    "import torchaudio\n",
-    "from tangoflux import TangoFluxInference\n",
-    "from IPython.display import Audio\n",
-    "\n",
-    "model = TangoFluxInference(name='declare-lab/TangoFlux')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# @title Generate Audio\n",
-    "\n",
-    "prompt = 'Hammer slowly hitting the wooden table' # @param {type:\"string\"}\n",
-    "duration = 10 # @param {type:\"number\"}\n",
-    "steps = 50 # @param {type:\"number\"}\n",
-    "\n",
-    "audio = model.generate(prompt, steps=steps, duration=duration)\n",
-    "\n",
-    "Audio(data=audio, rate=44100)"
-   ]
-  }
- ],
- "metadata": {
-  "language_info": {
-   "name": "python"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}