predict.py

# Prediction interface for Cog ⚙️
# https://cog.run/python

import os
import subprocess
import time
import json
from cog import BasePredictor, Input, Path
from diffusers import AutoencoderOobleck
import soundfile as sf
from safetensors.torch import load_file
from huggingface_hub import snapshot_download
from tangoflux.model import TangoFlux
from tangoflux import TangoFluxInference

MODEL_CACHE = "model_cache"
MODEL_URL = (
    "https://weights.replicate.delivery/default/declare-lab/TangoFlux/model_cache.tar"
)


class CachedTangoFluxInference(TangoFluxInference):
    ## load the weights from replicate.delivery for faster booting
    def __init__(self, name="declare-lab/TangoFlux", device="cuda", cached_paths=None):
        if cached_paths:
            paths = cached_paths
        else:
            paths = snapshot_download(repo_id=name)

        self.vae = AutoencoderOobleck()
        vae_weights = load_file(f"{paths}/vae.safetensors")
        self.vae.load_state_dict(vae_weights)
        weights = load_file(f"{paths}/tangoflux.safetensors")

        with open(f"{paths}/config.json", "r") as f:
            config = json.load(f)
        self.model = TangoFlux(config)
        self.model.load_state_dict(weights, strict=False)
        self.vae.to(device)
        self.model.to(device)


def download_weights(url, dest):
    start = time.time()
    print("downloading url: ", url)
    print("downloading to: ", dest)
    subprocess.check_call(["pget", "-x", url, dest], close_fds=False)
    print("downloading took: ", time.time() - start)


class Predictor(BasePredictor):
    def setup(self) -> None:
        """Load the model into memory to make running multiple predictions efficient"""

        if not os.path.exists(MODEL_CACHE):
            print("downloading")
            download_weights(MODEL_URL, MODEL_CACHE)

        self.model = CachedTangoFluxInference(
            cached_paths=f"{MODEL_CACHE}/declare-lab/TangoFlux"
        )

    def predict(
        self,
        prompt: str = Input(
            description="Input prompt", default="Hammer slowly hitting the wooden table"
        ),
        duration: int = Input(
            description="Duration of the output audio in seconds", default=10
        ),
        steps: int = Input(
            description="Number of inference steps", ge=1, le=200, default=25
        ),
        guidance_scale: float = Input(
            description="Scale for classifier-free guidance", ge=1, le=20, default=4.5
        ),
    ) -> Path:
        """Run a single prediction on the model"""

        audio = self.model.generate(
            prompt,
            steps=steps,
            guidance_scale=guidance_scale,
            duration=duration,
        )
        audio_numpy = audio.numpy()
        out_path = "/tmp/out.wav"

        sf.write(
            out_path, audio_numpy.T, samplerate=self.model.vae.config.sampling_rate
        )
        return Path(out_path)