test_whisper.py

# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
"""
Integration tests for Whisper models with LoRA adapters.

These tests verify that Whisper models can correctly load and use LoRA adapters
for speech-to-text transcription tasks.
"""

import pytest

import vllm
from vllm.assets.audio import AudioAsset
from vllm.lora.request import LoRARequest

from ..utils import create_new_process_for_each_test

# Model configuration
WHISPER_MODEL = "openai/whisper-small"

# Test prompts for Whisper transcription
WHISPER_PROMPT = "<|startoftranscript|><|en|><|transcribe|><|notimestamps|>"

# Note: whisper_lora_files fixture is defined in conftest.py


@pytest.fixture(autouse=True)
def use_spawn_for_whisper(monkeypatch):
    """Whisper has issues with forked workers, use spawn instead."""
    monkeypatch.setenv("VLLM_WORKER_MULTIPROC_METHOD", "spawn")


def create_whisper_llm(enable_lora: bool = True, max_loras: int = 2):
    """Create a Whisper LLM instance with optional LoRA support."""
    return vllm.LLM(
        model=WHISPER_MODEL,
        enable_lora=enable_lora,
        max_loras=max_loras if enable_lora else 1,
        max_lora_rank=64,
        max_model_len=448,
        dtype="half",
        enforce_eager=True,  # For stability in tests
    )


def run_whisper_inference(
    llm: vllm.LLM,
    lora_path: str | None = None,
    lora_id: int = 1,
) -> list[str]:
    """Run Whisper inference with optional LoRA adapter."""
    # Load test audio
    audio_asset = AudioAsset("mary_had_lamb")
    audio_data = audio_asset.audio_and_sample_rate

    inputs = [
        {
            "prompt": WHISPER_PROMPT,
            "multi_modal_data": {"audio": audio_data},
        }
    ]

    sampling_params = vllm.SamplingParams(
        temperature=0,
        max_tokens=200,
    )

    # Prepare LoRA request if adapter path is provided
    lora_request = None
    if lora_path:
        lora_request = LoRARequest(
            lora_name=f"whisper_lora_{lora_id}",
            lora_int_id=lora_id,
            lora_path=lora_path,
        )

    outputs = llm.generate(inputs, sampling_params, lora_request=lora_request)

    return [output.outputs[0].text for output in outputs]


@create_new_process_for_each_test()
def test_whisper_lora_inference(whisper_lora_files):
    """Test basic Whisper inference with a LoRA adapter.

    This test verifies that:
    1. Whisper model can be loaded with LoRA support enabled
    2. A LoRA adapter can be applied during inference
    3. The model produces valid transcription output
    """
    llm = create_whisper_llm(enable_lora=True)

    # Run inference with LoRA
    outputs = run_whisper_inference(llm, lora_path=whisper_lora_files, lora_id=1)

    # Verify we got a non-empty transcription
    assert len(outputs) == 1
    assert len(outputs[0]) > 0, "Expected non-empty transcription output"

    # The output should contain some recognizable words from the audio
    # (Mary had a little lamb)
    print(f"Transcription output: {outputs[0]}")


@create_new_process_for_each_test()
def test_whisper_multi_lora(whisper_lora_files):
    """Test Whisper with multiple LoRA adapter IDs.

    This test verifies that the same LoRA adapter can be loaded with
    different IDs and produce consistent results.
    """
    llm = create_whisper_llm(enable_lora=True, max_loras=4)

    # Test with different LoRA IDs using the same adapter
    outputs_lora1 = run_whisper_inference(llm, lora_path=whisper_lora_files, lora_id=1)
    outputs_lora2 = run_whisper_inference(llm, lora_path=whisper_lora_files, lora_id=2)

    # Both should produce valid outputs
    assert len(outputs_lora1[0]) > 0
    assert len(outputs_lora2[0]) > 0

    # Same adapter with different IDs should produce same output
    assert outputs_lora1 == outputs_lora2, (
        f"Expected same outputs for same adapter with different IDs. "
        f"Got: {outputs_lora1} vs {outputs_lora2}"
    )


@create_new_process_for_each_test()
def test_whisper_with_and_without_lora(whisper_lora_files):
    """Test that Whisper produces different outputs with and without LoRA.

    This test verifies that the LoRA adapter actually affects the model output.
    """
    llm = create_whisper_llm(enable_lora=True)

    # Run with LoRA
    outputs_with_lora = run_whisper_inference(
        llm, lora_path=whisper_lora_files, lora_id=1
    )

    # Run without LoRA (base model only)
    outputs_without_lora = run_whisper_inference(llm, lora_path=None)

    # Both should produce valid outputs
    assert len(outputs_with_lora[0]) > 0
    assert len(outputs_without_lora[0]) > 0

    print(f"Output with LoRA: {outputs_with_lora[0]}")
    print(f"Output without LoRA: {outputs_without_lora[0]}")

    # Note: Outputs may or may not differ depending on the adapter
    # The main verification is that both configurations work