test_qwen.py

"""Ensure that a text-only Qwen model can be run without throwing an error.
We explicitly test this because Qwen is implemented as a multimodal and
supports a visual encoder for models like Qwen-VL.
"""
from typing import List, Type

import pytest

from ....conftest import VllmRunner

models = [
    "Qwen/Qwen-7B-Chat"  # Has no visual encoder
]


@pytest.mark.parametrize("model", models)
@pytest.mark.parametrize("dtype", ["bfloat16"])
@pytest.mark.parametrize("max_tokens", [32])
@pytest.mark.parametrize("num_logprobs", [5])
def test_text_only_qwen_model_can_be_loaded_and_run(
    vllm_runner: Type[VllmRunner],
    example_prompts: List[str],
    model: str,
    *,
    dtype: str,
    max_tokens: int,
    num_logprobs: int,
):
    with vllm_runner(model, dtype=dtype) as vllm_model:
        vllm_model.generate_greedy_logprobs(
            example_prompts,
            max_tokens,
            num_logprobs=num_logprobs,
        )