"vllm/vscode:/vscode.git/clone" did not exist on "2c1de3fa60acc59a79f6207ff68c27e0df6fcb1c"
Commit 38d80967 authored by zhuwenwen's avatar zhuwenwen
Browse files

Merge tag 'v0.10.2rc2' into v0.10.2rc2-ori

parents 33650733 880c741b
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import pytest
from tests.models.utils import GenerateModelInfo
from .ppl_utils import wikitext_ppl_test
MODELS = [GenerateModelInfo("openai-community/gpt2-large")]
@pytest.mark.parametrize("model_info", MODELS)
def test_ppl(hf_runner, vllm_runner, model_info: GenerateModelInfo):
wikitext_ppl_test(hf_runner, vllm_runner, model_info)
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import pytest
from tests.models.utils import GenerateModelInfo
from .ppl_utils import wikitext_ppl_test
MODELS = [
GenerateModelInfo("Qwen/Qwen3-0.6B"),
GenerateModelInfo("Qwen/Qwen3-0.6B-FP8"),
# transformers:
# Loading a GPTQ quantized model requires optimum, gptqmodel
# GenerateModelInfo("Qwen/Qwen3-0.6B-GPTQ-Int8"),
]
@pytest.mark.parametrize("model_info", MODELS)
def test_ppl(hf_runner, vllm_runner, model_info: GenerateModelInfo):
wikitext_ppl_test(hf_runner, vllm_runner, model_info)
...@@ -35,10 +35,7 @@ def correctness_test_embed_models(hf_runner, ...@@ -35,10 +35,7 @@ def correctness_test_embed_models(hf_runner,
example_prompts, example_prompts,
vllm_extra_kwargs=None, vllm_extra_kwargs=None,
hf_model_callback=None): hf_model_callback=None):
if not model_info.enable_test: pytest.skip("Debug only, ci prefers to use mteb test.")
# A model family has many models with the same architecture,
# and we don't need to test each one.
pytest.skip("Skipping test.")
# The example_prompts has ending "\n", for example: # The example_prompts has ending "\n", for example:
# "Write a short story about a robot that dreams for the first time.\n" # "Write a short story about a robot that dreams for the first time.\n"
...@@ -62,7 +59,7 @@ def correctness_test_embed_models(hf_runner, ...@@ -62,7 +59,7 @@ def correctness_test_embed_models(hf_runner,
with hf_runner( with hf_runner(
model_info.name, model_info.name,
dtype="float32", dtype=model_info.hf_dtype,
is_sentence_transformer=True, is_sentence_transformer=True,
) as hf_model: ) as hf_model:
......
...@@ -7,7 +7,7 @@ import pytest ...@@ -7,7 +7,7 @@ import pytest
from vllm.config import PoolerConfig from vllm.config import PoolerConfig
from vllm.platforms import current_platform from vllm.platforms import current_platform
from ...utils import check_embeddings_close, check_transformers_version from ...utils import check_embeddings_close
@pytest.mark.parametrize( @pytest.mark.parametrize(
...@@ -27,12 +27,17 @@ from ...utils import check_embeddings_close, check_transformers_version ...@@ -27,12 +27,17 @@ from ...utils import check_embeddings_close, check_transformers_version
pytest.param("ssmits/Qwen2-7B-Instruct-embed-base", pytest.param("ssmits/Qwen2-7B-Instruct-embed-base",
marks=[pytest.mark.cpu_model]), marks=[pytest.mark.cpu_model]),
# [Encoder-only] # [Encoder-only]
pytest.param("BAAI/bge-base-en-v1.5", marks=[pytest.mark.core_model]), pytest.param(
"BAAI/bge-base-en-v1.5",
marks=[pytest.mark.core_model, pytest.mark.cpu_model],
),
pytest.param("sentence-transformers/all-MiniLM-L12-v2"), pytest.param("sentence-transformers/all-MiniLM-L12-v2"),
pytest.param("intfloat/multilingual-e5-small"), pytest.param("intfloat/multilingual-e5-small"),
pytest.param("Alibaba-NLP/gte-Qwen2-1.5B-instruct"),
# [Cross-Encoder] # [Cross-Encoder]
pytest.param("sentence-transformers/stsb-roberta-base-v2"), pytest.param(
"sentence-transformers/stsb-roberta-base-v2",
marks=[pytest.mark.core_model, pytest.mark.cpu_model],
),
], ],
) )
def test_models( def test_models(
...@@ -42,8 +47,6 @@ def test_models( ...@@ -42,8 +47,6 @@ def test_models(
model, model,
monkeypatch, monkeypatch,
) -> None: ) -> None:
if model == "Alibaba-NLP/gte-Qwen2-1.5B-instruct":
check_transformers_version(model, max_transformers_version="4.53.2")
if model == "BAAI/bge-multilingual-gemma2" and current_platform.is_rocm(): if model == "BAAI/bge-multilingual-gemma2" and current_platform.is_rocm():
# ROCm Triton FA does not currently support sliding window attention # ROCm Triton FA does not currently support sliding window attention
......
...@@ -9,8 +9,10 @@ import mteb ...@@ -9,8 +9,10 @@ import mteb
import numpy as np import numpy as np
import pytest import pytest
import requests import requests
import torch
from tests.models.utils import EmbedModelInfo, RerankModelInfo from tests.models.utils import (EmbedModelInfo, RerankModelInfo,
check_embeddings_close)
# Most embedding models on the STS12 task (See #17175): # Most embedding models on the STS12 task (See #17175):
# - Model implementation and minor changes in tensor dtype # - Model implementation and minor changes in tensor dtype
...@@ -18,7 +20,7 @@ from tests.models.utils import EmbedModelInfo, RerankModelInfo ...@@ -18,7 +20,7 @@ from tests.models.utils import EmbedModelInfo, RerankModelInfo
# - Different model results in differences more than 1e-3 # - Different model results in differences more than 1e-3
# 1e-4 is a good tolerance threshold # 1e-4 is a good tolerance threshold
MTEB_EMBED_TASKS = ["STS12"] MTEB_EMBED_TASKS = ["STS12"]
MTEB_EMBED_TOL = 0.02 MTEB_EMBED_TOL = 1e-4
# See #19344 # See #19344
MTEB_RERANK_TASKS = ["NFCorpus"] MTEB_RERANK_TASKS = ["NFCorpus"]
...@@ -163,15 +165,20 @@ def mteb_test_embed_models(hf_runner, ...@@ -163,15 +165,20 @@ def mteb_test_embed_models(hf_runner,
model_info: EmbedModelInfo, model_info: EmbedModelInfo,
vllm_extra_kwargs=None, vllm_extra_kwargs=None,
hf_model_callback=None, hf_model_callback=None,
atol=MTEB_RERANK_TOL): atol=MTEB_EMBED_TOL):
# A model family has many models with the same architecture,
# and we don't need to test each one.
if not model_info.enable_test: if not model_info.enable_test:
# A model family has many models with the same architecture,
# and we don't need to test each one.
pytest.skip("Skipping test.") pytest.skip("Skipping test.")
# Test embed_dims, isnan and whether to use normalize
example_prompts = ["The chef prepared a delicious meal." * 1000]
# Allow vllm to test using the given dtype, such as float32
vllm_extra_kwargs = vllm_extra_kwargs or {} vllm_extra_kwargs = vllm_extra_kwargs or {}
vllm_extra_kwargs["dtype"] = model_info.dtype vllm_extra_kwargs["dtype"] = model_info.dtype
# Allow vllm to test using hf_overrides
if model_info.hf_overrides is not None: if model_info.hf_overrides is not None:
vllm_extra_kwargs["hf_overrides"] = model_info.hf_overrides vllm_extra_kwargs["hf_overrides"] = model_info.hf_overrides
...@@ -183,8 +190,12 @@ def mteb_test_embed_models(hf_runner, ...@@ -183,8 +190,12 @@ def mteb_test_embed_models(hf_runner,
model_config = vllm_model.llm.llm_engine.model_config model_config = vllm_model.llm.llm_engine.model_config
# Confirm whether vllm is using the correct architecture
if model_info.architecture: if model_info.architecture:
assert model_info.architecture in model_config.architectures assert model_info.architecture in model_config.architectures
# Confirm whether vllm uses the correct default_pooling_type, which
# relates to whether chunked prefill and prefix caching are enabled
assert (model_config._model_info.default_pooling_type == assert (model_config._model_info.default_pooling_type ==
model_info.default_pooling_type) model_info.default_pooling_type)
...@@ -192,22 +203,46 @@ def mteb_test_embed_models(hf_runner, ...@@ -192,22 +203,46 @@ def mteb_test_embed_models(hf_runner,
MTEB_EMBED_TASKS) MTEB_EMBED_TASKS)
vllm_dtype = vllm_model.llm.llm_engine.model_config.dtype vllm_dtype = vllm_model.llm.llm_engine.model_config.dtype
with hf_runner(model_info.name, # Test embed_dims, isnan and whether to use normalize
is_sentence_transformer=True, vllm_outputs = vllm_model.embed(example_prompts,
dtype="float32") as hf_model: truncate_prompt_tokens=-1)
assert not torch.any(torch.isnan(torch.tensor(vllm_outputs)))
if hf_model_callback is not None:
hf_model_callback(hf_model) # Accelerate mteb test by setting
# SentenceTransformers mteb score to a constant
st_main_score = run_mteb_embed_task(hf_model, MTEB_EMBED_TASKS) if model_info.mteb_score is None:
st_dtype = next(hf_model.model.parameters()).dtype with hf_runner(model_info.name,
is_sentence_transformer=True,
dtype=model_info.hf_dtype) as hf_model:
# e.g. setting default parameters for the encode method of hf_runner
if hf_model_callback is not None:
hf_model_callback(hf_model)
st_main_score = run_mteb_embed_task(hf_model, MTEB_EMBED_TASKS)
st_dtype = next(hf_model.model.parameters()).dtype
# Test embed_dims and whether to use normalize
hf_outputs = hf_model.encode(example_prompts)
check_embeddings_close(
embeddings_0_lst=hf_outputs,
embeddings_1_lst=vllm_outputs,
name_0="hf",
name_1="vllm",
tol=1e-2,
)
else:
st_main_score = model_info.mteb_score
st_dtype = "Constant"
print("Model:", model_info.name) print("Model:", model_info.name)
print("VLLM:", vllm_dtype, vllm_main_score) print("VLLM:", vllm_dtype, vllm_main_score)
print("SentenceTransformers:", st_dtype, st_main_score) print("SentenceTransformers:", st_dtype, st_main_score)
print("Difference:", st_main_score - vllm_main_score) print("Difference:", st_main_score - vllm_main_score)
assert st_main_score == pytest.approx(vllm_main_score, abs=atol) # We are not concerned that the vllm mteb results are better
# than SentenceTransformers, so we only perform one-sided testing.
assert st_main_score - vllm_main_score < atol
def run_mteb_rerank(cross_encoder, tasks, languages): def run_mteb_rerank(cross_encoder, tasks, languages):
...@@ -243,9 +278,12 @@ def run_mteb_rerank(cross_encoder, tasks, languages): ...@@ -243,9 +278,12 @@ def run_mteb_rerank(cross_encoder, tasks, languages):
return main_score return main_score
def mteb_test_rerank_models_hf(hf_runner, model_name, hf_model_callback=None): def mteb_test_rerank_models_hf(hf_runner,
model_name,
hf_dtype="float32",
hf_model_callback=None):
with hf_runner(model_name, is_cross_encoder=True, with hf_runner(model_name, is_cross_encoder=True,
dtype="float32") as hf_model: dtype=hf_dtype) as hf_model:
original_predict = hf_model.predict original_predict = hf_model.predict
...@@ -279,14 +317,16 @@ def mteb_test_rerank_models(hf_runner, ...@@ -279,14 +317,16 @@ def mteb_test_rerank_models(hf_runner,
hf_model_callback=None, hf_model_callback=None,
vllm_mteb_encoder=VllmMtebEncoder, vllm_mteb_encoder=VllmMtebEncoder,
atol=MTEB_RERANK_TOL): atol=MTEB_RERANK_TOL):
# A model family has many models with the same architecture,
# and we don't need to test each one.
if not model_info.enable_test: if not model_info.enable_test:
# A model family has many models with the same architecture,
# and we don't need to test each one.
pytest.skip("Skipping test.") pytest.skip("Skipping test.")
# Allow vllm to test using the given dtype, such as float32
vllm_extra_kwargs = vllm_extra_kwargs or {} vllm_extra_kwargs = vllm_extra_kwargs or {}
vllm_extra_kwargs["dtype"] = model_info.dtype vllm_extra_kwargs["dtype"] = model_info.dtype
# Allow vllm to test using hf_overrides
if model_info.hf_overrides is not None: if model_info.hf_overrides is not None:
vllm_extra_kwargs["hf_overrides"] = model_info.hf_overrides vllm_extra_kwargs["hf_overrides"] = model_info.hf_overrides
...@@ -299,9 +339,15 @@ def mteb_test_rerank_models(hf_runner, ...@@ -299,9 +339,15 @@ def mteb_test_rerank_models(hf_runner,
model_config = vllm_model.llm.llm_engine.model_config model_config = vllm_model.llm.llm_engine.model_config
# Confirm whether vllm is using the correct architecture
if model_info.architecture: if model_info.architecture:
assert (model_info.architecture in model_config.architectures) assert (model_info.architecture in model_config.architectures)
# Score API is only enabled for num_labels == 1
assert model_config.hf_config.num_labels == 1 assert model_config.hf_config.num_labels == 1
# Confirm whether vllm uses the correct default_pooling_type, which
# relates to whether chunked prefill and prefix caching are enabled
assert (model_config._model_info.default_pooling_type == assert (model_config._model_info.default_pooling_type ==
model_info.default_pooling_type) model_info.default_pooling_type)
...@@ -310,12 +356,20 @@ def mteb_test_rerank_models(hf_runner, ...@@ -310,12 +356,20 @@ def mteb_test_rerank_models(hf_runner,
languages=MTEB_RERANK_LANGS) languages=MTEB_RERANK_LANGS)
vllm_dtype = model_config.dtype vllm_dtype = model_config.dtype
st_main_score, st_dtype = mteb_test_rerank_models_hf( # Accelerate mteb test by setting
hf_runner, model_info.name, hf_model_callback) # SentenceTransformers mteb score to a constant
if model_info.mteb_score is None:
st_main_score, st_dtype = mteb_test_rerank_models_hf(
hf_runner, model_info.name, model_info.hf_dtype, hf_model_callback)
else:
st_main_score = model_info.mteb_score
st_dtype = "Constant"
print("Model:", model_info.name) print("Model:", model_info.name)
print("VLLM:", vllm_dtype, vllm_main_score) print("VLLM:", vllm_dtype, vllm_main_score)
print("SentenceTransformers:", st_dtype, st_main_score) print("SentenceTransformers:", st_dtype, st_main_score)
print("Difference:", st_main_score - vllm_main_score) print("Difference:", st_main_score - vllm_main_score)
assert st_main_score == pytest.approx(vllm_main_score, abs=atol) # We are not concerned that the vllm mteb results are better
# than SentenceTransformers, so we only perform one-sided testing.
assert st_main_score - vllm_main_score < atol
...@@ -2,16 +2,19 @@ ...@@ -2,16 +2,19 @@
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import pytest import pytest
from ...utils import (CLSPoolingEmbedModelInfo, CLSPoolingRerankModelInfo, from tests.models.language.pooling.embed_utils import (
EmbedModelInfo, LASTPoolingEmbedModelInfo, correctness_test_embed_models)
RerankModelInfo) from tests.models.utils import (CLSPoolingEmbedModelInfo,
from .embed_utils import correctness_test_embed_models CLSPoolingRerankModelInfo, EmbedModelInfo,
LASTPoolingEmbedModelInfo, RerankModelInfo)
from .mteb_utils import mteb_test_embed_models, mteb_test_rerank_models from .mteb_utils import mteb_test_embed_models, mteb_test_rerank_models
MODELS = [ MODELS = [
########## BertModel ########## BertModel
CLSPoolingEmbedModelInfo("BAAI/bge-base-en", CLSPoolingEmbedModelInfo("BAAI/bge-base-en",
architecture="BertModel", architecture="BertModel",
mteb_score=0.779336792,
enable_test=True), enable_test=True),
CLSPoolingEmbedModelInfo("BAAI/bge-base-zh", CLSPoolingEmbedModelInfo("BAAI/bge-base-zh",
architecture="BertModel", architecture="BertModel",
...@@ -52,10 +55,12 @@ MODELS = [ ...@@ -52,10 +55,12 @@ MODELS = [
########## XLMRobertaModel ########## XLMRobertaModel
CLSPoolingEmbedModelInfo("BAAI/bge-m3", CLSPoolingEmbedModelInfo("BAAI/bge-m3",
architecture="XLMRobertaModel", architecture="XLMRobertaModel",
mteb_score=0.787343078,
enable_test=True), enable_test=True),
########## Qwen2Model ########## Qwen2Model
LASTPoolingEmbedModelInfo("BAAI/bge-code-v1", LASTPoolingEmbedModelInfo("BAAI/bge-code-v1",
architecture="Qwen2Model", architecture="Qwen2Model",
mteb_score=0.75724465,
dtype="float32", dtype="float32",
enable_test=True), enable_test=True),
] ]
...@@ -65,6 +70,7 @@ RERANK_MODELS = [ ...@@ -65,6 +70,7 @@ RERANK_MODELS = [
CLSPoolingRerankModelInfo( CLSPoolingRerankModelInfo(
"BAAI/bge-reranker-base", "BAAI/bge-reranker-base",
architecture="XLMRobertaForSequenceClassification", architecture="XLMRobertaForSequenceClassification",
mteb_score=0.32398,
enable_test=True), enable_test=True),
CLSPoolingRerankModelInfo( CLSPoolingRerankModelInfo(
"BAAI/bge-reranker-large", "BAAI/bge-reranker-large",
......
...@@ -7,13 +7,14 @@ import pytest ...@@ -7,13 +7,14 @@ import pytest
import torch import torch
from tests.conftest import HfRunner from tests.conftest import HfRunner
from tests.models.language.pooling_mteb_test.mteb_utils import (
from ...utils import LASTPoolingRerankModelInfo, RerankModelInfo VllmMtebEncoder, mteb_test_rerank_models)
from .mteb_utils import VllmMtebEncoder, mteb_test_rerank_models from tests.models.utils import LASTPoolingRerankModelInfo, RerankModelInfo
RERANK_MODELS = [ RERANK_MODELS = [
LASTPoolingRerankModelInfo("BAAI/bge-reranker-v2-gemma", LASTPoolingRerankModelInfo("BAAI/bge-reranker-v2-gemma",
architecture="GemmaForSequenceClassification", architecture="GemmaForSequenceClassification",
mteb_score=0.33757,
hf_overrides={ hf_overrides={
"architectures": "architectures":
["GemmaForSequenceClassification"], ["GemmaForSequenceClassification"],
...@@ -104,7 +105,6 @@ class GemmaMtebEncoder(VllmMtebEncoder): ...@@ -104,7 +105,6 @@ class GemmaMtebEncoder(VllmMtebEncoder):
def __init__(self, *args, **kwargs): def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs) super().__init__(*args, **kwargs)
self.prompt = PROMPT
self.query_template = "A: {query}\n" self.query_template = "A: {query}\n"
self.document_template = "B: {doc}\n{prompt}" self.document_template = "B: {doc}\n{prompt}"
...@@ -119,7 +119,7 @@ class GemmaMtebEncoder(VllmMtebEncoder): ...@@ -119,7 +119,7 @@ class GemmaMtebEncoder(VllmMtebEncoder):
_sentences = [] _sentences = []
for query, corpus, prompt in sentences: for query, corpus, prompt in sentences:
query = self.query_template.format(query=query) query = self.query_template.format(query=query)
corpus = self.document_template.format(doc=corpus, prompt=prompt) corpus = self.document_template.format(doc=corpus, prompt=PROMPT)
_sentences.append((query, corpus, prompt)) _sentences.append((query, corpus, prompt))
return super().predict(_sentences, *args, **kwargs) return super().predict(_sentences, *args, **kwargs)
......
...@@ -2,14 +2,17 @@ ...@@ -2,14 +2,17 @@
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import pytest import pytest
from ...utils import (CLSPoolingRerankModelInfo, LASTPoolingRerankModelInfo, from tests.models.utils import (CLSPoolingRerankModelInfo,
RerankModelInfo) LASTPoolingRerankModelInfo, RerankModelInfo)
from .mteb_utils import mteb_test_rerank_models from .mteb_utils import mteb_test_rerank_models
RERANK_MODELS = [ RERANK_MODELS = [
CLSPoolingRerankModelInfo("cross-encoder/ms-marco-TinyBERT-L-2-v2", CLSPoolingRerankModelInfo("cross-encoder/ms-marco-TinyBERT-L-2-v2",
mteb_score=0.32898,
architecture="BertForSequenceClassification"), architecture="BertForSequenceClassification"),
LASTPoolingRerankModelInfo("tomaarsen/Qwen3-Reranker-0.6B-seq-cls", LASTPoolingRerankModelInfo("tomaarsen/Qwen3-Reranker-0.6B-seq-cls",
mteb_score=0.25736,
architecture="Qwen3ForSequenceClassification") architecture="Qwen3ForSequenceClassification")
] ]
......
...@@ -3,15 +3,18 @@ ...@@ -3,15 +3,18 @@
import pytest import pytest
from ...utils import (CLSPoolingEmbedModelInfo, CLSPoolingRerankModelInfo, from tests.models.language.pooling.embed_utils import (
EmbedModelInfo, LASTPoolingEmbedModelInfo, correctness_test_embed_models)
RerankModelInfo, check_transformers_version) from tests.models.utils import (CLSPoolingEmbedModelInfo,
from .embed_utils import correctness_test_embed_models CLSPoolingRerankModelInfo, EmbedModelInfo,
LASTPoolingEmbedModelInfo, RerankModelInfo)
from .mteb_utils import mteb_test_embed_models, mteb_test_rerank_models from .mteb_utils import mteb_test_embed_models, mteb_test_rerank_models
MODELS = [ MODELS = [
########## BertModel ########## BertModel
CLSPoolingEmbedModelInfo("thenlper/gte-large", CLSPoolingEmbedModelInfo("thenlper/gte-large",
mteb_score=0.76807651,
architecture="BertModel", architecture="BertModel",
enable_test=True), enable_test=True),
CLSPoolingEmbedModelInfo("thenlper/gte-base", CLSPoolingEmbedModelInfo("thenlper/gte-base",
...@@ -30,28 +33,37 @@ MODELS = [ ...@@ -30,28 +33,37 @@ MODELS = [
architecture="BertModel", architecture="BertModel",
enable_test=False), enable_test=False),
########### NewModel ########### NewModel
# These three architectures are almost the same, but not exactly the same.
# For example,
# - whether to use token_type_embeddings
# - whether to use context expansion
# So only test one (the most widely used) model
CLSPoolingEmbedModelInfo("Alibaba-NLP/gte-multilingual-base", CLSPoolingEmbedModelInfo("Alibaba-NLP/gte-multilingual-base",
architecture="GteNewModel", architecture="GteNewModel",
mteb_score=0.775074696,
hf_overrides={"architectures": ["GteNewModel"]}, hf_overrides={"architectures": ["GteNewModel"]},
enable_test=True), enable_test=True),
CLSPoolingEmbedModelInfo("Alibaba-NLP/gte-base-en-v1.5", CLSPoolingEmbedModelInfo("Alibaba-NLP/gte-base-en-v1.5",
architecture="GteNewModel", architecture="GteNewModel",
hf_overrides={"architectures": ["GteNewModel"]}, hf_overrides={"architectures": ["GteNewModel"]},
enable_test=True), enable_test=False),
CLSPoolingEmbedModelInfo("Alibaba-NLP/gte-large-en-v1.5", CLSPoolingEmbedModelInfo("Alibaba-NLP/gte-large-en-v1.5",
architecture="GteNewModel", architecture="GteNewModel",
hf_overrides={"architectures": ["GteNewModel"]}, hf_overrides={"architectures": ["GteNewModel"]},
enable_test=True), enable_test=False),
########### Qwen2ForCausalLM ########### Qwen2ForCausalLM
LASTPoolingEmbedModelInfo("Alibaba-NLP/gte-Qwen2-1.5B-instruct", LASTPoolingEmbedModelInfo("Alibaba-NLP/gte-Qwen2-1.5B-instruct",
mteb_score=0.758473459018872,
architecture="Qwen2ForCausalLM", architecture="Qwen2ForCausalLM",
enable_test=True), enable_test=True),
########## ModernBertModel ########## ModernBertModel
CLSPoolingEmbedModelInfo("Alibaba-NLP/gte-modernbert-base", CLSPoolingEmbedModelInfo("Alibaba-NLP/gte-modernbert-base",
mteb_score=0.748193353,
architecture="ModernBertModel", architecture="ModernBertModel",
enable_test=True), enable_test=True),
########## Qwen3ForCausalLM ########## Qwen3ForCausalLM
LASTPoolingEmbedModelInfo("Qwen/Qwen3-Embedding-0.6B", LASTPoolingEmbedModelInfo("Qwen/Qwen3-Embedding-0.6B",
mteb_score=0.771163695,
architecture="Qwen3ForCausalLM", architecture="Qwen3ForCausalLM",
dtype="float32", dtype="float32",
enable_test=True), enable_test=True),
...@@ -65,10 +77,12 @@ RERANK_MODELS = [ ...@@ -65,10 +77,12 @@ RERANK_MODELS = [
CLSPoolingRerankModelInfo( CLSPoolingRerankModelInfo(
# classifier_pooling: mean # classifier_pooling: mean
"Alibaba-NLP/gte-reranker-modernbert-base", "Alibaba-NLP/gte-reranker-modernbert-base",
mteb_score=0.33386,
architecture="ModernBertForSequenceClassification", architecture="ModernBertForSequenceClassification",
enable_test=True), enable_test=True),
CLSPoolingRerankModelInfo( CLSPoolingRerankModelInfo(
"Alibaba-NLP/gte-multilingual-reranker-base", "Alibaba-NLP/gte-multilingual-reranker-base",
mteb_score=0.33062,
architecture="GteNewForSequenceClassification", architecture="GteNewForSequenceClassification",
hf_overrides={"architectures": ["GteNewForSequenceClassification"]}, hf_overrides={"architectures": ["GteNewForSequenceClassification"]},
enable_test=True), enable_test=True),
...@@ -78,10 +92,6 @@ RERANK_MODELS = [ ...@@ -78,10 +92,6 @@ RERANK_MODELS = [
@pytest.mark.parametrize("model_info", MODELS) @pytest.mark.parametrize("model_info", MODELS)
def test_embed_models_mteb(hf_runner, vllm_runner, def test_embed_models_mteb(hf_runner, vllm_runner,
model_info: EmbedModelInfo) -> None: model_info: EmbedModelInfo) -> None:
if model_info.name == "Alibaba-NLP/gte-Qwen2-1.5B-instruct":
check_transformers_version(model_info.name,
max_transformers_version="4.53.2")
mteb_test_embed_models(hf_runner, vllm_runner, model_info) mteb_test_embed_models(hf_runner, vllm_runner, model_info)
...@@ -89,10 +99,6 @@ def test_embed_models_mteb(hf_runner, vllm_runner, ...@@ -89,10 +99,6 @@ def test_embed_models_mteb(hf_runner, vllm_runner,
def test_embed_models_correctness(hf_runner, vllm_runner, def test_embed_models_correctness(hf_runner, vllm_runner,
model_info: EmbedModelInfo, model_info: EmbedModelInfo,
example_prompts) -> None: example_prompts) -> None:
if model_info.name == "Alibaba-NLP/gte-Qwen2-1.5B-instruct":
check_transformers_version(model_info.name,
max_transformers_version="4.53.2")
correctness_test_embed_models(hf_runner, vllm_runner, model_info, correctness_test_embed_models(hf_runner, vllm_runner, model_info,
example_prompts) example_prompts)
......
...@@ -2,14 +2,17 @@ ...@@ -2,14 +2,17 @@
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import pytest import pytest
from ...utils import CLSPoolingEmbedModelInfo, EmbedModelInfo from tests.models.language.pooling.embed_utils import (
from .embed_utils import correctness_test_embed_models correctness_test_embed_models)
from tests.models.utils import CLSPoolingEmbedModelInfo, EmbedModelInfo
from .mteb_utils import mteb_test_embed_models from .mteb_utils import mteb_test_embed_models
MODELS = [ MODELS = [
########## BertModel ########## BertModel
CLSPoolingEmbedModelInfo("intfloat/e5-small", CLSPoolingEmbedModelInfo("intfloat/e5-small",
architecture="BertModel", architecture="BertModel",
mteb_score=0.742285423,
enable_test=True), enable_test=True),
CLSPoolingEmbedModelInfo("intfloat/e5-base", CLSPoolingEmbedModelInfo("intfloat/e5-base",
architecture="BertModel", architecture="BertModel",
...@@ -23,6 +26,7 @@ MODELS = [ ...@@ -23,6 +26,7 @@ MODELS = [
########## XLMRobertaModel ########## XLMRobertaModel
CLSPoolingEmbedModelInfo("intfloat/multilingual-e5-base", CLSPoolingEmbedModelInfo("intfloat/multilingual-e5-base",
architecture="XLMRobertaModel", architecture="XLMRobertaModel",
mteb_score=0.779325955,
enable_test=True), enable_test=True),
CLSPoolingEmbedModelInfo("intfloat/multilingual-e5-large", CLSPoolingEmbedModelInfo("intfloat/multilingual-e5-large",
architecture="XLMRobertaModel", architecture="XLMRobertaModel",
...@@ -36,7 +40,7 @@ MODELS = [ ...@@ -36,7 +40,7 @@ MODELS = [
@pytest.mark.parametrize("model_info", MODELS) @pytest.mark.parametrize("model_info", MODELS)
def test_embed_models_mteb(hf_runner, vllm_runner, def test_embed_models_mteb(hf_runner, vllm_runner,
model_info: EmbedModelInfo) -> None: model_info: EmbedModelInfo) -> None:
mteb_test_embed_models(hf_runner, vllm_runner, model_info, atol=0.02) mteb_test_embed_models(hf_runner, vllm_runner, model_info)
@pytest.mark.parametrize("model_info", MODELS) @pytest.mark.parametrize("model_info", MODELS)
......
...@@ -4,16 +4,18 @@ from functools import partial ...@@ -4,16 +4,18 @@ from functools import partial
import pytest import pytest
from tests.models.language.pooling.embed_utils import (
check_embeddings_close, correctness_test_embed_models, matryoshka_fy)
from tests.models.utils import (CLSPoolingEmbedModelInfo,
CLSPoolingRerankModelInfo, EmbedModelInfo,
RerankModelInfo)
from vllm import PoolingParams from vllm import PoolingParams
from ...utils import (CLSPoolingEmbedModelInfo, CLSPoolingRerankModelInfo,
EmbedModelInfo, RerankModelInfo)
from .embed_utils import (check_embeddings_close,
correctness_test_embed_models, matryoshka_fy)
from .mteb_utils import mteb_test_embed_models, mteb_test_rerank_models from .mteb_utils import mteb_test_embed_models, mteb_test_rerank_models
EMBEDDING_MODELS = [ EMBEDDING_MODELS = [
CLSPoolingEmbedModelInfo("jinaai/jina-embeddings-v3", CLSPoolingEmbedModelInfo("jinaai/jina-embeddings-v3",
mteb_score=0.824413164,
architecture="XLMRobertaModel", architecture="XLMRobertaModel",
is_matryoshka=True) is_matryoshka=True)
] ]
...@@ -21,6 +23,7 @@ EMBEDDING_MODELS = [ ...@@ -21,6 +23,7 @@ EMBEDDING_MODELS = [
RERANK_MODELS = [ RERANK_MODELS = [
CLSPoolingRerankModelInfo( CLSPoolingRerankModelInfo(
"jinaai/jina-reranker-v2-base-multilingual", "jinaai/jina-reranker-v2-base-multilingual",
mteb_score=0.33643,
architecture="XLMRobertaForSequenceClassification") architecture="XLMRobertaForSequenceClassification")
] ]
......
...@@ -6,8 +6,8 @@ import pytest ...@@ -6,8 +6,8 @@ import pytest
import torch import torch
from tests.conftest import HfRunner from tests.conftest import HfRunner
from tests.models.utils import LASTPoolingRerankModelInfo, RerankModelInfo
from ...utils import LASTPoolingRerankModelInfo, RerankModelInfo
from .mteb_utils import mteb_test_rerank_models from .mteb_utils import mteb_test_rerank_models
mxbai_rerank_hf_overrides = { mxbai_rerank_hf_overrides = {
...@@ -20,6 +20,7 @@ RERANK_MODELS = [ ...@@ -20,6 +20,7 @@ RERANK_MODELS = [
LASTPoolingRerankModelInfo("mixedbread-ai/mxbai-rerank-base-v2", LASTPoolingRerankModelInfo("mixedbread-ai/mxbai-rerank-base-v2",
architecture="Qwen2ForSequenceClassification", architecture="Qwen2ForSequenceClassification",
hf_overrides=mxbai_rerank_hf_overrides, hf_overrides=mxbai_rerank_hf_overrides,
mteb_score=0.273,
enable_test=True), enable_test=True),
LASTPoolingRerankModelInfo("mixedbread-ai/mxbai-rerank-large-v2", LASTPoolingRerankModelInfo("mixedbread-ai/mxbai-rerank-large-v2",
architecture="Qwen2ForSequenceClassification", architecture="Qwen2ForSequenceClassification",
......
...@@ -3,13 +3,16 @@ ...@@ -3,13 +3,16 @@
import pytest import pytest
from ...utils import CLSPoolingEmbedModelInfo, EmbedModelInfo from tests.models.language.pooling.embed_utils import (
from .embed_utils import correctness_test_embed_models correctness_test_embed_models)
from tests.models.utils import CLSPoolingEmbedModelInfo, EmbedModelInfo
from .mteb_utils import mteb_test_embed_models from .mteb_utils import mteb_test_embed_models
MODELS = [ MODELS = [
CLSPoolingEmbedModelInfo("nomic-ai/nomic-embed-text-v1", CLSPoolingEmbedModelInfo("nomic-ai/nomic-embed-text-v1",
architecture="NomicBertModel", architecture="NomicBertModel",
mteb_score=0.737568559,
enable_test=True), enable_test=True),
CLSPoolingEmbedModelInfo("nomic-ai/nomic-embed-text-v1.5", CLSPoolingEmbedModelInfo("nomic-ai/nomic-embed-text-v1.5",
architecture="NomicBertModel", architecture="NomicBertModel",
...@@ -19,6 +22,7 @@ MODELS = [ ...@@ -19,6 +22,7 @@ MODELS = [
enable_test=False), enable_test=False),
CLSPoolingEmbedModelInfo("nomic-ai/nomic-embed-text-v2-moe", CLSPoolingEmbedModelInfo("nomic-ai/nomic-embed-text-v2-moe",
architecture="NomicBertModel", architecture="NomicBertModel",
mteb_score=0.715488912,
enable_test=True) enable_test=True)
] ]
......
...@@ -6,9 +6,9 @@ import pytest ...@@ -6,9 +6,9 @@ import pytest
import torch import torch
from tests.conftest import HfRunner from tests.conftest import HfRunner
from tests.models.utils import LASTPoolingRerankModelInfo, RerankModelInfo
from tests.utils import multi_gpu_test from tests.utils import multi_gpu_test
from ...utils import LASTPoolingRerankModelInfo, RerankModelInfo
from .mteb_utils import mteb_test_rerank_models from .mteb_utils import mteb_test_rerank_models
qwen3_reranker_hf_overrides = { qwen3_reranker_hf_overrides = {
...@@ -20,6 +20,7 @@ qwen3_reranker_hf_overrides = { ...@@ -20,6 +20,7 @@ qwen3_reranker_hf_overrides = {
RERANK_MODELS = [ RERANK_MODELS = [
LASTPoolingRerankModelInfo("Qwen/Qwen3-Reranker-0.6B", LASTPoolingRerankModelInfo("Qwen/Qwen3-Reranker-0.6B",
architecture="Qwen3ForSequenceClassification", architecture="Qwen3ForSequenceClassification",
mteb_score=0.25736,
hf_overrides=qwen3_reranker_hf_overrides, hf_overrides=qwen3_reranker_hf_overrides,
enable_test=True), enable_test=True),
LASTPoolingRerankModelInfo("Qwen/Qwen3-Reranker-4B", LASTPoolingRerankModelInfo("Qwen/Qwen3-Reranker-4B",
......
...@@ -3,14 +3,17 @@ ...@@ -3,14 +3,17 @@
import pytest import pytest
from ...utils import CLSPoolingEmbedModelInfo, EmbedModelInfo from tests.models.language.pooling.embed_utils import (
from .embed_utils import correctness_test_embed_models correctness_test_embed_models)
from tests.models.utils import CLSPoolingEmbedModelInfo, EmbedModelInfo
from .mteb_utils import mteb_test_embed_models from .mteb_utils import mteb_test_embed_models
MODELS = [ MODELS = [
CLSPoolingEmbedModelInfo("Snowflake/snowflake-arctic-embed-xs", CLSPoolingEmbedModelInfo("Snowflake/snowflake-arctic-embed-xs",
is_matryoshka=False, is_matryoshka=False,
architecture="BertModel", architecture="BertModel",
mteb_score=0.714927797,
enable_test=True), enable_test=True),
CLSPoolingEmbedModelInfo("Snowflake/snowflake-arctic-embed-s", CLSPoolingEmbedModelInfo("Snowflake/snowflake-arctic-embed-s",
is_matryoshka=False, is_matryoshka=False,
...@@ -23,6 +26,7 @@ MODELS = [ ...@@ -23,6 +26,7 @@ MODELS = [
CLSPoolingEmbedModelInfo("Snowflake/snowflake-arctic-embed-m-long", CLSPoolingEmbedModelInfo("Snowflake/snowflake-arctic-embed-m-long",
is_matryoshka=False, is_matryoshka=False,
architecture="NomicBertModel", architecture="NomicBertModel",
mteb_score=0.681146831,
enable_test=True), enable_test=True),
CLSPoolingEmbedModelInfo("Snowflake/snowflake-arctic-embed-l", CLSPoolingEmbedModelInfo("Snowflake/snowflake-arctic-embed-l",
is_matryoshka=False, is_matryoshka=False,
...@@ -31,14 +35,17 @@ MODELS = [ ...@@ -31,14 +35,17 @@ MODELS = [
CLSPoolingEmbedModelInfo("Snowflake/snowflake-arctic-embed-m-v1.5", CLSPoolingEmbedModelInfo("Snowflake/snowflake-arctic-embed-m-v1.5",
is_matryoshka=True, is_matryoshka=True,
architecture="BertModel", architecture="BertModel",
mteb_score=0.649088363,
enable_test=True), enable_test=True),
CLSPoolingEmbedModelInfo("Snowflake/snowflake-arctic-embed-l-v2.0", CLSPoolingEmbedModelInfo("Snowflake/snowflake-arctic-embed-l-v2.0",
is_matryoshka=True, is_matryoshka=True,
architecture="XLMRobertaModel", architecture="XLMRobertaModel",
mteb_score=0.712258299,
enable_test=True), enable_test=True),
CLSPoolingEmbedModelInfo("Snowflake/snowflake-arctic-embed-m-v2.0", CLSPoolingEmbedModelInfo("Snowflake/snowflake-arctic-embed-m-v2.0",
is_matryoshka=True, is_matryoshka=True,
architecture="GteModel", architecture="GteModel",
mteb_score=0.706622444,
enable_test=True), enable_test=True),
] ]
...@@ -46,7 +53,7 @@ MODELS = [ ...@@ -46,7 +53,7 @@ MODELS = [
@pytest.mark.parametrize("model_info", MODELS) @pytest.mark.parametrize("model_info", MODELS)
def test_embed_models_mteb(hf_runner, vllm_runner, def test_embed_models_mteb(hf_runner, vllm_runner,
model_info: EmbedModelInfo) -> None: model_info: EmbedModelInfo) -> None:
mteb_test_embed_models(hf_runner, vllm_runner, model_info, atol=0.02) mteb_test_embed_models(hf_runner, vllm_runner, model_info)
@pytest.mark.parametrize("model_info", MODELS) @pytest.mark.parametrize("model_info", MODELS)
......
...@@ -2,7 +2,9 @@ ...@@ -2,7 +2,9 @@
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import pytest import pytest
from ...utils import CLSPoolingEmbedModelInfo, EmbedModelInfo from tests.models.utils import (CLSPoolingEmbedModelInfo, EmbedModelInfo,
LASTPoolingEmbedModelInfo)
from .mteb_utils import mteb_test_embed_models from .mteb_utils import mteb_test_embed_models
# ST models with projector (Dense) layers # ST models with projector (Dense) layers
...@@ -10,8 +12,13 @@ ST_PROJECTOR_MODELS = [ ...@@ -10,8 +12,13 @@ ST_PROJECTOR_MODELS = [
CLSPoolingEmbedModelInfo( CLSPoolingEmbedModelInfo(
"TencentBAC/Conan-embedding-v1", "TencentBAC/Conan-embedding-v1",
architecture="BertModel", architecture="BertModel",
mteb_score=0.688611955,
enable_test=True, enable_test=True,
), ),
LASTPoolingEmbedModelInfo("google/embeddinggemma-300m",
architecture="Gemma3TextModel",
mteb_score=0.7473819294684156,
enable_test=True)
] ]
......
...@@ -29,10 +29,10 @@ MISTRAL_SMALL_3_1_ID = "mistralai/Mistral-Small-3.1-24B-Instruct-2503" ...@@ -29,10 +29,10 @@ MISTRAL_SMALL_3_1_ID = "mistralai/Mistral-Small-3.1-24B-Instruct-2503"
MODELS = [PIXTRAL_ID, MISTRAL_SMALL_3_1_ID] MODELS = [PIXTRAL_ID, MISTRAL_SMALL_3_1_ID]
IMG_URLS = [ IMG_URLS = [
"https://huggingface.co/datasets/Isotr0py/mistral-test-images/resolve/main/237-400x300.jpg", "237-400x300.jpg", # "https://huggingface.co/datasets/Isotr0py/mistral-test-images/resolve/main/237-400x300.jpg",
"https://huggingface.co/datasets/Isotr0py/mistral-test-images/resolve/main/231-200x300.jpg", "231-200x300.jpg", # "https://huggingface.co/datasets/Isotr0py/mistral-test-images/resolve/main/237-400x300.jpg",
"https://huggingface.co/datasets/Isotr0py/mistral-test-images/resolve/main/27-500x500.jpg", "27-500x500.jpg", # "https://huggingface.co/datasets/Isotr0py/mistral-test-images/resolve/main/237-400x300.jpg",
"https://huggingface.co/datasets/Isotr0py/mistral-test-images/resolve/main/17-150x600.jpg", "17-150x600.jpg", # "https://huggingface.co/datasets/Isotr0py/mistral-test-images/resolve/main/237-400x300.jpg",
] ]
PROMPT = "Describe each image in one short sentence." PROMPT = "Describe each image in one short sentence."
...@@ -105,12 +105,6 @@ def _create_engine_inputs_hf(urls: list[str]) -> TextPrompt: ...@@ -105,12 +105,6 @@ def _create_engine_inputs_hf(urls: list[str]) -> TextPrompt:
return engine_inputs return engine_inputs
MSGS = [
_create_msg_format(IMG_URLS[:1]),
_create_msg_format(IMG_URLS[:2]),
_create_msg_format(IMG_URLS),
]
SAMPLING_PARAMS = SamplingParams(max_tokens=512, temperature=0.0, logprobs=5) SAMPLING_PARAMS = SamplingParams(max_tokens=512, temperature=0.0, logprobs=5)
LIMIT_MM_PER_PROMPT = dict(image=4) LIMIT_MM_PER_PROMPT = dict(image=4)
...@@ -156,12 +150,8 @@ def load_outputs_w_logprobs(filename: "StrPath") -> OutputsLogprobs: ...@@ -156,12 +150,8 @@ def load_outputs_w_logprobs(filename: "StrPath") -> OutputsLogprobs:
@pytest.mark.parametrize("model", MODELS) @pytest.mark.parametrize("model", MODELS)
@pytest.mark.parametrize("max_model_len", MAX_MODEL_LEN) @pytest.mark.parametrize("max_model_len", MAX_MODEL_LEN)
@pytest.mark.parametrize("dtype", ["bfloat16"]) @pytest.mark.parametrize("dtype", ["bfloat16"])
def test_chat( def test_chat(vllm_runner, max_model_len: int, model: str, dtype: str,
vllm_runner, local_asset_server) -> None:
max_model_len: int,
model: str,
dtype: str,
) -> None:
EXPECTED_CHAT_LOGPROBS = load_outputs_w_logprobs( EXPECTED_CHAT_LOGPROBS = load_outputs_w_logprobs(
FIXTURE_LOGPROBS_CHAT[model]) FIXTURE_LOGPROBS_CHAT[model])
with vllm_runner( with vllm_runner(
...@@ -174,7 +164,14 @@ def test_chat( ...@@ -174,7 +164,14 @@ def test_chat(
limit_mm_per_prompt=LIMIT_MM_PER_PROMPT, limit_mm_per_prompt=LIMIT_MM_PER_PROMPT,
) as vllm_model: ) as vllm_model:
outputs = [] outputs = []
for msg in MSGS:
urls_all = [local_asset_server.url_for(u) for u in IMG_URLS]
msgs = [
_create_msg_format(urls_all[:1]),
_create_msg_format(urls_all[:2]),
_create_msg_format(urls_all),
]
for msg in msgs:
output = vllm_model.llm.chat(msg, sampling_params=SAMPLING_PARAMS) output = vllm_model.llm.chat(msg, sampling_params=SAMPLING_PARAMS)
outputs.extend(output) outputs.extend(output)
...@@ -190,17 +187,19 @@ def test_chat( ...@@ -190,17 +187,19 @@ def test_chat(
name_1="output") name_1="output")
@pytest.mark.parametrize("prompt,expected_ranges", @pytest.mark.parametrize(
[(_create_engine_inputs_hf(IMG_URLS[:1]), "image_urls,expected_ranges",
[PlaceholderRange(offset=11, length=494)]), [(IMG_URLS[:1], [PlaceholderRange(offset=11, length=494)]),
(_create_engine_inputs_hf(IMG_URLS[1:4]), [ (IMG_URLS[1:4], [
PlaceholderRange(offset=11, length=266), PlaceholderRange(offset=11, length=266),
PlaceholderRange(offset=277, length=1056), PlaceholderRange(offset=277, length=1056),
PlaceholderRange(offset=1333, length=418) PlaceholderRange(offset=1333, length=418)
])]) ])])
def test_multi_modal_placeholders(vllm_runner, prompt: TextPrompt, def test_multi_modal_placeholders(vllm_runner, image_urls: list[str],
expected_ranges: list[PlaceholderRange], expected_ranges: list[PlaceholderRange],
monkeypatch) -> None: local_asset_server, monkeypatch) -> None:
local_image_urls = [local_asset_server.url_for(u) for u in image_urls]
prompt = _create_engine_inputs_hf(local_image_urls)
# This placeholder checking test only works with V0 engine # This placeholder checking test only works with V0 engine
# where `multi_modal_placeholders` is returned with `RequestOutput` # where `multi_modal_placeholders` is returned with `RequestOutput`
......
...@@ -154,7 +154,7 @@ def batch_make_image_embeddings( ...@@ -154,7 +154,7 @@ def batch_make_image_embeddings(
embed_counter += cur_batch_embed_len embed_counter += cur_batch_embed_len
image_counter += cur_batch_image_count image_counter += cur_batch_image_count
# ensure we don't lost any images or embeddings # ensure we don't lose any images or embeddings
assert embed_counter == image_embeds.size(0) assert embed_counter == image_embeds.size(0)
assert image_counter == image_grid_thw.size(0) assert image_counter == image_grid_thw.size(0)
assert len(image_batches) == len(result) assert len(image_batches) == len(result)
...@@ -238,7 +238,7 @@ def batch_make_video_embeddings( ...@@ -238,7 +238,7 @@ def batch_make_video_embeddings(
embed_counter += cur_batch_embed_len embed_counter += cur_batch_embed_len
video_counter += cur_batch_video_count video_counter += cur_batch_video_count
# ensure we don't lost any videos or embeddings # ensure we don't lose any videos or embeddings
assert embed_counter == video_embeds.size(0) assert embed_counter == video_embeds.size(0)
assert video_counter == video_grid_thw.size(0) assert video_counter == video_grid_thw.size(0)
assert len(video_batches) == len(result) assert len(video_batches) == len(result)
......
...@@ -122,8 +122,7 @@ def run_test( ...@@ -122,8 +122,7 @@ def run_test(
@pytest.mark.core_model @pytest.mark.core_model
@pytest.mark.parametrize( @pytest.mark.parametrize("model", ["openai/whisper-large-v3-turbo"])
"model", ["openai/whisper-small", "openai/whisper-large-v3-turbo"])
@create_new_process_for_each_test() @create_new_process_for_each_test()
def test_models(vllm_runner, model) -> None: def test_models(vllm_runner, model) -> None:
run_test( run_test(
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment