Unverified Commit 6c046382 authored by Harry Mellor's avatar Harry Mellor Committed by GitHub
Browse files

Fix per file ruff ignores related to line length (#26262)


Signed-off-by: default avatarHarry Mellor <19981378+hmellor@users.noreply.github.com>
parent 91ac7f76
......@@ -947,7 +947,8 @@ def test_parse_chat_messages_placeholder_one_already_in_prompt(
{"type": "image_url", "image_url": {"url": image_url}},
{
"type": "text",
"text": "What's in <|image_1|> and how does it compare to the other one?", # noqa: E501
"text": "What's in <|image_1|> and how does it compare to "
"the other one?",
},
],
}
......@@ -960,8 +961,8 @@ def test_parse_chat_messages_placeholder_one_already_in_prompt(
assert conversation == [
{
"role": "user",
"content": "<|image_2|>\nWhat's in <|image_1|> and how does it compare to the "
"other one?",
"content": "<|image_2|>\nWhat's in <|image_1|> and how does it compare to "
"the other one?",
}
]
_assert_mm_data_is_image_input(mm_data, 2)
......@@ -1364,7 +1365,7 @@ def test_parse_chat_messages_multiple_images_multiple_messages_interleave(
_assert_mm_uuids(mm_uuids, 2, expected_uuids=[None, None])
def test_parse_chat_messages_multiple_images_with_uuids_multiple_messages_interleave( # noqa: E501
def test_parse_chat_messages_multiple_images_with_uuids_multiple_messages_interleave(
phi3v_model_config_mm_interleaved,
phi3v_tokenizer,
image_url,
......@@ -1451,14 +1452,14 @@ def test_parse_chat_messages_multiple_modals_multiple_messages_interleave(
assert conversation == [
{
"role": "user",
"content": "What's on this image?\n<|vision_start|><|IMAGE|><|vision_end|>\n"
"Now listen to this audio\nAudio 1: <|audio_bos|><|AUDIO|><|audio_eos|>", # noqa: E501
"content": "What's on this image?\n<|vision_start|><|IMAGE|><|vision_end|>"
"\nNow listen to this audio\nAudio 1: <|audio_bos|><|AUDIO|><|audio_eos|>",
},
{"role": "assistant", "content": "Some stuff."},
{
"role": "user",
"content": "What's on this image?\n<|vision_start|><|IMAGE|><|vision_end|>\n"
"And what's in the video?\n<|vision_start|><|VIDEO|><|vision_end|>",
"content": "What's on this image?\n<|vision_start|><|IMAGE|><|vision_end|>"
"\nAnd what's in the video?\n<|vision_start|><|VIDEO|><|vision_end|>",
},
]
......@@ -1468,7 +1469,7 @@ def test_parse_chat_messages_multiple_modals_multiple_messages_interleave(
_assert_mm_uuids(mm_uuids, 1, modality="audio", expected_uuids=[None])
def test_parse_chat_messages_multiple_modals_with_uuids_multiple_messages_interleave( # noqa: E501
def test_parse_chat_messages_multiple_modals_with_uuids_multiple_messages_interleave(
qwen25omni_model_config_mm_interleaved,
qwen25omni_tokenizer,
image_url,
......@@ -1521,14 +1522,14 @@ def test_parse_chat_messages_multiple_modals_with_uuids_multiple_messages_interl
assert conversation == [
{
"role": "user",
"content": "What's on this image?\n<|vision_start|><|IMAGE|><|vision_end|>\n"
"Now listen to this audio\nAudio 1: <|audio_bos|><|AUDIO|><|audio_eos|>", # noqa: E501
"content": "What's on this image?\n<|vision_start|><|IMAGE|><|vision_end|>"
"\nNow listen to this audio\nAudio 1: <|audio_bos|><|AUDIO|><|audio_eos|>",
},
{"role": "assistant", "content": "Some stuff."},
{
"role": "user",
"content": "What's on this image?\n<|vision_start|><|IMAGE|><|vision_end|>\n"
"And what's in the video?\n<|vision_start|><|VIDEO|><|vision_end|>",
"content": "What's on this image?\n<|vision_start|><|IMAGE|><|vision_end|>"
"\nAnd what's in the video?\n<|vision_start|><|VIDEO|><|vision_end|>",
},
]
......@@ -1593,14 +1594,14 @@ def test_parse_chat_messages_multiple_modals_with_uuids_multiple_empty_media_mes
assert conversation == [
{
"role": "user",
"content": "What's on this image?\n<|vision_start|><|IMAGE|><|vision_end|>\n"
"Now listen to this audio\nAudio 1: <|audio_bos|><|AUDIO|><|audio_eos|>", # noqa: E501
"content": "What's on this image?\n<|vision_start|><|IMAGE|><|vision_end|>"
"\nNow listen to this audio\nAudio 1: <|audio_bos|><|AUDIO|><|audio_eos|>",
},
{"role": "assistant", "content": "Some stuff."},
{
"role": "user",
"content": "What's on this image?\n<|vision_start|><|IMAGE|><|vision_end|>\n"
"And what's in the video?\n<|vision_start|><|VIDEO|><|vision_end|>",
"content": "What's on this image?\n<|vision_start|><|IMAGE|><|vision_end|>"
"\nAnd what's in the video?\n<|vision_start|><|VIDEO|><|vision_end|>",
},
]
......@@ -1661,14 +1662,14 @@ def test_parse_chat_messages_multiple_modals_with_partial_uuids_multiple_message
assert conversation == [
{
"role": "user",
"content": "What's on this image?\n<|vision_start|><|IMAGE|><|vision_end|>\n"
"Now listen to this audio\nAudio 1: <|audio_bos|><|AUDIO|><|audio_eos|>", # noqa: E501
"content": "What's on this image?\n<|vision_start|><|IMAGE|><|vision_end|>"
"\nNow listen to this audio\nAudio 1: <|audio_bos|><|AUDIO|><|audio_eos|>",
},
{"role": "assistant", "content": "Some stuff."},
{
"role": "user",
"content": "What's on this image?\n<|vision_start|><|IMAGE|><|vision_end|>\n"
"And what's in the video?\n<|vision_start|><|VIDEO|><|vision_end|>",
"content": "What's on this image?\n<|vision_start|><|IMAGE|><|vision_end|>"
"\nAnd what's in the video?\n<|vision_start|><|VIDEO|><|vision_end|>",
},
]
......@@ -2193,7 +2194,8 @@ def test_parse_chat_messages_single_empty_audio_with_uuid(
assert conversation == [
{
"role": "user",
"content": "Audio 1: <|audio_bos|><|AUDIO|><|audio_eos|>\nWhat does the audio say?",
"content": "Audio 1: <|audio_bos|><|AUDIO|><|audio_eos|>\nWhat does the "
"audio say?",
}
]
_assert_mm_data_inputs(mm_data, {"audio": 1})
......@@ -2228,7 +2230,8 @@ async def test_parse_chat_messages_single_empty_audio_with_uuid_async(
assert conversation == [
{
"role": "user",
"content": "Audio 1: <|audio_bos|><|AUDIO|><|audio_eos|>\nWhat does the audio say?",
"content": "Audio 1: <|audio_bos|><|AUDIO|><|audio_eos|>\nWhat does the "
"audio say?",
}
]
_assert_mm_data_inputs(await mm_future, {"audio": 1})
......
......@@ -165,7 +165,7 @@ def test_env(
# FlashMLA only supports block_size == 64
pytest.skip("FlashMLA only supports block_size 64")
else:
from vllm.v1.attention.backends.mla.flashmla import ( # noqa: E501
from vllm.v1.attention.backends.mla.flashmla import (
is_flashmla_supported,
)
......
......@@ -331,7 +331,8 @@ class WeightTensors:
in_dtype=config.dtype,
quant_dtype=config.quant_dtype,
block_shape=config.quant_block_shape,
per_out_ch_quant=config.is_per_act_token_quant, # or config.is_per_out_ch_quant
# or config.is_per_out_ch_quant
per_out_ch_quant=config.is_per_act_token_quant,
)
return WeightTensors(
w1=w1, w2=w2, w1_scale=w1_scale, w2_scale=w2_scale, w1_gs=w1_gs, w2_gs=w2_gs
......
......@@ -124,7 +124,7 @@ def make_feature_matrix(csv_file_path: str):
results_df: Optional[pd.DataFrame] = None
for m, k, n, e, topks, dtype, pf_type, experts_type, quant_config in tqdm(
combinations
): # noqa: E501
):
config = Config(
Ms=[m],
K=k,
......
......@@ -10,7 +10,7 @@ import vllm.model_executor.layers.fused_moe.modular_kernel as mk
from vllm.model_executor.layers.fused_moe.batched_deep_gemm_moe import (
BatchedDeepGemmExperts,
)
from vllm.model_executor.layers.fused_moe.batched_triton_or_deep_gemm_moe import ( # noqa: E501
from vllm.model_executor.layers.fused_moe.batched_triton_or_deep_gemm_moe import (
BatchedTritonOrDeepGemmExperts,
)
from vllm.model_executor.layers.fused_moe.config import (
......@@ -196,10 +196,10 @@ register_experts(
# Disable on blackwell for now
if has_deep_ep() and not current_platform.has_device_capability(100):
from vllm.model_executor.layers.fused_moe.deepep_ht_prepare_finalize import ( # noqa: E501
from vllm.model_executor.layers.fused_moe.deepep_ht_prepare_finalize import (
DeepEPHTPrepareAndFinalize,
)
from vllm.model_executor.layers.fused_moe.deepep_ll_prepare_finalize import ( # noqa: E501
from vllm.model_executor.layers.fused_moe.deepep_ll_prepare_finalize import (
DeepEPLLPrepareAndFinalize,
)
......@@ -233,7 +233,7 @@ if has_pplx():
)
if has_flashinfer_cutlass_fused_moe() and current_platform.has_device_capability(100):
from vllm.model_executor.layers.fused_moe.flashinfer_cutlass_moe import ( # noqa: E501
from vllm.model_executor.layers.fused_moe.flashinfer_cutlass_moe import (
FlashInferExperts,
)
from vllm.model_executor.layers.fused_moe.flashinfer_cutlass_prepare_finalize import ( # noqa: E501
......
......@@ -17,10 +17,10 @@ from typing_extensions import Concatenate, ParamSpec
from vllm.utils import get_open_port, has_deep_ep
if has_deep_ep():
from vllm.model_executor.layers.fused_moe.deepep_ht_prepare_finalize import ( # noqa: E501
from vllm.model_executor.layers.fused_moe.deepep_ht_prepare_finalize import (
DeepEPHTPrepareAndFinalize,
)
from vllm.model_executor.layers.fused_moe.deepep_ll_prepare_finalize import ( # noqa: E501
from vllm.model_executor.layers.fused_moe.deepep_ll_prepare_finalize import (
DeepEPLLPrepareAndFinalize,
)
......
......@@ -30,10 +30,10 @@ from .parallel_utils import ProcessGroupInfo, parallel_launch
from .utils import make_test_weights
if has_deep_ep():
from vllm.model_executor.layers.fused_moe.deepep_ht_prepare_finalize import ( # noqa: E501
from vllm.model_executor.layers.fused_moe.deepep_ht_prepare_finalize import (
DeepEPHTPrepareAndFinalize,
)
from vllm.model_executor.layers.fused_moe.deepep_ll_prepare_finalize import ( # noqa: E501
from vllm.model_executor.layers.fused_moe.deepep_ll_prepare_finalize import (
DeepEPLLPrepareAndFinalize,
)
......
......@@ -28,10 +28,10 @@ from ...utils import multi_gpu_test
from .parallel_utils import ProcessGroupInfo, parallel_launch
if has_deep_ep():
from vllm.model_executor.layers.fused_moe.deepep_ht_prepare_finalize import ( # noqa: E501
from vllm.model_executor.layers.fused_moe.deepep_ht_prepare_finalize import (
DeepEPHTPrepareAndFinalize,
)
from vllm.model_executor.layers.fused_moe.deepep_ll_prepare_finalize import ( # noqa: E501
from vllm.model_executor.layers.fused_moe.deepep_ll_prepare_finalize import (
DeepEPLLPrepareAndFinalize,
)
......
......@@ -271,7 +271,7 @@ if __name__ == "__main__":
parser = make_config_arg_parser(
description=(
"Run single prepare-finalize & fused-experts combination test"
"Example : python3 -m tests.kernels.moe.test_modular_kernel_combinations " # noqa: E501
"Example : python3 -m tests.kernels.moe.test_modular_kernel_combinations "
"--pf-type PplxPrepareAndFinalize --experts-type BatchedTritonExperts"
)
)
......
......@@ -483,8 +483,8 @@ def test_mixtral_moe(
}
if use_rocm_aiter:
# The values of rtol and atol are set based on the tests in ROCM AITER package. # noqa: E501
# https://github.com/ROCm/aiter/blob/dfed377f4be7da96ca2d75ac0761f569676f7240/op_tests/test_moe.py#L174 # noqa: E501
# The values of rtol and atol are set based on the tests in ROCM AITER package.
# https://github.com/ROCm/aiter/blob/dfed377f4be7da96ca2d75ac0761f569676f7240/op_tests/test_moe.py#L174
torch.testing.assert_close(
hf_states.flatten(0, 1), vllm_states, rtol=0.01, atol=100
)
......
......@@ -10,11 +10,11 @@ import pytest
import torch
from packaging import version
from vllm.model_executor.layers.quantization.quark.quark import ( # noqa: E501
from vllm.model_executor.layers.quantization.quark.quark import (
QuarkLinearMethod,
QuarkW4A4MXFP4,
)
from vllm.model_executor.layers.quantization.quark.quark_moe import ( # noqa: E501
from vllm.model_executor.layers.quantization.quark.quark_moe import (
QuarkW4A4MXFp4MoEMethod,
)
from vllm.platforms import current_platform
......
......@@ -12,7 +12,7 @@ PROMPT_TEMPLATE = """I want you to act as a SQL terminal in front of an example
EXPECTED_LORA_OUTPUT = [
"SELECT count(*) FROM singer",
"SELECT avg(age) , min(age) , max(age) FROM singer WHERE country = 'France'", # noqa: E501
"SELECT avg(age) , min(age) , max(age) FROM singer WHERE country = 'France'",
"SELECT name , country , age FROM singer ORDER BY age",
]
......@@ -21,10 +21,16 @@ def do_sample(llm: vllm.LLM, lora_path: str, lora_id: int) -> list[str]:
prompts = [
PROMPT_TEMPLATE.format(query="How many singers do we have?"),
PROMPT_TEMPLATE.format(
query="What is the average, minimum, and maximum age of all singers from France?" # noqa: E501
query=(
"What is the average, minimum, and maximum "
"age of all singers from France?"
)
),
PROMPT_TEMPLATE.format(
query="Show name, country, age for all singers ordered by age from the oldest to the youngest." # noqa: E501
query=(
"Show name, country, age for all singers ordered "
"by age from the oldest to the youngest."
)
),
]
sampling_params = vllm.SamplingParams(temperature=0, max_tokens=32)
......
......@@ -15,10 +15,10 @@ MODEL_PATH = "meta-llama/Llama-2-7b-hf"
EXPECTED_LORA_OUTPUT = [
" SELECT icao FROM table_name_74 WHERE airport = 'lilongwe international airport' ", # noqa: E501
" SELECT nationality FROM table_name_11 WHERE elector = 'anchero pantaleone' ", # noqa: E501
" SELECT nationality FROM table_name_11 WHERE elector = 'anchero pantaleone' ",
" SELECT one_mora FROM table_name_95 WHERE gloss = 'low tone mora with a gloss of /˩okiru/' [òkìɽɯ́] AND accented_mora = 'low tone mora with a gloss of /˩okiru/' [òkìɽɯ́] ", # noqa: E501
" SELECT sex FROM people WHERE people_id IN (SELECT people_id FROM candidate GROUP BY sex ORDER BY COUNT(people_id) DESC LIMIT 1) ", # noqa: E501
" SELECT pick FROM table_name_60 WHERE former_wnba_team = 'Minnesota Lynx' ", # noqa: E501
" SELECT pick FROM table_name_60 WHERE former_wnba_team = 'Minnesota Lynx' ",
" SELECT womens_doubles FROM table_28138035_4 WHERE mens_singles = 'Werner Schlager' ", # noqa: E501
]
......
......@@ -26,7 +26,7 @@ LORA_RANK = 8
LORA_TEST_PROMPTS = ["What is GitHub?", "Hi, tell me about you"]
LORA_TEST_EXPECTED = [
"GitHub is an open-source platform that provides a way to manage and develop software projects. It allows developers to store and manage code, collaborate on projects, and automate tasks.", # noqa: E501
"I am Alice, an AI assistant developed by GitHub/Charent.", # noqa: E501
"I am Alice, an AI assistant developed by GitHub/Charent.",
]
......
......@@ -16,7 +16,7 @@ def test_dummy_loader(vllm_runner, monkeypatch, model: str) -> None:
) as llm:
if model == "google/gemma-3-4b-it":
normalizers = llm.llm.collective_rpc(
lambda self: self.model_runner.model.language_model.model.normalizer.cpu().item()
lambda self: self.model_runner.model.language_model.model.normalizer.cpu().item() # noqa: E501
)
config = llm.llm.llm_engine.model_config.hf_config.text_config
else:
......
......@@ -46,12 +46,13 @@ TOOLS = [
"properties": {
"city": {
"type": "string",
"description": "The city to find the weather for, e.g. 'San Francisco'",
"description": "The city to find the weather for, e.g. "
"'San Francisco'",
},
"state": {
"type": "string",
"description": "the two-letter abbreviation for the state that the city is"
" in, e.g. 'CA' which would mean 'California'",
"description": "the two-letter abbreviation for the state that "
"the city is in, e.g. 'CA' which would mean 'California'",
},
"unit": {
"type": "string",
......@@ -85,7 +86,8 @@ MSGS = [
{"role": "system", "content": "You are an assistant."},
{
"role": "user",
"content": "Could you please rewrite the below article? \n\n My English needs improvving, maybe I make errors.", # noqa
"content": "Could you please rewrite the below article? \n\n My English needs "
"improvving, maybe I make errors.",
},
{
"role": "assistant",
......@@ -96,14 +98,16 @@ MSGS = [
"type": "function",
"function": {
"name": "rewrite",
"arguments": '{"text":"My English needs improvving, maybe I make errors."}', # noqa
"arguments": '{"text":"My English needs improvving, maybe '
'I make errors."}',
},
}
],
},
{
"role": "tool",
"content": '{"action":"rewrite","outcome":"My English needs improving, maybe I make errors."}', # noqa
"content": '{"action":"rewrite","outcome":"My English needs improving, maybe '
'I make errors."}',
"tool_call_id": "bbc5b7ede",
"name": "rewrite",
},
......
......@@ -130,14 +130,14 @@ VLM_TEST_SETTINGS = {
dtype="bfloat16",
marks=[
pytest.mark.skip(reason="vLLM does not support PrefixLM attention mask")
], # noqa: E501
],
),
"qwen2_5_vl": VLMTestInfo(
models=["Qwen/Qwen2.5-VL-3B-Instruct"],
test_type=(VLMTestType.IMAGE, VLMTestType.MULTI_IMAGE, VLMTestType.VIDEO),
prompt_formatter=lambda img_prompt: f"<|im_start|>User\n{img_prompt}<|im_end|>\n<|im_start|>assistant\n", # noqa: E501
img_idx_to_prompt=lambda idx: "<|vision_start|><|image_pad|><|vision_end|>", # noqa: E501
video_idx_to_prompt=lambda idx: "<|vision_start|><|video_pad|><|vision_end|>", # noqa: E501
img_idx_to_prompt=lambda idx: "<|vision_start|><|image_pad|><|vision_end|>",
video_idx_to_prompt=lambda idx: "<|vision_start|><|video_pad|><|vision_end|>",
max_model_len=4096,
max_num_seqs=2,
auto_cls=AutoModelForImageTextToText,
......@@ -149,8 +149,8 @@ VLM_TEST_SETTINGS = {
models=["Qwen/Qwen2.5-Omni-3B"],
test_type=(VLMTestType.IMAGE, VLMTestType.MULTI_IMAGE, VLMTestType.VIDEO),
prompt_formatter=lambda img_prompt: f"<|im_start|>User\n{img_prompt}<|im_end|>\n<|im_start|>assistant\n", # noqa: E501
img_idx_to_prompt=lambda idx: "<|vision_bos|><|IMAGE|><|vision_eos|>", # noqa: E501
video_idx_to_prompt=lambda idx: "<|vision_bos|><|VIDEO|><|vision_eos|>", # noqa: E501
img_idx_to_prompt=lambda idx: "<|vision_bos|><|IMAGE|><|vision_eos|>",
video_idx_to_prompt=lambda idx: "<|vision_bos|><|VIDEO|><|vision_eos|>",
max_model_len=4096,
max_num_seqs=2,
num_logprobs=6 if current_platform.is_cpu() else 5,
......@@ -181,7 +181,7 @@ VLM_TEST_SETTINGS = {
max_model_len=16384,
hf_model_kwargs=model_utils.llava_onevision_hf_model_kwargs(
"llava-hf/llava-onevision-qwen2-0.5b-ov-hf"
), # noqa: E501
),
auto_cls=AutoModelForImageTextToText,
vllm_output_post_proc=model_utils.llava_onevision_vllm_to_hf_output,
image_size_factors=[(0.25, 0.5, 1.0)],
......@@ -213,7 +213,7 @@ VLM_TEST_SETTINGS = {
models=["Qwen/Qwen2.5-VL-3B-Instruct"],
test_type=VLMTestType.IMAGE,
prompt_formatter=lambda img_prompt: f"<|im_start|>User\n{img_prompt}<|im_end|>\n<|im_start|>assistant\n", # noqa: E501
img_idx_to_prompt=lambda idx: "<|vision_start|><|image_pad|><|vision_end|>", # noqa: E501
img_idx_to_prompt=lambda idx: "<|vision_start|><|image_pad|><|vision_end|>",
max_model_len=4096,
max_num_seqs=2,
auto_cls=AutoModelForImageTextToText,
......@@ -237,10 +237,10 @@ VLM_TEST_SETTINGS = {
single_image_prompts=IMAGE_ASSETS.prompts(
{
"stop_sign": "<vlm_image>Please describe the image shortly.",
"cherry_blossom": "<vlm_image>Please infer the season with reason.", # noqa: E501
"cherry_blossom": "<vlm_image>Please infer the season with reason.",
}
),
multi_image_prompt="<vlm_image><vlm_image>Describe the two images shortly.", # noqa: E501
multi_image_prompt="<vlm_image><vlm_image>Describe the two images shortly.",
stop_str=["<|im_end|>"],
image_size_factors=[(0.10, 0.15)],
max_tokens=64,
......@@ -252,11 +252,11 @@ VLM_TEST_SETTINGS = {
prompt_formatter=lambda img_prompt: f"<|START_OF_TURN_TOKEN|><|USER_TOKEN|>{img_prompt}<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>", # noqa: E501
single_image_prompts=IMAGE_ASSETS.prompts(
{
"stop_sign": "<image>What's the content in the center of the image?", # noqa: E501
"cherry_blossom": "<image>What is the season?", # noqa: E501
"stop_sign": "<image>What's the content in the center of the image?",
"cherry_blossom": "<image>What is the season?",
}
),
multi_image_prompt="<image><image>Describe the two images in detail.", # noqa: E501
multi_image_prompt="<image><image>Describe the two images in detail.",
max_model_len=4096,
max_num_seqs=2,
auto_cls=AutoModelForImageTextToText,
......@@ -268,11 +268,11 @@ VLM_TEST_SETTINGS = {
prompt_formatter=lambda img_prompt: f"<|START_OF_TURN_TOKEN|><|USER_TOKEN|>{img_prompt}<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>", # noqa: E501
single_image_prompts=IMAGE_ASSETS.prompts(
{
"stop_sign": "<image>What's the content in the center of the image?", # noqa: E501
"cherry_blossom": "<image>What is the season?", # noqa: E501
"stop_sign": "<image>What's the content in the center of the image?",
"cherry_blossom": "<image>What is the season?",
}
),
multi_image_prompt="<image><image>Describe the two images in detail.", # noqa: E501
multi_image_prompt="<image><image>Describe the two images in detail.",
max_model_len=4096,
max_num_seqs=2,
auto_cls=AutoModelForImageTextToText,
......@@ -311,14 +311,14 @@ VLM_TEST_SETTINGS = {
max_num_seqs=2,
single_image_prompts=IMAGE_ASSETS.prompts(
{
"stop_sign": "<image>\nWhat's the content in the center of the image?", # noqa: E501
"stop_sign": "<image>\nWhat's the content in the center of the image?",
"cherry_blossom": "<image>\nPlease infer the season with reason in details.", # noqa: E501
}
),
multi_image_prompt="image_1:<image>\nimage_2:<image>\nWhich image can we see the car and the tower?", # noqa: E501
patch_hf_runner=model_utils.deepseekvl2_patch_hf_runner,
hf_output_post_proc=model_utils.deepseekvl2_trunc_hf_output,
stop_str=["<|end▁of▁sentence|>", "<|begin▁of▁sentence|>"], # noqa: E501
stop_str=["<|end▁of▁sentence|>", "<|begin▁of▁sentence|>"],
image_size_factors=[(), (1.0,), (1.0, 1.0, 1.0), (0.1, 0.5, 1.0)],
),
"fuyu": VLMTestInfo(
......@@ -342,7 +342,7 @@ VLM_TEST_SETTINGS = {
single_image_prompts=IMAGE_ASSETS.prompts(
{
"stop_sign": "<start_of_image>What's the content in the center of the image?", # noqa: E501
"cherry_blossom": "<start_of_image>What is the season?", # noqa: E501
"cherry_blossom": "<start_of_image>What is the season?",
}
),
multi_image_prompt="<start_of_image><start_of_image>Describe the two images in detail.", # noqa: E501
......@@ -356,7 +356,7 @@ VLM_TEST_SETTINGS = {
"glm4v": VLMTestInfo(
models=["zai-org/glm-4v-9b"],
test_type=VLMTestType.IMAGE,
prompt_formatter=lambda img_prompt: f"<|user|>\n{img_prompt}<|assistant|>", # noqa: E501
prompt_formatter=lambda img_prompt: f"<|user|>\n{img_prompt}<|assistant|>",
single_image_prompts=IMAGE_ASSETS.prompts(
{
"stop_sign": "<|begin_of_image|><|endoftext|><|end_of_image|>What's the content in the center of the image?", # noqa: E501
......@@ -377,9 +377,9 @@ VLM_TEST_SETTINGS = {
"glm4_1v": VLMTestInfo(
models=["zai-org/GLM-4.1V-9B-Thinking"],
test_type=(VLMTestType.IMAGE, VLMTestType.MULTI_IMAGE),
prompt_formatter=lambda img_prompt: f"<|user|>\n{img_prompt}<|assistant|>", # noqa: E501
img_idx_to_prompt=lambda idx: "<|begin_of_image|><|image|><|end_of_image|>", # noqa: E501
video_idx_to_prompt=lambda idx: "<|begin_of_video|><|video|><|end_of_video|>", # noqa: E501
prompt_formatter=lambda img_prompt: f"<|user|>\n{img_prompt}<|assistant|>",
img_idx_to_prompt=lambda idx: "<|begin_of_image|><|image|><|end_of_image|>",
video_idx_to_prompt=lambda idx: "<|begin_of_video|><|video|><|end_of_video|>",
max_model_len=2048,
max_num_seqs=2,
get_stop_token_ids=lambda tok: [151329, 151336, 151338],
......@@ -410,10 +410,10 @@ VLM_TEST_SETTINGS = {
"h2oai/h2ovl-mississippi-2b",
],
test_type=(VLMTestType.IMAGE, VLMTestType.MULTI_IMAGE),
prompt_formatter=lambda img_prompt: f"<|prompt|>{img_prompt}<|end|><|answer|>", # noqa: E501
prompt_formatter=lambda img_prompt: f"<|prompt|>{img_prompt}<|end|><|answer|>",
single_image_prompts=IMAGE_ASSETS.prompts(
{
"stop_sign": "<image>\nWhat's the content in the center of the image?", # noqa: E501
"stop_sign": "<image>\nWhat's the content in the center of the image?",
"cherry_blossom": "<image>\nWhat is the season?",
}
),
......@@ -444,7 +444,7 @@ VLM_TEST_SETTINGS = {
prompt_formatter=lambda img_prompt: f"<|im_start|>User\n{img_prompt}<|im_end|>\n<|im_start|>Assistant\n", # noqa: E501
single_image_prompts=IMAGE_ASSETS.prompts(
{
"stop_sign": "<image>\nWhat's the content in the center of the image?", # noqa: E501
"stop_sign": "<image>\nWhat's the content in the center of the image?",
"cherry_blossom": "<image>\nWhat is the season?",
}
),
......@@ -529,7 +529,7 @@ VLM_TEST_SETTINGS = {
max_model_len=16384,
hf_model_kwargs=model_utils.llava_onevision_hf_model_kwargs(
"llava-hf/llava-onevision-qwen2-0.5b-ov-hf"
), # noqa: E501
),
auto_cls=AutoModelForImageTextToText,
vllm_output_post_proc=model_utils.llava_onevision_vllm_to_hf_output,
custom_test_opts=[
......@@ -583,7 +583,7 @@ VLM_TEST_SETTINGS = {
max_num_seqs=2,
get_stop_token_ids=lambda tok: tok.convert_tokens_to_ids(
["<|im_end|>", "<|endoftext|>"]
), # noqa: E501
),
hf_output_post_proc=model_utils.minicpmv_trunc_hf_output,
patch_hf_runner=model_utils.minicpmo_26_patch_hf_runner,
# FIXME: https://huggingface.co/openbmb/MiniCPM-o-2_6/discussions/49
......@@ -598,7 +598,7 @@ VLM_TEST_SETTINGS = {
max_num_seqs=2,
get_stop_token_ids=lambda tok: tok.convert_tokens_to_ids(
["<|im_end|>", "<|endoftext|>"]
), # noqa: E501
),
hf_output_post_proc=model_utils.minicpmv_trunc_hf_output,
patch_hf_runner=model_utils.minicpmv_26_patch_hf_runner,
),
......@@ -627,7 +627,7 @@ VLM_TEST_SETTINGS = {
models=["AIDC-AI/Ovis1.6-Gemma2-9B"],
test_type=(VLMTestType.IMAGE, VLMTestType.MULTI_IMAGE),
prompt_formatter=lambda img_prompt: f"<bos><start_of_turn>user\n{img_prompt}<end_of_turn>\n<start_of_turn>model\n", # noqa: E501
img_idx_to_prompt=lambda idx: "<image>\n", # noqa: E501
img_idx_to_prompt=lambda idx: "<image>\n",
max_model_len=4096,
max_num_seqs=2,
dtype="half",
......@@ -640,7 +640,7 @@ VLM_TEST_SETTINGS = {
models=["AIDC-AI/Ovis2-1B"],
test_type=(VLMTestType.IMAGE, VLMTestType.MULTI_IMAGE),
prompt_formatter=lambda img_prompt: f"<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n<|im_start|>user\n{img_prompt}<|im_end|>\n<|im_start|>assistant\n", # noqa: E501
img_idx_to_prompt=lambda idx: "<image>\n", # noqa: E501
img_idx_to_prompt=lambda idx: "<image>\n",
max_model_len=4096,
max_num_seqs=2,
dtype="half",
......@@ -652,7 +652,7 @@ VLM_TEST_SETTINGS = {
models=["AIDC-AI/Ovis2.5-2B"],
test_type=(VLMTestType.IMAGE, VLMTestType.MULTI_IMAGE, VLMTestType.VIDEO),
prompt_formatter=lambda img_prompt: f"<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n<|im_start|>user\n{img_prompt}<|im_end|>\n<|im_start|>assistant\n", # noqa: E501
img_idx_to_prompt=lambda idx: "<image>\n", # noqa: E501
img_idx_to_prompt=lambda idx: "<image>\n",
video_idx_to_prompt=lambda idx: "<video>\n",
max_model_len=4096,
max_num_seqs=2,
......@@ -701,8 +701,8 @@ VLM_TEST_SETTINGS = {
models=["Qwen/Qwen2-VL-2B-Instruct"],
test_type=(VLMTestType.IMAGE, VLMTestType.MULTI_IMAGE, VLMTestType.VIDEO),
prompt_formatter=lambda img_prompt: f"<|im_start|>User\n{img_prompt}<|im_end|>\n<|im_start|>assistant\n", # noqa: E501
img_idx_to_prompt=lambda idx: "<|vision_start|><|image_pad|><|vision_end|>", # noqa: E501
video_idx_to_prompt=lambda idx: "<|vision_start|><|video_pad|><|vision_end|>", # noqa: E501
img_idx_to_prompt=lambda idx: "<|vision_start|><|image_pad|><|vision_end|>",
video_idx_to_prompt=lambda idx: "<|vision_start|><|video_pad|><|vision_end|>",
multi_image_prompt="Picture 1: <vlm_image>\nPicture 2: <vlm_image>\nDescribe these two images with one paragraph respectively.", # noqa: E501
max_model_len=4096,
max_num_seqs=2,
......@@ -717,11 +717,11 @@ VLM_TEST_SETTINGS = {
prompt_formatter=lambda img_prompt: f"<|begin▁of▁sentence|><|User|>\n{img_prompt}<|Assistant|><think>\n", # noqa: E501
single_image_prompts=IMAGE_ASSETS.prompts(
{
"stop_sign": "<image>\nWhat's the content in the center of the image?", # noqa: E501
"stop_sign": "<image>\nWhat's the content in the center of the image?",
"cherry_blossom": "<image>\nWhat is the season?",
}
),
multi_image_prompt="<image>\n<image>\nDescribe the two images in short.", # noqa: E501
multi_image_prompt="<image>\n<image>\nDescribe the two images in short.",
max_model_len=4096,
use_tokenizer_eos=True,
patch_hf_runner=model_utils.skyworkr1v_patch_hf_runner,
......@@ -754,8 +754,8 @@ VLM_TEST_SETTINGS = {
VLMTestType.VIDEO,
),
prompt_formatter=lambda img_prompt: f"<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n<|im_start|>user\n{img_prompt}<|im_end|>\n<|im_start|>assistant\n", # noqa: E501
img_idx_to_prompt=lambda idx: "<|vision_start|><|image_pad|><|vision_end|>", # noqa: E501
video_idx_to_prompt=lambda idx: "<|vision_start|><|video_pad|><|vision_end|>", # noqa: E501
img_idx_to_prompt=lambda idx: "<|vision_start|><|image_pad|><|vision_end|>",
video_idx_to_prompt=lambda idx: "<|vision_start|><|video_pad|><|vision_end|>",
max_model_len=4096,
max_num_seqs=2,
auto_cls=AutoModelForImageTextToText,
......@@ -816,7 +816,7 @@ VLM_TEST_SETTINGS = {
auto_cls=AutoModelForImageTextToText,
hf_model_kwargs=model_utils.llava_onevision_hf_model_kwargs(
"llava-hf/llava-onevision-qwen2-0.5b-ov-hf"
), # noqa: E501
),
vllm_output_post_proc=model_utils.llava_onevision_vllm_to_hf_output,
custom_test_opts=[
CustomTestOptions(
......
......@@ -170,7 +170,7 @@ async def test_online_serving(client, audio_assets: AudioTestAssets):
],
{
"type": "text",
"text": f"What's happening in these {len(audio_assets)} audio clips?",
"text": f"What's happening in these {len(audio_assets)} audio clips?", # noqa: E501
},
],
}
......
......@@ -101,16 +101,11 @@ async def test_online_serving(client, audio_assets: AudioTestAssets):
return audio_dict
audio_chunks = [asset_to_chunk(asset) for asset in audio_assets]
text = f"What's happening in these {len(audio_assets)} audio clips?"
messages = [
{
"role": "user",
"content": [
*audio_chunks,
{
"type": "text",
"text": f"What's happening in these {len(audio_assets)} audio clips?",
},
],
"content": [*audio_chunks, {"type": "text", "text": text}],
}
]
......
......@@ -102,8 +102,8 @@ def multi_video_multi_aspect_ratio_inputs(
def different_patch_input_cases_internvl():
images = [asset.pil_image.resize((896, 896)) for asset in IMAGE_ASSETS]
formatter = (
lambda img_prompt: f"<|im_start|>User\n{img_prompt}<|im_end|>\n<|im_start|>Assistant\n"
) # noqa: E501
lambda img_prompt: f"<|im_start|>User\n{img_prompt}<|im_end|>\n<|im_start|>Assistant\n" # noqa: E501
)
single_img_prompts = [
"<image>\nWhat's the content in the center of the image?",
"<image>\nWhat is the season?",
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment