Fix per file ruff ignores related to line length (#26262)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>

Fix per file ruff ignores related to line length (#26262)
Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
6c046382 · Harry Mellor · GitHub · 91ac7f76 · 6c046382 · 6c046382
Unverified Commit 6c046382 authored Oct 06, 2025 by Harry Mellor Committed by GitHub Oct 06, 2025
20 changed files
--- a/tests/entrypoints/test_chat_utils.py
+++ b/tests/entrypoints/test_chat_utils.py
@@ -947,7 +947,8 @@ def test_parse_chat_messages_placeholder_one_already_in_prompt(
                    {"type": "image_url", "image_url": {"url": image_url}},
                    {
                        "type": "text",
-                        "text": "What's in <|image_1|> and how does it compare to the other one?",  # noqa: E501
+                        "text": "What's in <|image_1|> and how does it compare to "
+                        "the other one?",
                    },
                ],
            }
@@ -960,8 +961,8 @@ def test_parse_chat_messages_placeholder_one_already_in_prompt(
    assert conversation == [
        {
            "role": "user",
-            "content": "<|image_2|>\nWhat's in <|image_1|> and how does it compare to the "
+            "content": "<|image_2|>\nWhat's in <|image_1|> and how does it compare to "
-            "other one?",
+            "the other one?",
        }
    ]
    _assert_mm_data_is_image_input(mm_data, 2)
@@ -1364,7 +1365,7 @@ def test_parse_chat_messages_multiple_images_multiple_messages_interleave(
    _assert_mm_uuids(mm_uuids, 2, expected_uuids=[None, None])
-def test_parse_chat_messages_multiple_images_with_uuids_multiple_messages_interleave(  # noqa: E501
+def test_parse_chat_messages_multiple_images_with_uuids_multiple_messages_interleave(
    phi3v_model_config_mm_interleaved,
    phi3v_tokenizer,
    image_url,
@@ -1451,14 +1452,14 @@ def test_parse_chat_messages_multiple_modals_multiple_messages_interleave(
    assert conversation == [
        {
            "role": "user",
-            "content": "What's on this image?\n<|vision_start|><|IMAGE|><|vision_end|>\n"
+            "content": "What's on this image?\n<|vision_start|><|IMAGE|><|vision_end|>"
-            "Now listen to this audio\nAudio 1: <|audio_bos|><|AUDIO|><|audio_eos|>",  # noqa: E501
+            "\nNow listen to this audio\nAudio 1: <|audio_bos|><|AUDIO|><|audio_eos|>",
        },
        {"role": "assistant", "content": "Some stuff."},
        {
            "role": "user",
-            "content": "What's on this image?\n<|vision_start|><|IMAGE|><|vision_end|>\n"
+            "content": "What's on this image?\n<|vision_start|><|IMAGE|><|vision_end|>"
-            "And what's in the video?\n<|vision_start|><|VIDEO|><|vision_end|>",
+            "\nAnd what's in the video?\n<|vision_start|><|VIDEO|><|vision_end|>",
        },
    ]
@@ -1468,7 +1469,7 @@ def test_parse_chat_messages_multiple_modals_multiple_messages_interleave(
    _assert_mm_uuids(mm_uuids, 1, modality="audio", expected_uuids=[None])
-def test_parse_chat_messages_multiple_modals_with_uuids_multiple_messages_interleave(  # noqa: E501
+def test_parse_chat_messages_multiple_modals_with_uuids_multiple_messages_interleave(
    qwen25omni_model_config_mm_interleaved,
    qwen25omni_tokenizer,
    image_url,
@@ -1521,14 +1522,14 @@ def test_parse_chat_messages_multiple_modals_with_uuids_multiple_messages_interl
    assert conversation == [
        {
            "role": "user",
-            "content": "What's on this image?\n<|vision_start|><|IMAGE|><|vision_end|>\n"
+            "content": "What's on this image?\n<|vision_start|><|IMAGE|><|vision_end|>"
-            "Now listen to this audio\nAudio 1: <|audio_bos|><|AUDIO|><|audio_eos|>",  # noqa: E501
+            "\nNow listen to this audio\nAudio 1: <|audio_bos|><|AUDIO|><|audio_eos|>",
        },
        {"role": "assistant", "content": "Some stuff."},
        {
            "role": "user",
-            "content": "What's on this image?\n<|vision_start|><|IMAGE|><|vision_end|>\n"
+            "content": "What's on this image?\n<|vision_start|><|IMAGE|><|vision_end|>"
-            "And what's in the video?\n<|vision_start|><|VIDEO|><|vision_end|>",
+            "\nAnd what's in the video?\n<|vision_start|><|VIDEO|><|vision_end|>",
        },
    ]
@@ -1593,14 +1594,14 @@ def test_parse_chat_messages_multiple_modals_with_uuids_multiple_empty_media_mes
    assert conversation == [
        {
            "role": "user",
-            "content": "What's on this image?\n<|vision_start|><|IMAGE|><|vision_end|>\n"
+            "content": "What's on this image?\n<|vision_start|><|IMAGE|><|vision_end|>"
-            "Now listen to this audio\nAudio 1: <|audio_bos|><|AUDIO|><|audio_eos|>",  # noqa: E501
+            "\nNow listen to this audio\nAudio 1: <|audio_bos|><|AUDIO|><|audio_eos|>",
        },
        {"role": "assistant", "content": "Some stuff."},
        {
            "role": "user",
-            "content": "What's on this image?\n<|vision_start|><|IMAGE|><|vision_end|>\n"
+            "content": "What's on this image?\n<|vision_start|><|IMAGE|><|vision_end|>"
-            "And what's in the video?\n<|vision_start|><|VIDEO|><|vision_end|>",
+            "\nAnd what's in the video?\n<|vision_start|><|VIDEO|><|vision_end|>",
        },
    ]
@@ -1661,14 +1662,14 @@ def test_parse_chat_messages_multiple_modals_with_partial_uuids_multiple_message
    assert conversation == [
        {
            "role": "user",
-            "content": "What's on this image?\n<|vision_start|><|IMAGE|><|vision_end|>\n"
+            "content": "What's on this image?\n<|vision_start|><|IMAGE|><|vision_end|>"
-            "Now listen to this audio\nAudio 1: <|audio_bos|><|AUDIO|><|audio_eos|>",  # noqa: E501
+            "\nNow listen to this audio\nAudio 1: <|audio_bos|><|AUDIO|><|audio_eos|>",
        },
        {"role": "assistant", "content": "Some stuff."},
        {
            "role": "user",
-            "content": "What's on this image?\n<|vision_start|><|IMAGE|><|vision_end|>\n"
+            "content": "What's on this image?\n<|vision_start|><|IMAGE|><|vision_end|>"
-            "And what's in the video?\n<|vision_start|><|VIDEO|><|vision_end|>",
+            "\nAnd what's in the video?\n<|vision_start|><|VIDEO|><|vision_end|>",
        },
    ]
@@ -2193,7 +2194,8 @@ def test_parse_chat_messages_single_empty_audio_with_uuid(
    assert conversation == [
        {
            "role": "user",
-            "content": "Audio 1: <|audio_bos|><|AUDIO|><|audio_eos|>\nWhat does the audio say?",
+            "content": "Audio 1: <|audio_bos|><|AUDIO|><|audio_eos|>\nWhat does the "
+            "audio say?",
        }
    ]
    _assert_mm_data_inputs(mm_data, {"audio": 1})
@@ -2228,7 +2230,8 @@ async def test_parse_chat_messages_single_empty_audio_with_uuid_async(
    assert conversation == [
        {
            "role": "user",
-            "content": "Audio 1: <|audio_bos|><|AUDIO|><|audio_eos|>\nWhat does the audio say?",
+            "content": "Audio 1: <|audio_bos|><|AUDIO|><|audio_eos|>\nWhat does the "
+            "audio say?",
        }
    ]
    _assert_mm_data_inputs(await mm_future, {"audio": 1})

--- a/tests/kernels/attention/test_attention_selector.py
+++ b/tests/kernels/attention/test_attention_selector.py
@@ -165,7 +165,7 @@ def test_env(
                            # FlashMLA only supports block_size == 64
                            pytest.skip("FlashMLA only supports block_size 64")
                        else:
-                            from vllm.v1.attention.backends.mla.flashmla import (  # noqa: E501
+                            from vllm.v1.attention.backends.mla.flashmla import (
                                is_flashmla_supported,
                            )

--- a/tests/kernels/moe/modular_kernel_tools/common.py
+++ b/tests/kernels/moe/modular_kernel_tools/common.py
@@ -331,7 +331,8 @@ class WeightTensors:
            in_dtype=config.dtype,
            quant_dtype=config.quant_dtype,
            block_shape=config.quant_block_shape,
-            per_out_ch_quant=config.is_per_act_token_quant,  # or config.is_per_out_ch_quant
+            # or config.is_per_out_ch_quant
+            per_out_ch_quant=config.is_per_act_token_quant,
        )
        return WeightTensors(
            w1=w1, w2=w2, w1_scale=w1_scale, w2_scale=w2_scale, w1_gs=w1_gs, w2_gs=w2_gs

--- a/tests/kernels/moe/modular_kernel_tools/make_feature_matrix.py
+++ b/tests/kernels/moe/modular_kernel_tools/make_feature_matrix.py
@@ -124,7 +124,7 @@ def make_feature_matrix(csv_file_path: str):
    results_df: Optional[pd.DataFrame] = None
    for m, k, n, e, topks, dtype, pf_type, experts_type, quant_config in tqdm(
        combinations
-    ):  # noqa: E501
+    ):
        config = Config(
            Ms=[m],
            K=k,

--- a/tests/kernels/moe/modular_kernel_tools/mk_objects.py
+++ b/tests/kernels/moe/modular_kernel_tools/mk_objects.py
@@ -10,7 +10,7 @@ import vllm.model_executor.layers.fused_moe.modular_kernel as mk
 from vllm.model_executor.layers.fused_moe.batched_deep_gemm_moe import (
    BatchedDeepGemmExperts,
 )
-from vllm.model_executor.layers.fused_moe.batched_triton_or_deep_gemm_moe import (  # noqa: E501
+from vllm.model_executor.layers.fused_moe.batched_triton_or_deep_gemm_moe import (
    BatchedTritonOrDeepGemmExperts,
 )
 from vllm.model_executor.layers.fused_moe.config import (
@@ -196,10 +196,10 @@ register_experts(
 # Disable on blackwell for now
 if has_deep_ep() and not current_platform.has_device_capability(100):
-    from vllm.model_executor.layers.fused_moe.deepep_ht_prepare_finalize import (  # noqa: E501
+    from vllm.model_executor.layers.fused_moe.deepep_ht_prepare_finalize import (
        DeepEPHTPrepareAndFinalize,
    )
-    from vllm.model_executor.layers.fused_moe.deepep_ll_prepare_finalize import (  # noqa: E501
+    from vllm.model_executor.layers.fused_moe.deepep_ll_prepare_finalize import (
        DeepEPLLPrepareAndFinalize,
    )
@@ -233,7 +233,7 @@ if has_pplx():
    )
 if has_flashinfer_cutlass_fused_moe() and current_platform.has_device_capability(100):
-    from vllm.model_executor.layers.fused_moe.flashinfer_cutlass_moe import (  # noqa: E501
+    from vllm.model_executor.layers.fused_moe.flashinfer_cutlass_moe import (
        FlashInferExperts,
    )
    from vllm.model_executor.layers.fused_moe.flashinfer_cutlass_prepare_finalize import (  # noqa: E501

--- a/tests/kernels/moe/parallel_utils.py
+++ b/tests/kernels/moe/parallel_utils.py
@@ -17,10 +17,10 @@ from typing_extensions import Concatenate, ParamSpec
 from vllm.utils import get_open_port, has_deep_ep
 if has_deep_ep():
-    from vllm.model_executor.layers.fused_moe.deepep_ht_prepare_finalize import (  # noqa: E501
+    from vllm.model_executor.layers.fused_moe.deepep_ht_prepare_finalize import (
        DeepEPHTPrepareAndFinalize,
    )
-    from vllm.model_executor.layers.fused_moe.deepep_ll_prepare_finalize import (  # noqa: E501
+    from vllm.model_executor.layers.fused_moe.deepep_ll_prepare_finalize import (
        DeepEPLLPrepareAndFinalize,
    )

--- a/tests/kernels/moe/test_deepep_deepgemm_moe.py
+++ b/tests/kernels/moe/test_deepep_deepgemm_moe.py
@@ -30,10 +30,10 @@ from .parallel_utils import ProcessGroupInfo, parallel_launch
 from .utils import make_test_weights
 if has_deep_ep():
-    from vllm.model_executor.layers.fused_moe.deepep_ht_prepare_finalize import (  # noqa: E501
+    from vllm.model_executor.layers.fused_moe.deepep_ht_prepare_finalize import (
        DeepEPHTPrepareAndFinalize,
    )
-    from vllm.model_executor.layers.fused_moe.deepep_ll_prepare_finalize import (  # noqa: E501
+    from vllm.model_executor.layers.fused_moe.deepep_ll_prepare_finalize import (
        DeepEPLLPrepareAndFinalize,
    )

--- a/tests/kernels/moe/test_deepep_moe.py
+++ b/tests/kernels/moe/test_deepep_moe.py
@@ -28,10 +28,10 @@ from ...utils import multi_gpu_test
 from .parallel_utils import ProcessGroupInfo, parallel_launch
 if has_deep_ep():
-    from vllm.model_executor.layers.fused_moe.deepep_ht_prepare_finalize import (  # noqa: E501
+    from vllm.model_executor.layers.fused_moe.deepep_ht_prepare_finalize import (
        DeepEPHTPrepareAndFinalize,
    )
-    from vllm.model_executor.layers.fused_moe.deepep_ll_prepare_finalize import (  # noqa: E501
+    from vllm.model_executor.layers.fused_moe.deepep_ll_prepare_finalize import (
        DeepEPLLPrepareAndFinalize,
    )

--- a/tests/kernels/moe/test_modular_kernel_combinations.py
+++ b/tests/kernels/moe/test_modular_kernel_combinations.py
@@ -271,7 +271,7 @@ if __name__ == "__main__":
    parser = make_config_arg_parser(
        description=(
            "Run single prepare-finalize & fused-experts combination test"
-            "Example : python3 -m tests.kernels.moe.test_modular_kernel_combinations "  # noqa: E501
+            "Example : python3 -m tests.kernels.moe.test_modular_kernel_combinations "
            "--pf-type PplxPrepareAndFinalize --experts-type BatchedTritonExperts"
        )
    )

--- a/tests/kernels/moe/test_moe.py
+++ b/tests/kernels/moe/test_moe.py
@@ -483,8 +483,8 @@ def test_mixtral_moe(
    }
    if use_rocm_aiter:
-        # The values of rtol and atol are set based on the tests in ROCM AITER package. # noqa: E501
+        # The values of rtol and atol are set based on the tests in ROCM AITER package.
-        # https://github.com/ROCm/aiter/blob/dfed377f4be7da96ca2d75ac0761f569676f7240/op_tests/test_moe.py#L174  # noqa: E501
+        # https://github.com/ROCm/aiter/blob/dfed377f4be7da96ca2d75ac0761f569676f7240/op_tests/test_moe.py#L174
        torch.testing.assert_close(
            hf_states.flatten(0, 1), vllm_states, rtol=0.01, atol=100
        )

--- a/tests/kernels/moe/test_mxfp4_moe.py
+++ b/tests/kernels/moe/test_mxfp4_moe.py
@@ -10,11 +10,11 @@ import pytest
 import torch
 from packaging import version
-from vllm.model_executor.layers.quantization.quark.quark import (  # noqa: E501
+from vllm.model_executor.layers.quantization.quark.quark import (
    QuarkLinearMethod,
    QuarkW4A4MXFP4,
 )
-from vllm.model_executor.layers.quantization.quark.quark_moe import (  # noqa: E501
+from vllm.model_executor.layers.quantization.quark.quark_moe import (
    QuarkW4A4MXFp4MoEMethod,
 )
 from vllm.platforms import current_platform

--- a/tests/lora/test_chatglm3_tp.py
+++ b/tests/lora/test_chatglm3_tp.py
@@ -12,7 +12,7 @@ PROMPT_TEMPLATE = """I want you to act as a SQL terminal in front of an example
 EXPECTED_LORA_OUTPUT = [
    "SELECT count(*) FROM singer",
-    "SELECT avg(age) ,  min(age) ,  max(age) FROM singer WHERE country  =  'France'",  # noqa: E501
+    "SELECT avg(age) ,  min(age) ,  max(age) FROM singer WHERE country  =  'France'",
    "SELECT name ,  country ,  age FROM singer ORDER BY age",
 ]
@@ -21,10 +21,16 @@ def do_sample(llm: vllm.LLM, lora_path: str, lora_id: int) -> list[str]:
    prompts = [
        PROMPT_TEMPLATE.format(query="How many singers do we have?"),
        PROMPT_TEMPLATE.format(
-            query="What is the average, minimum, and maximum age of all singers from France?"  # noqa: E501
+            query=(
+                "What is the average, minimum, and maximum "
+                "age of all singers from France?"
+            )
        ),
        PROMPT_TEMPLATE.format(
-            query="Show name, country, age for all singers ordered by age from the oldest to the youngest."  # noqa: E501
+            query=(
+                "Show name, country, age for all singers ordered "
+                "by age from the oldest to the youngest."
+            )
        ),
    ]
    sampling_params = vllm.SamplingParams(temperature=0, max_tokens=32)

--- a/tests/lora/test_llama_tp.py
+++ b/tests/lora/test_llama_tp.py
@@ -15,10 +15,10 @@ MODEL_PATH = "meta-llama/Llama-2-7b-hf"
 EXPECTED_LORA_OUTPUT = [
    "  SELECT icao FROM table_name_74 WHERE airport = 'lilongwe international airport' ",  # noqa: E501
-    "  SELECT nationality FROM table_name_11 WHERE elector = 'anchero pantaleone' ",  # noqa: E501
+    "  SELECT nationality FROM table_name_11 WHERE elector = 'anchero pantaleone' ",
    "  SELECT one_mora FROM table_name_95 WHERE gloss = 'low tone mora with a gloss of /˩okiru/' [òkìɽɯ́] AND accented_mora = 'low tone mora with a gloss of /˩okiru/' [òkìɽɯ́] ",  # noqa: E501
    "  SELECT sex FROM people WHERE people_id IN (SELECT people_id FROM candidate GROUP BY sex ORDER BY COUNT(people_id) DESC LIMIT 1) ",  # noqa: E501
-    "  SELECT pick FROM table_name_60 WHERE former_wnba_team = 'Minnesota Lynx' ",  # noqa: E501
+    "  SELECT pick FROM table_name_60 WHERE former_wnba_team = 'Minnesota Lynx' ",
    "  SELECT womens_doubles FROM table_28138035_4 WHERE mens_singles = 'Werner Schlager' ",  # noqa: E501
 ]

--- a/tests/lora/test_llm_with_multi_loras.py
+++ b/tests/lora/test_llm_with_multi_loras.py
@@ -26,7 +26,7 @@ LORA_RANK = 8
 LORA_TEST_PROMPTS = ["What is GitHub?", "Hi, tell me about you"]
 LORA_TEST_EXPECTED = [
    "GitHub is an open-source platform that provides a way to manage and develop software projects. It allows developers to store and manage code, collaborate on projects, and automate tasks.",  # noqa: E501
-    "I am Alice, an AI assistant developed by GitHub/Charent.",  # noqa: E501
+    "I am Alice, an AI assistant developed by GitHub/Charent.",
 ]

--- a/tests/models/language/generation/test_gemma.py
+++ b/tests/models/language/generation/test_gemma.py
@@ -16,7 +16,7 @@ def test_dummy_loader(vllm_runner, monkeypatch, model: str) -> None:
        ) as llm:
            if model == "google/gemma-3-4b-it":
                normalizers = llm.llm.collective_rpc(
-                    lambda self: self.model_runner.model.language_model.model.normalizer.cpu().item()
+                    lambda self: self.model_runner.model.language_model.model.normalizer.cpu().item()  # noqa: E501
                )
                config = llm.llm.llm_engine.model_config.hf_config.text_config
            else:

--- a/tests/models/language/generation/test_mistral.py
+++ b/tests/models/language/generation/test_mistral.py
@@ -46,12 +46,13 @@ TOOLS = [
                "properties": {
                    "city": {
                        "type": "string",
-                        "description": "The city to find the weather for, e.g. 'San Francisco'",
+                        "description": "The city to find the weather for, e.g. "
+                        "'San Francisco'",
                    },
                    "state": {
                        "type": "string",
-                        "description": "the two-letter abbreviation for the state that the city is"
+                        "description": "the two-letter abbreviation for the state that "
-                        " in, e.g. 'CA' which would mean 'California'",
+                        "the city is in, e.g. 'CA' which would mean 'California'",
                    },
                    "unit": {
                        "type": "string",
@@ -85,7 +86,8 @@ MSGS = [
    {"role": "system", "content": "You are an assistant."},
    {
        "role": "user",
-        "content": "Could you please rewrite the below article? \n\n My English needs improvving, maybe I make errors.",  # noqa
+        "content": "Could you please rewrite the below article? \n\n My English needs "
+        "improvving, maybe I make errors.",
    },
    {
        "role": "assistant",
@@ -96,14 +98,16 @@ MSGS = [
                "type": "function",
                "function": {
                    "name": "rewrite",
-                    "arguments": '{"text":"My English needs improvving, maybe I make errors."}',  # noqa
+                    "arguments": '{"text":"My English needs improvving, maybe '
+                    'I make errors."}',
                },
            }
        ],
    },
    {
        "role": "tool",
-        "content": '{"action":"rewrite","outcome":"My English needs improving, maybe I make errors."}',  # noqa
+        "content": '{"action":"rewrite","outcome":"My English needs improving, maybe '
+        'I make errors."}',
        "tool_call_id": "bbc5b7ede",
        "name": "rewrite",
    },

--- a/tests/models/multimodal/generation/test_common.py
+++ b/tests/models/multimodal/generation/test_common.py
@@ -130,14 +130,14 @@ VLM_TEST_SETTINGS = {
        dtype="bfloat16",
        marks=[
            pytest.mark.skip(reason="vLLM does not support PrefixLM attention mask")
-        ],  # noqa: E501
+        ],
    ),
    "qwen2_5_vl": VLMTestInfo(
        models=["Qwen/Qwen2.5-VL-3B-Instruct"],
        test_type=(VLMTestType.IMAGE, VLMTestType.MULTI_IMAGE, VLMTestType.VIDEO),
        prompt_formatter=lambda img_prompt: f"<|im_start|>User\n{img_prompt}<|im_end|>\n<|im_start|>assistant\n",  # noqa: E501
-        img_idx_to_prompt=lambda idx: "<|vision_start|><|image_pad|><|vision_end|>",  # noqa: E501
+        img_idx_to_prompt=lambda idx: "<|vision_start|><|image_pad|><|vision_end|>",
-        video_idx_to_prompt=lambda idx: "<|vision_start|><|video_pad|><|vision_end|>",  # noqa: E501
+        video_idx_to_prompt=lambda idx: "<|vision_start|><|video_pad|><|vision_end|>",
        max_model_len=4096,
        max_num_seqs=2,
        auto_cls=AutoModelForImageTextToText,
@@ -149,8 +149,8 @@ VLM_TEST_SETTINGS = {
        models=["Qwen/Qwen2.5-Omni-3B"],
        test_type=(VLMTestType.IMAGE, VLMTestType.MULTI_IMAGE, VLMTestType.VIDEO),
        prompt_formatter=lambda img_prompt: f"<|im_start|>User\n{img_prompt}<|im_end|>\n<|im_start|>assistant\n",  # noqa: E501
-        img_idx_to_prompt=lambda idx: "<|vision_bos|><|IMAGE|><|vision_eos|>",  # noqa: E501
+        img_idx_to_prompt=lambda idx: "<|vision_bos|><|IMAGE|><|vision_eos|>",
-        video_idx_to_prompt=lambda idx: "<|vision_bos|><|VIDEO|><|vision_eos|>",  # noqa: E501
+        video_idx_to_prompt=lambda idx: "<|vision_bos|><|VIDEO|><|vision_eos|>",
        max_model_len=4096,
        max_num_seqs=2,
        num_logprobs=6 if current_platform.is_cpu() else 5,
@@ -181,7 +181,7 @@ VLM_TEST_SETTINGS = {
        max_model_len=16384,
        hf_model_kwargs=model_utils.llava_onevision_hf_model_kwargs(
            "llava-hf/llava-onevision-qwen2-0.5b-ov-hf"
-        ),  # noqa: E501
+        ),
        auto_cls=AutoModelForImageTextToText,
        vllm_output_post_proc=model_utils.llava_onevision_vllm_to_hf_output,
        image_size_factors=[(0.25, 0.5, 1.0)],
@@ -213,7 +213,7 @@ VLM_TEST_SETTINGS = {
        models=["Qwen/Qwen2.5-VL-3B-Instruct"],
        test_type=VLMTestType.IMAGE,
        prompt_formatter=lambda img_prompt: f"<|im_start|>User\n{img_prompt}<|im_end|>\n<|im_start|>assistant\n",  # noqa: E501
-        img_idx_to_prompt=lambda idx: "<|vision_start|><|image_pad|><|vision_end|>",  # noqa: E501
+        img_idx_to_prompt=lambda idx: "<|vision_start|><|image_pad|><|vision_end|>",
        max_model_len=4096,
        max_num_seqs=2,
        auto_cls=AutoModelForImageTextToText,
@@ -237,10 +237,10 @@ VLM_TEST_SETTINGS = {
        single_image_prompts=IMAGE_ASSETS.prompts(
            {
                "stop_sign": "<vlm_image>Please describe the image shortly.",
-                "cherry_blossom": "<vlm_image>Please infer the season with reason.",  # noqa: E501
+                "cherry_blossom": "<vlm_image>Please infer the season with reason.",
            }
        ),
-        multi_image_prompt="<vlm_image><vlm_image>Describe the two images shortly.",  # noqa: E501
+        multi_image_prompt="<vlm_image><vlm_image>Describe the two images shortly.",
        stop_str=["<|im_end|>"],
        image_size_factors=[(0.10, 0.15)],
        max_tokens=64,
@@ -252,11 +252,11 @@ VLM_TEST_SETTINGS = {
        prompt_formatter=lambda img_prompt: f"<|START_OF_TURN_TOKEN|><|USER_TOKEN|>{img_prompt}<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>",  # noqa: E501
        single_image_prompts=IMAGE_ASSETS.prompts(
            {
-                "stop_sign": "<image>What's the content in the center of the image?",  # noqa: E501
+                "stop_sign": "<image>What's the content in the center of the image?",
-                "cherry_blossom": "<image>What is the season?",  # noqa: E501
+                "cherry_blossom": "<image>What is the season?",
            }
        ),
-        multi_image_prompt="<image><image>Describe the two images in detail.",  # noqa: E501
+        multi_image_prompt="<image><image>Describe the two images in detail.",
        max_model_len=4096,
        max_num_seqs=2,
        auto_cls=AutoModelForImageTextToText,
@@ -268,11 +268,11 @@ VLM_TEST_SETTINGS = {
        prompt_formatter=lambda img_prompt: f"<|START_OF_TURN_TOKEN|><|USER_TOKEN|>{img_prompt}<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>",  # noqa: E501
        single_image_prompts=IMAGE_ASSETS.prompts(
            {
-                "stop_sign": "<image>What's the content in the center of the image?",  # noqa: E501
+                "stop_sign": "<image>What's the content in the center of the image?",
-                "cherry_blossom": "<image>What is the season?",  # noqa: E501
+                "cherry_blossom": "<image>What is the season?",
            }
        ),
-        multi_image_prompt="<image><image>Describe the two images in detail.",  # noqa: E501
+        multi_image_prompt="<image><image>Describe the two images in detail.",
        max_model_len=4096,
        max_num_seqs=2,
        auto_cls=AutoModelForImageTextToText,
@@ -311,14 +311,14 @@ VLM_TEST_SETTINGS = {
        max_num_seqs=2,
        single_image_prompts=IMAGE_ASSETS.prompts(
            {
-                "stop_sign": "<image>\nWhat's the content in the center of the image?",  # noqa: E501
+                "stop_sign": "<image>\nWhat's the content in the center of the image?",
                "cherry_blossom": "<image>\nPlease infer the season with reason in details.",  # noqa: E501
            }
        ),
        multi_image_prompt="image_1:<image>\nimage_2:<image>\nWhich image can we see the car and the tower?",  # noqa: E501
        patch_hf_runner=model_utils.deepseekvl2_patch_hf_runner,
        hf_output_post_proc=model_utils.deepseekvl2_trunc_hf_output,
-        stop_str=["<｜end▁of▁sentence｜>", "<｜begin▁of▁sentence｜>"],  # noqa: E501
+        stop_str=["<｜end▁of▁sentence｜>", "<｜begin▁of▁sentence｜>"],
        image_size_factors=[(), (1.0,), (1.0, 1.0, 1.0), (0.1, 0.5, 1.0)],
    ),
    "fuyu": VLMTestInfo(
@@ -342,7 +342,7 @@ VLM_TEST_SETTINGS = {
        single_image_prompts=IMAGE_ASSETS.prompts(
            {
                "stop_sign": "<start_of_image>What's the content in the center of the image?",  # noqa: E501
-                "cherry_blossom": "<start_of_image>What is the season?",  # noqa: E501
+                "cherry_blossom": "<start_of_image>What is the season?",
            }
        ),
        multi_image_prompt="<start_of_image><start_of_image>Describe the two images in detail.",  # noqa: E501
@@ -356,7 +356,7 @@ VLM_TEST_SETTINGS = {
    "glm4v": VLMTestInfo(
        models=["zai-org/glm-4v-9b"],
        test_type=VLMTestType.IMAGE,
-        prompt_formatter=lambda img_prompt: f"<|user|>\n{img_prompt}<|assistant|>",  # noqa: E501
+        prompt_formatter=lambda img_prompt: f"<|user|>\n{img_prompt}<|assistant|>",
        single_image_prompts=IMAGE_ASSETS.prompts(
            {
                "stop_sign": "<|begin_of_image|><|endoftext|><|end_of_image|>What's the content in the center of the image?",  # noqa: E501
@@ -377,9 +377,9 @@ VLM_TEST_SETTINGS = {
    "glm4_1v": VLMTestInfo(
        models=["zai-org/GLM-4.1V-9B-Thinking"],
        test_type=(VLMTestType.IMAGE, VLMTestType.MULTI_IMAGE),
-        prompt_formatter=lambda img_prompt: f"<|user|>\n{img_prompt}<|assistant|>",  # noqa: E501
+        prompt_formatter=lambda img_prompt: f"<|user|>\n{img_prompt}<|assistant|>",
-        img_idx_to_prompt=lambda idx: "<|begin_of_image|><|image|><|end_of_image|>",  # noqa: E501
+        img_idx_to_prompt=lambda idx: "<|begin_of_image|><|image|><|end_of_image|>",
-        video_idx_to_prompt=lambda idx: "<|begin_of_video|><|video|><|end_of_video|>",  # noqa: E501
+        video_idx_to_prompt=lambda idx: "<|begin_of_video|><|video|><|end_of_video|>",
        max_model_len=2048,
        max_num_seqs=2,
        get_stop_token_ids=lambda tok: [151329, 151336, 151338],
@@ -410,10 +410,10 @@ VLM_TEST_SETTINGS = {
            "h2oai/h2ovl-mississippi-2b",
        ],
        test_type=(VLMTestType.IMAGE, VLMTestType.MULTI_IMAGE),
-        prompt_formatter=lambda img_prompt: f"<|prompt|>{img_prompt}<|end|><|answer|>",  # noqa: E501
+        prompt_formatter=lambda img_prompt: f"<|prompt|>{img_prompt}<|end|><|answer|>",
        single_image_prompts=IMAGE_ASSETS.prompts(
            {
-                "stop_sign": "<image>\nWhat's the content in the center of the image?",  # noqa: E501
+                "stop_sign": "<image>\nWhat's the content in the center of the image?",
                "cherry_blossom": "<image>\nWhat is the season?",
            }
        ),
@@ -444,7 +444,7 @@ VLM_TEST_SETTINGS = {
        prompt_formatter=lambda img_prompt: f"<|im_start|>User\n{img_prompt}<|im_end|>\n<|im_start|>Assistant\n",  # noqa: E501
        single_image_prompts=IMAGE_ASSETS.prompts(
            {
-                "stop_sign": "<image>\nWhat's the content in the center of the image?",  # noqa: E501
+                "stop_sign": "<image>\nWhat's the content in the center of the image?",
                "cherry_blossom": "<image>\nWhat is the season?",
            }
        ),
@@ -529,7 +529,7 @@ VLM_TEST_SETTINGS = {
        max_model_len=16384,
        hf_model_kwargs=model_utils.llava_onevision_hf_model_kwargs(
            "llava-hf/llava-onevision-qwen2-0.5b-ov-hf"
-        ),  # noqa: E501
+        ),
        auto_cls=AutoModelForImageTextToText,
        vllm_output_post_proc=model_utils.llava_onevision_vllm_to_hf_output,
        custom_test_opts=[
@@ -583,7 +583,7 @@ VLM_TEST_SETTINGS = {
        max_num_seqs=2,
        get_stop_token_ids=lambda tok: tok.convert_tokens_to_ids(
            ["<|im_end|>", "<|endoftext|>"]
-        ),  # noqa: E501
+        ),
        hf_output_post_proc=model_utils.minicpmv_trunc_hf_output,
        patch_hf_runner=model_utils.minicpmo_26_patch_hf_runner,
        # FIXME: https://huggingface.co/openbmb/MiniCPM-o-2_6/discussions/49
@@ -598,7 +598,7 @@ VLM_TEST_SETTINGS = {
        max_num_seqs=2,
        get_stop_token_ids=lambda tok: tok.convert_tokens_to_ids(
            ["<|im_end|>", "<|endoftext|>"]
-        ),  # noqa: E501
+        ),
        hf_output_post_proc=model_utils.minicpmv_trunc_hf_output,
        patch_hf_runner=model_utils.minicpmv_26_patch_hf_runner,
    ),
@@ -627,7 +627,7 @@ VLM_TEST_SETTINGS = {
        models=["AIDC-AI/Ovis1.6-Gemma2-9B"],
        test_type=(VLMTestType.IMAGE, VLMTestType.MULTI_IMAGE),
        prompt_formatter=lambda img_prompt: f"<bos><start_of_turn>user\n{img_prompt}<end_of_turn>\n<start_of_turn>model\n",  # noqa: E501
-        img_idx_to_prompt=lambda idx: "<image>\n",  # noqa: E501
+        img_idx_to_prompt=lambda idx: "<image>\n",
        max_model_len=4096,
        max_num_seqs=2,
        dtype="half",
@@ -640,7 +640,7 @@ VLM_TEST_SETTINGS = {
        models=["AIDC-AI/Ovis2-1B"],
        test_type=(VLMTestType.IMAGE, VLMTestType.MULTI_IMAGE),
        prompt_formatter=lambda img_prompt: f"<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n<|im_start|>user\n{img_prompt}<|im_end|>\n<|im_start|>assistant\n",  # noqa: E501
-        img_idx_to_prompt=lambda idx: "<image>\n",  # noqa: E501
+        img_idx_to_prompt=lambda idx: "<image>\n",
        max_model_len=4096,
        max_num_seqs=2,
        dtype="half",
@@ -652,7 +652,7 @@ VLM_TEST_SETTINGS = {
        models=["AIDC-AI/Ovis2.5-2B"],
        test_type=(VLMTestType.IMAGE, VLMTestType.MULTI_IMAGE, VLMTestType.VIDEO),
        prompt_formatter=lambda img_prompt: f"<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n<|im_start|>user\n{img_prompt}<|im_end|>\n<|im_start|>assistant\n",  # noqa: E501
-        img_idx_to_prompt=lambda idx: "<image>\n",  # noqa: E501
+        img_idx_to_prompt=lambda idx: "<image>\n",
        video_idx_to_prompt=lambda idx: "<video>\n",
        max_model_len=4096,
        max_num_seqs=2,
@@ -701,8 +701,8 @@ VLM_TEST_SETTINGS = {
        models=["Qwen/Qwen2-VL-2B-Instruct"],
        test_type=(VLMTestType.IMAGE, VLMTestType.MULTI_IMAGE, VLMTestType.VIDEO),
        prompt_formatter=lambda img_prompt: f"<|im_start|>User\n{img_prompt}<|im_end|>\n<|im_start|>assistant\n",  # noqa: E501
-        img_idx_to_prompt=lambda idx: "<|vision_start|><|image_pad|><|vision_end|>",  # noqa: E501
+        img_idx_to_prompt=lambda idx: "<|vision_start|><|image_pad|><|vision_end|>",
-        video_idx_to_prompt=lambda idx: "<|vision_start|><|video_pad|><|vision_end|>",  # noqa: E501
+        video_idx_to_prompt=lambda idx: "<|vision_start|><|video_pad|><|vision_end|>",
        multi_image_prompt="Picture 1: <vlm_image>\nPicture 2: <vlm_image>\nDescribe these two images with one paragraph respectively.",  # noqa: E501
        max_model_len=4096,
        max_num_seqs=2,
@@ -717,11 +717,11 @@ VLM_TEST_SETTINGS = {
        prompt_formatter=lambda img_prompt: f"<｜begin▁of▁sentence｜><｜User｜>\n{img_prompt}<｜Assistant｜><think>\n",  # noqa: E501
        single_image_prompts=IMAGE_ASSETS.prompts(
            {
-                "stop_sign": "<image>\nWhat's the content in the center of the image?",  # noqa: E501
+                "stop_sign": "<image>\nWhat's the content in the center of the image?",
                "cherry_blossom": "<image>\nWhat is the season?",
            }
        ),
-        multi_image_prompt="<image>\n<image>\nDescribe the two images in short.",  # noqa: E501
+        multi_image_prompt="<image>\n<image>\nDescribe the two images in short.",
        max_model_len=4096,
        use_tokenizer_eos=True,
        patch_hf_runner=model_utils.skyworkr1v_patch_hf_runner,
@@ -754,8 +754,8 @@ VLM_TEST_SETTINGS = {
            VLMTestType.VIDEO,
        ),
        prompt_formatter=lambda img_prompt: f"<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n<|im_start|>user\n{img_prompt}<|im_end|>\n<|im_start|>assistant\n",  # noqa: E501
-        img_idx_to_prompt=lambda idx: "<|vision_start|><|image_pad|><|vision_end|>",  # noqa: E501
+        img_idx_to_prompt=lambda idx: "<|vision_start|><|image_pad|><|vision_end|>",
-        video_idx_to_prompt=lambda idx: "<|vision_start|><|video_pad|><|vision_end|>",  # noqa: E501
+        video_idx_to_prompt=lambda idx: "<|vision_start|><|video_pad|><|vision_end|>",
        max_model_len=4096,
        max_num_seqs=2,
        auto_cls=AutoModelForImageTextToText,
@@ -816,7 +816,7 @@ VLM_TEST_SETTINGS = {
        auto_cls=AutoModelForImageTextToText,
        hf_model_kwargs=model_utils.llava_onevision_hf_model_kwargs(
            "llava-hf/llava-onevision-qwen2-0.5b-ov-hf"
-        ),  # noqa: E501
+        ),
        vllm_output_post_proc=model_utils.llava_onevision_vllm_to_hf_output,
        custom_test_opts=[
            CustomTestOptions(

--- a/tests/models/multimodal/generation/test_ultravox.py
+++ b/tests/models/multimodal/generation/test_ultravox.py
@@ -170,7 +170,7 @@ async def test_online_serving(client, audio_assets: AudioTestAssets):
                ],
                {
                    "type": "text",
-                    "text": f"What's happening in these {len(audio_assets)} audio clips?",
+                    "text": f"What's happening in these {len(audio_assets)} audio clips?",  # noqa: E501
                },
            ],
        }

--- a/tests/models/multimodal/generation/test_voxtral.py
+++ b/tests/models/multimodal/generation/test_voxtral.py
@@ -101,16 +101,11 @@ async def test_online_serving(client, audio_assets: AudioTestAssets):
        return audio_dict
    audio_chunks = [asset_to_chunk(asset) for asset in audio_assets]
+    text = f"What's happening in these {len(audio_assets)} audio clips?"
    messages = [
        {
            "role": "user",
-            "content": [
+            "content": [*audio_chunks, {"type": "text", "text": text}],
-                *audio_chunks,
-                {
-                    "type": "text",
-                    "text": f"What's happening in these {len(audio_assets)} audio clips?",
-                },
-            ],
        }
    ]

--- a/tests/models/multimodal/generation/vlm_utils/custom_inputs.py
+++ b/tests/models/multimodal/generation/vlm_utils/custom_inputs.py
@@ -102,8 +102,8 @@ def multi_video_multi_aspect_ratio_inputs(
 def different_patch_input_cases_internvl():
    images = [asset.pil_image.resize((896, 896)) for asset in IMAGE_ASSETS]
    formatter = (
-        lambda img_prompt: f"<|im_start|>User\n{img_prompt}<|im_end|>\n<|im_start|>Assistant\n"
+        lambda img_prompt: f"<|im_start|>User\n{img_prompt}<|im_end|>\n<|im_start|>Assistant\n"  # noqa: E501
-    )  # noqa: E501
+    )
    single_img_prompts = [
        "<image>\nWhat's the content in the center of the image?",
        "<image>\nWhat is the season?",