Fix per file ruff ignores related to line length (#26262)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>

Fix per file ruff ignores related to line length (#26262)
Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
6c046382 · Harry Mellor · GitHub · 91ac7f76 · 6c046382 · 6c046382
Unverified Commit 6c046382 authored Oct 06, 2025 by Harry Mellor Committed by GitHub Oct 06, 2025
20 changed files
--- a/benchmarks/benchmark_ngram_proposer.py
+++ b/benchmarks/benchmark_ngram_proposer.py
@@ -164,7 +164,7 @@ def invoke_main() -> None:
    )
    parser.add_argument(
        "--batched", action="store_true", help="consider time to prepare batch"
-    )  # noqa: E501
+    )
    parser.add_argument(
        "--num-iteration",
        type=int,

--- a/benchmarks/benchmark_serving_structured_output.py
+++ b/benchmarks/benchmark_serving_structured_output.py
@@ -909,13 +909,13 @@ def create_argument_parser():
    parser.add_argument(
        "--tokenizer",
        type=str,
-        help="Name or path of the tokenizer, if not using the default tokenizer.",  # noqa: E501
+        help="Name or path of the tokenizer, if not using the default tokenizer.",
    )
    parser.add_argument(
        "--tokenizer-mode",
        type=str,
        default="auto",
-        help="Name or path of the tokenizer, if not using the default tokenizer.",  # noqa: E501
+        help="Name or path of the tokenizer, if not using the default tokenizer.",
    )
    parser.add_argument(
        "--num-prompts",

--- a/csrc/cutlass_extensions/vllm_cutlass_library_extension.py
+++ b/csrc/cutlass_extensions/vllm_cutlass_library_extension.py
@@ -72,8 +72,8 @@ VLLMKernelScheduleTag: dict[
 ] = {
    **KernelScheduleTag,  # type: ignore
    **{
-        MixedInputKernelScheduleType.TmaWarpSpecialized: "cutlass::gemm::KernelTmaWarpSpecialized",
-        MixedInputKernelScheduleType.TmaWarpSpecializedPingpong: "cutlass::gemm::KernelTmaWarpSpecializedPingpong",
-        MixedInputKernelScheduleType.TmaWarpSpecializedCooperative: "cutlass::gemm::KernelTmaWarpSpecializedCooperative",
+        MixedInputKernelScheduleType.TmaWarpSpecialized: "cutlass::gemm::KernelTmaWarpSpecialized",  # noqa: E501
+        MixedInputKernelScheduleType.TmaWarpSpecializedPingpong: "cutlass::gemm::KernelTmaWarpSpecializedPingpong",  # noqa: E501
+        MixedInputKernelScheduleType.TmaWarpSpecializedCooperative: "cutlass::gemm::KernelTmaWarpSpecializedCooperative",  # noqa: E501
    },
 }
--- a/examples/offline_inference/vision_language_pooling.py
+++ b/examples/offline_inference/vision_language_pooling.py
@@ -113,7 +113,7 @@ def run_e5_v(query: Query) -> ModelRequestData:
 def _get_vlm2vec_prompt_image(query: Query, image_token: str):
    if query["modality"] == "text":
        text = query["text"]
-        prompt = f"Find me an everyday image that matches the given caption: {text}"  # noqa: E501
+        prompt = f"Find me an everyday image that matches the given caption: {text}"
        image = None
    elif query["modality"] == "image":
        prompt = f"{image_token} Find a day-to-day image that looks similar to the provided image."  # noqa: E501

--- a/examples/online_serving/disaggregated_serving/disagg_proxy_demo.py
+++ b/examples/online_serving/disaggregated_serving/disagg_proxy_demo.py
@@ -203,9 +203,9 @@ class Proxy:
                async with session.post(
                    url=url, json=data, headers=headers
                ) as response:
-                    if 200 <= response.status < 300 or 400 <= response.status < 500:  # noqa: E501
+                    if 200 <= response.status < 300 or 400 <= response.status < 500:
                        if use_chunked:
-                            async for chunk_bytes in response.content.iter_chunked(  # noqa: E501
+                            async for chunk_bytes in response.content.iter_chunked(
                                1024
                            ):
                                yield chunk_bytes

--- a/pyproject.toml
+++ b/pyproject.toml
@@ -56,52 +56,6 @@ include = ["vllm*"]
 "vllm/third_party/**" = ["ALL"]
 "vllm/version.py" = ["F401"]
 "vllm/_version.py" = ["ALL"]
-# TEMPORARY! These ignores will be fixed forward
-## Line length violations
-"csrc/cutlass_extensions/vllm_cutlass_library_extension.py" = ["E501"]
-"tests/compile/piecewise/test_simple.py" = ["E501"]
-"tests/compile/piecewise/test_toy_llama.py" = ["E501", "B023"]
-"tests/entrypoints/conftest.py" = ["E501"]
-"tests/entrypoints/openai/test_audio.py" = ["E501"]
-"tests/entrypoints/openai/test_chat.py" = ["E501"]
-"tests/entrypoints/openai/test_chat_template.py" = ["E501"]
-"tests/entrypoints/openai/test_chat_with_tool_reasoning.py" = ["E501"]
-"tests/entrypoints/openai/test_completion_with_function_calling.py" = ["E501"]
-"tests/entrypoints/openai/test_video.py" = ["E501"]
-"tests/entrypoints/openai/test_vision.py" = ["E501"]
-"tests/entrypoints/test_chat_utils.py" = ["E501"]
-"tests/kernels/moe/modular_kernel_tools/common.py" = ["E501"]
-"tests/models/language/generation/test_gemma.py" = ["E501"]
-"tests/models/language/generation/test_mistral.py" = ["E501"]
-"tests/models/multimodal/generation/test_ultravox.py" = ["E501"]
-"tests/models/multimodal/generation/test_voxtral.py" = ["E501"]
-"tests/models/multimodal/generation/vlm_utils/custom_inputs.py" = ["E501"]
-"tests/tool_use/test_tool_choice_required.py" = ["E501"]
-"tests/v1/attention/utils.py" = ["E501"]
-"tests/v1/entrypoints/openai/responses/test_image.py" = ["E501"]
-"tests/v1/kv_connector/nixl_integration/test_accuracy.py" = ["E501"]
-"tests/v1/kv_connector/unit/test_offloading_connector.py" = ["E501"]
-"tests/v1/logits_processors/test_custom_offline.py" = ["E501"]
-"vllm/attention/ops/pallas_kv_cache_update.py" = ["E501"]
-"vllm/compilation/collective_fusion.py" = ["E501"]
-"vllm/compilation/wrapper.py" = ["E501"]
-"vllm/config/vllm.py" = ["E501"]
-"vllm/distributed/device_communicators/all2all.py" = ["E501"]
-"vllm/entrypoints/openai/protocol.py" = ["E501"]
-"vllm/lora/layers/vocal_parallel_embedding.py" = ["E501"]
-"vllm/model_executor/model_loader/bitsandbytes_loader.py" = ["E501"]
-"vllm/model_executor/models/bailing_moe.py" = ["E501"]
-"vllm/model_executor/models/hyperclovax_vision.py" = ["E501"]
-"vllm/model_executor/models/llama4_eagle.py" = ["E501"]
-"vllm/model_executor/models/longcat_flash_mtp.py" = ["E501"]
-"vllm/model_executor/models/phi4mm.py" = ["E501"]
-"vllm/model_executor/models/qwen3_next.py" = ["E501"]
-"vllm/model_executor/layers/quantization/ptpc_fp8.py" = ["E501"]
-"vllm/v1/attention/backends/mla/common.py" = ["E501"]
-"vllm/v1/engine/utils.py" = ["E501"]
-"vllm/v1/utils.py" = ["E501"]
-"vllm/v1/worker/gpu_model_runner.py" = ["E501"]
-# End of temporary ignores

 [tool.ruff.lint]
 select = [

--- a/tests/compile/piecewise/test_simple.py
+++ b/tests/compile/piecewise/test_simple.py
@@ -132,10 +132,14 @@ def test_simple_piecewise_compile(use_inductor):
        splitting_ops=["silly.attention"],
        use_inductor_graph_partition=False,
        use_inductor=use_inductor,
-        expected_num_piecewise_graphs_seen=5,  # 2 * num_layers + 1
-        expected_num_piecewise_capturable_graphs_seen=3,  # 1 + num_layers
-        expected_num_backend_compilations=3,  # num_piecewise_capturable_graphs_seen
-        expected_num_cudagraph_captured=6,  # num_cudagraph_sizes * num_piecewise_capturable_graphs_seen
+        # 2 * num_layers + 1
+        expected_num_piecewise_graphs_seen=5,
+        # 1 + num_layers
+        expected_num_piecewise_capturable_graphs_seen=3,
+        # num_piecewise_capturable_graphs_seen
+        expected_num_backend_compilations=3,
+        # num_cudagraph_sizes * num_piecewise_capturable_graphs_seen
+        expected_num_cudagraph_captured=6,
    )


@@ -147,14 +151,16 @@ def test_simple_inductor_graph_partition(splitting_ops):
        pytest.skip("inductor graph partition is only available in PyTorch 2.9+")

    _run_simple_model(
-        # inductor graph partition automatically resets splitting_ops
-        # to be an empty list
+        # Inductor graph partition automatically resets splitting_ops to an empty list
        splitting_ops=splitting_ops,
        use_inductor_graph_partition=True,
        use_inductor=True,
-        expected_num_piecewise_graphs_seen=1,  # since not splitting at fx graph level
-        expected_num_piecewise_capturable_graphs_seen=1,  # since not splitting at fx graph level
-        expected_num_backend_compilations=1,  # since not splitting at fx graph level
-        expected_num_cudagraph_captured=6,  # inductor graph partition still captures 6
-        # graph, same as fx graph partition.
+        # Since not splitting at fx graph level
+        expected_num_piecewise_graphs_seen=1,
+        # Since not splitting at fx graph level
+        expected_num_piecewise_capturable_graphs_seen=1,
+        # Since not splitting at fx graph level
+        expected_num_backend_compilations=1,
+        # Inductor graph partition still captures 6 graph, same as fx graph partition
+        expected_num_cudagraph_captured=6,
    )
--- a/tests/compile/piecewise/test_toy_llama.py
+++ b/tests/compile/piecewise/test_toy_llama.py
@@ -367,11 +367,14 @@ def test_toy_llama(use_inductor: bool):
        kwargs = {"num_eager_compiles": 1, "num_inductor_compiles": 0}

    with compilation_counter.expect(
-        num_graphs_seen=1,  # one graph for the model
+        # One graph for the model
+        num_graphs_seen=1,
        num_piecewise_graphs_seen=1,
        num_piecewise_capturable_graphs_seen=1,
-        num_backend_compilations=1,  # num_piecewise_capturable_graphs_seen
-        num_cudagraph_captured=2,  # num_cudagraph_sizes * num_piecewise_capturable_graphs_seen
+        # num_piecewise_capturable_graphs_seen
+        num_backend_compilations=1,
+        # num_cudagraph_sizes * num_piecewise_capturable_graphs_seen
+        num_cudagraph_captured=2,
        **kwargs,
    ):
        outputs.append(
@@ -478,9 +481,10 @@ def benchmark():
                # it is fine here, because we only use the lambda function once.
                runtime = do_bench(
                    lambda: graphs[b][0](  # noqa
-                        input_ids[:b], positions[:b]
+                        input_ids[:b],  # noqa
+                        positions[:b],  # noqa
+                    )
                )
-                )  # noqa
                piecewise_cudagraph_time[b] = runtime
            else:
                runtime = do_bench(lambda: graphs[b][0].replay())  # noqa

--- a/tests/compile/test_functionalization.py
+++ b/tests/compile/test_functionalization.py
@@ -243,7 +243,7 @@ def test_fix_functionalization(model_class: torch.nn.Module, do_fusion: bool):
    # check if the functionalization pass is applied
    for op in model.ops_in_model(do_fusion):
        find_auto_fn(backend_no_func.graph_post_pass.nodes, op)
-        assert find_auto_fn_maybe(backend_func.graph_post_pass.nodes, op) is None  # noqa: E501
+        assert find_auto_fn_maybe(backend_func.graph_post_pass.nodes, op) is None

    # make sure the ops were all de-functionalized
    found = dict()

--- a/tests/compile/test_fusion_attn.py
+++ b/tests/compile/test_fusion_attn.py
@@ -565,7 +565,7 @@ def test_attention_quant_pattern(
    elif quant_key.dtype == FP4_DTYPE:
        assert attn_nodes_post[0].kwargs.get("output_block_scale") is not None, (
            "Attention should have output_block_scale after FP4 fusion"
-        )  # noqa: E501
+        )

    # Check that results are close
    torch.testing.assert_close(result_unfused, result_fused_1, atol=1e-2, rtol=1e-2)
--- a/tests/compile/test_sequence_parallelism.py
+++ b/tests/compile/test_sequence_parallelism.py
@@ -186,7 +186,7 @@ class TestQuantModel(torch.nn.Module):
        ):
            # If fusion happens, the fused op is the one
            # we check for (de)functionalization
-            return [torch.ops._C.fused_add_rms_norm_static_fp8_quant.default]  # noqa: E501
+            return [torch.ops._C.fused_add_rms_norm_static_fp8_quant.default]
        else:
            # If no fusion, the original ops are checked
            return [
@@ -322,7 +322,7 @@ def sequence_parallelism_pass_on_test_model(
    # check if the functionalization pass is applied
    for op in model.ops_in_model():
        find_auto_fn(backend_no_func.graph_post_pass.nodes, op)
-        assert find_auto_fn_maybe(backend_func.graph_post_pass.nodes, op) is None  # noqa: E501
+        assert find_auto_fn_maybe(backend_func.graph_post_pass.nodes, op) is None

    # make sure the ops were all de-functionalized
    found = dict()

--- a/tests/distributed/test_pipeline_parallel.py
+++ b/tests/distributed/test_pipeline_parallel.py
@@ -104,7 +104,7 @@ TEXT_GENERATION_MODELS = {
    # [Decoder-only]
    # Uses Llama
    # "BAAI/AquilaChat-7B": PPTestSettings.fast(),
-    "Snowflake/snowflake-arctic-instruct": PPTestSettings.fast(load_format="dummy"),  # noqa: E501
+    "Snowflake/snowflake-arctic-instruct": PPTestSettings.fast(load_format="dummy"),
    "baichuan-inc/Baichuan-7B": PPTestSettings.fast(),
    "baichuan-inc/Baichuan2-13B-Chat": PPTestSettings.fast(),
    "bigscience/bloomz-1b1": PPTestSettings.fast(),
@@ -138,7 +138,7 @@ TEXT_GENERATION_MODELS = {
    # Uses Llama
    # "mistralai/Mistral-7B-Instruct-v0.1": PPTestSettings.fast(),
    "state-spaces/mamba-130m-hf": PPTestSettings.fast(),
-    "mistralai/Mixtral-8x7B-Instruct-v0.1": PPTestSettings.fast(load_format="dummy"),  # noqa: E501
+    "mistralai/Mixtral-8x7B-Instruct-v0.1": PPTestSettings.fast(load_format="dummy"),
    "mosaicml/mpt-7b": PPTestSettings.fast(),
    "nvidia/Minitron-8B-Base": PPTestSettings.fast(),
    "allenai/OLMo-1B-hf": PPTestSettings.fast(),
@@ -151,13 +151,13 @@ TEXT_GENERATION_MODELS = {
    "microsoft/Phi-3-small-8k-instruct": PPTestSettings.fast(),
    "microsoft/Phi-3.5-MoE-instruct": PPTestSettings.detailed(
        multi_node_only=True, load_format="dummy"
-    ),  # noqa: E501
+    ),
    "Qwen/Qwen-7B-Chat": PPTestSettings.fast(),
    "Qwen/Qwen2.5-0.5B-Instruct": PPTestSettings.fast(),
    "Qwen/Qwen1.5-MoE-A2.7B-Chat": PPTestSettings.fast(),
    "stabilityai/stablelm-3b-4e1t": PPTestSettings.fast(),
    "bigcode/starcoder2-3b": PPTestSettings.fast(),
-    "upstage/solar-pro-preview-instruct": PPTestSettings.fast(load_format="dummy"),  # noqa: E501
+    "upstage/solar-pro-preview-instruct": PPTestSettings.fast(load_format="dummy"),
    # FIXME: Cannot load tokenizer in latest transformers version.
    # Need to use tokenizer from `meta-llama/Llama-2-7b-chat-hf`
    # "xverse/XVERSE-7B-Chat": PPTestSettings.fast(),

--- a/tests/entrypoints/conftest.py
+++ b/tests/entrypoints/conftest.py
@@ -83,7 +83,8 @@ def sample_complex_json_schema():
                "type": "array",
                "items": {
                    "type": "string",
-                    "pattern": "^[a-z]{1,10}$",  # Combining length and pattern restrictions
+                    # Combining length and pattern restrictions
+                    "pattern": "^[a-z]{1,10}$",
                },
            },
        },

--- a/tests/entrypoints/openai/test_audio.py
+++ b/tests/entrypoints/openai/test_audio.py
@@ -145,7 +145,7 @@ async def test_single_chat_session_audio_base64encoded(
                {
                    "type": "audio_url",
                    "audio_url": {
-                        "url": f"data:audio/wav;base64,{base64_encoded_audio[audio_url]}"
+                        "url": f"data:audio/wav;base64,{base64_encoded_audio[audio_url]}"  # noqa: E501
                    },
                },
                {"type": "text", "text": "What's happening in this audio?"},

--- a/tests/entrypoints/openai/test_chat.py
+++ b/tests/entrypoints/openai/test_chat.py
@@ -835,17 +835,18 @@ async def test_extra_fields_allowed(client: openai.AsyncOpenAI):

 @pytest.mark.asyncio
 async def test_complex_message_content(client: openai.AsyncOpenAI):
+    content = [
+        {
+            "type": "text",
+            "text": "what is 1+1? please provide the result without any other text.",
+        }
+    ]
    resp = await client.chat.completions.create(
        model=MODEL_NAME,
        messages=[
            {
                "role": "user",
-                "content": [
-                    {
-                        "type": "text",
-                        "text": "what is 1+1? please provide the result without any other text.",
-                    }
-                ],
+                "content": content,
            }
        ],
        temperature=0,

--- a/tests/entrypoints/openai/test_chat_template.py
+++ b/tests/entrypoints/openai/test_chat_template.py
@@ -76,8 +76,8 @@ def test_load_chat_template():
    assert (
        template_content
        == """{% for message in messages %}{{'<|im_start|>' + message['role'] + '\\n' + message['content']}}{% if (loop.last and add_generation_prompt) or not loop.last %}{{ '<|im_end|>' + '\\n'}}{% endif %}{% endfor %}
-{% if add_generation_prompt and messages[-1]['role'] != 'assistant' %}{{ '<|im_start|>assistant\\n' }}{% endif %}"""
-    )  # noqa: E501
+{% if add_generation_prompt and messages[-1]['role'] != 'assistant' %}{{ '<|im_start|>assistant\\n' }}{% endif %}"""  # noqa: E501
+    )


 def test_no_load_chat_template_filelike():

--- a/tests/entrypoints/openai/test_chat_with_tool_reasoning.py
+++ b/tests/entrypoints/openai/test_chat_with_tool_reasoning.py
@@ -45,12 +45,13 @@ TOOLS = [
                "properties": {
                    "city": {
                        "type": "string",
-                        "description": "The city to find the weather for, e.g. 'San Francisco'",
+                        "description": "The city to find the weather for, e.g. "
+                        "'San Francisco'",
                    },
                    "state": {
                        "type": "string",
-                        "description": "the two-letter abbreviation for the state that the city is"
-                        " in, e.g. 'CA' which would mean 'California'",
+                        "description": "the two-letter abbreviation for the state that "
+                        "the city is in, e.g. 'CA' which would mean 'California'",
                    },
                    "unit": {
                        "type": "string",
@@ -69,7 +70,8 @@ MESSAGES = [
    {"role": "assistant", "content": "I'm doing well! How can I help you?"},
    {
        "role": "user",
-        "content": "Can you tell me what the temperate will be in Dallas, in fahrenheit?",
+        "content": "Can you tell me what the temperate will be in Dallas, "
+        "in fahrenheit?",
    },
 ]


--- a/tests/entrypoints/openai/test_completion_with_function_calling.py
+++ b/tests/entrypoints/openai/test_completion_with_function_calling.py
@@ -25,12 +25,14 @@ tools = [
                "properties": {
                    "city": {
                        "type": "string",
-                        "description": "The city to find the weather for, e.g. 'Vienna'",
+                        "description": "The city to find the weather for, e.g. "
+                        "'Vienna'",
                        "default": "Vienna",
                    },
                    "country": {
                        "type": "string",
-                        "description": "The country that the city is in, e.g. 'Austria'",
+                        "description": "The country that the city is in, e.g. "
+                        "'Austria'",
                    },
                    "unit": {
                        "type": "string",
@@ -85,12 +87,14 @@ tools = [
                "properties": {
                    "city": {
                        "type": "string",
-                        "description": "The city to get the forecast for, e.g. 'Vienna'",
+                        "description": "The city to get the forecast for, e.g. "
+                        "'Vienna'",
                        "default": "Vienna",
                    },
                    "country": {
                        "type": "string",
-                        "description": "The country that the city is in, e.g. 'Austria'",
+                        "description": "The country that the city is in, e.g. "
+                        "'Austria'",
                    },
                    "days": {
                        "type": "integer",

--- a/tests/entrypoints/openai/test_video.py
+++ b/tests/entrypoints/openai/test_video.py
@@ -179,7 +179,7 @@ async def test_single_chat_session_video_base64encoded(
                {
                    "type": "video_url",
                    "video_url": {
-                        "url": f"data:video/jpeg;base64,{base64_encoded_video[video_url]}"
+                        "url": f"data:video/jpeg;base64,{base64_encoded_video[video_url]}"  # noqa: E501
                    },
                },
                {"type": "text", "text": "What's in this video?"},
@@ -238,7 +238,7 @@ async def test_single_chat_session_video_base64encoded_beamsearch(
                {
                    "type": "video_url",
                    "video_url": {
-                        "url": f"data:video/jpeg;base64,{base64_encoded_video[video_url]}"
+                        "url": f"data:video/jpeg;base64,{base64_encoded_video[video_url]}"  # noqa: E501
                    },
                },
                {"type": "text", "text": "What's in this video?"},

--- a/tests/entrypoints/openai/test_vision.py
+++ b/tests/entrypoints/openai/test_vision.py
@@ -233,7 +233,7 @@ async def test_single_chat_session_image_base64encoded(
                {
                    "type": "image_url",
                    "image_url": {
-                        "url": f"data:image/jpeg;base64,{base64_encoded_image[raw_image_url]}"
+                        "url": f"data:image/jpeg;base64,{base64_encoded_image[raw_image_url]}"  # noqa: E501
                    },
                },
                {"type": "text", "text": content_text},
@@ -300,7 +300,7 @@ async def test_single_chat_session_image_base64encoded_beamsearch(
                {
                    "type": "image_url",
                    "image_url": {
-                        "url": f"data:image/jpeg;base64,{base64_encoded_image[raw_image_url]}"
+                        "url": f"data:image/jpeg;base64,{base64_encoded_image[raw_image_url]}"  # noqa: E501
                    },
                },
                {"type": "text", "text": "What's in this image?"},