Add flake8-implicit-str-concat rules to Ruff (#33191)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>

Add flake8-implicit-str-concat rules to Ruff (#33191)
Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
2eb673a0 · Harry Mellor · GitHub · a97b5e20 · 2eb673a0 · 2eb673a0
Unverified Commit 2eb673a0 authored Jan 28, 2026 by Harry Mellor Committed by GitHub Jan 28, 2026
16 changed files
--- a/.buildkite/performance-benchmarks/scripts/convert-results-json-to-markdown.py
+++ b/.buildkite/performance-benchmarks/scripts/convert-results-json-to-markdown.py
@@ -393,7 +393,7 @@ if __name__ == "__main__":
    with open(results_folder / md_file, "w") as f:
        results = read_markdown(
            "../.buildkite/performance-benchmarks/"
-            + "performance-benchmarks-descriptions.md"
+            "performance-benchmarks-descriptions.md"
        )
        results = results.format(
            latency_tests_markdown_table=latency_md_table,

--- a/csrc/quantization/machete/generate.py
+++ b/csrc/quantization/machete/generate.py
@@ -288,8 +288,8 @@ def generate_sch_sig(schedule_config: ScheduleConfig) -> str:
    )
    cluster_shape = (
        f"{schedule_config.cluster_shape_mnk[0]}"
-        + f"x{schedule_config.cluster_shape_mnk[1]}"
+        f"x{schedule_config.cluster_shape_mnk[1]}"
-        + f"x{schedule_config.cluster_shape_mnk[2]}"
+        f"x{schedule_config.cluster_shape_mnk[2]}"
    )
    kernel_schedule = VLLMKernelScheduleTag[schedule_config.kernel_schedule].split(
        "::"
@@ -301,7 +301,7 @@ def generate_sch_sig(schedule_config: ScheduleConfig) -> str:
    return (
        f"{tile_shape}_{cluster_shape}_{kernel_schedule}"
-        + f"_{epilogue_schedule}_{tile_scheduler}"
+        f"_{epilogue_schedule}_{tile_scheduler}"
    )

--- a/examples/offline_inference/automatic_prefix_caching.py
+++ b/examples/offline_inference/automatic_prefix_caching.py
@@ -26,7 +26,7 @@ from vllm import LLM, SamplingParams
 # A prompt containing a large markdown table. The table is randomly generated by GPT-4.
 LONG_PROMPT = (
    "You are a helpful assistant in recognizes the content of tables in markdown format. Here is a table as follows.\n# Table\n"
-    + """
+    """
 | ID  | Name          | Age | Occupation    | Country       | Email                  | Phone Number   | Address                       |
 |-----|---------------|-----|---------------|---------------|------------------------|----------------|------------------------------|
 | 1   | John Doe      | 29  | Engineer      | USA           | john.doe@example.com   | 555-1234       | 123 Elm St, Springfield, IL  |

--- a/examples/others/lmcache/disagg_prefill_lmcache_v1/disagg_proxy_server.py
+++ b/examples/others/lmcache/disagg_prefill_lmcache_v1/disagg_proxy_server.py
@@ -69,10 +69,10 @@ class StatsCalculator:
        np_arr = np.array(self._stats)
        output_str = (
            f"\nNum requests: {len(self._stats)}"
-            + "\nPrefill node TTFT stats:"
+            "\nPrefill node TTFT stats:"
-            + f"\n - Average (ms): {np.mean(np_arr)}"
+            f"\n - Average (ms): {np.mean(np_arr)}"
-            + f"\n - Median (ms): {np.median(np_arr)}"
+            f"\n - Median (ms): {np.median(np_arr)}"
-            + f"\n - 99th Percentile (ms): {np.percentile(np_arr, 99)}\n"
+            f"\n - 99th Percentile (ms): {np.percentile(np_arr, 99)}\n"
        )
        print(
            "===============================",

--- a/pyproject.toml
+++ b/pyproject.toml
@@ -72,6 +72,8 @@ select = [
    "UP",
    # flake8-bugbear
    "B",
+    # flake8-implicit-str-concat
+    "ISC",
    # flake8-simplify
    "SIM",
    # isort

--- a/tests/entrypoints/openai/tool_parsers/test_llama4_pythonic_tool_parser.py
+++ b/tests/entrypoints/openai/tool_parsers/test_llama4_pythonic_tool_parser.py
@@ -193,7 +193,7 @@ TEST_CASES = [
    pytest.param(
        False,
        "<|python_start|>[get_weather(city='LA', metric='C'), "
-        + "register_user(name='Doe', age=9)]",
+        "register_user(name='Doe', age=9)]",
        [
            SIMPLE_FUNCTION_CALL,
            FunctionCall(name="register_user", arguments='{"name": "Doe", "age": 9}'),

--- a/tests/tool_parsers/test_deepseekv31_tool_parser.py
+++ b/tests/tool_parsers/test_deepseekv31_tool_parser.py
@@ -24,9 +24,9 @@ def parser(deepseekv31_tokenizer):
 def test_extract_tool_calls_with_tool(parser):
    model_output = (
        "normal text"
-        + "<｜tool▁calls▁begin｜>"
+        "<｜tool▁calls▁begin｜>"
-        + '<｜tool▁call▁begin｜>foo<｜tool▁sep｜>{"x":1}<｜tool▁call▁end｜>'
+        '<｜tool▁call▁begin｜>foo<｜tool▁sep｜>{"x":1}<｜tool▁call▁end｜>'
-        + "<｜tool▁calls▁end｜>"
+        "<｜tool▁calls▁end｜>"
    )
    result = parser.extract_tool_calls(model_output, None)
    assert result.tools_called
@@ -39,11 +39,11 @@ def test_extract_tool_calls_with_tool(parser):
 def test_extract_tool_calls_with_multiple_tools(parser):
    model_output = (
        "some prefix text"
-        + "<｜tool▁calls▁begin｜>"
+        "<｜tool▁calls▁begin｜>"
-        + '<｜tool▁call▁begin｜>foo<｜tool▁sep｜>{"x":1}<｜tool▁call▁end｜>'
+        '<｜tool▁call▁begin｜>foo<｜tool▁sep｜>{"x":1}<｜tool▁call▁end｜>'
-        + '<｜tool▁call▁begin｜>bar<｜tool▁sep｜>{"y":2}<｜tool▁call▁end｜>'
+        '<｜tool▁call▁begin｜>bar<｜tool▁sep｜>{"y":2}<｜tool▁call▁end｜>'
-        + "<｜tool▁calls▁end｜>"
+        "<｜tool▁calls▁end｜>"
-        + " some suffix text"
+        " some suffix text"
    )
    result = parser.extract_tool_calls(model_output, None)

--- a/tests/utils.py
+++ b/tests/utils.py
@@ -1302,7 +1302,7 @@ def prep_prompts(batch_size: int, ln_range: tuple[int, int] = (800, 1100)):
        indices.append(idx)
        prompt = (
            "```python\n# We set a number of variables, "
-            + f"x{idx} will be important later\n"
+            f"x{idx} will be important later\n"
        )
        ln = random.randint(*ln_range)
        for k in range(30, ln):

--- a/vllm/benchmarks/datasets.py
+++ b/vllm/benchmarks/datasets.py
@@ -134,8 +134,7 @@ class BenchmarkDataset(ABC):
                content.append(mm_content)
            else:
                raise TypeError(
-                    "Could not process multimodal content of type: "
+                    f"Could not process multimodal content of type: {type(mm_content)}"
-                    + f"{type(mm_content)}"
                )
        return [{"role": "user", "content": content}]

--- a/vllm/compilation/wrapper.py
+++ b/vllm/compilation/wrapper.py
@@ -208,8 +208,8 @@ class TorchCompileWithNoGuardsWrapper:
        if not hasattr(self._compiled_callable, "aot_compile"):
            raise RuntimeError(
                "aot_compile is not supported by the current configuration. "
-                + "Please make sure torch.compile is enabled with the latest "
+                "Please make sure torch.compile is enabled with the latest "
-                + f"version of PyTorch (current using torch: {torch.__version__})"
+                f"version of PyTorch (current using torch: {torch.__version__})"
            )
        return self._compiled_callable.aot_compile((args, kwargs))

--- a/vllm/entrypoints/openai/translations/speech_to_text.py
+++ b/vllm/entrypoints/openai/translations/speech_to_text.py
@@ -406,8 +406,8 @@ class OpenAISpeechToText(OpenAIServing):
        if request.response_format not in ["text", "json", "verbose_json"]:
            return self.create_error_response(
-                ("Currently only support response_format")
+                "Currently only support response_format: "
-                + ("`text`, `json` or `verbose_json`")
+                "`text`, `json` or `verbose_json`"
            )
        if (

--- a/vllm/model_executor/layers/quantization/kernels/scaled_mm/aiter.py
+++ b/vllm/model_executor/layers/quantization/kernels/scaled_mm/aiter.py
@@ -32,8 +32,8 @@ class AiterInt8ScaledMMLinearKernel(CutlassInt8ScaledMMLinearKernel):
            return (
                False,
                "requires setting `VLLM_ROCM_USE_AITER=1` "
-                + "and `VLLM_ROCM_USE_AITER_LINEAR=1`. "
+                "and `VLLM_ROCM_USE_AITER_LINEAR=1`. "
-                + "`VLLM_ROCM_USE_AITER_LINEAR` default is True.",
+                "`VLLM_ROCM_USE_AITER_LINEAR` default is True.",
            )
        return True, None
@@ -97,9 +97,9 @@ class AiterInt8ScaledMMLinearKernel(CutlassInt8ScaledMMLinearKernel):
            per_token_scale_a and per_channel_scale_b
        ), (
            "Currently only support per-tensor-per-tensor GEMM "
-            + " and per-token-per-channel GEMM through AITER"
+            " and per-token-per-channel GEMM through AITER"
            " w8a8 scaled gemm. `AiterInt8ScaledMMLinearKernel` "
-            + "does not support AITER block scaled GEMM."
+            "does not support AITER block scaled GEMM."
        )
        # gemm_a8w8_CK(a, b, scale_a, scale_b, bias) expects

--- a/vllm/reasoning/olmo3_reasoning_parser.py
+++ b/vllm/reasoning/olmo3_reasoning_parser.py
@@ -234,7 +234,7 @@ class Olmo3ReasoningParser(ReasoningParser):
        # reasoning template.
        reasoning_expr = (
            rf"^(?:{self.think_start})?(?P<reasoning>.*?)"
-            + rf"{self.think_end}(?P<content>.*)$"
+            rf"{self.think_end}(?P<content>.*)$"
        )
        self.reasoning_regex = re.compile(reasoning_expr, re.DOTALL)

--- a/vllm/v1/attention/backends/mla/flashattn_mla.py
+++ b/vllm/v1/attention/backends/mla/flashattn_mla.py
@@ -216,7 +216,7 @@ class FlashAttnMLAMetadataBuilder(MLACommonMetadataBuilder[FlashAttnMLAMetadata]
            # Ensure the persistent buffer is large enough
            assert n <= self.scheduler_metadata.shape[0], (
                f"Scheduler metadata size {n} exceeds buffer size "
-                + f"{self.scheduler_metadata.shape[0]}"
+                f"{self.scheduler_metadata.shape[0]}"
            )
            self.scheduler_metadata[:n] = scheduler_metadata
            # NOTE(woosuk): We should zero out the rest of the scheduler

--- a/vllm/v1/core/single_type_kv_cache_manager.py
+++ b/vllm/v1/core/single_type_kv_cache_manager.py
@@ -646,7 +646,7 @@ class ChunkedLocalAttentionManager(SingleTypeKVCacheManager):
        """
        assert isinstance(kv_cache_spec, ChunkedLocalAttentionSpec), (
            "ChunkedLocalAttentionManager can only be used for "
-            + "chunked local attention groups"
+            "chunked local attention groups"
        )
        assert use_eagle is False, (
            "Hybrid KV cache is not supported for " + "eagle + chunked local attention."

--- a/vllm/v1/engine/async_llm.py
+++ b/vllm/v1/engine/async_llm.py
@@ -618,7 +618,7 @@ class AsyncLLM(EngineClient):
                except Exception as e2:
                    s = (
                        f"{e.__class__.__name__}: "
-                        + "error during printing an exception of class"
+                        "error during printing an exception of class"
                        + e2.__class__.__name__
                    )
                logger.info("Request %s failed due to %s.", request_id, s)