Remove all references to `yapf` as it's no longer used (#26251)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>

Remove all references to `yapf` as it's no longer used (#26251)
Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
4e256cad · Harry Mellor · GitHub · d6953beb · 4e256cad · 4e256cad
Unverified Commit 4e256cad authored Oct 05, 2025 by Harry Mellor Committed by GitHub Oct 05, 2025
20 changed files
--- a/csrc/quantization/machete/generate.py
+++ b/csrc/quantization/machete/generate.py
@@ -12,9 +12,6 @@ from functools import reduce
 from typing import Optional, Union
 import jinja2
-# yapf conflicts with isort for this block
-# yapf: disable
 from vllm_cutlass_library_extension import (
    DataType,
    EpilogueScheduleTag,
@@ -31,8 +28,6 @@ from vllm_cutlass_library_extension import (
    VLLMKernelScheduleTag,
 )
-# yapf: enable
 #
 #   Generator templating
 #

--- a/examples/others/tensorize_vllm_model.py
+++ b/examples/others/tensorize_vllm_model.py
@@ -21,8 +21,6 @@ from vllm.utils import FlexibleArgumentParser
 logger = logging.getLogger()
-# yapf conflicts with isort for this docstring
-# yapf: disable
 """
 tensorize_vllm_model.py is a script that can be used to serialize and 
 deserialize vLLM models. These models can be loaded using tensorizer 
@@ -132,7 +130,8 @@ def get_parser():
        "can be loaded using tensorizer directly to the GPU "
        "extremely quickly. Tensor encryption and decryption is "
        "also supported, although libsodium must be installed to "
-        "use it.")
+        "use it."
+    )
    parser = EngineArgs.add_cli_args(parser)
    parser.add_argument(
@@ -144,13 +143,14 @@ def get_parser():
        "along with the model by instantiating a TensorizerConfig object, "
        "creating a dict from it with TensorizerConfig.to_serializable(), "
        "and passing it to LoRARequest's initializer with the kwarg "
-        "tensorizer_config_dict."
+        "tensorizer_config_dict.",
    )
-    subparsers = parser.add_subparsers(dest='command', required=True)
+    subparsers = parser.add_subparsers(dest="command", required=True)
    serialize_parser = subparsers.add_parser(
-        'serialize', help="Serialize a model to `--serialized-directory`")
+        "serialize", help="Serialize a model to `--serialized-directory`"
+    )
    serialize_parser.add_argument(
        "--suffix",
@@ -163,7 +163,9 @@ def get_parser():
            "`--suffix` is `v1`, the serialized model tensors will be "
            "saved to "
            "`s3://my-bucket/vllm/EleutherAI/gpt-j-6B/v1/model.tensors`. "
-            "If none is provided, a random UUID will be used."))
+            "If none is provided, a random UUID will be used."
+        ),
+    )
    serialize_parser.add_argument(
        "--serialized-directory",
        type=str,
@@ -175,33 +177,44 @@ def get_parser():
        "and the model HuggingFace ID is `EleutherAI/gpt-j-6B`, tensors will "
        "be saved to `dir/vllm/EleutherAI/gpt-j-6B/suffix/model.tensors`, "
        "where `suffix` is given by `--suffix` or a random UUID if not "
-        "provided.")
+        "provided.",
+    )
    serialize_parser.add_argument(
        "--serialization-kwargs",
        type=tensorizer_kwargs_arg,
        required=False,
-        help=("A JSON string containing additional keyword arguments to "
+        help=(
+            "A JSON string containing additional keyword arguments to "
            "pass to Tensorizer's TensorSerializer during "
-              "serialization."))
+            "serialization."
+        ),
+    )
    serialize_parser.add_argument(
        "--keyfile",
        type=str,
        required=False,
-        help=("Encrypt the model weights with a randomly-generated binary key,"
+        help=(
-              " and save the key at this path"))
+            "Encrypt the model weights with a randomly-generated binary key,"
+            " and save the key at this path"
+        ),
+    )
    deserialize_parser = subparsers.add_parser(
-        'deserialize',
+        "deserialize",
-        help=("Deserialize a model from `--path-to-tensors`"
+        help=(
-              " to verify it can be loaded and used."))
+            "Deserialize a model from `--path-to-tensors`"
+            " to verify it can be loaded and used."
+        ),
+    )
    deserialize_parser.add_argument(
        "--path-to-tensors",
        type=str,
        required=False,
-        help="The local path or S3 URI to the model tensors to deserialize. ")
+        help="The local path or S3 URI to the model tensors to deserialize. ",
+    )
    deserialize_parser.add_argument(
        "--serialized-directory",
@@ -209,74 +222,82 @@ def get_parser():
        required=False,
        help="Directory with model artifacts for loading. Assumes a "
        "model.tensors file exists therein. Can supersede "
-             "--path-to-tensors.")
+        "--path-to-tensors.",
+    )
    deserialize_parser.add_argument(
        "--keyfile",
        type=str,
        required=False,
-        help=("Path to a binary key to use to decrypt the model weights,"
+        help=(
-              " if the model was serialized with encryption"))
+            "Path to a binary key to use to decrypt the model weights,"
+            " if the model was serialized with encryption"
+        ),
+    )
    deserialize_parser.add_argument(
        "--deserialization-kwargs",
        type=tensorizer_kwargs_arg,
        required=False,
-        help=("A JSON string containing additional keyword arguments to "
+        help=(
+            "A JSON string containing additional keyword arguments to "
            "pass to Tensorizer's `TensorDeserializer` during "
-              "deserialization."))
+            "deserialization."
+        ),
+    )
    TensorizerArgs.add_cli_args(deserialize_parser)
    return parser
-def merge_extra_config_with_tensorizer_config(extra_cfg: dict,
-                                              cfg: TensorizerConfig):
+def merge_extra_config_with_tensorizer_config(extra_cfg: dict, cfg: TensorizerConfig):
    for k, v in extra_cfg.items():
        if hasattr(cfg, k):
            setattr(cfg, k, v)
            logger.info(
                "Updating TensorizerConfig with %s from "
-                "--model-loader-extra-config provided", k
+                "--model-loader-extra-config provided",
+                k,
            )
 def deserialize(args, tensorizer_config):
    if args.lora_path:
        tensorizer_config.lora_dir = tensorizer_config.tensorizer_dir
-        llm = LLM(model=args.model,
+        llm = LLM(
+            model=args.model,
            load_format="tensorizer",
            tensor_parallel_size=args.tensor_parallel_size,
            model_loader_extra_config=tensorizer_config,
            enable_lora=True,
        )
        sampling_params = SamplingParams(
-            temperature=0,
+            temperature=0, max_tokens=256, stop=["[/assistant]"]
-            max_tokens=256,
-            stop=["[/assistant]"]
        )
        # Truncating this as the extra text isn't necessary
-        prompts = [
+        prompts = ["[user] Write a SQL query to answer the question based on ..."]
-            "[user] Write a SQL query to answer the question based on ..."
-        ]
        # Test LoRA load
        print(
            llm.generate(
                prompts,
                sampling_params,
-            lora_request=LoRARequest("sql-lora",
+                lora_request=LoRARequest(
+                    "sql-lora",
                    1,
                    args.lora_path,
-                                     tensorizer_config_dict = tensorizer_config
+                    tensorizer_config_dict=tensorizer_config.to_serializable(),
-                                     .to_serializable())
+                ),
            )
        )
    else:
-        llm = LLM(model=args.model,
+        llm = LLM(
+            model=args.model,
            load_format="tensorizer",
            tensor_parallel_size=args.tensor_parallel_size,
-                  model_loader_extra_config=tensorizer_config
+            model_loader_extra_config=tensorizer_config,
        )
    return llm
@@ -285,17 +306,20 @@ def main():
    parser = get_parser()
    args = parser.parse_args()
-    s3_access_key_id = (getattr(args, 's3_access_key_id', None)
+    s3_access_key_id = getattr(args, "s3_access_key_id", None) or os.environ.get(
-                        or os.environ.get("S3_ACCESS_KEY_ID", None))
+        "S3_ACCESS_KEY_ID", None
-    s3_secret_access_key = (getattr(args, 's3_secret_access_key', None)
+    )
-                            or os.environ.get("S3_SECRET_ACCESS_KEY", None))
+    s3_secret_access_key = getattr(
-    s3_endpoint = (getattr(args, 's3_endpoint', None)
+        args, "s3_secret_access_key", None
-                or os.environ.get("S3_ENDPOINT_URL", None))
+    ) or os.environ.get("S3_SECRET_ACCESS_KEY", None)
+    s3_endpoint = getattr(args, "s3_endpoint", None) or os.environ.get(
+        "S3_ENDPOINT_URL", None
+    )
    credentials = {
        "s3_access_key_id": s3_access_key_id,
        "s3_secret_access_key": s3_secret_access_key,
-        "s3_endpoint": s3_endpoint
+        "s3_endpoint": s3_endpoint,
    }
    model_ref = args.model
@@ -309,25 +333,25 @@ def main():
    if args.model_loader_extra_config:
        extra_config = json.loads(args.model_loader_extra_config)
+    tensorizer_dir = args.serialized_directory or extra_config.get("tensorizer_dir")
-    tensorizer_dir = (args.serialized_directory or
+    tensorizer_uri = getattr(args, "path_to_tensors", None) or extra_config.get(
-                      extra_config.get("tensorizer_dir"))
+        "tensorizer_uri"
-    tensorizer_uri = (getattr(args, "path_to_tensors", None)
+    )
-                      or extra_config.get("tensorizer_uri"))
    if tensorizer_dir and tensorizer_uri:
-        parser.error("--serialized-directory and --path-to-tensors "
+        parser.error(
-                     "cannot both be provided")
+            "--serialized-directory and --path-to-tensors cannot both be provided"
+        )
    if not tensorizer_dir and not tensorizer_uri:
-        parser.error("Either --serialized-directory or --path-to-tensors "
+        parser.error(
-                     "must be provided")
+            "Either --serialized-directory or --path-to-tensors must be provided"
+        )
    if args.command == "serialize":
        engine_args = EngineArgs.from_cli_args(args)
-        input_dir = tensorizer_dir.rstrip('/')
+        input_dir = tensorizer_dir.rstrip("/")
        suffix = args.suffix if args.suffix else uuid.uuid4().hex
        base_path = f"{input_dir}/vllm/{model_ref}/{suffix}"
        if engine_args.tensor_parallel_size > 1:
@@ -339,15 +363,14 @@ def main():
            tensorizer_uri=model_path,
            encryption_keyfile=keyfile,
            serialization_kwargs=args.serialization_kwargs or {},
-            **credentials
+            **credentials,
        )
        if args.lora_path:
            tensorizer_config.lora_dir = tensorizer_config.tensorizer_dir
            tensorize_lora_adapter(args.lora_path, tensorizer_config)
-        merge_extra_config_with_tensorizer_config(extra_config,
+        merge_extra_config_with_tensorizer_config(extra_config, tensorizer_config)
-                                                  tensorizer_config)
        tensorize_vllm_model(engine_args, tensorizer_config)
    elif args.command == "deserialize":
@@ -356,11 +379,10 @@ def main():
            tensorizer_dir=args.serialized_directory,
            encryption_keyfile=keyfile,
            deserialization_kwargs=args.deserialization_kwargs or {},
-            **credentials
+            **credentials,
        )
-        merge_extra_config_with_tensorizer_config(extra_config,
+        merge_extra_config_with_tensorizer_config(extra_config, tensorizer_config)
-                                                  tensorizer_config)
        deserialize(args, tensorizer_config)
    else:
        raise ValueError("Either serialize or deserialize must be specified.")

--- a/tests/compile/test_silu_mul_quant_fusion.py
+++ b/tests/compile/test_silu_mul_quant_fusion.py
@@ -8,16 +8,11 @@ import torch
 import vllm.envs as envs
 from tests.kernels.quantization.nvfp4_utils import quant_nvfp4_tensor
 from vllm._custom_ops import cutlass_scaled_fp4_mm, scaled_fp4_quant
-# yapf conflicts with isort for this block
-# yapf: disable
 from vllm.compilation.activation_quant_fusion import (
    FUSED_OPS,
    SILU_MUL_OP,
    ActivationQuantFusionPass,
 )
-# yapf: enable
 from vllm.compilation.fusion import QUANT_OPS
 from vllm.compilation.noop_elimination import NoOpEliminationPass
 from vllm.compilation.post_cleanup import PostCleanupPass

--- a/tests/distributed/test_expert_parallel.py
+++ b/tests/distributed/test_expert_parallel.py
@@ -107,10 +107,8 @@ class EPTestSettings:
 # NOTE: You can adjust tp_base locally to fit the model in GPU
 # The values displayed here are only a rough indicator of the size of the model
-# yapf: disable
 TEST_MODELS = {
-    "deepseek-ai/DeepSeek-V2-Lite-Chat": EPTestSettings.fast(
+    "deepseek-ai/DeepSeek-V2-Lite-Chat": EPTestSettings.fast(trust_remote_code=True),
-        trust_remote_code=True),
    "mistralai/Mixtral-8x7B-Instruct-v0.1": EPTestSettings.fast(tp_base=4),
 }
@@ -192,22 +190,24 @@ def _compare_tp(
    ]
    try:
-        compare_two_settings(model_name,
+        compare_two_settings(
+            model_name,
            ep_args,
            tp_args,
            ep_env,
            tp_env,
            method=method,
-                             max_wait_seconds=360)
+            max_wait_seconds=360,
+        )
    except Exception:
        raise
 @pytest.mark.parametrize(
-    ("model_name", "parallel_setup", "distributed_backend", "runner",
+    ("model_name", "parallel_setup", "distributed_backend", "runner", "test_options"),
-     "test_options"),
    [
-        params for model_name, settings in TEST_MODELS.items()
+        params
+        for model_name, settings in TEST_MODELS.items()
        for params in settings.iter_params(model_name)
    ],
 )
@@ -220,10 +220,12 @@ def test_ep(
    test_options: EPTestOptions,
    num_gpus_available,
 ):
-    _compare_tp(model_name,
+    _compare_tp(
+        model_name,
        parallel_setup,
        distributed_backend,
        runner,
        test_options,
        num_gpus_available,
-                method="generate")
+        method="generate",
+    )
--- a/tests/distributed/test_pipeline_parallel.py
+++ b/tests/distributed/test_pipeline_parallel.py
@@ -100,7 +100,6 @@ class PPTestSettings:
 # NOTE: You can adjust tp_base and/or pp_base locally to fit the model in GPU
 # The values displayed here are only a rough indicator of the size of the model
-# yapf: disable
 TEXT_GENERATION_MODELS = {
    # [Decoder-only]
    # Uses Llama
@@ -150,7 +149,9 @@ TEXT_GENERATION_MODELS = {
    "adept/persimmon-8b-chat": PPTestSettings.fast(),
    "microsoft/phi-2": PPTestSettings.fast(),
    "microsoft/Phi-3-small-8k-instruct": PPTestSettings.fast(),
-    "microsoft/Phi-3.5-MoE-instruct": PPTestSettings.detailed(multi_node_only=True, load_format="dummy"),  # noqa: E501
+    "microsoft/Phi-3.5-MoE-instruct": PPTestSettings.detailed(
+        multi_node_only=True, load_format="dummy"
+    ),  # noqa: E501
    "Qwen/Qwen-7B-Chat": PPTestSettings.fast(),
    "Qwen/Qwen2.5-0.5B-Instruct": PPTestSettings.fast(),
    "Qwen/Qwen1.5-MoE-A2.7B-Chat": PPTestSettings.fast(),
@@ -196,7 +197,6 @@ MULTIMODAL_MODELS = {
    "Qwen/Qwen2-VL-2B-Instruct": PPTestSettings.fast(),
    "fixie-ai/ultravox-v0_5-llama-3_2-1b": PPTestSettings.fast(),
 }
-# yapf: enable
 # NOTE: You can update this on your local machine to run specific tests
 TEST_MODELS = [

--- a/tests/engine/test_arg_utils.py
+++ b/tests/engine/test_arg_utils.py
@@ -287,29 +287,15 @@ def test_prefix_cache_default():
    assert not engine_args.enable_prefix_caching
-# yapf: disable
+@pytest.mark.parametrize(
-@pytest.mark.parametrize(("arg", "expected", "option"), [
+    ("arg", "expected", "option"),
+    [
        (None, None, "mm-processor-kwargs"),
        ("{}", {}, "mm-processor-kwargs"),
-    (
+        ('{"num_crops": 4}', {"num_crops": 4}, "mm-processor-kwargs"),
-        '{"num_crops": 4}',
+        ('{"foo": {"bar": "baz"}}', {"foo": {"bar": "baz"}}, "mm-processor-kwargs"),
-        {
+    ],
-            "num_crops": 4
+)
-        },
-        "mm-processor-kwargs"
-    ),
-    (
-        '{"foo": {"bar": "baz"}}',
-        {
-            "foo":
-            {
-                "bar": "baz"
-            }
-        },
-        "mm-processor-kwargs"
-    ),
-])
-# yapf: enable
 def test_composite_arg_parser(arg, expected, option):
    parser = EngineArgs.add_cli_args(FlexibleArgumentParser())
    if arg is None:
@@ -321,8 +307,7 @@ def test_composite_arg_parser(arg, expected, option):
 def test_human_readable_model_len():
    # `exit_on_error` disabled to test invalid values below
-    parser = EngineArgs.add_cli_args(
+    parser = EngineArgs.add_cli_args(FlexibleArgumentParser(exit_on_error=False))
-        FlexibleArgumentParser(exit_on_error=False))
    args = parser.parse_args([])
    assert args.max_model_len is None

--- a/tests/entrypoints/test_chat_utils.py
+++ b/tests/entrypoints/test_chat_utils.py
@@ -15,6 +15,7 @@ from vllm.assets.video import VideoAsset
 from vllm.config import ModelConfig
 from vllm.entrypoints.chat_utils import (
    _try_extract_ast,
+    apply_mistral_chat_template,
    load_chat_template,
    parse_chat_messages,
    parse_chat_messages_futures,
@@ -1855,17 +1856,17 @@ def test_resolve_hf_chat_template_kwargs(sample_json_schema, model, expected_kwa
 # NOTE: Qwen2-Audio default chat template is specially defined inside
 # processor class instead of using `tokenizer_config.json`
-# yapf: disable
 @pytest.mark.parametrize(
    ("model", "expected_format"),
-    [(PHI3V_MODEL_ID, "string"),
+    [
+        (PHI3V_MODEL_ID, "string"),
        (QWEN2VL_MODEL_ID, "openai"),
        (QWEN25VL_MODEL_ID, "openai"),
        (ULTRAVOX_MODEL_ID, "string"),
        (QWEN2AUDIO_MODEL_ID, "openai"),
-     (LLAMA_GUARD_MODEL_ID, "openai")],
+        (LLAMA_GUARD_MODEL_ID, "openai"),
+    ],
 )
-# yapf: enable
 def test_resolve_content_format_hf_defined(model, expected_format):
    model_info = HF_EXAMPLE_MODELS.find_hf_info(model)
    model_info.check_available_online(on_fail="skip")
@@ -1879,7 +1880,8 @@ def test_resolve_content_format_hf_defined(model, expected_format):
        hf_overrides=model_info.hf_overrides,
        skip_tokenizer_init=model_info.skip_tokenizer_init,
        enforce_eager=model_info.enforce_eager,
-        dtype=model_info.dtype)
+        dtype=model_info.dtype,
+    )
    tokenizer = get_tokenizer(
        model,
@@ -1911,18 +1913,18 @@ def test_resolve_content_format_hf_defined(model, expected_format):
    assert resolved_format == expected_format
-# yapf: disable
 @pytest.mark.parametrize(
    ("model", "expected_format"),
-    [("Salesforce/blip2-opt-2.7b", "string"),
+    [
+        ("Salesforce/blip2-opt-2.7b", "string"),
        ("facebook/chameleon-7b", "string"),
        ("deepseek-ai/deepseek-vl2-tiny", "string"),
        ("adept/fuyu-8b", "string"),
        ("google/paligemma-3b-mix-224", "string"),
        ("Qwen/Qwen-VL", "string"),
-     ("Qwen/Qwen-VL-Chat", "string")],
+        ("Qwen/Qwen-VL-Chat", "string"),
+    ],
 )
-# yapf: enable
 def test_resolve_content_format_fallbacks(model, expected_format):
    model_info = HF_EXAMPLE_MODELS.find_hf_info(model)
    model_info.check_available_online(on_fail="skip")
@@ -1936,7 +1938,8 @@ def test_resolve_content_format_fallbacks(model, expected_format):
        hf_overrides=model_info.hf_overrides,
        skip_tokenizer_init=model_info.skip_tokenizer_init,
        enforce_eager=model_info.enforce_eager,
-        dtype=model_info.dtype)
+        dtype=model_info.dtype,
+    )
    tokenizer = get_tokenizer(
        model_config.tokenizer,
@@ -1968,10 +1971,10 @@ def test_resolve_content_format_fallbacks(model, expected_format):
    assert resolved_format == expected_format
-# yapf: disable
 @pytest.mark.parametrize(
    ("template_path", "expected_format"),
-    [("template_alpaca.jinja", "string"),
+    [
+        ("template_alpaca.jinja", "string"),
        ("template_baichuan.jinja", "string"),
        ("template_chatglm.jinja", "string"),
        ("template_chatglm2.jinja", "string"),
@@ -1989,9 +1992,9 @@ def test_resolve_content_format_fallbacks(model, expected_format):
        ("tool_chat_template_llama3.1_json.jinja", "openai"),
        ("tool_chat_template_llama3.2_json.jinja", "openai"),
        ("tool_chat_template_mistral_parallel.jinja", "string"),
-     ("tool_chat_template_mistral.jinja", "string")],
+        ("tool_chat_template_mistral.jinja", "string"),
+    ],
 )
-# yapf: enable
 def test_resolve_content_format_examples(template_path, expected_format):
    model_config = ModelConfig(
        PHI3V_MODEL_ID,  # Dummy
@@ -2024,40 +2027,34 @@ def test_resolve_content_format_examples(template_path, expected_format):
    assert resolved_format == expected_format
-def test_parse_chat_messages_include_thinking_chunk(mistral_model_config,
+def test_parse_chat_messages_include_thinking_chunk(
-                                                    mistral_tokenizer):
+    mistral_model_config, mistral_tokenizer
-    messages = [{
+):
-        "role":
+    messages = [
-        "system",
+        {
-        "content": [{
+            "role": "system",
-            "type": "text",
+            "content": [
-            "text": "You are a helpful assistant."
+                {"type": "text", "text": "You are a helpful assistant."},
-        }, {
+                {
-            "type":
-            "thinking",
-            "closed":
-            True,
-            "thinking":
-            "Only return the answer when you are confident."
-        }]
-    }, {
-        "role": "user",
-        "content": "What is 2+2?"
-    }, {
-        "role":
-        "assistant",
-        "content": [{
-            "type": "text",
-            "text": "Let me think about it."
-        }, {
                    "type": "thinking",
                    "closed": True,
-            "thinking": "2+2 = 4"
+                    "thinking": "Only return the answer when you are confident.",
-        }, {
+                },
+            ],
+        },
+        {"role": "user", "content": "What is 2+2?"},
+        {
+            "role": "assistant",
+            "content": [
+                {"type": "text", "text": "Let me think about it."},
+                {"type": "thinking", "closed": True, "thinking": "2+2 = 4"},
+                {
                    "type": "text",
                    "text": "The answer is 4.",
-        }],
+                },
-    }]
+            ],
+        },
+    ]
    conversation_with_thinking, _, _ = parse_chat_messages(
        messages,
@@ -2066,122 +2063,105 @@ def test_parse_chat_messages_include_thinking_chunk(mistral_model_config,
        content_format="openai",
    )
-    expected_conversation = [{
+    expected_conversation = [
-        "role":
+        {
-        "system",
+            "role": "system",
-        "content": [{
-            "type": "text",
-            "text": "You are a helpful assistant."
-        }, {
-            "type": "text",
-            "text": "Only return the answer when you are confident."
-        }],
-    }, {
-        "role":
-        "user",
-        "content": [{
-            "type": "text",
-            "text": "What is 2+2?"
-        }],
-    }, {
-        "role":
-        "assistant",
            "content": [
+                {"type": "text", "text": "You are a helpful assistant."},
                {
                    "type": "text",
-                "text": "Let me think about it."
+                    "text": "Only return the answer when you are confident.",
+                },
+            ],
        },
        {
-                "type": "text",
+            "role": "user",
-                "text": "2+2 = 4"
+            "content": [{"type": "text", "text": "What is 2+2?"}],
        },
        {
-                "type": "text",
+            "role": "assistant",
-                "text": "The answer is 4."
+            "content": [
+                {"type": "text", "text": "Let me think about it."},
+                {"type": "text", "text": "2+2 = 4"},
+                {"type": "text", "text": "The answer is 4."},
+            ],
        },
    ]
-    }]
    assert conversation_with_thinking == expected_conversation
 def test_apply_mistral_chat_template_thinking_chunk():
-    # Moved import here to avoid yapf and isort conflicts
+    messages = [
-    from vllm.entrypoints.chat_utils import apply_mistral_chat_template
+        {
-    messages = [{
+            "role": "system",
-        "role":
+            "content": [
-        "system",
+                {"type": "text", "text": "You are a helpful assistant."},
-        "content": [{
+                {
-            "type": "text",
-            "text": "You are a helpful assistant."
-        }, {
-            "type":
-            "thinking",
-            "closed":
-            True,
-            "thinking":
-            "Only return the answer when you are confident."
-        }]
-    }, {
-        "role": "user",
-        "content": "What is 2+2?"
-    }, {
-        "role":
-        "assistant",
-        "content": [{
-            "type": "text",
-            "text": "Let me think about it."
-        }, {
                    "type": "thinking",
                    "closed": True,
-            "thinking": "2+2 = 4"
+                    "thinking": "Only return the answer when you are confident.",
-        }, {
+                },
+            ],
+        },
+        {"role": "user", "content": "What is 2+2?"},
+        {
+            "role": "assistant",
+            "content": [
+                {"type": "text", "text": "Let me think about it."},
+                {"type": "thinking", "closed": True, "thinking": "2+2 = 4"},
+                {
                    "type": "text",
                    "text": "The answer is 4.",
-        }],
+                },
-    }, {
+            ],
-        "role": "user",
+        },
-        "content": "Thanks, what is 3+3?"
+        {"role": "user", "content": "Thanks, what is 3+3?"},
-    }]
+    ]
    # TODO(Julien): upon model release change to a tokenizer already configured.
    # =================================================================
    mistral_tokenizer = MistralTokenizer.from_pretrained(
-        "mistralai/Devstral-Small-2507")
+        "mistralai/Devstral-Small-2507"
+    )
    assert isinstance(mistral_tokenizer.tokenizer, Tekkenizer)
    # Add think special tokens to the tokenizer
    mistral_tokenizer.tokenizer._all_special_tokens[35] = SpecialTokenInfo(
-        rank=35, is_control=True, token_str=SpecialTokens.begin_think.value)
+        rank=35, is_control=True, token_str=SpecialTokens.begin_think.value
+    )
    mistral_tokenizer.tokenizer._all_special_tokens[36] = SpecialTokenInfo(
-        rank=36, is_control=True, token_str=SpecialTokens.end_think.value)
+        rank=36, is_control=True, token_str=SpecialTokens.end_think.value
+    )
    mistral_tokenizer.tokenizer._special_tokens_reverse_vocab = {
        k: v
-        for k, v in
+        for k, v in mistral_tokenizer.tokenizer._special_tokens_reverse_vocab.items()
-        mistral_tokenizer.tokenizer._special_tokens_reverse_vocab.items()
        if v not in {35, 36}
    }
    mistral_tokenizer.tokenizer._special_tokens_reverse_vocab[
-        SpecialTokens.begin_think.value] = 35
+        SpecialTokens.begin_think.value
+    ] = 35
    mistral_tokenizer.tokenizer._special_tokens_reverse_vocab[
-        SpecialTokens.end_think.value] = 36
+        SpecialTokens.end_think.value
+    ] = 36
    mistral_tokenizer.instruct.BEGIN_THINK = 35
    mistral_tokenizer.instruct.END_THINK = 36
    # =================================================================
-    tokens_ids = apply_mistral_chat_template(mistral_tokenizer,
+    tokens_ids = apply_mistral_chat_template(
-                                             messages,
+        mistral_tokenizer, messages, chat_template=None, tools=None
-                                             chat_template=None,
+    )
-                                             tools=None)
    string_tokens = mistral_tokenizer.mistral.decode(
-        tokens_ids, special_token_policy=SpecialTokenPolicy.KEEP)
+        tokens_ids, special_token_policy=SpecialTokenPolicy.KEEP
+    )
    expected_tokens = (
        r"<s>[SYSTEM_PROMPT]You are a helpful assistant.[THINK]Only return the"
        r" answer when you are confident.[/THINK][/SYSTEM_PROMPT]"
        r"[INST]What is 2+2?[/INST]"
        r"Let me think about it.[THINK]2+2 = 4[/THINK]The answer is 4.</s>"
-        r"[INST]Thanks, what is 3+3?[/INST]")
+        r"[INST]Thanks, what is 3+3?[/INST]"
+    )
    assert string_tokens == expected_tokens
@@ -2192,37 +2172,32 @@ def test_parse_chat_messages_single_empty_audio_with_uuid(
 ):
    audio_uuid = "abcd"
    conversation, mm_data, mm_uuids = parse_chat_messages(
-        [{
+        [
-            "role":
+            {
-            "user",
+                "role": "user",
                "content": [
                    {
                        "type": "input_audio",
                        "input_audio": {},
                        "uuid": audio_uuid,
                    },
-                {
+                    {"type": "text", "text": "What does the audio say?"},
-                    "type": "text",
+                ],
-                    "text": "What does the audio say?"
+            }
-                },
        ],
-        }],
        qwen2_audio_model_config,
        qwen2_audio_tokenizer,
        content_format="string",
    )
-    assert conversation == [{
+    assert conversation == [
-        "role":
+        {
-        "user",
+            "role": "user",
-        "content":
+            "content": "Audio 1: <|audio_bos|><|AUDIO|><|audio_eos|>\nWhat does the audio say?",
-        "Audio 1: <|audio_bos|><|AUDIO|><|audio_eos|>\nWhat does the audio say?"
+        }
-    }]
+    ]
    _assert_mm_data_inputs(mm_data, {"audio": 1})
-    _assert_mm_uuids(mm_uuids,
+    _assert_mm_uuids(mm_uuids, 1, modality="audio", expected_uuids=[audio_uuid])
-                     1,
-                     modality="audio",
-                     expected_uuids=[audio_uuid])
 @pytest.mark.asyncio
@@ -2232,34 +2207,29 @@ async def test_parse_chat_messages_single_empty_audio_with_uuid_async(
 ):
    audio_uuid = "abcd"
    conversation, mm_future, mm_uuids = parse_chat_messages_futures(
-        [{
+        [
-            "role":
+            {
-            "user",
+                "role": "user",
                "content": [
                    {
                        "type": "input_audio",
                        "input_audio": {},
                        "uuid": audio_uuid,
                    },
-                {
+                    {"type": "text", "text": "What does the audio say?"},
-                    "type": "text",
+                ],
-                    "text": "What does the audio say?"
+            }
-                },
        ],
-        }],
        qwen2_audio_model_config,
        qwen2_audio_tokenizer,
        content_format="string",
    )
-    assert conversation == [{
+    assert conversation == [
-        "role":
+        {
-        "user",
+            "role": "user",
-        "content":
+            "content": "Audio 1: <|audio_bos|><|AUDIO|><|audio_eos|>\nWhat does the audio say?",
-        "Audio 1: <|audio_bos|><|AUDIO|><|audio_eos|>\nWhat does the audio say?"
+        }
-    }]
+    ]
    _assert_mm_data_inputs(await mm_future, {"audio": 1})
-    _assert_mm_uuids(mm_uuids,
+    _assert_mm_uuids(mm_uuids, 1, modality="audio", expected_uuids=[audio_uuid])
-                     1,
-                     modality="audio",
-                     expected_uuids=[audio_uuid])
--- a/tests/lora/test_layers.py
+++ b/tests/lora/test_layers.py
@@ -12,9 +12,6 @@ import torch
 import torch.nn.functional as F
 from vllm.config.lora import LoRAConfig
-# yapf conflicts with isort for this block
-# yapf: disable
 from vllm.lora.layers import (
    BaseLayerWithLoRA,
    ColumnParallelLinearWithLoRA,
@@ -32,8 +29,6 @@ from vllm.lora.layers import (
    RowParallelLinearWithShardedLoRA,
    VocabParallelEmbeddingWithLoRA,
 )
-# yapf: enable
 from vllm.lora.models import LoRALayerWeights, PackedLoRALayerWeights
 from vllm.lora.punica_wrapper import get_punica_wrapper
 from vllm.model_executor.layers.linear import (

--- a/tests/model_executor/model_loader/tensorizer_loader/test_tensorizer.py
+++ b/tests/model_executor/model_loader/tensorizer_loader/test_tensorizer.py
@@ -17,8 +17,6 @@ import vllm.model_executor.model_loader.tensorizer
 from tests.utils import VLLM_PATH, RemoteOpenAIServer
 from vllm import LLM, SamplingParams
 from vllm.engine.arg_utils import EngineArgs
-# yapf: disable
 from vllm.model_executor.model_loader.tensorizer import (
    TensorizerConfig,
    TensorSerializer,
@@ -29,8 +27,6 @@ from vllm.model_executor.model_loader.tensorizer import (
 from vllm.model_executor.model_loader.tensorizer_loader import (
    BLACKLISTED_TENSORIZER_ARGS,
 )
-# yapf: enable
 from vllm.utils import PlaceholderModule
 from .conftest import DummyExecutor, assert_from_collective_rpc

--- a/tests/models/multimodal/generation/test_common.py
+++ b/tests/models/multimodal/generation/test_common.py
--- a/tests/models/multimodal/generation/vlm_utils/case_filtering.py
+++ b/tests/models/multimodal/generation/vlm_utils/case_filtering.py
@@ -114,7 +114,6 @@ def get_parametrized_options(
                raise ValueError("Test has type CUSTOM_INPUTS, but none given")
            iter_kwargs["custom_test_opts"] = test_info.custom_test_opts
-        # yapf: disable
        # Wrap all model cases in a pytest parameter & pass marks through
        return [
            pytest.param(
@@ -122,10 +121,10 @@ def get_parametrized_options(
                ExpandableVLMTestArgs(
                    **{k: v for k, v in zip(iter_kwargs.keys(), case)}
                ),
-                marks=test_info.marks if test_info.marks is not None else []
+                marks=test_info.marks if test_info.marks is not None else [],
-            ) for case in list(itertools.product(*iter_kwargs.values()))
+            )
+            for case in list(itertools.product(*iter_kwargs.values()))
        ]
-        # yapf: enable
    # Get a list per model type, where each entry contains a tuple of all of
    # that model type's cases, then flatten them into the top level so that

--- a/tests/models/multimodal/generation/vlm_utils/model_utils.py
+++ b/tests/models/multimodal/generation/vlm_utils/model_utils.py
@@ -418,7 +418,6 @@ def h2ovl_patch_hf_runner(hf_model: HfRunner) -> HfRunner:
            self.image_size = self.vision_config.image_size
        def __call__(self, text: str, images: Union[Image, list[Image]], **kwargs):
-            # yapf: disable
            from vllm.model_executor.models.h2ovl import (
                IMG_CONTEXT,
                IMG_END,
@@ -426,7 +425,6 @@ def h2ovl_patch_hf_runner(hf_model: HfRunner) -> HfRunner:
                image_to_pixel_values_h2ovl,
            )
-            # yapf: enable
            images = [images] if isinstance(images, Image) else images
            pixel_values = [
                image_to_pixel_values_h2ovl(

--- a/tests/models/multimodal/generation/vlm_utils/types.py
+++ b/tests/models/multimodal/generation/vlm_utils/types.py
@@ -33,24 +33,26 @@ TEST_IMG_PLACEHOLDER = "<vlm_image>"
 TEST_VIDEO_PLACEHOLDER = "<vlm_video>"
 TEST_AUDIO_PLACEHOLDER = "<lmm_audio>"
-# yapf: disable
+SINGLE_IMAGE_BASE_PROMPTS = IMAGE_ASSETS.prompts(
-SINGLE_IMAGE_BASE_PROMPTS = IMAGE_ASSETS.prompts({
+    {
        "stop_sign": f"{TEST_IMG_PLACEHOLDER}What's the content of the image?",
        "cherry_blossom": f"{TEST_IMG_PLACEHOLDER}What is the season?",
-})
+    }
-SINGLE_AUDIO_BASE_PROMPT = AUDIO_ASSETS.prompts({
+)
+SINGLE_AUDIO_BASE_PROMPT = AUDIO_ASSETS.prompts(
+    {
        "mary_had_lamb": f"{TEST_AUDIO_PLACEHOLDER}Transcribe this audio into English.",  # noqa: E501
        "winning_call": f"{TEST_AUDIO_PLACEHOLDER}What is happening in this audio clip?",  # noqa: E501
-})
+    }
+)
 MULTI_IMAGE_BASE_PROMPT = f"Image-1: {TEST_IMG_PLACEHOLDER}Image-2: {TEST_IMG_PLACEHOLDER}Describe the two images in detail.\n"  # noqa: E501
 VIDEO_BASE_PROMPT = f"{TEST_VIDEO_PLACEHOLDER}Why is this video funny?"
-IMAGE_SIZE_FACTORS = [(), (1.0, ), (1.0, 1.0, 1.0), (0.25, 0.5, 1.0)]
+IMAGE_SIZE_FACTORS = [(), (1.0,), (1.0, 1.0, 1.0), (0.25, 0.5, 1.0)]
-EMBEDDING_SIZE_FACTORS = [(), (1.0, ), (1.0, 1.0, 1.0)]
+EMBEDDING_SIZE_FACTORS = [(), (1.0,), (1.0, 1.0, 1.0)]
 RunnerOutput = tuple[list[int], str, Optional[SampleLogprobs]]
-# yapf: enable
 class PromptWithMultiModalInput(NamedTuple):

--- a/tests/models/multimodal/processing/test_common.py
+++ b/tests/models/multimodal/processing/test_common.py
@@ -322,8 +322,9 @@ def _test_processing_correctness_one(
        )
-# yapf: disable
+@pytest.mark.parametrize(
-@pytest.mark.parametrize("model_id", [
+    "model_id",
+    [
        "rhymes-ai/Aria",
        "CohereForAI/aya-vision-8b",
        "Salesforce/blip2-opt-2.7b",
@@ -391,11 +392,11 @@ def _test_processing_correctness_one(
        "omni-research/Tarsier-7b",
        "omni-research/Tarsier2-Recap-7b",
        "mistralai/Voxtral-Mini-3B-2507",
-])
+    ],
+)
 @pytest.mark.parametrize("hit_rate", [0.3, 0.5, 1.0])
 @pytest.mark.parametrize("num_batches", [32])
 @pytest.mark.parametrize("simplify_rate", [1.0])
-# yapf: enable
 def test_processing_correctness(
    model_id: str,
    hit_rate: float,

--- a/tests/models/multimodal/processing/test_idefics3.py
+++ b/tests/models/multimodal/processing/test_idefics3.py
@@ -12,7 +12,6 @@ from ...utils import build_model_context
 @pytest.mark.parametrize("model_id", ["HuggingFaceM4/Idefics3-8B-Llama3"])
-# yapf: disable
 @pytest.mark.parametrize(
    ("mm_processor_kwargs", "expected_toks_per_img"),
    [
@@ -20,7 +19,6 @@ from ...utils import build_model_context
        ({"size": {"longest_edge": 728}}, 169 * (2**2 + 1)),
    ],
 )
-# yapf: enable
 @pytest.mark.parametrize("num_imgs", [1, 2])
 @pytest.mark.parametrize("kwargs_on_init", [True, False])
 def test_processor_override(

--- a/tests/models/multimodal/processing/test_phi3v.py
+++ b/tests/models/multimodal/processing/test_phi3v.py
@@ -11,7 +11,6 @@ from ...utils import build_model_context
 @pytest.mark.parametrize("model_id", ["microsoft/Phi-3.5-vision-instruct"])
-# yapf: disable
 @pytest.mark.parametrize(
    ("mm_processor_kwargs", "expected_toks_per_img"),
    [
@@ -21,7 +20,6 @@ from ...utils import build_model_context
        ({}, 757),
    ],
 )
-# yapf: enable
 @pytest.mark.parametrize("num_imgs", [1, 2])
 @pytest.mark.parametrize("kwargs_on_init", [True, False])
 def test_processor_override(

--- a/tests/models/multimodal/processing/test_phi4mm.py
+++ b/tests/models/multimodal/processing/test_phi4mm.py
@@ -11,7 +11,6 @@ from ...utils import build_model_context
 @pytest.mark.parametrize("model_id", ["microsoft/Phi-4-multimodal-instruct"])
-# yapf: disable
 @pytest.mark.parametrize(
    ("mm_processor_kwargs", "expected_toks_per_img"),
    [
@@ -21,7 +20,6 @@ from ...utils import build_model_context
        ({}, 9585),
    ],
 )
-# yapf: enable
 @pytest.mark.parametrize("num_imgs", [1, 2])
 @pytest.mark.parametrize("kwargs_on_init", [True, False])
 def test_processor_override(

--- a/tests/models/multimodal/processing/test_qwen2_vl.py
+++ b/tests/models/multimodal/processing/test_qwen2_vl.py
@@ -10,7 +10,6 @@ from ...utils import build_model_context
 @pytest.mark.parametrize("model_id", ["Qwen/Qwen2-VL-2B-Instruct"])
-# yapf: disable
 @pytest.mark.parametrize(
    ("mm_processor_kwargs", "expected_toks_per_img", "expected_pixels_shape"),
    [
@@ -18,7 +17,6 @@ from ...utils import build_model_context
        ({"min_pixels": 64**2, "max_pixels": 512**2}, 330, (1320, 1176)),
    ],
 )
-# yapf: enable
 @pytest.mark.parametrize("num_imgs", [1, 2])
 @pytest.mark.parametrize("kwargs_on_init", [True, False])
 def test_processor_override(

--- a/tests/models/multimodal/processing/test_smolvlm.py
+++ b/tests/models/multimodal/processing/test_smolvlm.py
@@ -12,7 +12,6 @@ from ...utils import build_model_context
 @pytest.mark.parametrize("model_id", ["HuggingFaceTB/SmolVLM2-2.2B-Instruct"])
-# yapf: disable
 @pytest.mark.parametrize(
    ("mm_processor_kwargs", "expected_toks_per_img"),
    [
@@ -20,7 +19,6 @@ from ...utils import build_model_context
        ({"max_image_size": {"longest_edge": 768}}, 405),
    ],
 )
-# yapf: enable
 @pytest.mark.parametrize("num_imgs", [1, 2])
 @pytest.mark.parametrize("kwargs_on_init", [True, False])
 def test_processor_override(

--- a/tests/models/multimodal/processing/test_transformers.py
+++ b/tests/models/multimodal/processing/test_transformers.py
@@ -7,9 +7,7 @@ from vllm.config import ModelConfig
 from vllm.multimodal import MULTIMODAL_REGISTRY
-# yapf: disable
+@pytest.mark.parametrize("model_id", ["llava-hf/llava-onevision-qwen2-0.5b-ov-hf"])
-@pytest.mark.parametrize("model_id",
-                         ["llava-hf/llava-onevision-qwen2-0.5b-ov-hf"])
 def test_multimodal_processor(model_id):
    model_config = ModelConfig(
        model=model_id,
@@ -18,7 +16,7 @@ def test_multimodal_processor(model_id):
    mm_processor = MULTIMODAL_REGISTRY.create_processor(model_config)
-    image_pil = ImageAsset('cherry_blossom').pil_image
+    image_pil = ImageAsset("cherry_blossom").pil_image
    mm_data = {"image": image_pil}
    str_prompt = "<|im_start|>user <image>\nWhat is the content of this image?<|im_end|><|im_start|>assistant\n"  # noqa: E501
    str_processed_inputs = mm_processor.apply(
@@ -28,8 +26,23 @@ def test_multimodal_processor(model_id):
    )
    ids_prompt = [
-        151644, 872, 220, 151646, 198, 3838, 374, 279, 2213, 315, 419, 2168,
+        151644,
-        30, 151645, 151644, 77091, 198
+        872,
+        220,
+        151646,
+        198,
+        3838,
+        374,
+        279,
+        2213,
+        315,
+        419,
+        2168,
+        30,
+        151645,
+        151644,
+        77091,
+        198,
    ]
    ids_processed_inputs = mm_processor.apply(
        prompt=ids_prompt,
@@ -37,5 +50,7 @@ def test_multimodal_processor(model_id):
        hf_processor_mm_kwargs={},
    )
-    assert (str_processed_inputs["prompt_token_ids"]
+    assert (
-            == ids_processed_inputs["prompt_token_ids"])
+        str_processed_inputs["prompt_token_ids"]
+        == ids_processed_inputs["prompt_token_ids"]
+    )