[Bugfix][CI] fix typos (#34934)

Signed-off-by: 1195343015 <1195343015@qq.com> Signed-off-by: Jiayi Yan <66017932+1195343015@users.noreply.github.com> Co-authored-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>

[Bugfix][CI] fix typos (#34934)
Signed-off-by: 1195343015 <1195343015@qq.com> Signed-off-by: Jiayi Yan <66017932+1195343015@users.noreply.github.com> Co-authored-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
6a895197 · Jiayi Yan · GitHub · 8c760b6a · 6a895197 · 6a895197
Unverified Commit 6a895197 authored Mar 06, 2026 by Jiayi Yan Committed by GitHub Mar 05, 2026
20 changed files
--- a/tests/kernels/moe/test_modular_kernel_combinations.py
+++ b/tests/kernels/moe/test_modular_kernel_combinations.py
@@ -162,7 +162,7 @@ Ns = [1024]
 TOPKs = [4, 1]
 Es = [32]
 DTYPEs = [torch.bfloat16]
-FUSED_MOE_CHUNK_SIZEs = [None, 16]
+FUSED_MOE_CHUNK_SIZES = [None, 16]


 def is_nyi_config(config: Config) -> bool:
@@ -192,7 +192,7 @@ def generate_valid_test_cases(
        DTYPEs,
        MK_QUANT_CONFIGS,
        product(prepare_finalize_types, MK_FUSED_EXPERT_TYPES),
-        FUSED_MOE_CHUNK_SIZEs,
+        FUSED_MOE_CHUNK_SIZES,
    ):
        total = total + 1

@@ -266,7 +266,7 @@ def test_modular_kernel_combinations_multigpu(
    if cuda_device_count_stateless() < world_size:
        pytest.skip(
            f"Not enough GPUs available to run, got "
-            f"{cuda_device_count_stateless()} exepected "
+            f"{cuda_device_count_stateless()} expected "
            f"{world_size}."
        )


--- a/tests/models/language/generation/test_mistral.py
+++ b/tests/models/language/generation/test_mistral.py
@@ -87,7 +87,7 @@ MSGS = [
    {
        "role": "user",
        "content": "Could you please rewrite the below article? \n\n My English needs "
-        "improvving, maybe I make errors.",
+        "improving, maybe I make errors.",
    },
    {
        "role": "assistant",
@@ -98,7 +98,7 @@ MSGS = [
                "type": "function",
                "function": {
                    "name": "rewrite",
-                    "arguments": '{"text":"My English needs improvving, maybe '
+                    "arguments": '{"text":"My English needs improving, maybe '
                    'I make errors."}',
                },
            }

--- a/tests/models/language/pooling/test_bge_m3.py
+++ b/tests/models/language/pooling/test_bge_m3.py
@@ -14,7 +14,7 @@ MAX_MODEL_LEN = 512


 # Example from https://huggingface.co/BAAI/bge-m3
-sentences_1 = ["What is BGE M3?", "Defination of BM25"]
+sentences_1 = ["What is BGE M3?", "Definition of BM25"]
 sentences_2 = [
    "BGE M3 is an embedding model supporting dense retrieval, "
    "lexical matching and multi-vector interaction.",

--- a/tests/models/multimodal/generation/vlm_utils/model_utils.py
+++ b/tests/models/multimodal/generation/vlm_utils/model_utils.py
@@ -719,7 +719,7 @@ def isaac_patch_hf_runner(hf_model: HfRunner) -> HfRunner:
        # Convert to tuple or None
        all_hidden_states = tuple(hidden_states_list) if output_hidden_states else None

-        # Include hiden_states for compatibility with hidden_states_to_seq_logprobs()
+        # Include hidden_states for compatibility with hidden_states_to_seq_logprobs()
        return BaseModelOutputWithPast(
            last_hidden_state=hidden_states,
            past_key_values=past_key_values,
@@ -1226,7 +1226,7 @@ def voxtral_patch_hf_runner(hf_model: "HfRunner") -> "HfRunner":
       dicts (accepting ``url``, ``path``, or ``base64`` audio) rather than
       the standard ``processor(text=, audio=, sampling_rate=)`` interface.
    2. HfRunner.get_inputs cannot handle multi-audio per prompt because it
-       mis-unpacks ``[(arr1, sr1), (arr2, sr2)]`` via a ``len == 2`` check.
+       incorrectly unpacks ``[(arr1, sr1), (arr2, sr2)]`` via a ``len == 2`` check.

    We override ``get_inputs`` to build conversation dicts and call
    ``apply_chat_template`` directly, bypassing both issues. We also wrap

--- a/tests/quantization/test_blackwell_moe.py
+++ b/tests/quantization/test_blackwell_moe.py
@@ -25,7 +25,7 @@ def set_test_environment():
    os.environ["FLASHINFER_NVCC_THREADS"] = "16"


-# Overide the backbone layers to 4 for faster startup
+# Override the backbone layers to 4 for faster startup
 HF_OVERRIDE_TEXT = {
    "num_layers": 4,
    "num_hidden_layers": 4,

--- a/tests/renderers/test_hf.py
+++ b/tests/renderers/test_hf.py
@@ -206,8 +206,8 @@ def test_resolve_chat_template_kwargs(sample_json_schema, model, expected_kwargs

    chat_template_kwargs = {
        # both unused
-        "unsed_kwargs_1": 123,
-        "unsed_kwargs_2": "abc",
+        "unused_kwargs_1": 123,
+        "unused_kwargs_2": "abc",
        # should not appear
        "chat_template": "{% Hello world! %}",
        "tokenize": True,

--- a/tests/test_config.py
+++ b/tests/test_config.py
@@ -853,7 +853,7 @@ def test_vllm_config_defaults_are_none():


 @pytest.mark.parametrize(
-    ("model_id", "compiliation_config", "optimization_level"),
+    ("model_id", "compilation_config", "optimization_level"),
    [
        (
            None,
@@ -895,7 +895,7 @@ def test_vllm_config_defaults_are_none():
        ("RedHatAI/DeepSeek-V2.5-1210-FP8", CompilationConfig(), OptimizationLevel.O3),
    ],
 )
-def test_vllm_config_defaults(model_id, compiliation_config, optimization_level):
+def test_vllm_config_defaults(model_id, compilation_config, optimization_level):
    """Test that optimization-level defaults are correctly applied."""

    model_config = None
@@ -903,12 +903,12 @@ def test_vllm_config_defaults(model_id, compiliation_config, optimization_level)
        model_config = ModelConfig(model_id)
        vllm_config = VllmConfig(
            model_config=model_config,
-            compilation_config=compiliation_config,
+            compilation_config=compilation_config,
            optimization_level=optimization_level,
        )
    else:
        vllm_config = VllmConfig(
-            compilation_config=compiliation_config,
+            compilation_config=compilation_config,
            optimization_level=optimization_level,
        )
    # Use the global optimization level defaults

--- a/tests/tool_parsers/test_seed_oss_tool_parser.py
+++ b/tests/tool_parsers/test_seed_oss_tool_parser.py
@@ -106,7 +106,7 @@ def test_extract_tool_calls_no_tools(seed_oss_tool_parser):
 @pytest.mark.parametrize(
    ids=[
        "tool_call_0_thinking_budget",
-        "tool_call_512_thinkg_budget",
+        "tool_call_512_thinking_budget",
        "tool_call_unlimited_thinking_budget",
    ],
    argnames=["model_output", "expected_tool_calls", "expected_content"],
@@ -308,7 +308,7 @@ def stream_delta_message_generator(
 @pytest.mark.parametrize(
    ids=[
        "tool_call_0_thinking_budget",
-        "tool_call_512_thinkg_budget",
+        "tool_call_512_thinking_budget",
        "tool_call_unlimited_thinking_budget",
    ],
    argnames=["model_output", "expected_tool_calls", "expected_content"],

--- a/tests/transformers_utils/test_repo_utils.py
+++ b/tests/transformers_utils/test_repo_utils.py
@@ -34,10 +34,10 @@ def test_list_filtered_repo_files(
        subfolder.mkdir()
        (path_tmp_dir / "json_file.json").touch()
        (path_tmp_dir / "correct_2.txt").touch()
-        (path_tmp_dir / "uncorrect.txt").touch()
-        (path_tmp_dir / "uncorrect.jpeg").touch()
+        (path_tmp_dir / "incorrect.txt").touch()
+        (path_tmp_dir / "incorrect.jpeg").touch()
        (subfolder / "correct.txt").touch()
-        (subfolder / "uncorrect_sub.txt").touch()
+        (subfolder / "incorrect_sub.txt").touch()

        def _glob_path() -> list[str]:
            return [
@@ -86,7 +86,7 @@ def test_one_filtered_repo_files(allow_patterns: list[str], expected_bool: bool)
        path_tmp_dir = Path(tmp_dir)
        subfolder = path_tmp_dir / "subfolder"
        subfolder.mkdir()
-        (path_tmp_dir / "uncorrect.jpeg").touch()
+        (path_tmp_dir / "incorrect.jpeg").touch()
        (subfolder / "correct.txt").touch()

        def _glob_path() -> list[str]:

--- a/tests/v1/core/test_kv_cache_utils.py
+++ b/tests/v1/core/test_kv_cache_utils.py
@@ -308,7 +308,7 @@ def test_free_kv_cache_block_queue_append_n():

    # Create an empty FreeKVCacheBlockQueue
    invalid_queue = FreeKVCacheBlockQueue([])
-    # set prev_free_block to None and this will cause assertation in append_n
+    # set prev_free_block to None and this will cause assertion in append_n
    invalid_queue.fake_free_list_tail.prev_free_block = None
    with pytest.raises(AssertionError):
        # Append 1 block

--- a/tests/v1/core/test_prefix_caching.py
+++ b/tests/v1/core/test_prefix_caching.py
@@ -2304,22 +2304,22 @@ def test_block_lookup_cache_single_block_per_key():
    assert cache.get_one_block(key0) is block0
    assert cache.get_one_block(key1) is block1
    assert cache.get_one_block(key2) is None
-    # No block poped due to block_id mismatch
+    # No block popped due to block_id mismatch
    assert cache.pop(key0, 100) is None
    assert cache.get_one_block(key0) is block0
    assert cache.get_one_block(key1) is block1
    assert cache.get_one_block(key2) is None
-    # block poped with (key0, block ID 0)
+    # block popped with (key0, block ID 0)
    assert cache.pop(key0, 0) is block0
    assert cache.get_one_block(key0) is None
    assert cache.get_one_block(key1) is block1
    assert cache.get_one_block(key2) is None
-    # No block poped due to block_id mismatch
+    # No block popped due to block_id mismatch
    assert cache.pop(key0, 1) is None
    assert cache.get_one_block(key0) is None
    assert cache.get_one_block(key1) is block1
    assert cache.get_one_block(key2) is None
-    # block poped with (key1, block ID 1)
+    # block popped with (key1, block ID 1)
    assert cache.pop(key1, 1) is block1
    assert cache.get_one_block(key0) is None
    assert cache.get_one_block(key1) is None

--- a/tests/v1/core/test_priority_scheduler_random.py
+++ b/tests/v1/core/test_priority_scheduler_random.py
@@ -140,7 +140,7 @@ def _mock_draft_token_ids(
    return DraftTokenIds(req_ids=request_ids, draft_token_ids=sampled_token_ids)


-def _chech_valid_scheduler_output(
+def _check_valid_scheduler_output(
    scheduler_output: SchedulerOutput,
    seen_request_ids: set[str],
    seen_mm_hashes: set[str],
@@ -242,7 +242,7 @@ def test_priority_scheduling_blast(
                )
                scheduler.add_request(req)
        scheduler_output = scheduler.schedule()
-        _chech_valid_scheduler_output(
+        _check_valid_scheduler_output(
            scheduler_output, seen_request_ids, seen_mm_hashes
        )
        model_output = _mock_execute_model(

--- a/tests/v1/core/test_scheduler.py
+++ b/tests/v1/core/test_scheduler.py
@@ -1116,7 +1116,7 @@ def _step_until_done(


 def _step_until_kv_transfer_finished(scheduler: Scheduler, req_ids: list[str]):
-    """Cycle requests through a KV transfer cyle."""
+    """Cycle requests through a KV transfer cycle."""

    # Requests should first transition to WAITING_FOR_REMOTE_KVS
    output = scheduler.schedule()
@@ -2714,7 +2714,7 @@ def _assert_right_encoder_inputs(
        if expected_total_reqs == 0:
            return

-    # Number of expected enocder inputs should match number of requests
+    # Number of expected encoder inputs should match number of requests
    if expected_encoder_inputs:
        assert check_exist and requests is not None  # only support expect input exist
        assert len(requests) == len(expected_encoder_inputs)
@@ -2964,7 +2964,7 @@ def test_ec_connector_with_partial_cache_hit_multi_round(use_kv_connector):
    )
    scheduler.update_from_output(output, model_output)

-    # request1 is finished after outputing 1 token
+    # request1 is finished after outputting 1 token
    # Finish request
    scheduler.finish_requests(request1.request_id, RequestStatus.FINISHED_LENGTH_CAPPED)

@@ -3060,14 +3060,14 @@ def test_ec_connector_schedule_multiple_requests(cache_exist, use_kv_connector):
    for request in requests:
        scheduler.add_request(request)

-    # Set up to test different encoder cache exsistence scenario after preemption
+    # Set up to test different encoder cache existence scenario after preemption
    # Order of getting encoder cache should be: local cache -> connector-> compute
    scheduler.ec_connector.update_state_after_alloc = Mock(
        wraps=scheduler.ec_connector.update_state_after_alloc
    )

    if cache_exist == "local":
-        # Allocate cache to cache manager manually to mimick
+        # Allocate cache to cache manager manually to mimic
        for req in requests:
            scheduler.encoder_cache_manager.allocate(req, 0)
    else:
@@ -3384,13 +3384,13 @@ def test_priority_scheduling_ec_connector_preemption_and_resumption(
        pooler_output=[],
    )
    # Finish the requests to make room for the preempted requests to resume
-    # req_high is finished after outputing 2 tokens
+    # req_high is finished after outputting 2 tokens
    scheduler.update_from_output(output, model_output)
    scheduler.finish_requests(
        request_high.request_id, RequestStatus.FINISHED_LENGTH_CAPPED
    )

-    # Set up to test different encoder cache exsistence scenario after preemption
+    # Set up to test different encoder cache existence scenario after preemption
    # Order of getting encoder cache should be: local cache -> connector-> compute
    # By default, the cache should still exist in local in this test case
    if cache_exist != "local":
@@ -3483,7 +3483,7 @@ def test_ec_connector_allocate_encoder_tokens_with_external_load(use_kv_connecto
        ec_role="ec_consumer",
    )

-    # Limit the number of availiable slots of EncoderCacheManager
+    # Limit the number of available slots of EncoderCacheManager
    scheduler.encoder_cache_manager = EncoderCacheManager(cache_size=32)

    # Create MM request1
@@ -3574,7 +3574,7 @@ def test_ec_connector_allocate_encoder_tokens_with_external_load(use_kv_connecto
    )
    scheduler.update_from_output(output, model_output)

-    # request1 is finished after outputing 1 token
+    # request1 is finished after outputting 1 token
    # Finish request
    scheduler.finish_requests(request1.request_id, RequestStatus.FINISHED_LENGTH_CAPPED)
    assert scheduler.get_num_unfinished_requests() == 1

--- a/tests/v1/e2e/test_mamba_prefix_cache.py
+++ b/tests/v1/e2e/test_mamba_prefix_cache.py
@@ -76,11 +76,11 @@ def get_fake_sample_fn() -> SamplerOutput:
                ),
                logprobs_tensors=None,
            )
-        accpeted_tokens = prompt_token_ids[
+        accepted_tokens = prompt_token_ids[
            first_token_id_index : first_token_id_index
            + min(num_accepted_tokens, logits.shape[0])
        ]
-        sampled_token_ids = accpeted_tokens
+        sampled_token_ids = accepted_tokens
        return SamplerOutput(
            sampled_token_ids=torch.tensor(
                [sampled_token_ids], device="cuda", dtype=torch.int32

--- a/tests/v1/entrypoints/llm/test_struct_output_generate.py
+++ b/tests/v1/entrypoints/llm/test_struct_output_generate.py
@@ -911,7 +911,7 @@ def test_structured_output_with_structural_tag(backend: str):
        ),
    )

-    prompt = "Hello and repete hello 10 times, do not say anything else. Only say hello hello hello, now start"
+    prompt = "Hello and repeat hello 10 times, do not say anything else. Only say hello hello hello, now start"
    outputs = llm.generate(prompt, sampling_params=sampling_params, use_tqdm=True)
    assert outputs is not None
    for output in outputs:

--- a/tests/v1/kv_connector/unit/test_kv_connector_lifecyle.py
+++ b/tests/v1/kv_connector/unit/test_kv_connector_lifecyle.py
--- a/tests/v1/kv_connector/unit/test_moriio_connector.py
+++ b/tests/v1/kv_connector/unit/test_moriio_connector.py
@@ -99,7 +99,7 @@ def _setup_kv_transfer_request(request, remote_host="127.0.0.1", fake_port=4789)
    return request


-class FakeMorIIOWrapper:
+class FakeMoRIIOWrapper:
    # A fake MoRIIOWrapper for testing purposes
    def __init__(self, *args, **kwargs):
        pass
@@ -168,7 +168,7 @@ class FakeMorIIOWrapper:
        pass


-class FakeMorIIOConnectorWorker(MoRIIOConnectorWorker):
+class FakeMoRIIOConnectorWorker(MoRIIOConnectorWorker):
    # Define a fake remote engine id for testing
    REMOTE_ENGINE_ID = "remote_engine"

@@ -373,7 +373,7 @@ def test_read_mode_loads_remote_block_ids(moriio_read_mode):
    # Set remote block ids to be fetched.
    request.kv_transfer_params["remote_block_ids"] = block_list

-    # Remote Prefill, triggers MorIIOConnectorMetadata.
+    # Remote Prefill, triggers MoRIIOConnectorMetadata.

    scheduler_output = scheduler.schedule()
    kv_connector_metadata = scheduler_output.kv_connector_metadata
@@ -451,7 +451,7 @@ def test_register_kv_caches(mock_parallel_groups):

        with set_current_vllm_config(vllm_config):
            connector = MoRIIOConnector(vllm_config, KVConnectorRole.WORKER)
-            connector.connector_worker = FakeMorIIOConnectorWorker(
+            connector.connector_worker = FakeMoRIIOConnectorWorker(
                vllm_config, connector.engine_id, hand_shake_latency=0
            )

@@ -528,7 +528,7 @@ def test_moriio_handshake_returns_metadata(mock_parallel_groups):
    with (
        patch(
            "vllm.distributed.kv_transfer.kv_connector.v1.moriio.moriio_engine.MoRIIOWrapper",
-            FakeMorIIOWrapper,
+            FakeMoRIIOWrapper,
        ),
    ):
        handshake_port = _find_free_port()

--- a/tests/v1/kv_connector/unit/test_nixl_connector.py
+++ b/tests/v1/kv_connector/unit/test_nixl_connector.py
@@ -460,9 +460,9 @@ class FakeNixlConnectorWorker(NixlConnectorWorker):

        # When remote tp_size > local tp_size, handshake with multiple
        # remote ranks.
-        num_hanshakes = 1 if tp_ratio > 0 else -tp_ratio
+        num_handshakes = 1 if tp_ratio > 0 else -tp_ratio
        remote_agents: dict[int, str] = {}
-        for remote_tp_rank in range(num_hanshakes):
+        for remote_tp_rank in range(num_handshakes):
            remote_agent_name = self.add_remote_agent(
                NixlAgentMetadata(
                    engine_id=self.REMOTE_ENGINE_ID,
@@ -688,7 +688,7 @@ class TestNixlHandshake:
        )
        check_handshake(2)

-        # NOTE flexiblity: a second remote with higher number of ranks is
+        # NOTE flexibility: a second remote with higher number of ranks is
        # discovered. This is not a scenario we actively support right now, but
        # the connector allows it.
        worker.REMOTE_ENGINE_ID = "remote_engine_2"
@@ -1766,7 +1766,7 @@ def test_aborted_request_removed_from_worker_in_batch(default_vllm_config, dist_
    req = create_request(request_id=1, do_remote_decode=True, max_tokens=1)
    scheduler.add_request(req)

-    # First scheduling pass - examinate build_connector_meta output
+    # First scheduling pass - examine build_connector_meta output
    sched_out = scheduler.schedule()
    kv_meta = sched_out.kv_connector_metadata
    assert kv_meta is not None

--- a/tests/v1/sample/test_logprobs.py
+++ b/tests/v1/sample/test_logprobs.py
@@ -36,7 +36,7 @@ SAMPLE_PROMPT = BatchLogprobsComposition.SAMPLE_PROMPT
 # non-associative and sensitive to batch geometry. The ref LLM (no spec
 # decode, default scheduling) and the spec-decode LLM (chunked prefill,
 # different effective batch sizes) follow different reduction orders,
-# producing numerically divergent logprobs that get mis-attributed to
+# producing numerically divergent logprobs that get misattributed to
 # spec-decode incorrectness.
 #
 # Force LLM instances into an identical, deterministic execution

--- a/tests/v1/sample/test_rejection_sampler.py
+++ b/tests/v1/sample/test_rejection_sampler.py
@@ -726,7 +726,7 @@ def test_frequency_penalties(rejection_sampler):
    spec_tokens = [[1, 1, 1], [], [1, 1, 1]]
    output_tokens = [[1, 1, 1, 1], [7], [1, 1, 1, 1]]  # 1, 7 and 1 are the bonus tokens

-    num_requsts = len(spec_tokens)
+    num_requests = len(spec_tokens)
    logits = create_logits_tensor(output_tokens, token_idx_to_override=15)
    metadata = create_sampling_metadata(
        all_greedy=True,
@@ -734,8 +734,8 @@ def test_frequency_penalties(rejection_sampler):
        spec_token_ids=spec_tokens,
        prompt_token_ids=torch.tensor([[5, 6, 7], [6, 7, 8], [7, 8, 9]], device=DEVICE),
        frequency_penalties=[1.5, 1.5, 0.7],
-        presence_penalties=[0.0] * num_requsts,
-        repetition_penalties=[1.0] * num_requsts,
+        presence_penalties=[0.0] * num_requests,
+        repetition_penalties=[1.0] * num_requests,
    )
    bonus_token_tensor = torch.tensor(
        [output_tokens[i][-1] for i in range(len(output_tokens))], device=logits.device