[CI] change spell checker from codespell to typos (#18711)

Signed-off-by: Andy Xie <andy.xning@gmail.com>

[CI] change spell checker from codespell to typos (#18711)
Signed-off-by: Andy Xie <andy.xning@gmail.com>
2f1c19b2 · Ning Xie · GitHub · 42f52cc9 · 2f1c19b2 · 2f1c19b2
Unverified Commit 2f1c19b2 authored Jun 12, 2025 by Ning Xie Committed by GitHub Jun 11, 2025
20 changed files
--- a/tests/lora/test_transfomers_model.py
+++ b/tests/lora/test_transfomers_model.py
--- a/tests/models/language/generation/test_bart.py
+++ b/tests/models/language/generation/test_bart.py
@@ -118,7 +118,7 @@ def run_test(
    # default to enforce_eager=True if enforce_eager
    # is left unspecified. However, the
    # VllmRunner test fixture (which wraps around the LLM class) defaults to
-    # enforce_eager=False (a behavior which a number of already-exisitng
+    # enforce_eager=False (a behavior which a number of already-existing
    # decoder-only unit tests expect), so when testing an encoder/decoder
    # model we must explicitly specify enforce_eager=True in the VllmRunner
    # constructor.

--- a/tests/samplers/test_typical_acceptance_sampler.py
+++ b/tests/samplers/test_typical_acceptance_sampler.py
@@ -248,7 +248,7 @@ def test_temperature_zero_target_distribution(seed: int, device: str):
                                    size=(batch_size, 1),
                                    dtype=torch.int64)
    # The target probaility distribution is a temperature zero distribution
-    # with zero entroy. Since our draft token ids don't match the probability
+    # with zero entropy. Since our draft token ids don't match the probability
    # 1.0 tokens in the target distribution we will reject all of them and
    # fallback to the greedy sampling for selecting 1 token for each sequence.
    # Verify the same.

--- a/tests/spec_decode/e2e/test_eagle_correctness.py
+++ b/tests/spec_decode/e2e/test_eagle_correctness.py
@@ -18,7 +18,7 @@ However, we still need to verify below scenario could be passed:
    * Test greedy equality under various number of speculative tokens.

 With those tests, we can say at least, EAGLE would not break the
-correctess for the target model outputs.
+correctness for the target model outputs.
 """

 import pytest

--- a/tests/spec_decode/e2e/test_medusa_correctness.py
+++ b/tests/spec_decode/e2e/test_medusa_correctness.py
@@ -18,7 +18,7 @@ However, we still need to verify below scenario could be passed:
    * Test greedy equality under various number of speculative tokens.

 With those tests, we can say at least, Medusa would not break the
-correctess for the target model outputs.
+correctness for the target model outputs.
 """

 import pytest

--- a/tests/spec_decode/e2e/test_mtp_correctness.py
+++ b/tests/spec_decode/e2e/test_mtp_correctness.py
@@ -18,7 +18,7 @@ However, we still need to verify below scenario could be passed:
    * Test greedy equality under various number of speculative tokens.

 With those tests, we can say at least, mtp would not break the
-correctess for the target model outputs.
+correctness for the target model outputs.
 """

 import pytest

--- a/tests/spec_decode/e2e/test_ngram_correctness.py
+++ b/tests/spec_decode/e2e/test_ngram_correctness.py
@@ -22,8 +22,8 @@ However, we still need to verify below scenario could be passed:
    * Test greedy equality under preemption
    * Test greedy equality under various ngram sizes / speculative sizes

-With those tests, we can say at least, ngram spec would not break the correctess
-for the target model outputs.
+With those tests, we can say at least, ngram spec would not break the
+correctness for the target model outputs.
 """

 import pytest

--- a/tests/v1/e2e/test_correctness_sliding_window.py
+++ b/tests/v1/e2e/test_correctness_sliding_window.py
@@ -30,7 +30,7 @@ model_config = {
    ])
 @pytest.mark.parametrize("batch_size", [5])
 @pytest.mark.parametrize("seed", [1])
-def test_sliding_window_retrival(monkeypatch, model, batch_size, seed):
+def test_sliding_window_retrieval(monkeypatch, model, batch_size, seed):
    """
    The test does a bunch of assignments "x1 = 10\nx2 = 33\n..." and then
    asks for value of one of them (which is outside the sliding window).

--- a/tests/v1/kv_connector/unit/test_nixl_connector.py
+++ b/tests/v1/kv_connector/unit/test_nixl_connector.py
@@ -7,7 +7,7 @@ from vllm.distributed.kv_transfer.kv_connector.v1.nixl_connector import (
 from .utils import create_request, create_scheduler, create_vllm_config


-def test_basic_inferface():
+def test_basic_interface():
    """Unit test for basic NixlConnector interface functionality."""

    vllm_config = create_vllm_config()
@@ -25,7 +25,7 @@ def test_basic_inferface():

    scheduler.add_request(request)

-    # Remote Prefill, triggers NixlConnectorMetdata.
+    # Remote Prefill, triggers NixlConnectorMetadata.
    scheduler_output = scheduler.schedule()
    kv_connector_metadata = scheduler_output.kv_connector_metadata
    assert kv_connector_metadata is not None

--- a/tests/v1/sample/test_logprobs_e2e.py
+++ b/tests/v1/sample/test_logprobs_e2e.py
@@ -32,7 +32,7 @@ def test_prompt_logprobs_e2e():
            ), f"Expected: {EXPECTED_VALUE} |  Measured: {measured_value}"


-def test_promt_logprobs_e2e_server():
+def test_prompt_logprobs_e2e_server():
    with RemoteOpenAIServer(MODEL, SERVER_ARGS) as remote_server:
        url = f"{remote_server.url_for('v1')}/completions"


--- a/tests/worker/test_model_input.py
+++ b/tests/worker/test_model_input.py
@@ -209,32 +209,32 @@ def test_multi_step_model_runner_input():
    received_model_input = (StatefulModelInput.from_broadcasted_tensor_dict(
        tensor_dict, attn_backend=attn_backend))

-    receieved_frozen_input = received_model_input.frozen_model_input
+    received_frozen_input = received_model_input.frozen_model_input

    # Check that received copy has correct values.
    assert isinstance(received_model_input, StatefulModelInput)
-    assert receieved_frozen_input.input_tokens is not None
-    assert (receieved_frozen_input.input_tokens ==
+    assert received_frozen_input.input_tokens is not None
+    assert (received_frozen_input.input_tokens ==
            frozen_model_input.input_tokens).all()
-    assert receieved_frozen_input.input_positions is not None
-    assert (receieved_frozen_input.input_positions ==
+    assert received_frozen_input.input_positions is not None
+    assert (received_frozen_input.input_positions ==
            frozen_model_input.input_positions).all()
-    assert receieved_frozen_input.multi_modal_kwargs is None
+    assert received_frozen_input.multi_modal_kwargs is None
    assert (frozen_model_input.multi_modal_kwargs ==
            frozen_model_input.multi_modal_kwargs)
-    assert receieved_frozen_input.lora_requests is None
-    assert (receieved_frozen_input.lora_requests ==
+    assert received_frozen_input.lora_requests is None
+    assert (received_frozen_input.lora_requests ==
            frozen_model_input.lora_requests)
-    assert receieved_frozen_input.lora_mapping is None
+    assert received_frozen_input.lora_mapping is None
    assert (
-        receieved_frozen_input.lora_mapping == frozen_model_input.lora_mapping)
+        received_frozen_input.lora_mapping == frozen_model_input.lora_mapping)
    for field in dataclasses.fields(AttentionMetadata):
-        assert getattr(receieved_frozen_input.attn_metadata, field.name,
+        assert getattr(received_frozen_input.attn_metadata, field.name,
                       None) == getattr(attn_metadata, field.name, None)
    # For sampling metadata, only selected_token_indices is copied.
-    assert (receieved_frozen_input.sampling_metadata.selected_token_indices ==
+    assert (received_frozen_input.sampling_metadata.selected_token_indices ==
            sampling_metadata.selected_token_indices)
-    assert receieved_frozen_input.sampling_metadata.seq_groups is None
+    assert received_frozen_input.sampling_metadata.seq_groups is None

    # check non frozen fields
    assert received_model_input.is_last_step == model_input.is_last_step

--- a/tools/report_build_time_ninja.py
+++ b/tools/report_build_time_ninja.py
@@ -116,7 +116,7 @@ def ReadTargets(log, show_all):
            # If ninja.exe is rudely halted then the .ninja_log file may be
            # corrupt. Silently continue.
            continue
-        start, end, _, name, cmdhash = parts  # Ignore restat.
+        start, end, _, name, cmdhash = parts  # Ignore restart.
        # Convert from integral milliseconds to float seconds.
        start = int(start) / 1000.0
        end = int(end) / 1000.0

--- a/typos.toml
+++ b/typos.toml
+[files]
+# these files may be written in non english words
+extend-exclude = ["tests/models/fixtures/*", "tests/prompts/*",
+    "benchmarks/sonnet.txt", "tests/lora/data/*", "build/*",
+    "vllm/third_party/*"]
+ignore-hidden = true
+ignore-files = true
+ignore-dot = true
+ignore-vcs = true
+ignore-global = true
+ignore-parent = true
+
+[default]
+binary = false
+check-filename = false
+check-file = true
+unicode = true
+ignore-hex = true
+identifier-leading-digits = false
+locale = "en"
+extend-ignore-identifiers-re = ["NVML_*", ".*Unc.*", ".*_thw",
+    ".*UE8M0.*", ".*[UE4M3|ue4m3].*", ".*eles.*", ".*fo.*", ".*ba.*",
+    ".*ot.*", ".*[Tt]h[rR].*"]
+extend-ignore-words-re = []
+extend-ignore-re = []
+
+[default.extend-identifiers]
+bbc5b7ede = "bbc5b7ede"
+womens_doubles = "womens_doubles"
+v_2nd = "v_2nd"
+splitted_input = "splitted_input"
+NOOPs = "NOOPs"
+typ = "typ"
+nin_shortcut = "nin_shortcut"
+UperNetDecoder = "UperNetDecoder"
+subtile = "subtile"
+cudaDevAttrMaxSharedMemoryPerBlockOptin = "cudaDevAttrMaxSharedMemoryPerBlockOptin"
+SFOuput = "SFOuput"
+# huggingface transformers repo uses these words
+depthwise_seperable_out_channel = "depthwise_seperable_out_channel"
+DepthWiseSeperableConv1d = "DepthWiseSeperableConv1d"
+depthwise_seperable_CNN = "depthwise_seperable_CNN"
+
+[default.extend-words]
+iy = "iy"
+tendencias = "tendencias"
+# intel cpu features
+tme = "tme"
+dout = "dout"
+Pn = "Pn"
+arange = "arange"
+
+[type.py]
+extend-glob = []
+extend-ignore-identifiers-re = []
+extend-ignore-words-re = []
+extend-ignore-re = []
+
+[type.py.extend-identifiers]
+arange = "arange"
+NDArray = "NDArray"
+EOFError = "EOFError"
+
+[type.py.extend-words]
+
+[type.cpp]
+extend-glob = []
+extend-ignore-identifiers-re = []
+extend-ignore-words-re = []
+extend-ignore-re = []
+
+[type.cpp.extend-identifiers]
+countr_one = "countr_one"
+
+[type.cpp.extend-words]
+
+[type.rust]
+extend-glob = []
+extend-ignore-identifiers-re = []
+extend-ignore-words-re = []
+extend-ignore-re = []
+
+[type.rust.extend-identifiers]
+flate2 = "flate2"
+
+[type.rust.extend-words]
+ser = "ser"
+
+[type.lock]
+extend-glob = []
+check-file = false
+extend-ignore-identifiers-re = []
+extend-ignore-words-re = []
+extend-ignore-re = []
+
+[type.lock.extend-identifiers]
+
+[type.lock.extend-words]
+
+[type.jl]
+extend-glob = []
+extend-ignore-identifiers-re = []
+extend-ignore-words-re = []
+extend-ignore-re = []
+
+[type.jl.extend-identifiers]
+
+[type.jl.extend-words]
+modul = "modul"
+egals = "egals"
+usig = "usig"
+egal = "egal"
+
+[type.go]
+extend-glob = []
+extend-ignore-identifiers-re = []
+extend-ignore-words-re = []
+extend-ignore-re = []
+
+[type.go.extend-identifiers]
+flate = "flate"
+
+[type.go.extend-words]
+
+[type.css]
+extend-glob = []
+extend-ignore-identifiers-re = []
+extend-ignore-words-re = []
+extend-ignore-re = []
+
+[type.css.extend-identifiers]
+nd = "nd"
+
+[type.css.extend-words]
+
+[type.man]
+extend-glob = []
+extend-ignore-identifiers-re = []
+extend-ignore-words-re = []
+extend-ignore-re = []
+
+[type.man.extend-identifiers]
+Nd = "Nd"
+
+[type.man.extend-words]
+
+[type.cert]
+extend-glob = []
+check-file = false
+extend-ignore-identifiers-re = []
+extend-ignore-words-re = []
+extend-ignore-re = []
+
+[type.cert.extend-identifiers]
+
+[type.cert.extend-words]
+
+[type.sh]
+extend-glob = []
+extend-ignore-identifiers-re = []
+extend-ignore-words-re = []
+extend-ignore-re = []
+
+[type.sh.extend-identifiers]
+stap = "stap"
+ot = "ot"
+
+[type.sh.extend-words]
+
+[type.vimscript]
+extend-glob = []
+extend-ignore-identifiers-re = []
+extend-ignore-words-re = []
+extend-ignore-re = []
+
+[type.vimscript.extend-identifiers]
+windo = "windo"
+
+[type.vimscript.extend-words]
--- a/vllm/_custom_ops.py
+++ b/vllm/_custom_ops.py
@@ -1550,10 +1550,10 @@ def moe_wna16_gemm(input: torch.Tensor, output: torch.Tensor,


 def topk_softmax(topk_weights: torch.Tensor, topk_ids: torch.Tensor,
-                 token_expert_indicies: torch.Tensor,
+                 token_expert_indices: torch.Tensor,
                 gating_output: torch.Tensor) -> None:
-    torch.ops._moe_C.topk_softmax(topk_weights, topk_ids,
-                                  token_expert_indicies, gating_output)
+    torch.ops._moe_C.topk_softmax(topk_weights, topk_ids, token_expert_indices,
+                                  gating_output)


 def moe_wna16_marlin_gemm(input: torch.Tensor, output: Optional[torch.Tensor],

--- a/vllm/attention/backends/utils.py
+++ b/vllm/attention/backends/utils.py
@@ -373,7 +373,7 @@ class CommonAttentionState(AttentionState):
                f"Expected attn_backend name to be either 'XFORMERS'," \
                f"'ROCM_FLASH', or 'FLASH_ATTN', but " \
                f"got '{self.runner.attn_backend.get_name()}'"
-            self._add_additonal_input_buffers_for_enc_dec_model(
+            self._add_additional_input_buffers_for_enc_dec_model(
                attn_metadata=attn_metadata, input_buffers=input_buffers)
        return input_buffers

@@ -427,7 +427,7 @@ class CommonAttentionState(AttentionState):
        attn_metadata.max_encoder_seq_len = self.runner.max_seq_len_to_capture
        attn_metadata.num_encoder_tokens = 0

-    def _add_additonal_input_buffers_for_enc_dec_model(
+    def _add_additional_input_buffers_for_enc_dec_model(
            self, attn_metadata, input_buffers: Dict[str, Any]):
        """
        Saves additional input buffers specific to the encoder-decoder model

--- a/vllm/entrypoints/openai/tool_parsers/internlm2_tool_parser.py
+++ b/vllm/entrypoints/openai/tool_parsers/internlm2_tool_parser.py
@@ -40,7 +40,7 @@ class Internlm2ToolParser(ToolParser):
            request.skip_special_tokens = False
        return request

-    def get_argments(self, obj):
+    def get_arguments(self, obj):
        if "parameters" in obj:
            return obj.get("parameters")
        elif "arguments" in obj:
@@ -119,9 +119,9 @@ class Internlm2ToolParser(ToolParser):
            # now we know we're on the same tool call and we're streaming
            # arguments
            else:
-                prev_arguments = self.get_argments(
+                prev_arguments = self.get_arguments(
                    self.prev_tool_call_arr[self.current_tool_id])
-                cur_arguments = self.get_argments(tool_call_arr)
+                cur_arguments = self.get_arguments(tool_call_arr)

                # not arguments generated
                if not cur_arguments and not prev_arguments:
@@ -170,7 +170,7 @@ class Internlm2ToolParser(ToolParser):
            # check to see if the name is defined and has been sent. if so,
            # stream the name - otherwise keep waiting
            # finish by setting old and returning None as base case
-            tool_call_arr["arguments"] = self.get_argments(tool_call_arr)
+            tool_call_arr["arguments"] = self.get_arguments(tool_call_arr)
            self.prev_tool_call_arr = [tool_call_arr]
            return delta
        except Exception:

--- a/vllm/lora/layers.py
+++ b/vllm/lora/layers.py
@@ -1202,7 +1202,7 @@ class LinearScalingRotaryEmbeddingWithLoRA(BaseLayerWithLoRA):
    multiple LoRA adapters with a specialized kernel.

    Replace LinearScalingRotaryEmbedding with MultiLinearScalingRotaryEmbedding
-    which can handle multi lora adapters in a specialied kernel.
+    which can handle multi lora adapters in a specialized kernel.
    """

    def __init__(self, base_layer: RotaryEmbedding) -> None:

--- a/vllm/lora/punica_wrapper/utils.py
+++ b/vllm/lora/punica_wrapper/utils.py
@@ -68,11 +68,11 @@ def convert_mapping(
                LoRA indices.
            sampler_indices: Tensor of shape [batch_size] mapping requests to
                LoRA indices for sampler. For generation, this will be the
-                same as base_indicies. For prefill, this will map requests
+                same as base_indices. For prefill, this will map requests
                to LoRA indices.
            sampler_indices_padded: Tensor of shape [batch_size] mapping
                requests to LoRA indices for sampler with padding.
-                Same as sampler_indicies, but -1 is replaced with
+                Same as sampler_indices, but -1 is replaced with
                max_loras.
            embeddings_indices: Tensor of shape [2, batch_size] mapping
                requests to embedding indices. First row is for embeddings

--- a/vllm/model_executor/layers/mamba/mamba_mixer2.py
+++ b/vllm/model_executor/layers/mamba/mamba_mixer2.py
@@ -319,7 +319,7 @@ class MambaMixer2(CustomOp):
            n_groups == 1,  # if there was only one group
        )
        intermediate_settings = (intermediate_size, 0, False)
-        head_setings = (self.num_heads, 0, False)
+        head_settings = (self.num_heads, 0, False)

        # - the weight already has a "weight_loader" attribute
        #   which set_weight_attrs will raise if we do not
@@ -372,7 +372,7 @@ class MambaMixer2(CustomOp):
                            intermediate_settings,
                            group_shard_settings,
                            group_shard_settings,
-                            head_setings,  # for dt
+                            head_settings,  # for dt
                        ],
                        self.tp_size,
                        tp_rank,

--- a/vllm/model_executor/layers/mamba/ops/ssd_chunk_state.py
+++ b/vllm/model_executor/layers/mamba/ops/ssd_chunk_state.py
@@ -516,7 +516,7 @@ def _chunk_state_varlen_kernel(
                offs_n[None, :] * stride_chunk_states_dstate)
        else:

-            # - this seems repetitve, buts its to help the compiler
+            # - this seems repetitive, buts its to help the compiler
            if start_idx < pid_c * chunk_size:
                past_states_ptrs = chunk_states_ptr + (
                    offs_m[:, None] * stride_chunk_states_hdim +