[CI] update typos config for CI pre-commit and fix some spells (#20919)

Signed-off-by: Peter Pan <Peter.Pan@daocloud.io>

[CI] update typos config for CI pre-commit and fix some spells (#20919)
Signed-off-by: Peter Pan <Peter.Pan@daocloud.io>
1eb2b9c1 · Peter Pan · GitHub · 6ebf3137 · 1eb2b9c1 · 1eb2b9c1
Unverified Commit 1eb2b9c1 authored Jul 16, 2025 by Peter Pan Committed by GitHub Jul 15, 2025
19 changed files
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -21,7 +21,7 @@ repos:
  - id: ruff-format
    files: ^(.buildkite|benchmarks|examples)/.*
 - repo: https://github.com/crate-ci/typos
-  rev: v1.32.0
+  rev: v1.34.0
  hooks:
  - id: typos
 - repo: https://github.com/PyCQA/isort

--- a/csrc/cpu/sgl-kernels/common.h
+++ b/csrc/cpu/sgl-kernels/common.h
@@ -58,7 +58,7 @@ namespace {
 #define CHECK_CONTIGUOUS(x) TORCH_CHECK(x.is_contiguous(), #x " must be contiguous")
 #define CHECK_LAST_DIM_CONTIGUOUS(x) \
-  TORCH_CHECK(x.strides()[x.strides().size() - 1] == 1, #x "must be contiguous at last dimention")
+  TORCH_CHECK(x.strides()[x.strides().size() - 1] == 1, #x "must be contiguous at last dimension")
 #define CHECK_INPUT(x) \
  CHECK_CPU(x);        \

--- a/csrc/cpu/sgl-kernels/gemm.h
+++ b/csrc/cpu/sgl-kernels/gemm.h
@@ -126,7 +126,7 @@ void fused_experts_int4_w4a16_kernel_impl(
    int64_t topk,
    int64_t num_tokens_post_pad);
-// shared expert implememntation for int8 w8a8
+// shared expert implementation for int8 w8a8
 template <typename scalar_t>
 void shared_expert_int8_kernel_impl(
    scalar_t* __restrict__ output,

--- a/csrc/cpu/sgl-kernels/gemm_int8.cpp
+++ b/csrc/cpu/sgl-kernels/gemm_int8.cpp
@@ -41,7 +41,7 @@ struct tinygemm_kernel_nn<at::BFloat16, has_bias, BLOCK_M, BLOCK_N> {
    __m512  vd0;
    __m512  vd1[COLS];
-    // oops! 4x4 spills but luckly we use 4x2
+    // oops! 4x4 spills but luckily we use 4x2
    __m512 vbias[COLS];
    // [NOTE]: s8s8 igemm compensation in avx512-vnni

--- a/csrc/cpu/sgl-kernels/vec.h
+++ b/csrc/cpu/sgl-kernels/vec.h
@@ -37,7 +37,7 @@ inline Vectorized<at::BFloat16> convert_from_float_ext<at::BFloat16>(const Vecto
 #define CVT_FP16_TO_FP32(a) \
    _mm512_cvtps_ph(a, (_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC))
-// this doesn't hanel NaN.
+// this doesn't handle NaN.
 inline __m512bh cvt_e4m3_bf16_intrinsic_no_nan(__m256i fp8_vec) {
  const __m512i x = _mm512_cvtepu8_epi16(fp8_vec);

--- a/docker/Dockerfile
+++ b/docker/Dockerfile
@@ -63,7 +63,7 @@ ARG PYTORCH_CUDA_NIGHTLY_INDEX_BASE_URL=https://download.pytorch.org/whl/nightly
 ARG PIP_KEYRING_PROVIDER=disabled
 ARG UV_KEYRING_PROVIDER=${PIP_KEYRING_PROVIDER}
-# Flag enables build-in KV-connector dependency libs into docker images
+# Flag enables built-in KV-connector dependency libs into docker images
 ARG INSTALL_KV_CONNECTORS=false
 #################### BASE BUILD IMAGE ####################

--- a/docs/usage/v1_guide.md
+++ b/docs/usage/v1_guide.md
@@ -106,7 +106,7 @@ to enable simultaneous generation and embedding using the same engine instance i
 Models using selective state-space mechanisms instead of standard transformer attention are partially supported.
 Models that use Mamba-2 layers (e.g., `Mamba2ForCausalLM`) are supported, but models that use older Mamba-1 layers
-(e.g., `MambaForCausalLM`, `JambaForCausalLM`) are not yet suported. Please note that these models currently require
+(e.g., `MambaForCausalLM`, `JambaForCausalLM`) are not yet supported. Please note that these models currently require
 enforcing eager mode and disabling prefix caching in V1.
 Models that combine Mamba-2 layers with standard attention layers are also supported (e.g., `BambaForCausalLM`,

--- a/pyproject.toml
+++ b/pyproject.toml
@@ -174,3 +174,186 @@ respect-ignore-files = true
 [tool.ty.environment]
 python = "./.venv"
+[tool.typos.files]
+# these files may be written in non english words
+extend-exclude = ["tests/models/fixtures/*", "tests/prompts/*",
+    "benchmarks/sonnet.txt", "tests/lora/data/*", "build/*",
+    "vllm/third_party/*"]
+ignore-hidden = true
+ignore-files = true
+ignore-dot = true
+ignore-vcs = true
+ignore-global = true
+ignore-parent = true
+[tool.typos.default]
+binary = false
+check-filename = false
+check-file = true
+unicode = true
+ignore-hex = true
+identifier-leading-digits = false
+locale = "en"
+extend-ignore-identifiers-re = ["NVML_*", ".*Unc.*", ".*_thw",
+    ".*UE8M0.*", ".*[UE4M3|ue4m3].*", ".*eles.*",
+     ".*[Tt]h[rR].*"]
+extend-ignore-words-re = []
+extend-ignore-re = []
+[tool.typos.default.extend-identifiers]
+bbc5b7ede = "bbc5b7ede"
+womens_doubles = "womens_doubles"
+v_2nd = "v_2nd"
+# splitted_input = "splitted_input"
+NOOPs = "NOOPs"
+typ = "typ"
+nin_shortcut = "nin_shortcut"
+UperNetDecoder = "UperNetDecoder"
+subtile = "subtile"
+cudaDevAttrMaxSharedMemoryPerBlockOptin = "cudaDevAttrMaxSharedMemoryPerBlockOptin"
+SFOuput = "SFOuput"
+# huggingface transformers repo uses these words
+depthwise_seperable_out_channel = "depthwise_seperable_out_channel"
+DepthWiseSeperableConv1d = "DepthWiseSeperableConv1d"
+depthwise_seperable_CNN = "depthwise_seperable_CNN"
+[tool.typos.default.extend-words]
+iy = "iy"
+tendencias = "tendencias"
+# intel cpu features
+tme = "tme"
+dout = "dout"
+Pn = "Pn"
+arange = "arange"
+[tool.typos.type.py]
+extend-glob = []
+extend-ignore-identifiers-re = []
+extend-ignore-words-re = []
+extend-ignore-re = []
+[tool.typos.type.py.extend-identifiers]
+arange = "arange"
+NDArray = "NDArray"
+EOFError = "EOFError"
+fo = "fo"
+ba = "ba"
+[tool.typos.type.py.extend-words]
+[tool.typos.type.cpp]
+extend-glob = ["*.cu"]
+extend-ignore-identifiers-re = []
+extend-ignore-words-re = []
+extend-ignore-re = []
+[tool.typos.type.cpp.extend-identifiers]
+countr_one = "countr_one"
+k_ot = "k_ot"
+ot = "ot"
+[tool.typos.type.cpp.extend-words]
+[tool.typos.type.rust]
+extend-glob = []
+extend-ignore-identifiers-re = []
+extend-ignore-words-re = []
+extend-ignore-re = []
+[tool.typos.type.rust.extend-identifiers]
+flate2 = "flate2"
+[tool.typos.type.rust.extend-words]
+ser = "ser"
+[tool.typos.type.lock]
+extend-glob = []
+check-file = false
+extend-ignore-identifiers-re = []
+extend-ignore-words-re = []
+extend-ignore-re = []
+[tool.typos.type.lock.extend-identifiers]
+[tool.typos.type.lock.extend-words]
+[tool.typos.type.jl]
+extend-glob = []
+extend-ignore-identifiers-re = []
+extend-ignore-words-re = []
+extend-ignore-re = []
+[tool.typos.type.jl.extend-identifiers]
+[tool.typos.type.jl.extend-words]
+modul = "modul"
+egals = "egals"
+usig = "usig"
+egal = "egal"
+[tool.typos.type.go]
+extend-glob = []
+extend-ignore-identifiers-re = []
+extend-ignore-words-re = []
+extend-ignore-re = []
+[tool.typos.type.go.extend-identifiers]
+flate = "flate"
+[tool.typos.type.go.extend-words]
+[tool.typos.type.css]
+extend-glob = []
+extend-ignore-identifiers-re = []
+extend-ignore-words-re = []
+extend-ignore-re = []
+[tool.typos.type.css.extend-identifiers]
+nd = "nd"
+[tool.typos.type.css.extend-words]
+[tool.typos.type.man]
+extend-glob = []
+extend-ignore-identifiers-re = []
+extend-ignore-words-re = []
+extend-ignore-re = []
+[tool.typos.type.man.extend-identifiers]
+Nd = "Nd"
+[tool.typos.type.man.extend-words]
+[tool.typos.type.cert]
+extend-glob = []
+check-file = false
+extend-ignore-identifiers-re = []
+extend-ignore-words-re = []
+extend-ignore-re = []
+[tool.typos.type.cert.extend-identifiers]
+[tool.typos.type.cert.extend-words]
+[tool.typos.type.sh]
+extend-glob = []
+extend-ignore-identifiers-re = []
+extend-ignore-words-re = []
+extend-ignore-re = []
+[tool.typos.type.sh.extend-identifiers]
+ot = "ot"
+[tool.typos.type.sh.extend-words]
+[tool.typos.type.vimscript]
+extend-glob = []
+extend-ignore-identifiers-re = []
+extend-ignore-words-re = []
+extend-ignore-re = []
+[tool.typos.type.vimscript.extend-identifiers]
+windo = "windo"
+[tool.typos.type.vimscript.extend-words]
--- a/tests/kernels/moe/modular_kernel_tools/common.py
+++ b/tests/kernels/moe/modular_kernel_tools/common.py
@@ -416,7 +416,7 @@ class RankTensors:
        # We dequant and use that as hidden_states so the tests are stable.
        # quantizing and dequantizing yield slightly different results
        # depending on the hardware. Here we, quantize and dequantize
-        # first - so further quantize and dequantize will yeild the same
+        # first - so further quantize and dequantize will yield the same
        # values.
        if config.is_per_tensor_act_quant:
            a_q, a_scales = ops.scaled_fp8_quant(

--- a/tests/kernels/moe/test_deepgemm.py
+++ b/tests/kernels/moe/test_deepgemm.py
@@ -95,7 +95,7 @@ def run_single_case(m, n, k, topk, num_experts, block_size):
    topk_weights, topk_ids = torch.topk(router_logits, k=topk, dim=-1)
    topk_weights = torch.nn.functional.softmax(topk_weights, dim=-1)
-    # triton referrence
+    # triton reference
    out_triton = fused_experts(
        hidden_states=tokens_bf16,
        w1=w1,

--- a/tests/models/test_initialization.py
+++ b/tests/models/test_initialization.py
@@ -43,7 +43,7 @@ def test_can_initialize(model_arch: str, monkeypatch: pytest.MonkeyPatch):
        text_config = hf_config.get_text_config()
        # Ensure at least 2 expert per group
-        # Since `grouped_topk` assums top-2
+        # Since `grouped_topk` assumes top-2
        n_group = getattr(text_config, 'n_group', None)
        num_experts = n_group * 2 if n_group is not None else 2

--- a/tests/v1/test_external_lb_dp.py
+++ b/tests/v1/test_external_lb_dp.py
@@ -17,7 +17,7 @@ MODEL_NAME = "ibm-research/PowerMoE-3b"
 # Number of data parallel ranks for external LB testing
 DP_SIZE = int(os.getenv("DP_SIZE", "2"))
-# Default tensor parallell size to use
+# Default tensor parallel size to use
 TP_SIZE = int(os.getenv("TP_SIZE", "1"))

--- a/typos.toml
+++ b/typos.toml
-[files]
-# these files may be written in non english words
-extend-exclude = ["tests/models/fixtures/*", "tests/prompts/*",
-    "benchmarks/sonnet.txt", "tests/lora/data/*", "build/*",
-    "vllm/third_party/*"]
-ignore-hidden = true
-ignore-files = true
-ignore-dot = true
-ignore-vcs = true
-ignore-global = true
-ignore-parent = true
-[default]
-binary = false
-check-filename = false
-check-file = true
-unicode = true
-ignore-hex = true
-identifier-leading-digits = false
-locale = "en"
-extend-ignore-identifiers-re = ["NVML_*", ".*Unc.*", ".*_thw",
-    ".*UE8M0.*", ".*[UE4M3|ue4m3].*", ".*eles.*", ".*fo.*", ".*ba.*",
-    ".*ot.*", ".*[Tt]h[rR].*"]
-extend-ignore-words-re = []
-extend-ignore-re = []
-[default.extend-identifiers]
-bbc5b7ede = "bbc5b7ede"
-womens_doubles = "womens_doubles"
-v_2nd = "v_2nd"
-splitted_input = "splitted_input"
-NOOPs = "NOOPs"
-typ = "typ"
-nin_shortcut = "nin_shortcut"
-UperNetDecoder = "UperNetDecoder"
-subtile = "subtile"
-cudaDevAttrMaxSharedMemoryPerBlockOptin = "cudaDevAttrMaxSharedMemoryPerBlockOptin"
-SFOuput = "SFOuput"
-# huggingface transformers repo uses these words
-depthwise_seperable_out_channel = "depthwise_seperable_out_channel"
-DepthWiseSeperableConv1d = "DepthWiseSeperableConv1d"
-depthwise_seperable_CNN = "depthwise_seperable_CNN"
-[default.extend-words]
-iy = "iy"
-tendencias = "tendencias"
-# intel cpu features
-tme = "tme"
-dout = "dout"
-Pn = "Pn"
-arange = "arange"
-[type.py]
-extend-glob = []
-extend-ignore-identifiers-re = []
-extend-ignore-words-re = []
-extend-ignore-re = []
-[type.py.extend-identifiers]
-arange = "arange"
-NDArray = "NDArray"
-EOFError = "EOFError"
-[type.py.extend-words]
-[type.cpp]
-extend-glob = []
-extend-ignore-identifiers-re = []
-extend-ignore-words-re = []
-extend-ignore-re = []
-[type.cpp.extend-identifiers]
-countr_one = "countr_one"
-[type.cpp.extend-words]
-[type.rust]
-extend-glob = []
-extend-ignore-identifiers-re = []
-extend-ignore-words-re = []
-extend-ignore-re = []
-[type.rust.extend-identifiers]
-flate2 = "flate2"
-[type.rust.extend-words]
-ser = "ser"
-[type.lock]
-extend-glob = []
-check-file = false
-extend-ignore-identifiers-re = []
-extend-ignore-words-re = []
-extend-ignore-re = []
-[type.lock.extend-identifiers]
-[type.lock.extend-words]
-[type.jl]
-extend-glob = []
-extend-ignore-identifiers-re = []
-extend-ignore-words-re = []
-extend-ignore-re = []
-[type.jl.extend-identifiers]
-[type.jl.extend-words]
-modul = "modul"
-egals = "egals"
-usig = "usig"
-egal = "egal"
-[type.go]
-extend-glob = []
-extend-ignore-identifiers-re = []
-extend-ignore-words-re = []
-extend-ignore-re = []
-[type.go.extend-identifiers]
-flate = "flate"
-[type.go.extend-words]
-[type.css]
-extend-glob = []
-extend-ignore-identifiers-re = []
-extend-ignore-words-re = []
-extend-ignore-re = []
-[type.css.extend-identifiers]
-nd = "nd"
-[type.css.extend-words]
-[type.man]
-extend-glob = []
-extend-ignore-identifiers-re = []
-extend-ignore-words-re = []
-extend-ignore-re = []
-[type.man.extend-identifiers]
-Nd = "Nd"
-[type.man.extend-words]
-[type.cert]
-extend-glob = []
-check-file = false
-extend-ignore-identifiers-re = []
-extend-ignore-words-re = []
-extend-ignore-re = []
-[type.cert.extend-identifiers]
-[type.cert.extend-words]
-[type.sh]
-extend-glob = []
-extend-ignore-identifiers-re = []
-extend-ignore-words-re = []
-extend-ignore-re = []
-[type.sh.extend-identifiers]
-stap = "stap"
-ot = "ot"
-[type.sh.extend-words]
-[type.vimscript]
-extend-glob = []
-extend-ignore-identifiers-re = []
-extend-ignore-words-re = []
-extend-ignore-re = []
-[type.vimscript.extend-identifiers]
-windo = "windo"
-[type.vimscript.extend-words]
--- a/vllm/attention/backends/differential_flash_attn.py
+++ b/vllm/attention/backends/differential_flash_attn.py
@@ -961,7 +961,7 @@ class DifferentialFlashAttentionImpl(AttentionImpl):
                                    "... H (two D) -> ... (H two) D",
                                    two=2)
-        else:  # re-use the kv cache, full attention
+        else:  # reuse the kv cache, full attention
            q = q.view(-1, self.num_heads, self.head_size)
            q1, q2 = self.split_heads(q)
            # kv_cache shape is (2, num_blocks, block_size, num_kv_heads, head_size) # noqa: E501

--- a/vllm/entrypoints/openai/serving_responses.py
+++ b/vllm/entrypoints/openai/serving_responses.py
@@ -372,7 +372,7 @@ class OpenAIServingResponses(OpenAIServing):
                        })
        # Append the new input.
-        # Reponses API supports simple text inputs without chat format.
+        # Responses API supports simple text inputs without chat format.
        if isinstance(request.input, str):
            messages.append({"role": "user", "content": request.input})
        else:

--- a/vllm/model_executor/layers/fused_moe/fused_moe.py
+++ b/vllm/model_executor/layers/fused_moe/fused_moe.py
@@ -1172,7 +1172,7 @@ def fused_experts(
        allow_cutlass_block_scaled_grouped_gemm: bool = False) -> torch.Tensor:
    # For now, disable DeepGemm for small N (<= 512) until better
    # permute/unpermute ops are available.
-    # However, on B200, we use DeepGemm for all cases becuase they only support
+    # However, on B200, we use DeepGemm for all cases because they only support
    # E8M0 scale, which means we requantize the weight and input to the specific
    # scale. Fallen back to cutlass or triton for some cases would cause
    # accuracy issue.

--- a/vllm/model_executor/models/phi4flash.py
+++ b/vllm/model_executor/models/phi4flash.py
@@ -193,7 +193,7 @@ class SambaYAttention(nn.Module):
            ],
                                dim=-1)
            attn_output = self.attn(q, k, v)
-        else:  # re-use the kv cache, full attention
+        else:  # reuse the kv cache, full attention
            q = self.Wqkv(hidden_states)
            attn_output = self.attn(q, None, None)
        attn_output = attn_output.view(-1, self.num_heads * self.head_dim)

--- a/vllm/v1/attention/backends/mla/common.py
+++ b/vllm/v1/attention/backends/mla/common.py
@@ -394,7 +394,7 @@ def use_cudnn_prefill() -> bool:
 # Currently 394MB, this can be tuned based on GEMM sizes used.
-# Choosen to be the same as sglang:
+# Chosen to be the same as sglang:
 #  https://github.com/sgl-project/sglang/blob/766392c6bda2558b61ce6d1c1bfd8081a549e1f1/python/sglang/global_config.py#L37
 FLASHINFER_WORKSPACE_BUFFER_SIZE = 394 * 1024 * 1024

--- a/vllm/v1/worker/tpu_model_runner.py
+++ b/vllm/v1/worker/tpu_model_runner.py
@@ -969,7 +969,7 @@ class TPUModelRunner(LoRAModelRunnerMixin):
        else:
            mm_embeds = []
        xm.mark_step()
-        # Prepare inputs, the requests might be splitted into multiple
+        # Prepare inputs, the requests might be split into multiple
        # executions, combine the result of each execution.
        start_index = 0
        combined_selected_tokens: list[torch.Tensor] = []