[CI] change spell checker from codespell to typos (#18711)

Signed-off-by: Andy Xie <andy.xning@gmail.com>

[CI] change spell checker from codespell to typos (#18711)
Signed-off-by: Andy Xie <andy.xning@gmail.com>
2f1c19b2 · Ning Xie · GitHub · 42f52cc9 · 2f1c19b2 · 2f1c19b2
Unverified Commit 2f1c19b2 authored Jun 12, 2025 by Ning Xie Committed by GitHub Jun 11, 2025
17 changed files
--- a/vllm/model_executor/layers/quantization/utils/int8_utils.py
+++ b/vllm/model_executor/layers/quantization/utils/int8_utils.py
@@ -219,7 +219,7 @@ def per_token_group_quant_int8(
    quantized tensor along with the scaling factor used for quantization.
    Args:
-        x: The input tenosr with ndim >= 2.
+        x: The input tensor with ndim >= 2.
        group_size: The group size used for quantization.
        eps: The minimum to avoid dividing zero.
        dtype: The dype of output tensor. Note that only `torch.int8`

--- a/vllm/model_executor/model_loader/bitsandbytes_loader.py
+++ b/vllm/model_executor/model_loader/bitsandbytes_loader.py
@@ -401,7 +401,7 @@ class BitsAndBytesModelLoader(BaseModelLoader):
                        self.target_modules.append(
                            name.replace(rep_name, sub_name))
                # Add original module name even if the module has stacked map,
-                # in case model has a mixture of disk-merged and disk-splitted
+                # in case model has a mixture of disk-merged and disk-split
                # weights with same last name.
                self.target_modules.append(name)

--- a/vllm/model_executor/models/baichuan.py
+++ b/vllm/model_executor/models/baichuan.py
@@ -131,7 +131,7 @@ class BaiChuanAttention(nn.Module):
        self.num_heads = (self.total_num_heads //
                          tensor_model_parallel_world_size)
        self.head_dim = hidden_size // self.total_num_heads
-        self.postion_embedding = position_embedding
+        self.position_embedding = position_embedding
        self.rope_theta = rope_theta
        self.max_position_embeddings = max_position_embeddings
@@ -151,7 +151,7 @@ class BaiChuanAttention(nn.Module):
            quant_config=quant_config,
        )
        # Create the alibi slopes and slice them.
-        if self.postion_embedding == "ALIBI":
+        if self.position_embedding == "ALIBI":
            tp_rank = get_tensor_model_parallel_rank()
            head_start = tp_rank * self.num_heads
            head_end = (tp_rank + 1) * self.num_heads
@@ -187,7 +187,7 @@ class BaiChuanAttention(nn.Module):
    ) -> torch.Tensor:
        qkv, _ = self.W_pack(hidden_states)
        q, k, v = qkv.chunk(chunks=3, dim=-1)
-        if self.postion_embedding != "ALIBI":
+        if self.position_embedding != "ALIBI":
            q, k = self.rotary_emb(positions, q, k)
        attn_output = self.attn(q, k, v)
        output, _ = self.o_proj(attn_output)

--- a/vllm/model_executor/models/deepseek_vl2.py
+++ b/vllm/model_executor/models/deepseek_vl2.py
@@ -344,7 +344,7 @@ class DeepseekVLV2ForCausalLM(nn.Module, SupportsMultiModal, SupportsPP):
            self.image_newline = nn.Parameter(
                torch.randn(self.projector_config.n_embed) * embed_std)
            # This is a typo in original implementation
-            self.view_seperator = nn.Parameter(
+            self.view_separator = nn.Parameter(
                torch.randn(self.projector_config.n_embed) * embed_std)
        else:
            raise ValueError(
@@ -549,13 +549,13 @@ class DeepseekVLV2ForCausalLM(nn.Module, SupportsMultiModal, SupportsPP):
            if self.global_view_pos == "head":
                global_local_features = torch.cat([
                    global_features,
-                    self.view_seperator[None, :],
+                    self.view_separator[None, :],
                    local_features,
                ])
            else:
                global_local_features = torch.cat([
                    local_features,
-                    self.view_seperator[None, :],
+                    self.view_separator[None, :],
                    global_features,
                ])

--- a/vllm/model_executor/models/eagle.py
+++ b/vllm/model_executor/models/eagle.py
@@ -197,7 +197,7 @@ class EAGLE(nn.Module):
        return logits
    def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]):
-        # This implementation is incompitable with https://huggingface.co/yuhuili/EAGLE-LLaMA3-Instruct-8B
+        # This implementation is incompatible with https://huggingface.co/yuhuili/EAGLE-LLaMA3-Instruct-8B
        # due to missing lm_head weights and its config being that of a
        # Llama model. Here's a compatible version with the same weights:
        # https://huggingface.co/abhigoyal/EAGLE-LLaMA3-Instruct-8B-vllm

--- a/vllm/model_executor/models/gemma3_mm.py
+++ b/vllm/model_executor/models/gemma3_mm.py
@@ -634,13 +634,13 @@ class Gemma3ForConditionalGeneration(nn.Module, SupportsMultiModal, SupportsPP,
        kwargs["has_images"] = True
        # NOTE(woosuk): Here, we distinguish the sequences by the position id 0.
        # This is a HACK. Fix this.
-        start_idices = (positions == 0).cpu().nonzero()
+        start_indices = (positions == 0).cpu().nonzero()
-        num_seqs = len(start_idices)
+        num_seqs = len(start_indices)
        seq_lens = []
        for i in range(num_seqs):
-            start_idx = start_idices[i].item()
+            start_idx = start_indices[i].item()
            if i < num_seqs - 1:
-                end_idx = start_idices[i + 1].item()
+                end_idx = start_indices[i + 1].item()
            else:
                end_idx = len(input_ids)
            seq_lens.append(end_idx - start_idx)

--- a/vllm/model_executor/models/llama4.py
+++ b/vllm/model_executor/models/llama4.py
@@ -52,7 +52,7 @@ class Llama4MoE(nn.Module):
        renormalize: bool,
    ) -> tuple[torch.Tensor, torch.Tensor]:
        router_scores, router_indices = fast_topk(gating_output, topk, dim=-1)
-        # psuedo-standard is that the router scores are floats
+        # pseudo-standard is that the router scores are floats
        router_scores = torch.sigmoid(router_scores.float())
        return (router_scores, router_indices.to(torch.int32))

--- a/vllm/model_executor/models/mixtral_quant.py
+++ b/vllm/model_executor/models/mixtral_quant.py
@@ -114,9 +114,9 @@ class MixtralMoE(nn.Module):
                f"Tensor parallel size {self.tp_size} is greater than "
                f"the number of experts {self.num_total_experts}.")
        # Split experts equally between ranks
-        self.expert_indicies = np.array_split(range(
+        self.expert_indices = np.array_split(range(self.num_total_experts),
-            self.num_total_experts), self.tp_size)[self.rank].tolist()
+                                             self.tp_size)[self.rank].tolist()
-        if not self.expert_indicies:
+        if not self.expert_indices:
            raise ValueError(
                f"Rank {self.rank} has no experts assigned to it.")
@@ -125,7 +125,7 @@ class MixtralMoE(nn.Module):
                       config.hidden_size,
                       config.intermediate_size,
                       quant_config=quant_config)
-            if idx in self.expert_indicies else None
+            if idx in self.expert_indices else None
            for idx in range(self.num_total_experts)
        ])
        self.gate = ReplicatedLinear(config.hidden_size,
@@ -146,7 +146,7 @@ class MixtralMoE(nn.Module):
        routing_weights /= routing_weights.sum(dim=-1, keepdim=True)
        final_hidden_states = None
-        for expert_idx in self.expert_indicies:
+        for expert_idx in self.expert_indices:
            expert_layer = self.experts[expert_idx]
            expert_mask = (selected_experts == expert_idx)
            expert_weights = (routing_weights * expert_mask).sum(dim=-1,

--- a/vllm/model_executor/models/ovis.py
+++ b/vllm/model_executor/models/ovis.py
@@ -283,7 +283,7 @@ class OvisProcessingInfo(BaseProcessingInfo):
    def get_image_size_with_most_features(self) -> ImageSize:
        height, width = self.get_hf_processor().get_image_size()
        hs = self.get_hf_config().visual_tokenizer_config.hidden_stride
-        # NOTE(Isotr0py): 9 is `max_partion` hardcoded in original code
+        # NOTE(Isotr0py): 9 is `max_partition` hardcoded in original code
        # https://huggingface.co/AIDC-AI/Ovis2-1B/blob/main/modeling_ovis.py#L96
        return ImageSize(width=width * hs * 9, height=height * hs * 9)

--- a/vllm/model_executor/models/phi3_small.py
+++ b/vllm/model_executor/models/phi3_small.py
@@ -145,7 +145,7 @@ class Phi3SmallSelfAttention(nn.Module):
        self.num_q_per_kv = self.num_heads // self.num_key_value_heads
        if self.tp_size > 1:
            assert self.num_key_value_heads % self.tp_size == 0
-        self.num_kv_heads_per_partion = max(
+        self.num_kv_heads_per_partition = max(
            1, self.num_key_value_heads // self.tp_size)
        self.num_heads_per_partition = self.num_heads // self.tp_size
@@ -212,7 +212,7 @@ class Phi3SmallSelfAttention(nn.Module):
            bs_params = {
                'max_seqlen': self.max_position_embeddings,
                'num_heads': self.num_heads_per_partition,
-                "num_kv_heads": self.num_kv_heads_per_partion,
+                "num_kv_heads": self.num_kv_heads_per_partition,
                "block_size": self.sparse_block_size,
                "local_blocks": self.local_blocks,
                "vert_stride": self.vert_stride,
@@ -222,7 +222,7 @@ class Phi3SmallSelfAttention(nn.Module):
        self.attn = Attention(self.num_heads_per_partition,
                              self.head_dim,
                              self.scale,
-                              num_kv_heads=self.num_kv_heads_per_partion,
+                              num_kv_heads=self.num_kv_heads_per_partition,
                              cache_config=cache_config,
                              quant_config=quant_config,
                              blocksparse_params=bs_params,
@@ -243,8 +243,8 @@ class Phi3SmallSelfAttention(nn.Module):
        # NOTE: this is required by RotaryEmbed, which indeed does not have to
        # TODO: allow 3D QK for rotary forward
        q = q.reshape(-1, self.head_dim * self.num_heads_per_partition)
-        k = k.reshape(-1, self.head_dim * self.num_kv_heads_per_partion)
+        k = k.reshape(-1, self.head_dim * self.num_kv_heads_per_partition)
-        v = v.reshape(-1, self.head_dim * self.num_kv_heads_per_partion)
+        v = v.reshape(-1, self.head_dim * self.num_kv_heads_per_partition)
        q, k = self.rotary_emb(positions, q, k)
        attn_output = self.attn(q, k, v)

--- a/vllm/model_executor/models/phi4mm_audio.py
+++ b/vllm/model_executor/models/phi4mm_audio.py
@@ -126,7 +126,7 @@ class ConformerEncoderLayer(nn.Module):
            (Multi-Head Attention),
            1 = typical Multi-Head Attention,
            1 < attn_group_sizes < attention_heads = Grouped-Query Attention
-            attn_group_sizes = attenion_heads = Multi-Query Attention
+            attn_group_sizes = attention_heads = Multi-Query Attention
    """
    def __init__(
@@ -318,7 +318,7 @@ class TransformerEncoderBase(abc.ABC, nn.Module):
            1 = typical Multi-Head Attention,
            1 < attention_group_size < attention_heads = Grouped-Query 
            Attention
-            attention_group_size = attenion_heads = Multi-Query Attention
+            attention_group_size = attention_heads = Multi-Query Attention
    """
    def __init__(
@@ -744,7 +744,7 @@ class ConformerEncoder(TransformerEncoderBase):
            1 = typical Multi-Head Attention,
            1 < attention_group_size < attention_heads = Grouped-Query
            Attention
-            attention_group_size = attenion_heads = Multi-Query Attention
+            attention_group_size = attention_heads = Multi-Query Attention
    """
    extra_multi_layer_output_idxs: list[int]

--- a/vllm/model_executor/models/phimoe.py
+++ b/vllm/model_executor/models/phimoe.py
@@ -147,15 +147,15 @@ class mp(torch.autograd.Function):
        grad_at_output = grad_at_output * multiplier
-        grad_at_scores_expaned = masked_gates * grad_at_output.mul(-1)
+        grad_at_scores_expanded = masked_gates * grad_at_output.mul(-1)
-        grad_at_scores_expaned.scatter_add_(
+        grad_at_scores_expanded.scatter_add_(
            dim=-1,
            index=selected_experts,
            src=grad_at_output,
        )
        return (
-            grad_at_scores_expaned,
+            grad_at_scores_expanded,
            None,
            None,
            None,

--- a/vllm/multimodal/utils.py
+++ b/vllm/multimodal/utils.py
@@ -324,7 +324,7 @@ def merge_and_sort_multimodal_metadata(
    Returns:
        list[str]: List of item modalities in order of their positions in the
        input sequence.
-        list[PlaceholderRange]: Sorted list of all PlaceholdeRanges from
+        list[PlaceholderRange]: Sorted list of all PlaceholderRanges from
        mm_positions.
        Optional[list[str]]: Sorted list of all hashes from mm_hashes if given,
        None otherwise.

--- a/vllm/transformers_utils/processors/ovis.py
+++ b/vllm/transformers_utils/processors/ovis.py
@@ -68,7 +68,7 @@ class OvisProcessor(ProcessorMixin):
    """
    attributes = ["image_processor", "tokenizer"]
-    valid_kwargs = ["chat_template", "image_pad_token", "image_segement_len"]
+    valid_kwargs = ["chat_template", "image_pad_token", "image_segment_len"]
    image_processor_class = "AutoImageProcessor"
    tokenizer_class = "AutoTokenizer"

--- a/vllm/worker/hpu_model_runner.py
+++ b/vllm/worker/hpu_model_runner.py
@@ -886,7 +886,7 @@ class HPUModelRunnerBase(ModelRunnerBase[TModelInputForHPU]):
            num_decode_tokens=0,
            slot_mapping=slot_mapping,
            multi_modal_placeholder_index_maps=
-            None,  # FIXME(kzawora): mutli-modality will not work here
+            None,  # FIXME(kzawora): multi-modality will not work here
            enable_kv_scales_calculation=False,
        )
        multi_modal_kwargs = MultiModalKwargs.batch(multi_modal_kwargs_list)

--- a/vllm/worker/multi_step_model_runner.py
+++ b/vllm/worker/multi_step_model_runner.py
@@ -277,7 +277,7 @@ class StatefulModelInput(BroadcastableModelInput):
        assert fmi.input_tokens.shape[0] >= self.num_seqs
        fmi_new_input_tokens: torch.Tensor = fmi.input_tokens[:self.num_seqs]
-        # Update frozen_model_input::input_positons.
+        # Update frozen_model_input::input_positions.
        assert fmi.input_positions is not None
        assert fmi.input_positions.shape[0] >= self.num_seqs
        fmi_new_input_positions: torch.Tensor = fmi.input_positions[:self.

--- a/vllm/worker/tpu_model_runner.py
+++ b/vllm/worker/tpu_model_runner.py
@@ -798,9 +798,9 @@ class ModelWrapper(nn.Module):
        """
        batch_size, seq_len = token_ids.shape
        # Calculate the positions to sample from.
-        start_indicies = torch.arange(
+        start_indices = torch.arange(
            batch_size, dtype=torch.int32, device=input_lens.device) * seq_len
-        logits_indices = start_indicies + input_lens - 1
+        logits_indices = start_indices + input_lens - 1
        attn_metadata = get_forward_context().attn_metadata
        # FIXME(woosuk): This is a temporary hack to avoid using the existing
@@ -822,14 +822,14 @@ class ModelWrapper(nn.Module):
            num_kv_heads, num_blocks, block_size, _ = kv_caches[0][0].shape
            slot_mapping = attn_metadata.slot_mapping
            slot_mapping = slot_mapping.flatten()
-            head_indicies = torch.arange(0,
+            head_indices = torch.arange(0,
                                        num_kv_heads,
                                        device=slot_mapping.device,
                                        dtype=slot_mapping.dtype)
-            head_indicies *= block_size * num_blocks
+            head_indices *= block_size * num_blocks
            slot_mapping = slot_mapping.repeat_interleave(num_kv_heads).view(
                -1, num_kv_heads)
-            slot_mapping = slot_mapping + head_indicies.view(1, -1)
+            slot_mapping = slot_mapping + head_indices.view(1, -1)
            slot_mapping = slot_mapping.flatten()
            attn_metadata.slot_mapping = slot_mapping