[Bugfix][CI] fix typos (#34934)

Signed-off-by: 1195343015 <1195343015@qq.com> Signed-off-by: Jiayi Yan <66017932+1195343015@users.noreply.github.com> Co-authored-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>

[Bugfix][CI] fix typos (#34934)
Signed-off-by: 1195343015 <1195343015@qq.com> Signed-off-by: Jiayi Yan <66017932+1195343015@users.noreply.github.com> Co-authored-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
6a895197 · Jiayi Yan · GitHub · 8c760b6a · 6a895197 · 6a895197
Unverified Commit 6a895197 authored Mar 06, 2026 by Jiayi Yan Committed by GitHub Mar 05, 2026
20 changed files
--- a/vllm/model_executor/layers/linear.py
+++ b/vllm/model_executor/layers/linear.py
@@ -1502,10 +1502,10 @@ class RowParallelLinear(LinearBase):
        if self.input_is_parallel:
            input_parallel = input_
        else:
-            splitted_input = split_tensor_along_last_dim(
+            split_input = split_tensor_along_last_dim(
                input_, num_partitions=self.tp_size
            )
-            input_parallel = splitted_input[self.tp_rank].contiguous()
+            input_parallel = split_input[self.tp_rank].contiguous()

        # Matrix multiply.
        assert self.quant_method is not None

--- a/vllm/model_executor/layers/mla.py
+++ b/vllm/model_executor/layers/mla.py
@@ -35,7 +35,7 @@ class MultiHeadLatentAttentionWrapper(PluggableLayer):
    """Pluggable MLA layer which allows OOT backends to add
    custom implementations of the outer MLA layer (including rope & o_proj).
    Note that currently oot platforms can still use CustomOp.register_oot to
-    replace MLA layer entirly, although we use PluggableLayer to register
+    replace MLA layer entirely, although we use PluggableLayer to register
    this layer now.

    This class takes positions and hidden_states as input.

--- a/vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors.py
+++ b/vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors.py
@@ -191,7 +191,7 @@ class CompressedTensorsConfig(QuantizationConfig):
        """
        Helper function to update target_scheme_map
        since linear layers get fused into FusedMoE
-        targetting 'Linear' needs to also match
+        targeting 'Linear' needs to also match
        FusedMoE modules.
        """
        if (

--- a/vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors_moe.py
+++ b/vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors_moe.py
@@ -2445,7 +2445,7 @@ class CompressedTensorsW4A8Fp8MoEMethod(CompressedTensorsMoEMethod):
            w2_scale=layer.w2_weight_scale,  # group scale
            g1_alphas=layer.w13_weight_chan_scale,
            g2_alphas=layer.w2_weight_chan_scale,
-            per_act_token_quant=True,  # always use dynamc per-token
+            per_act_token_quant=True,  # always use dynamic per-token
            per_out_ch_quant=True,  # always use per-channel
        )


--- a/vllm/model_executor/layers/quantization/cpu_wna16.py
+++ b/vllm/model_executor/layers/quantization/cpu_wna16.py
@@ -261,7 +261,7 @@ class CPUAWQLinearMethod(LinearMethodBase):

        zeros = pack_cols(zeros, bits, group_num, output_size).contiguous()
        # make 16 output channel as a block and transpose to
-        # the make the block contigous
+        # the make the block contiguous
        weight = pack_cols(weight, bits, input_size, output_size)
        weight = (
            weight.view(input_size, -1, 16 // pack_factor)

--- a/vllm/model_executor/layers/quantization/torchao.py
+++ b/vllm/model_executor/layers/quantization/torchao.py
@@ -199,7 +199,7 @@ class TorchAOConfig(QuantizationConfig):

    @classmethod
    def from_config_dict_json(cls, config_dict_json: str) -> "TorchAOConfig":
-        """Iniitalize class from a config_dict json string, got from
+        """Initialize class from a config_dict json string, got from
        torchao_config_object = some AOBaseConfig object
        json.dumps(config_to_dict(torchao_config_object))
        """

--- a/vllm/model_executor/layers/quantization/utils/fp8_utils.py
+++ b/vllm/model_executor/layers/quantization/utils/fp8_utils.py
@@ -255,7 +255,7 @@ def _flashinfer_fp8_blockscale_gemm_impl(

    This batch-size-dependent selection is essential for maintaining model accuracy.
    Benchmarks on GSM8K show a significant accuracy gap (88% vs 95%) for DeepSeek-V3.1
-    when using FlashInfer's DeepGEMM on M>=32. The M < 32 strategy fixes the accurracy
+    when using FlashInfer's DeepGEMM on M>=32. The M < 32 strategy fixes the accuracy
    drop.

    Args:

--- a/vllm/model_executor/layers/quantization/utils/machete_utils.py
+++ b/vllm/model_executor/layers/quantization/utils/machete_utils.py
@@ -39,7 +39,7 @@ def query_machete_supported_group_sizes(act_type: torch.dtype) -> list[int]:


 def check_machete_supports_shape(
-    in_features: int, out_featrues: int
+    in_features: int, out_features: int
 ) -> tuple[bool, str | None]:
    if in_features % MACHETE_PREPACKED_BLOCK_SHAPE[0] != 0:
        return (
@@ -47,7 +47,7 @@ def check_machete_supports_shape(
            "Input features size must be divisible by "
            f"{MACHETE_PREPACKED_BLOCK_SHAPE[0]}",
        )
-    if out_featrues % MACHETE_PREPACKED_BLOCK_SHAPE[1] != 0:
+    if out_features % MACHETE_PREPACKED_BLOCK_SHAPE[1] != 0:
        return (
            False,
            "Output features size must be divisible by "

--- a/vllm/model_executor/layers/rotary_embedding/common.py
+++ b/vllm/model_executor/layers/rotary_embedding/common.py
@@ -237,7 +237,7 @@ class ApplyRotaryEmb(CustomOp):
        Arguments of apply_rotary_emb() in vllm_flash_attn:
            x: [batch_size, seq_len, nheads, headdim]
            cos, sin: [seqlen_rotary, rotary_dim / 2]
-            interleaved: defalut as False (Neox-style).
+            interleaved: default as False (Neox-style).
            ...
        """
        interleaved = not self.is_neox_style
@@ -259,7 +259,7 @@ class ApplyRotaryEmb(CustomOp):
            Arguments of apply_rotary() in flash_attn:
                x: [batch_size, seq_len, nheads, headdim]
                cos, sin: [seqlen_rotary, rotary_dim / 2]
-                interleaved: defalut as False (Neox-style).
+                interleaved: default as False (Neox-style).
                ...
            """
            interleaved = not self.is_neox_style

--- a/vllm/model_executor/models/ernie45_vl_moe.py
+++ b/vllm/model_executor/models/ernie45_vl_moe.py
@@ -342,7 +342,7 @@ class Ernie4_5_VLMoeMoE(nn.Module):
            visual_token_mask = visual_token_mask.repeat(1, self.hidden_size).bool()
            text_token_mask = ~visual_token_mask
            final_experts_hidden_states = torch.zeros_like(hidden_states)
-            final_shared_ouput = (
+            final_shared_output = (
                torch.zeros_like(hidden_states) if self.has_shared_experts else None
            )

@@ -356,26 +356,26 @@ class Ernie4_5_VLMoeMoE(nn.Module):
            text_router_logits, _ = self.text_experts_gate(
                text_hidden_states.to(dtype=torch.float32)
            )
-            text_shared_ouput, text_experts_output = self.text_experts(
+            text_shared_output, text_experts_output = self.text_experts(
                hidden_states=text_hidden_states, router_logits=text_router_logits
            )
            final_experts_hidden_states[text_token_mask] = text_experts_output.flatten()
            if self.has_shared_experts:
-                final_shared_ouput[text_token_mask] = text_shared_ouput.flatten()
+                final_shared_output[text_token_mask] = text_shared_output.flatten()

            vision_router_logits, _ = self.vision_experts_gate(
                vision_hidden_states.to(dtype=torch.float32)
            )
-            vision_shared_ouput, vision_experts_output = self.vision_experts(
+            vision_shared_output, vision_experts_output = self.vision_experts(
                hidden_states=vision_hidden_states, router_logits=vision_router_logits
            )
            final_experts_hidden_states[visual_token_mask] = (
                vision_experts_output.flatten()
            )
            if self.has_shared_experts:
-                final_shared_ouput[visual_token_mask] = vision_shared_ouput.flatten()
+                final_shared_output[visual_token_mask] = vision_shared_output.flatten()

-            final_hidden_states = (final_shared_ouput, final_experts_hidden_states)
+            final_hidden_states = (final_shared_output, final_experts_hidden_states)
        else:
            # only text modal input
            text_router_logits, _ = self.text_experts_gate(

--- a/vllm/model_executor/models/fireredasr2.py
+++ b/vllm/model_executor/models/fireredasr2.py
@@ -107,7 +107,7 @@ class Conv2dSubsampling(nn.Module):
        )

        self.subsampling = 4
-        left_context = right_context = 3  # both exclude currect frame
+        left_context = right_context = 3  # both exclude current frame
        self.context = left_context + 1 + right_context  # 7

    def forward(

--- a/vllm/model_executor/models/funasr.py
+++ b/vllm/model_executor/models/funasr.py
@@ -115,7 +115,7 @@ class EncoderLayerSANM(nn.Module):
        hidden_states: torch.Tensor,
        mask: torch.Tensor | None = None,
        cache=None,
-        mask_shfit_chunk=None,
+        mask_shift_chunk=None,
        mask_att_chunk_encoder=None,
    ):
        residual = hidden_states
@@ -125,14 +125,14 @@ class EncoderLayerSANM(nn.Module):
            hidden_states = residual + self.self_attn(
                hidden_states,
                mask,
-                mask_shfit_chunk=mask_shfit_chunk,
+                mask_shift_chunk=mask_shift_chunk,
                mask_att_chunk_encoder=mask_att_chunk_encoder,
            )
        else:
            hidden_states = self.self_attn(
                hidden_states,
                mask,
-                mask_shfit_chunk=mask_shfit_chunk,
+                mask_shift_chunk=mask_shift_chunk,
                mask_att_chunk_encoder=mask_att_chunk_encoder,
            )

@@ -140,7 +140,7 @@ class EncoderLayerSANM(nn.Module):
        hidden_states = self.norm2(hidden_states)
        hidden_states = residual + self.feed_forward(hidden_states)

-        return hidden_states, mask, cache, mask_shfit_chunk, mask_att_chunk_encoder
+        return hidden_states, mask, cache, mask_shift_chunk, mask_att_chunk_encoder


 class MultiHeadedAttentionSANM(nn.Module):
@@ -183,13 +183,13 @@ class MultiHeadedAttentionSANM(nn.Module):
        self,
        inputs: torch.Tensor,
        mask: torch.Tensor,
-        mask_shfit_chunk: torch.Tensor = None,
+        mask_shift_chunk: torch.Tensor = None,
    ):
        b, t, d = inputs.size()
        if mask is not None:
            mask = torch.reshape(mask, (b, -1, 1))
-            if mask_shfit_chunk is not None:
-                mask = mask * mask_shfit_chunk
+            if mask_shift_chunk is not None:
+                mask = mask * mask_shift_chunk
            inputs = inputs * mask

        x = inputs.transpose(1, 2)
@@ -243,11 +243,11 @@ class MultiHeadedAttentionSANM(nn.Module):
        self,
        hidden_states: torch.Tensor,
        mask: torch.Tensor,
-        mask_shfit_chunk: torch.Tensor = None,
+        mask_shift_chunk: torch.Tensor = None,
        mask_att_chunk_encoder: torch.Tensor = None,
    ):
        q_h, k_h, v_h, v = self.forward_qkv(hidden_states)
-        fsmn_memory = self.forward_fsmn(v, mask, mask_shfit_chunk)
+        fsmn_memory = self.forward_fsmn(v, mask, mask_shift_chunk)
        q_h = q_h * self.d_k ** (-0.5)
        scores = torch.matmul(q_h, k_h.transpose(-2, -1))
        att_outs = self.forward_attention(v_h, scores, mask, mask_att_chunk_encoder)

--- a/vllm/model_executor/models/isaac.py
+++ b/vllm/model_executor/models/isaac.py
@@ -646,7 +646,7 @@ class IsaacImageProcessor:
        return_tensors: str | TensorType | None,
        **kwargs: Unpack[IsaacImageProcessorKwargs],
    ) -> BatchFeature:
-        """Preprocess images into format compatibile with vLLM input processing."""
+        """Preprocess images into format compatible with vLLM input processing."""

        all_pixel_values: list[torch.Tensor] = []
        all_image_grids: list[torch.Tensor] = []

--- a/vllm/model_executor/models/keye.py
+++ b/vllm/model_executor/models/keye.py
@@ -299,7 +299,7 @@ class KeyeVisionEmbeddings(nn.Module):
                )
            (
                batch_size,
-                squence_len,
+                sequence_len,
                channel,
                height,
                width,

--- a/vllm/model_executor/models/longcat_flash.py
+++ b/vllm/model_executor/models/longcat_flash.py
@@ -238,7 +238,7 @@ class LongcatRouter(nn.Module):
        self,
        config: FlashConfig,
        zero_expert_num: int,
-        rounter_params_dtype: torch.dtype,
+        router_params_dtype: torch.dtype,
        prefix: str = "",
    ):
        super().__init__()
@@ -252,12 +252,12 @@ class LongcatRouter(nn.Module):
            config.hidden_size,
            self.n_routed_experts,
            bias=config.router_bias,
-            params_dtype=rounter_params_dtype,
+            params_dtype=router_params_dtype,
            quant_config=None,
            prefix=f"{prefix}.classifier",
        )
        self.e_score_correction_bias = nn.Parameter(
-            torch.zeros((self.n_routed_experts), dtype=rounter_params_dtype)
+            torch.zeros((self.n_routed_experts), dtype=router_params_dtype)
        )

    def forward(self, hidden_states):
@@ -281,14 +281,14 @@ class LongcatMoe(nn.Module):
        super().__init__()
        self.hidden_size = hidden_size
        # Gate always runs at half / full precision for now.
-        self.rounter_params_dtype = params_dtype
+        self.router_params_dtype = params_dtype
        if config.router_dtype == "float32":
-            self.rounter_params_dtype = torch.float32
+            self.router_params_dtype = torch.float32

        self.router = LongcatRouter(
            config=config,
            zero_expert_num=config.zero_expert_num,
-            rounter_params_dtype=self.rounter_params_dtype,
+            router_params_dtype=self.router_params_dtype,
            prefix=f"{prefix}.gate",
        )

@@ -309,7 +309,7 @@ class LongcatMoe(nn.Module):
            prefix=f"{prefix}.experts",
            enable_eplb=enable_eplb,
            routed_scaling_factor=config.routed_scaling_factor,
-            router_logits_dtype=self.rounter_params_dtype,
+            router_logits_dtype=self.router_params_dtype,
        )

    def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
@@ -329,7 +329,7 @@ class LongcatMoe(nn.Module):
            hidden_states_padded = hidden_states

        router_logits_full = self.router(
-            hidden_states_padded.to(self.rounter_params_dtype)
+            hidden_states_padded.to(self.router_params_dtype)
        )

        # ZeroExpertFusedMoE handles routing memoization and zero expert computation

--- a/vllm/model_executor/models/molmo2.py
+++ b/vllm/model_executor/models/molmo2.py
@@ -1321,14 +1321,14 @@ def get_image_size(image: ImageInput) -> ImageSize:
        raise ValueError(f"Unknown image type: {type(image)}")


-def exif_tranpose(
+def exif_transpose(
    images: ImageInput | None,
 ) -> ImageInput | None:
    if images is None:
        return None
    if images is not None and isinstance(images, (list, tuple)):
        images = [
-            exif_tranpose(img) if isinstance(img, Image) else img for img in images
+            exif_transpose(img) if isinstance(img, Image) else img for img in images
        ]
    elif images is not None and isinstance(images, Image):
        images = ImageOps.exif_transpose(images)
@@ -1667,7 +1667,7 @@ class Molmo2ProcessorWrapper:
        **kwargs: object,
    ) -> BatchFeature:
        inputs = [text]
-        images = exif_tranpose(images)
+        images = exif_transpose(images)
        if getattr(self.processor, "image_processor", None) is not None:
            inputs.append(images)
        if getattr(self.processor, "video_processor", None) is not None:
@@ -2352,7 +2352,7 @@ class Molmo2MultiModalProcessor(BaseMultiModalProcessor[Molmo2ProcessingInfo]):
        def get_image_replacement_molmo2(item_idx: int) -> list[int]:
            images = mm_items.get_items("image", ImageProcessorItems)
            image = images.get(item_idx)
-            image = exif_tranpose(image)
+            image = exif_transpose(image)

            resize_nrows, resize_cols = processor.get_base_grid_size(is_video=False)
            if use_single_crop_col_tokens is not None:

--- a/vllm/model_executor/models/nemotron_h.py
+++ b/vllm/model_executor/models/nemotron_h.py
@@ -349,7 +349,7 @@ class NemotronHMoEDecoderLayer(nn.Module):
        super().__init__()
        self.config = config

-        # Get per-layer config for heterogeneous models if exsist
+        # Get per-layer config for heterogeneous models if exists
        get_layer_config = getattr(config, "get_nemotron_h_config_for_layer", None)
        layer_config = get_layer_config(layer_idx) if get_layer_config else config

@@ -517,7 +517,7 @@ class NemotronHAttentionDecoderLayer(nn.Module):
    ) -> None:
        super().__init__()

-        # Get per-layer config for heterogeneous models if exsist
+        # Get per-layer config for heterogeneous models if exists
        get_layer_config = getattr(config, "get_nemotron_h_config_for_layer", None)
        layer_config = get_layer_config(layer_idx) if get_layer_config else config


--- a/vllm/model_executor/models/paddleocr_vl.py
+++ b/vllm/model_executor/models/paddleocr_vl.py
@@ -486,7 +486,7 @@ class SiglipVisionEmbeddings(nn.Module):
                )
            (
                batch_size,
-                squence_len,
+                sequence_len,
                channel,
                height,
                width,

--- a/vllm/model_executor/models/phi4mm_audio.py
+++ b/vllm/model_executor/models/phi4mm_audio.py
@@ -689,19 +689,19 @@ class ConformerEncoder(TransformerEncoderBase):
            default False.
        ext_pw_out_channel: int, optional
            the number of channel for CNN
-            before depthwise_seperable_CNN.
+            before depthwise_separable_CNN.
            If 0 then use linear. default 0.
        ext_pw_kernel_size: int, optional
-            kernel size of N before depthwise_seperable_CNN.
+            kernel size of N before depthwise_separable_CNN.
            only work for ext_pw_out_channel > 0.
            default 1
        depthwise_seperable_out_channel: int, optional
            the number of channel for
-            depthwise_seperable_CNN.
+            depthwise_separable_CNN.
            default 256.
        depthwise_multiplier: int, optional
            the number of multiplier for
-            depthwise_seperable_CNN.
+            depthwise_separable_CNN.
            default 1.
        chunk_se: int, optional
            0 for offline SE.
@@ -711,7 +711,7 @@ class ConformerEncoder(TransformerEncoderBase):
             by only the current chunk.
            default 0.
        kernel_size: int, optional
-            the number of kernels for depthwise_seperable_CNN.
+            the number of kernels for depthwise_separable_CNN.
            default 3.
        activation: str, optional
            FeedForward block activation.
@@ -721,7 +721,7 @@ class ConformerEncoder(TransformerEncoderBase):
            activation function used in ConvModule part
            of the conformer, default "relu".
        conv_glu_type: str, optional
-            activation used use glu in depthwise_seperable_CNN,
+            activation used use glu in depthwise_separable_CNN,
            default "sigmoid"
        bias_in_glu: bool, optional
            if set to True, use additive bias in the weight module

--- a/vllm/model_executor/models/phi4mm_utils.py
+++ b/vllm/model_executor/models/phi4mm_utils.py
@@ -217,8 +217,8 @@ class GLUPointWiseConv(nn.Module):
        return x


-class DepthWiseSeperableConv1d(nn.Module):
-    """DepthWiseSeperableConv1d module used in Convnet module
+class DepthWiseSeparableConv1d(nn.Module):
+    """DepthWiseSeparableConv1d module used in ConvNet module
    for the conformer, for more details see:
    https://arxiv.org/pdf/2005.08100v1.pdf

@@ -390,7 +390,7 @@ class ConvModule(nn.Module):
        else:
            padding = (kernel_size - 1) // 2

-        self.dw_sep_conv_1d = DepthWiseSeperableConv1d(
+        self.dw_sep_conv_1d = DepthWiseSeparableConv1d(
            input_dim,
            depthwise_seperable_out_channel,
            kernel_size,