[Doc]: fix typos in various files (#24798)

Signed-off-by: Didier Durand <durand.didier@gmail.com>

[Doc]: fix typos in various files (#24798)
Signed-off-by: Didier Durand <durand.didier@gmail.com>
41ae4a1e · Didier Durand · GitHub · 4dad72f0 · 41ae4a1e · 41ae4a1e
Unverified Commit 41ae4a1e authored Sep 13, 2025 by Didier Durand Committed by GitHub Sep 13, 2025
10 changed files
--- a/examples/offline_inference/tpu.py
+++ b/examples/offline_inference/tpu.py
@@ -42,7 +42,7 @@ def main():
        llm_args["model"] = "meta-llama/Llama-3.1-8B-Instruct"

    # Set `enforce_eager=True` to avoid ahead-of-time compilation.
-    # In real workloads, `enforace_eager` should be `False`.
+    # In real workloads, `enforce_eager` should be `False`.
    llm = LLM(**llm_args)
    outputs = llm.generate(prompts, sampling_params)
    print("-" * 50)

--- a/vllm/core/block/naive_block.py
+++ b/vllm/core/block/naive_block.py
@@ -182,7 +182,7 @@ class NaiveBlockAllocator(BlockAllocator):
            # Increment refcount for each block.
            assert block.block_id is not None
            refcount = self._refcounter.incr(block.block_id)
-            assert refcount != 1, "can't fork free'd block"
+            assert refcount != 1, "can't fork freed block"

            forked_block = self._block_pool.init_block(
                prev_block=prev_block,

--- a/vllm/core/evictor.py
+++ b/vllm/core/evictor.py
@@ -58,7 +58,7 @@ class Evictor(ABC):

 class BlockMetaData:
    """Data structure for storing key data describe cached block, so that
-    evitor could use to make its decision which one to choose for eviction
+    evictor could use to make its decision which one to choose for eviction

    Here we use physical block id as the dict key, as there maybe several
    blocks with the same content hash, but their physical id is unique.

--- a/vllm/engine/metrics.py
+++ b/vllm/engine/metrics.py
@@ -379,7 +379,7 @@ class LoggingStatLogger(StatLoggerBase):
        if local_interval_elapsed(stats.now, self.last_local_log,
                                  self.local_interval):
            # Compute summary metrics for tracked stats (and log them
-            # to promethus if applicable).
+            # to prometheus if applicable).
            prompt_throughput = get_throughput(self.num_prompt_tokens,
                                               now=stats.now,
                                               last_log=self.last_local_log)
@@ -432,7 +432,7 @@ class LoggingStatLogger(StatLoggerBase):


 class PrometheusStatLogger(StatLoggerBase):
-    """PrometheusStatLogger is used LLMEngine to log to Promethus."""
+    """PrometheusStatLogger is used LLMEngine to log to Prometheus."""
    _metrics_cls = Metrics
    _gauge_cls = prometheus_client.Gauge


--- a/vllm/model_executor/layers/linear.py
+++ b/vllm/model_executor/layers/linear.py
@@ -740,7 +740,7 @@ class MergedColumnParallelLinear(ColumnParallelLinear):
        """
        Handle special case for models where MLP layers are already
        fused on disk. In this case, we have no shard id. This function
-        determmines the shard id by splitting these layers and then calls
+        determines the shard id by splitting these layers and then calls
        the weight loader using the shard id.

        An example of a model with these fused layers:
@@ -914,7 +914,7 @@ class QKVParallelLinear(ColumnParallelLinear):
        """
        Handle special case for models where QKV layers are already 
        fused on disk. In this case, we have no shard id. This function
-        determmines the shard id by splitting these layers and then calls
+        determines the shard id by splitting these layers and then calls
        the weight loader using the shard id.

        An example of a model with these fused layers:

--- a/vllm/model_executor/layers/vocab_parallel_embedding.py
+++ b/vllm/model_executor/layers/vocab_parallel_embedding.py
@@ -258,7 +258,7 @@ class VocabParallelEmbedding(CustomOp):

        if params_dtype is None:
            params_dtype = torch.get_default_dtype()
-        # Divide the weight matrix along the vocaburaly dimension.
+        # Divide the weight matrix along the vocabulary dimension.
        self.num_added_embeddings = self.num_embeddings - self.org_vocab_size
        self.num_embeddings_per_partition = divide(self.num_embeddings_padded,
                                                   self.tp_size)

--- a/vllm/model_executor/models/ernie45_vl.py
+++ b/vllm/model_executor/models/ernie45_vl.py
@@ -1446,7 +1446,7 @@ class Ernie4_5_VLMoeForConditionalGeneration(nn.Module, SupportsMultiModal,
            return None

        # The result multimodal_embeddings is tuple of tensors, with each
-        # tensor correspoending to a multimodal data item (image or video).
+        # tensor corresponding to a multimodal data item (image or video).
        multimodal_embeddings: tuple[torch.Tensor, ...] = ()

        # NOTE: It is important to iterate over the keys in this dictionary

--- a/vllm/model_executor/models/gemma3n_mm.py
+++ b/vllm/model_executor/models/gemma3n_mm.py
@@ -586,10 +586,10 @@ class Gemma3nForConditionalGeneration(nn.Module, SupportsMultiModal,

        # ruff: noqa
        # The Gemma3nProcessor expects all audio will be 30s in length and inserts 188 audio soft tokens into the
-        # text to account for this. However, the audio preprocessing and encoder do not gurarantee they will
+        # text to account for this. However, the audio preprocessing and encoder do not guarantee they will
        # produce 188 soft tokens; they will produce at most that many tokens, but they may produce fewer tokens
        # depending on the length of the longest audio input in the batch. When we encounter this situation, we pad
-        # the audio feature out to 188 soft tokens with the emebedding of the last token in the embed_audio vocab.
+        # the audio feature out to 188 soft tokens with the embedding of the last token in the embed_audio vocab.
        # TODO precompute and cache padding
        audio_padding_toks = torch.tensor([[self.vocab_size - 1]],
                                          dtype=torch.long,

--- a/vllm/model_executor/models/nemotron_vl.py
+++ b/vllm/model_executor/models/nemotron_vl.py
@@ -560,7 +560,7 @@ class LlamaNemotronVLChatModel(nn.Module, SupportsMultiModal, SupportsPP,
            return []

        # The result multimodal_embeddings is tuple of tensors, with each
-        # tensor correspoending to a multimodal data item (image).
+        # tensor corresponding to a multimodal data item (image).
        multimodal_embeddings: tuple[torch.Tensor, ...] = ()

        # NOTE: It is important to iterate over the keys in this dictionary

--- a/vllm/model_executor/models/phi4mm.py
+++ b/vllm/model_executor/models/phi4mm.py
@@ -1154,7 +1154,7 @@ class Phi4MMForCausalLM(nn.Module, SupportsLoRA, SupportsMultiModal):
            return None

        # The result multimodal_embeddings is tuple of tensors, with each
-        # tensor correspoending to a multimodal data item (image or video).
+        # tensor corresponding to a multimodal data item (image or video).
        multimodal_embeddings: tuple[torch.Tensor, ...] = ()

        # NOTE: It is important to iterate over the keys in this dictionary