[Doc]: fix typos in Python comments (#24026)

Signed-off-by: Didier Durand <durand.didier@gmail.com>

[Doc]: fix typos in Python comments (#24026)
Signed-off-by: Didier Durand <durand.didier@gmail.com>
10728495 · Didier Durand · GitHub · dc1a5318 · 10728495 · 10728495
Unverified Commit 10728495 authored Sep 01, 2025 by Didier Durand Committed by GitHub Sep 01, 2025
14 changed files
--- a/examples/offline_inference/multilora_inference.py
+++ b/examples/offline_inference/multilora_inference.py
@@ -23,7 +23,7 @@ def create_test_prompts(
    2 requests for base model, 4 requests for the LoRA. We define 2
    different LoRA adapters (using the same model for demo purposes).
    Since we also set `max_loras=1`, the expectation is that the requests
-    with the second LoRA adapter will be ran after all requests with the
+    with the second LoRA adapter will be run after all requests with the
    first adapter have finished.
    """
    return [

--- a/vllm/distributed/device_communicators/pynccl.py
+++ b/vllm/distributed/device_communicators/pynccl.py
@@ -31,7 +31,7 @@ class PyNcclCommunicator:
            group: the process group to work on. If None, it will use the
                default process group.
            device: the device to bind the PyNcclCommunicator to. If None,
-                it will be bind to f"cuda:{local_rank}".
+                it will be bound to f"cuda:{local_rank}".
            library_path: the path to the NCCL library. If None, it will
                use the default library path.
        It is the caller's responsibility to make sure each communicator

--- a/vllm/distributed/parallel_state.py
+++ b/vllm/distributed/parallel_state.py
@@ -939,8 +939,8 @@ def get_pipeline_model_parallel_group():
 def graph_capture(device: torch.device):
    """
    `graph_capture` is a context manager which should surround the code that
-    is capturing the CUDA graph. Its main purpose is to ensure that the
+    is capturing the CUDA graph. Its main purpose is to ensure that some
-    some operations will be run after the graph is captured, before the graph
+    operations will be run after the graph is captured, before the graph
    is replayed. It returns a `GraphCaptureContext` object which contains the
    necessary data for the graph capture. Currently, it only contains the
    stream that the graph capture is running on. This stream is set to the

--- a/vllm/entrypoints/openai/tool_parsers/pythonic_tool_parser.py
+++ b/vllm/entrypoints/openai/tool_parsers/pythonic_tool_parser.py
@@ -165,7 +165,7 @@ class PythonicToolParser(ToolParser):
                            index] += delta.function.arguments
            # HACK: serving_chat.py inspects the internal state of tool parsers
-            # when determining it's final streaming delta, automatically
+            # when determining its final streaming delta, automatically
            # adding autocompleted JSON.
            # These two lines avoid that nonsense while ensuring finish_reason
            # is set to tool_calls when at least one tool is called.

--- a/vllm/model_executor/layers/fused_moe/moe_pallas.py
+++ b/vllm/model_executor/layers/fused_moe/moe_pallas.py
@@ -7,7 +7,7 @@ import torch.nn.functional as F
 def _histogram(input: torch.Tensor, min: int, max: int) -> torch.Tensor:
    """
-  Compute the histogram of a int32 tensor. The bin edges are defined by the
+  Compute the histogram of an int32 tensor. The bin edges are defined by the
  min and max values, with step = 1.
  """
    assert input.dtype == torch.int32, "input must be of torch.int32 dtype."

--- a/vllm/model_executor/models/ovis.py
+++ b/vllm/model_executor/models/ovis.py
@@ -544,7 +544,7 @@ class Ovis(nn.Module, SupportsMultiModal, SupportsPP):
                                                      vision_embeddings)
            input_ids = None
-        # up until here we have a inputs_embeds 100% numerical identity
+        # up until here we have an inputs_embeds 100% numerical identity
        # between the OG HF Transformers implementation and ours
        hidden_states = self.llm(
            input_ids=input_ids,

--- a/vllm/model_executor/models/phi4mm_audio.py
+++ b/vllm/model_executor/models/phi4mm_audio.py
@@ -43,7 +43,7 @@ class ConformerEncoderLayer(nn.Module):
            if set different to 0, the number of 
             depthwise_seperable_out_channel will be used as a
             channel_out of the second conv1d layer. 
-             otherwise, it equal to 0, the second conv1d layer is skipped.
+             otherwise, it equals to 0, the second conv1d layer is skipped.
        depthwise_multiplier: int
            number of input_dim channels duplication. this value
             will be used to compute the hidden channels of the Conv1D.
@@ -115,7 +115,7 @@ class ConformerEncoderLayer(nn.Module):
                    we recalculate activation in backward.
            default "".
        export: bool, optional
-            if set to True, it remove the padding from convolutional layers
+            if set to True, it removes the padding from convolutional layers
             and allow the onnx conversion for inference.
              default False.
        use_pt_scaled_dot_product_attention: bool, optional
@@ -686,7 +686,7 @@ class ConformerEncoder(TransformerEncoderBase):
            only work for glu_in_attention !=0
            default "swish".
        export: bool, optional
-            if set to True, it remove the padding from convolutional layers
+            if set to True, it removes the padding from convolutional layers
             and allow the onnx conversion for inference.
              default False.
        activation_checkpointing: str, optional

--- a/vllm/model_executor/models/phi4mm_utils.py
+++ b/vllm/model_executor/models/phi4mm_utils.py
@@ -258,7 +258,7 @@ class DepthWiseSeperableConv1d(nn.Module):
            if set different to 0, the number of 
             depthwise_seperable_out_channel will be used as a channel_out
             of the second conv1d layer.
-             otherwise, it equal to 0, the second conv1d layer is skipped.
+             otherwise, it equals to 0, the second conv1d layer is skipped.
        kernel_size: int
            kernel_size
        depthwise_multiplier: int

--- a/vllm/third_party/pynvml.py
+++ b/vllm/third_party/pynvml.py
@@ -1022,7 +1022,7 @@ def _extractNVMLErrorsAsClasses():
    Each NVML Error gets a new NVMLError subclass. This way try,except blocks can filter appropriate
    exceptions more easily.
-    NVMLError is a parent class. Each NVML_ERROR_* gets it's own subclass.
+    NVMLError is a parent class. Each NVML_ERROR_* gets its own subclass.
    e.g. NVML_ERROR_ALREADY_INITIALIZED will be turned into NVMLError_AlreadyInitialized
    '''
    this_module = sys.modules[__name__]

--- a/vllm/transformers_utils/configs/nemotron.py
+++ b/vllm/transformers_utils/configs/nemotron.py
@@ -26,7 +26,7 @@ logger = logging.get_logger(__name__)
 class NemotronConfig(PretrainedConfig):
    r"""
    This is the configuration class to store the configuration of a
-    [`NemotronModel`]. It is used to instantiate an Nemotron model
+    [`NemotronModel`]. It is used to instantiate a Nemotron model
    according to the specified arguments, defining the model architecture.
    Instantiating a configuration with the defaults will yield a similar
    configuration to that of the Nemotron-8B.

--- a/vllm/transformers_utils/configs/nemotron_h.py
+++ b/vllm/transformers_utils/configs/nemotron_h.py
@@ -38,7 +38,7 @@ class NemotronHConfig(PretrainedConfig):
            passed when calling [`NemotronHModel`]
        tie_word_embeddings (`bool`, *optional*, defaults to `False`):
            Whether the model's input and output word embeddings should be
-            tied. Note that this is only relevant if the model has a output
+            tied. Note that this is only relevant if the model has an output
            word embedding layer.
        hidden_size (`int`, *optional*, defaults to 4096):
            Dimension of the hidden representations.

--- a/vllm/transformers_utils/processors/ovis.py
+++ b/vllm/transformers_utils/processors/ovis.py
@@ -55,7 +55,7 @@ class OvisProcessorKwargs(ProcessingKwargs, total=False):   # type: ignore[call-
 class OvisProcessor(ProcessorMixin):
    r"""
-    Constructs a Ovis processor which wraps a Ovis image processor and a Qwen2 tokenizer into a single processor.
+    Constructs an Ovis processor which wraps an Ovis image processor and a Qwen2 tokenizer into a single processor.
    [`OvisProcessor`] offers all the functionalities of [`Qwen2VLImageProcessor`] and [`Qwen2TokenizerFast`]. See the
    [`~OvisProcessor.__call__`] and [`~OvisProcessor.decode`] for more information.
    Args:

--- a/vllm/transformers_utils/processors/ovis2_5.py
+++ b/vllm/transformers_utils/processors/ovis2_5.py
@@ -41,7 +41,7 @@ class Ovis2_5ProcessorKwargs(ProcessingKwargs,
 class Ovis2_5Processor(ProcessorMixin):
    r"""
-    Constructs a Ovis processor which wraps a Ovis image processor
+    Constructs an Ovis processor which wraps an Ovis image processor
    and a Qwen2 tokenizer into a single processor.
    [`OvisProcessor`] offers all the functionalities of 
    [`Qwen2VLImageProcessor`] and [`Qwen2TokenizerFast`]. 

--- a/vllm/v1/spec_decode/ngram_proposer.py
+++ b/vllm/v1/spec_decode/ngram_proposer.py
@@ -107,7 +107,7 @@ def _find_longest_matched_ngram_and_propose_tokens(
    longest_ngram = 0
    position = 0
-    # lps[0] always equal to 0, we starts with index 1
+    # lps[0] always equal to 0, we start with index 1
    prev_lps = 0
    i = 1
    while i < total_token: