[Doc]: fix typos in Python comments (#24115)

Signed-off-by: Didier Durand <durand.didier@gmail.com>

[Doc]: fix typos in Python comments (#24115)
Signed-off-by: Didier Durand <durand.didier@gmail.com>
02d411fd · Didier Durand · GitHub · d7e1e599 · 02d411fd · 02d411fd
Unverified Commit 02d411fd authored Sep 03, 2025 by Didier Durand Committed by GitHub Sep 02, 2025
11 changed files
--- a/.buildkite/nightly-benchmarks/scripts/compare-json-results.py
+++ b/.buildkite/nightly-benchmarks/scripts/compare-json-results.py
@@ -218,7 +218,7 @@ if __name__ == "__main__":
        "--xaxis",
        type=str,
        default="# of max concurrency.",
-        help="column name to use as X Axis in comparision graph",
+        help="column name to use as X Axis in comparison graph",
    )
    args = parser.parse_args()


--- a/benchmarks/benchmark_serving.py
+++ b/benchmarks/benchmark_serving.py
@@ -1104,7 +1104,7 @@ def create_argument_parser():
        "--percentile-metrics",
        type=str,
        default="ttft,tpot,itl",
-        help="Comma-separated list of selected metrics to report percentils. "
+        help="Comma-separated list of selected metrics to report percentiles. "
        "This argument specifies the metrics to report percentiles. "
        'Allowed metric names are "ttft", "tpot", "itl", "e2el". '
        'Default value is "ttft,tpot,itl".',

--- a/benchmarks/benchmark_serving_structured_output.py
+++ b/benchmarks/benchmark_serving_structured_output.py
@@ -998,7 +998,7 @@ def create_argument_parser():
        "--percentile-metrics",
        type=str,
        default="ttft,tpot,itl",
-        help="Comma-separated list of selected metrics to report percentils. "
+        help="Comma-separated list of selected metrics to report percentiles. "
        "This argument specifies the metrics to report percentiles. "
        'Allowed metric names are "ttft", "tpot", "itl", "e2el". '
        'Default value is "ttft,tpot,itl".',

--- a/benchmarks/benchmark_throughput.py
+++ b/benchmarks/benchmark_throughput.py
@@ -719,7 +719,7 @@ def create_argument_parser():
        "[length * (1 - range_ratio), length * (1 + range_ratio)].",
    )

-    # hf dtaset
+    # hf dataset
    parser.add_argument(
        "--hf-subset", type=str, default=None, help="Subset of the HF dataset."
    )

--- a/tools/profiler/visualize_layerwise_profile.py
+++ b/tools/profiler/visualize_layerwise_profile.py
@@ -119,7 +119,7 @@ def attempt_to_make_names_unique(entries_and_traces):
             if not all_the_same(trace_eles)), None)

        if first_trace_difference is None:
-            # can't create a unique name, leave them names as the
+            # can't create a unique name, leave the names as they
            # are they will get aggregated by the pivot_table call
            continue


--- a/vllm/compilation/collective_fusion.py
+++ b/vllm/compilation/collective_fusion.py
@@ -513,7 +513,7 @@ if flashinfer_comm is not None:
                        torch.ops._C.static_scaled_fp8_quant(
                            quant_out, norm_out, scale_factor)
            if scale_factor is None or norm_out is not None:
-                # we need to return allreduce outpput
+                # we need to return allreduce output
                # in cases of non quant fused AR + RMS norm
                # and fused AR + RMS norm + quant without fused add
                allreduce_in.copy_(allreduce_out)

--- a/vllm/engine/multiprocessing/engine.py
+++ b/vllm/engine/multiprocessing/engine.py
@@ -49,7 +49,7 @@ class MQLLMEngine:

    This class is used to wrap the
    [`LLMEngine`][vllm.engine.llm_engine.LLMEngine] class to enable use
-    in concurrnet manner. It runs a background loop and uses zeromq to
+    in concurrent manner. It runs a background loop and uses zeromq to
    receive new requests and stream outputs incrementally via ipc.

    The [`LLMEngine`][vllm.engine.llm_engine.LLMEngine] generate or encode

--- a/vllm/model_executor/layers/quantization/utils/w8a8_utils.py
+++ b/vllm/model_executor/layers/quantization/utils/w8a8_utils.py
@@ -23,7 +23,7 @@ TORCH_DEVICE_IDENTITY = None
 # The condition to determine if it is on a platform that supports
 # torch._scaled_mm rowwise feature.
 # The condition is determined once as the operations
-# are time consuming.
+# are time-consuming.
 USE_ROWWISE_TORCH_SCALED_MM = (current_platform.is_rocm() and version.parse(
    torch.__version__) >= version.parse("2.7")
                               and current_platform.has_device_capability(94))

--- a/vllm/model_executor/model_loader/default_loader.py
+++ b/vllm/model_executor/model_loader/default_loader.py
@@ -211,7 +211,7 @@ class DefaultModelLoader(BaseModelLoader):

            if not USE_TPU_COMMONS:
                # In PyTorch XLA, we should call `xm.mark_step`
-                # requently so that not too many ops are accumulated
+                # frequently so that not too many ops are accumulated
                # in the XLA program. import torch_xla.core.xla_model
                # as xm
                import torch_xla.core.xla_model as xm

--- a/vllm/v1/worker/xpu_worker.py
+++ b/vllm/v1/worker/xpu_worker.py
@@ -84,7 +84,7 @@ class XPUWorker(Worker):
        """Profiles the peak memory usage of the model to determine how many
        KV blocks may be allocated without OOMs.
        The engine will first conduct a profiling of the existing memory usage.
-        Then, it calculate the maximum possible number of GPU and CPU blocks
+        Then, it calculates the maximum possible number of GPU and CPU blocks
        that can be allocated with the remaining free memory.
        .. tip::
            You may limit the usage of GPU memory

--- a/vllm/worker/worker.py
+++ b/vllm/worker/worker.py
@@ -234,7 +234,7 @@ class Worker(LocalOrDistributedWorkerBase):
        KV blocks may be allocated without OOMs.

        The engine will first conduct a profiling of the existing memory usage.
-        Then, it calculate the maximum possible number of GPU and CPU blocks
+        Then, it calculates the maximum possible number of GPU and CPU blocks
        that can be allocated with the remaining free memory.

        Tip: