[Doc]: fix typos in Python comments (#24093)

Signed-off-by: Didier Durand <durand.didier@gmail.com>

[Doc]: fix typos in Python comments (#24093)
Signed-off-by: Didier Durand <durand.didier@gmail.com>
d7e1e599 · Didier Durand · GitHub · c4ed78b1 · d7e1e599 · d7e1e599
Unverified Commit d7e1e599 authored Sep 03, 2025 by Didier Durand Committed by GitHub Sep 02, 2025
15 changed files
--- a/tests/core/test_scheduler.py
+++ b/tests/core/test_scheduler.py
@@ -641,7 +641,7 @@ def test_schedule_decode_blocks_to_copy_update():
    # Nothing is preempted.
    assert output.blocks_to_swap_out == []
    # Since append_slot returns the source -> dist mapping, it should
-    # applied.
+    # be applied.
    assert output.blocks_to_copy == [(2, 3)]

--- a/tests/entrypoints/openai/correctness/test_transcription_api_correctness.py
+++ b/tests/entrypoints/openai/correctness/test_transcription_api_correctness.py
@@ -32,7 +32,7 @@ def to_bytes(y, sr):
 async def transcribe_audio(client, tokenizer, y, sr):
    # Send loaded audio directly instead of loading from disk,
-    # dont account for that time though
+    # don't account for that time though
    with to_bytes(y, sr) as f:
        start_time = time.perf_counter()
        transcription = await client.audio.transcriptions.create(

--- a/tests/entrypoints/openai/test_return_token_ids.py
+++ b/tests/entrypoints/openai/test_return_token_ids.py
@@ -224,7 +224,7 @@ async def test_comparison_with_prompt_logprobs_and_logprobs(server):
                logprobs_token_ids.append(token_id)
        # When echo=True, the logprobs include both prompt and response tokens
-        # The token_ids field should match the the suffix of response portion
+        # The token_ids field should match the suffix of response portion
        # The prompt_token_ids should match the prompt portion
        assert len(completion.choices[0].token_ids) < len(logprobs_token_ids)
        response_token_ids_length = len(completion.choices[0].token_ids)

--- a/tests/entrypoints/openai/test_serving_chat.py
+++ b/tests/entrypoints/openai/test_serving_chat.py
@@ -313,7 +313,7 @@ async def test_serving_chat_did_set_correct_cache_salt(model_type):
        }],
    )
-    # By default cache_salt in the engine prompt is not set
+    # By default, cache_salt in the engine prompt is not set
    with suppress(Exception):
        await serving_chat.create_chat_completion(req)
    assert "cache_salt" not in mock_engine.generate.call_args.args[0]

--- a/tests/kernels/utils.py
+++ b/tests/kernels/utils.py
@@ -1236,7 +1236,7 @@ def baseline_scaled_mm(a: torch.Tensor,
                       bias: Optional[torch.Tensor] = None) -> torch.Tensor:
    # We treat N-dimensional group scaling as extended numpy-style broadcasting
-    # in numpy simply stretches dimensions with an extent of 1 to match the
+    # in numpy simply stretches dimensions with an extent of 1 to match
    # the target shape by repeating the data along that dimension (broadcasting)
    # , we extend these semantics to say if the extent of a dimension in the
    # source shape is not 1 and does not match the target shape we repeat each

--- a/tests/multimodal/test_utils.py
+++ b/tests/multimodal/test_utils.py
@@ -458,7 +458,7 @@ def run_dp_sharded_vision_model_vs_direct(local_rank: int, world_size: int,
    with torch.inference_mode():
        sharded_output = run_dp_sharded_vision_model(image_input, vision_model)
-    # Check that the world size is setup correctly
+    # Check that the world size is set up correctly
    assert get_tensor_model_parallel_world_size() == world_size
    # Check that the outputs have the same shape
@@ -642,7 +642,7 @@ def run_dp_sharded_mrope_vision_model_vs_direct(local_rank: int,
                                                           rope_type="rope_3d")
        sharded_output = torch.cat(sharded_output, dim=0)
-    # Check that the world size is setup correctly
+    # Check that the world size is set up correctly
    assert get_tensor_model_parallel_world_size() == world_size
    # Compare outputs (only on rank 0)

--- a/tests/v1/e2e/test_spec_decode.py
+++ b/tests/v1/e2e/test_spec_decode.py
@@ -83,7 +83,7 @@ def test_ngram_correctness(
    model_name: str,
 ):
    '''
-    Compare the outputs of a original LLM and a speculative LLM
+    Compare the outputs of an original LLM and a speculative LLM
    should be the same when using ngram speculative decoding.
    '''
    with monkeypatch.context() as m:

--- a/tests/v1/kv_connector/unit/test_remote_decode_lifecycle.py
+++ b/tests/v1/kv_connector/unit/test_remote_decode_lifecycle.py
@@ -42,7 +42,7 @@ def test_basic_lifecycle():
    engine_core_outputs = scheduler.update_from_output(scheduler_output,
                                                       model_runner_output)
-    # Ensure the request is finished after 1 tokens.
+    # Ensure the request is finished after 1 token.
    assert request.is_finished()
    assert request.status == RequestStatus.FINISHED_LENGTH_CAPPED
    output = engine_core_outputs[0].outputs[0]
@@ -141,7 +141,7 @@ def test_short_prompt_lifecycle():
 def test_prefix_cache_lifecycle():
-    """Test that remote decode params still works with a prefix cache hit."""
+    """Test that remote decode params still work with a prefix cache hit."""
    vllm_config = create_vllm_config()
    scheduler = create_scheduler(vllm_config)

--- a/tests/v1/spec_decode/test_tree_attention.py
+++ b/tests/v1/spec_decode/test_tree_attention.py
@@ -187,7 +187,7 @@ def test_tree_attn_correctness() -> None:
                        dtype=torch.bfloat16,
                    )
-                    # Setup the block table and KV cache for paged KV.
+                    # Set up the block table and KV cache for paged KV.
                    assert max_sequence_length % block_size == 0
                    max_blocks_per_batch = max_sequence_length // block_size
                    kv_cache = torch.randn(
@@ -222,7 +222,7 @@ def test_tree_attn_correctness() -> None:
                                num_alloc_blocks_per_batch] = block_ids.view(
                                    -1, num_alloc_blocks_per_batch)
-                    # Setup the slot mapping for the input KVs.
+                    # Set up the slot mapping for the input KVs.
                    tree_positions = sequence_position + torch.arange(
                        0,
                        tree_size_q,

--- a/vllm/lora/utils.py
+++ b/vllm/lora/utils.py
@@ -239,7 +239,7 @@ def get_adapter_absolute_path(lora_path: str) -> str:
    except (HfHubHTTPError, RepositoryNotFoundError, EntryNotFoundError,
            HFValidationError):
        # Handle errors that may occur during the download
-        # Return original path instead instead of throwing error here
+        # Return original path instead of throwing error here
        logger.exception("Error downloading the HuggingFace model")
        return lora_path

--- a/vllm/model_executor/layers/quantization/compressed_tensors/utils.py
+++ b/vllm/model_executor/layers/quantization/compressed_tensors/utils.py
@@ -94,7 +94,7 @@ def find_matched_target(
    config that a layer corresponds to.
    Recall that a compressed-tensors configs has a concept of
-    config_groups, where each layer can be quantized with with a different
+    config_groups, where each layer can be quantized with a different
    scheme.
    targets in each config_group will be a list of either layer names

--- a/vllm/multimodal/utils.py
+++ b/vllm/multimodal/utils.py
@@ -213,7 +213,7 @@ class MediaConnector:
        image_mode: str = "RGB",
    ) -> Image.Image:
        """
-        Load a PIL image from a HTTP or base64 data URL.
+        Load a PIL image from an HTTP or base64 data URL.
        By default, the image is converted into RGB format.
        """
@@ -237,7 +237,7 @@ class MediaConnector:
        image_mode: str = "RGB",
    ) -> Image.Image:
        """
-        Asynchronously load a PIL image from a HTTP or base64 data URL.
+        Asynchronously load a PIL image from an HTTP or base64 data URL.
        By default, the image is converted into RGB format.
        """
@@ -261,7 +261,7 @@ class MediaConnector:
        image_mode: str = "RGB",
    ) -> tuple[npt.NDArray, dict[str, Any]]:
        """
-        Load video from a HTTP or base64 data URL.
+        Load video from an HTTP or base64 data URL.
        """
        image_io = ImageMediaIO(image_mode=image_mode,
                                **self.media_io_kwargs.get("image", {}))
@@ -281,7 +281,7 @@ class MediaConnector:
        image_mode: str = "RGB",
    ) -> tuple[npt.NDArray, dict[str, Any]]:
        """
-        Asynchronously load video from a HTTP or base64 data URL.
+        Asynchronously load video from an HTTP or base64 data URL.
        By default, the image is converted into RGB format.
        """
@@ -370,7 +370,7 @@ def group_mm_inputs_by_modality(
    def modality_group_func(
            mm_input: MultiModalKwargsItems) -> Union[str, int]:
-        # If the input has multiple modalities, return a id as the unique key
+        # If the input has multiple modalities, return an id as the unique key
        # for the mm_input input.
        if len(mm_input) > 1:
            return id(mm_input)

--- a/vllm/v1/attention/backends/utils.py
+++ b/vllm/v1/attention/backends/utils.py
@@ -709,7 +709,7 @@ def reorder_batch_to_split_decodes_and_prefills(
    for i, req_id in enumerate(input_batch.req_ids):
        num_tokens = scheduler_output.num_scheduled_tokens[req_id]
-        # for now treat 1 scheduled token as "decode" even if its not,
+        # for now treat 1 scheduled token as "decode" even if it's not,
        # we should update this to something like < 8 in the future but
        # currently the TritonMLA._forward_decode only supports
        # num_tokens = 1

--- a/vllm/v1/structured_output/utils.py
+++ b/vllm/v1/structured_output/utils.py
@@ -65,9 +65,9 @@ def get_outlines_cache_path() -> str:
    elif xdg_cache_home:
        return os.path.join(xdg_cache_home, ".cache", "outlines")
    # If homedir is "/", we may be inside a container, and thus writing to
-    # root would be problematic, so we fallback to using a tempfile.
+    # root would be problematic, so we fall back to using a tempfile.
    # Also validate the path exists, since os.path.expanduser does
-    # not garuntee existence.
+    # not guarantee existence.
    elif os.path.isdir(home_dir) and home_dir != "/":
        # Default Unix fallback: ~/.cache/outlines
        return os.path.join(home_dir, ".cache", "outlines")

--- a/vllm/v1/worker/tpu_worker.py
+++ b/vllm/v1/worker/tpu_worker.py
@@ -250,7 +250,7 @@ class TPUWorker:
        scheduler_output: "SchedulerOutput",
    ) -> Optional[ModelRunnerOutput]:
        output = self.model_runner.execute_model(scheduler_output)
-        # every worker's output is needed when kv_transfer_group is setup
+        # every worker's output is needed when kv_transfer_group is set up
        return output if self.is_driver_worker or has_kv_transfer_group(
        ) else None