[Docs] Fix warnings in `mkdocs build` (continued) (#23743)

Signed-off-by: Zerohertz <ohg3417@gmail.com> Signed-off-by: Hyogeun Oh (오효근) <ohg3417@gmail.com>

[Docs] Fix warnings in `mkdocs build` (continued) (#23743)
Signed-off-by: Zerohertz <ohg3417@gmail.com> Signed-off-by: Hyogeun Oh (오효근) <ohg3417@gmail.com>
4e4d017b · Hyogeun Oh (오효근) · GitHub · dd589322 · 4e4d017b · 4e4d017b
Unverified Commit 4e4d017b authored Aug 28, 2025 by Hyogeun Oh (오효근) Committed by GitHub Aug 27, 2025
6 changed files
--- a/vllm/v1/structured_output/backend_types.py
+++ b/vllm/v1/structured_output/backend_types.py
@@ -110,7 +110,7 @@ class StructuredOutputBackend(ABC):

        Args:
            request_type (StructuredOutputOptions): The type of structured
-              output request.
+                output request.
            grammar_spec (str): The grammar specification to compile.

        Returns:
@@ -124,7 +124,7 @@ class StructuredOutputBackend(ABC):

        Args:
            max_num_seqs (int): The maximum number of sequences for which
-              to allocate the bitmask.
+                to allocate the bitmask.
        """

    @abstractmethod

--- a/vllm/v1/worker/gpu_input_batch.py
+++ b/vllm/v1/worker/gpu_input_batch.py
@@ -525,9 +525,6 @@ class InputBatch:
        Any consecutive empty indices at the very end of the list are not
        filled.

-        Args:
-          empty_req_indices: empty indices which may be filled.
-
        Returns:
          swaps: list of (from,to) swap tuples for moved requests
          empty_req_indices: indices not filled by condensation

--- a/vllm/v1/worker/gpu_model_runner.py
+++ b/vllm/v1/worker/gpu_model_runner.py
@@ -2955,7 +2955,7 @@ class GPUModelRunner(LoRAModelRunnerMixin, KVConnectorModelRunnerMixin):
        Args:
            kv_cache_config: The KV cache config
            kv_cache_raw_tensors: The KV cache buffer of each layer, with
-            correct size but uninitialized shape.
+                correct size but uninitialized shape.
        Returns:
            Dict[str, torch.Tensor]: A map between layer names to their
            corresponding memory buffer for KV cache.

--- a/vllm/v1/worker/tpu_model_runner.py
+++ b/vllm/v1/worker/tpu_model_runner.py
@@ -552,7 +552,7 @@ class TPUModelRunner(LoRAModelRunnerMixin, KVConnectorModelRunnerMixin):
        return kv_cache_spec

    def _get_slot_mapping_metadata(self, num_reqs,
-                                   num_scheduled_tokens_per_req):
+                                   num_scheduled_tokens_per_req) -> np.ndarray:
        """
        Computes metadata for mapping slots to blocks in the key-value (KV)
        cache for a batch of requests.
@@ -565,15 +565,15 @@ class TPUModelRunner(LoRAModelRunnerMixin, KVConnectorModelRunnerMixin):
        Args:
            num_reqs (int): Number of requests in the current batch.
            num_scheduled_tokens_per_req (int or np.ndarray): Number of tokens
-            to be scheduled for each request.
+                to be scheduled for each request.

        Returns:
            np.ndarray: A 2D array of shape (total_block_len, 3), where each row
-            contains:
+                contains:
                - kv_cache_start_index (int): The starting index in the KV cache
-                    for the corresponding slice.
+                  for the corresponding slice.
                - new_kv_start_index (int): The starting index in the new KV
-                    cache for the corresponding slice.
+                  cache for the corresponding slice.
                - slice_len (int): The length of the slice.
        """
        slices_start = self.input_batch.num_computed_tokens_cpu[:num_reqs]

--- a/vllm/v1/worker/utils.py
+++ b/vllm/v1/worker/utils.py
@@ -172,10 +172,10 @@ def scatter_mm_placeholders(

    Args:
        embeds: The multimodal embeddings.
-          Shape: `(num_embeds, embed_dim)`
+            Shape: `(num_embeds, embed_dim)`
        is_embed: A boolean mask indicating which positions in the placeholder
-          tokens need to be filled with multimodal embeddings.
-          Shape: `(num_placeholders, num_embeds)`
+            tokens need to be filled with multimodal embeddings.
+            Shape: `(num_placeholders, num_embeds)`
    """
    if is_embed is None:
        return embeds
@@ -278,7 +278,7 @@ def bind_kv_cache(
    Args:
        kv_caches: The allocated kv_caches with layer names as keys.
        forward_context: The global forward context containing all Attention
-        layers with layer names as keys.
+            layers with layer names as keys.
        runner_kv_caches: The kv_cache declared by ModelRunner.
    """
    # Bind kv_caches to ModelRunner

--- a/vllm/v1/worker/worker_base.py
+++ b/vllm/v1/worker/worker_base.py
@@ -36,8 +36,8 @@ class WorkerBase(WorkerBaseV0):
            local_rank: Local device index
            rank: Global rank in distributed setup
            distributed_init_method: Distributed initialization method
-            is_driver_worker: Whether this worker handles driver 
-            responsibilities
+            is_driver_worker: Whether this worker handles driver
+                responsibilities
        """
        # Configuration storage
        super().__init__(vllm_config=vllm_config)