[Docs] Fix warnings in mkdocs build (continued) (#25042)

Signed-off-by: wwl2755 <wangwenlong2755@gmail.com>

[Docs] Fix warnings in mkdocs build (continued) (#25042)
Signed-off-by: wwl2755 <wangwenlong2755@gmail.com>
032d661d · Wenlong Wang · GitHub · e08a3a3f · 032d661d · 032d661d
Unverified Commit 032d661d authored Sep 20, 2025 by Wenlong Wang Committed by GitHub Sep 20, 2025
7 changed files
--- a/vllm/multimodal/__init__.py
+++ b/vllm/multimodal/__init__.py
@@ -15,7 +15,7 @@ is used by model runners to dispatch data processing according to the target
 model.
 Info:
-    [mm_processing](../../../design/mm_processing.html)
+    [mm_processing](../../../design/mm_processing.md)
 """
 __all__ = [

--- a/vllm/utils/__init__.py
+++ b/vllm/utils/__init__.py
@@ -3273,7 +3273,7 @@ def check_use_alibi(model_config: ModelConfig) -> bool:
                      and getattr(cfg.attn_config, "alibi", False)))))
-def sha256(input) -> bytes:
+def sha256(input: Any) -> bytes:
    """Hash any picklable Python object using SHA-256.
    The input is serialized using pickle before hashing, which allows
@@ -3290,7 +3290,7 @@ def sha256(input) -> bytes:
    return hashlib.sha256(input_bytes).digest()
-def sha256_cbor(input) -> bytes:
+def sha256_cbor(input: Any) -> bytes:
    """
    Hash objects using CBOR serialization and SHA-256.

--- a/vllm/v1/core/kv_cache_utils.py
+++ b/vllm/v1/core/kv_cache_utils.py
--- a/vllm/v1/sample/rejection_sampler.py
+++ b/vllm/v1/sample/rejection_sampler.py
@@ -351,17 +351,17 @@ def generate_uniform_probs(
    without a seed.
    Args:
-        num_tokens : int
+        num_tokens: int
            Total number of tokens.
-        num_draft_tokens : List[List[int]]
+        num_draft_tokens: List[List[int]]
            Number of draft tokens per request.
-        generators : Optional[Dict[int, torch.Generator]]
+        generators: Optional[Dict[int, torch.Generator]]
            A dictionary mapping indices in the batch to
            `torch.Generator` objects.
-        device : torch.device
+        device: torch.device
            The device on which to allocate the tensor.
    Returns:
-        uniform_rand : torch.Tensor
+        uniform_rand: torch.Tensor
            A tensor of shape `(num_tokens, )` containing uniform
            random values in the range [0, 1).
    """

--- a/vllm/v1/worker/gpu_model_runner.py
+++ b/vllm/v1/worker/gpu_model_runner.py
--- a/vllm/v1/worker/utils.py
+++ b/vllm/v1/worker/utils.py
@@ -205,7 +205,8 @@ def gather_mm_placeholders(
    """
    Reconstructs the embeddings from the placeholder tokens.
-    This is the operation of [scatter_mm_placeholders][].
+    This is the operation of [`scatter_mm_placeholders`]
+    [vllm.v1.worker.utils.scatter_mm_placeholders].
    """
    if is_embed is None:
        return placeholders

--- a/vllm/worker/model_runner.py
+++ b/vllm/worker/model_runner.py
@@ -1810,7 +1810,8 @@ class ModelRunner(GPUModelRunnerBase[ModelInputForGPUWithSamplingMetadata]):
        return [output]
-    def need_recv_kv(self, model_input, kv_caches) -> bool:
+    def need_recv_kv(self, model_input: ModelInputForGPUWithSamplingMetadata,
+                     kv_caches: List[torch.Tensor]) -> bool:
        """Check if we need to receive kv-cache from the other worker.
        We need to receive KV when
            1. current vLLM instance is KV cache consumer/decode vLLM instance
@@ -1825,6 +1826,9 @@ class ModelRunner(GPUModelRunnerBase[ModelInputForGPUWithSamplingMetadata]):
        if self.vllm_config.kv_transfer_config is None:
            return False
+        if model_input.attn_metadata is None:
+            raise ValueError("model_input.attn_metadata cannot be None")
        prefill_meta = model_input.attn_metadata.prefill_metadata
        # check if the current run is profiling
@@ -1835,7 +1839,8 @@ class ModelRunner(GPUModelRunnerBase[ModelInputForGPUWithSamplingMetadata]):
        return self.vllm_config.kv_transfer_config.is_kv_consumer and (
            not is_profile_run) and is_prefill_run
-    def need_send_kv(self, model_input, kv_caches) -> bool:
+    def need_send_kv(self, model_input: ModelInputForGPUWithSamplingMetadata,
+                     kv_caches: List[torch.Tensor]) -> bool:
        """Check if we need to send kv-cache to the other worker.
        We need to send KV when
            1. current vLLM instance is KV cache producer/prefill vLLM instance
@@ -1850,6 +1855,9 @@ class ModelRunner(GPUModelRunnerBase[ModelInputForGPUWithSamplingMetadata]):
        if self.vllm_config.kv_transfer_config is None:
            return False
+        if model_input.attn_metadata is None:
+            raise ValueError("model_input.attn_metadata cannot be None")
        prefill_meta = model_input.attn_metadata.prefill_metadata
        # check if the current run is profiling