[Doc] Fix failing doc build (#28772)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk> Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> Co-authored-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>

[Doc] Fix failing doc build (#28772)
Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk> Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> Co-authored-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
89d36792 · Cyrus Leung · GitHub · cb15ee28 · 89d36792 · 89d36792
Unverified Commit 89d36792 authored Nov 15, 2025 by Cyrus Leung Committed by GitHub Nov 15, 2025
14 changed files
--- a/docs/README.md
+++ b/docs/README.md
@@ -30,8 +30,8 @@ Originally developed in the [Sky Computing Lab](https://sky.cs.berkeley.edu) at
 Where to get started with vLLM depends on the type of user. If you are looking to:
 - Run open-source models on vLLM, we recommend starting with the [Quickstart Guide](./getting_started/quickstart.md)
- Build applications with vLLM, we recommend starting with the [User Guide](./usage)
+- Build applications with vLLM, we recommend starting with the [User Guide](./usage/README.md)
- Build vLLM, we recommend starting with [Developer Guide](./contributing)
+- Build vLLM, we recommend starting with [Developer Guide](./contributing/README.md)
 For information about the development of vLLM, see:

--- a/docs/cli/bench/latency.md
+++ b/docs/cli/bench/latency.md
@@ -4,6 +4,6 @@
 --8<-- "docs/cli/json_tip.inc.md"
-## Options
+## Arguments
--8<-- "docs/argparse/bench_latency.md"
+--8<-- "docs/argparse/bench_latency.inc.md"
--- a/docs/cli/bench/serve.md
+++ b/docs/cli/bench/serve.md
@@ -4,6 +4,6 @@
 --8<-- "docs/cli/json_tip.inc.md"
-## Options
+## Arguments
--8<-- "docs/argparse/bench_serve.md"
+--8<-- "docs/argparse/bench_serve.inc.md"
--- a/docs/cli/bench/sweep/plot.md
+++ b/docs/cli/bench/sweep/plot.md
@@ -4,6 +4,6 @@
 --8<-- "docs/cli/json_tip.inc.md"
-## Options
+## Arguments
--8<-- "docs/argparse/bench_sweep_plot.md"
+--8<-- "docs/argparse/bench_sweep_plot.inc.md"
--- a/docs/cli/bench/sweep/serve.md
+++ b/docs/cli/bench/sweep/serve.md
@@ -4,6 +4,6 @@
 --8<-- "docs/cli/json_tip.inc.md"
-## Options
+## Arguments
--8<-- "docs/argparse/bench_sweep_serve.md"
+--8<-- "docs/argparse/bench_sweep_serve.inc.md"
--- a/docs/cli/bench/sweep/serve_sla.md
+++ b/docs/cli/bench/sweep/serve_sla.md
@@ -4,6 +4,6 @@
 --8<-- "docs/cli/json_tip.inc.md"
-## Options
+## Arguments
--8<-- "docs/argparse/bench_sweep_serve_sla.md"
+--8<-- "docs/argparse/bench_sweep_serve_sla.inc.md"
--- a/docs/cli/bench/throughput.md
+++ b/docs/cli/bench/throughput.md
@@ -4,6 +4,6 @@
 --8<-- "docs/cli/json_tip.inc.md"
-## Options
+## Arguments
--8<-- "docs/argparse/bench_throughput.md"
+--8<-- "docs/argparse/bench_throughput.inc.md"
--- a/docs/cli/chat.md
+++ b/docs/cli/chat.md
 # vllm chat
-## Options
+## Arguments
--8<-- "docs/argparse/chat.md"
+--8<-- "docs/argparse/chat.inc.md"
--- a/docs/cli/complete.md
+++ b/docs/cli/complete.md
 # vllm complete
-## Options
+## Arguments
--8<-- "docs/argparse/complete.md"
+--8<-- "docs/argparse/complete.inc.md"
--- a/docs/cli/run-batch.md
+++ b/docs/cli/run-batch.md
@@ -4,6 +4,6 @@
 --8<-- "docs/cli/json_tip.inc.md"
-## Options
+## Arguments
--8<-- "docs/argparse/run-batch.md"
+--8<-- "docs/argparse/run-batch.inc.md"
--- a/docs/cli/serve.md
+++ b/docs/cli/serve.md
@@ -4,6 +4,6 @@
 --8<-- "docs/cli/json_tip.inc.md"
-## Options
+## Arguments
--8<-- "docs/argparse/serve.md"
+--8<-- "docs/argparse/serve.inc.md"
--- a/docs/configuration/serve_args.md
+++ b/docs/configuration/serve_args.md
@@ -5,7 +5,7 @@ The `vllm serve` command is used to launch the OpenAI-compatible server.
 ## CLI Arguments
 The `vllm serve` command is used to launch the OpenAI-compatible server.
-To see the available options, take a look at the [CLI Reference](../cli/README.md#options)!
+To see the available options, take a look at the [CLI Reference](../cli/README.md)!
 ## Configuration file

--- a/docs/mkdocs/hooks/generate_argparse.py
+++ b/docs/mkdocs/hooks/generate_argparse.py
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
-import importlib
+import importlib.metadata
+import importlib.util
 import logging
 import sys
 import traceback
-from argparse import SUPPRESS, HelpFormatter
+from argparse import SUPPRESS, Action, HelpFormatter
+from collections.abc import Iterable
+from importlib.machinery import ModuleSpec
 from pathlib import Path
-from typing import Literal
+from typing import TYPE_CHECKING, Literal
 from unittest.mock import MagicMock, patch
 from pydantic_core import core_schema
@@ -19,6 +22,11 @@ ARGPARSE_DOC_DIR = ROOT_DIR / "docs/argparse"
 sys.path.insert(0, str(ROOT_DIR))
+def mock_if_no_torch(mock_module: str, mock: MagicMock):
+    if not importlib.util.find_spec("torch"):
+        sys.modules[mock_module] = mock
 # Mock custom op code
 class MockCustomOp:
    @staticmethod
@@ -29,18 +37,21 @@ class MockCustomOp:
        return decorator
-noop = lambda *a, **k: None
+mock_if_no_torch("vllm._C", MagicMock())
-sys.modules["vllm._C"] = MagicMock()
+mock_if_no_torch("vllm.model_executor.custom_op", MagicMock(CustomOp=MockCustomOp))
-sys.modules["vllm.model_executor.custom_op"] = MagicMock(CustomOp=MockCustomOp)
+mock_if_no_torch(
-sys.modules["vllm.utils.torch_utils"] = MagicMock(direct_register_custom_op=noop)
+    "vllm.utils.torch_utils", MagicMock(direct_register_custom_op=lambda *a, **k: None)
+)
 # Mock any version checks by reading from compiled CI requirements
 with open(ROOT_DIR / "requirements/test.txt") as f:
    VERSIONS = dict(line.strip().split("==") for line in f if "==" in line)
 importlib.metadata.version = lambda name: VERSIONS.get(name) or "0.0.0"
 # Make torch.nn.Parameter safe to inherit from
-sys.modules["torch.nn"] = MagicMock(Parameter=object)
+mock_if_no_torch("torch.nn", MagicMock(Parameter=object))
 class PydanticMagicMock(MagicMock):
@@ -49,31 +60,34 @@ class PydanticMagicMock(MagicMock):
    def __init__(self, *args, **kwargs):
        name = kwargs.pop("name", None)
        super().__init__(*args, **kwargs)
-        self.__spec__ = importlib.machinery.ModuleSpec(name, None)
+        self.__spec__ = ModuleSpec(name, None)
    def __get_pydantic_core_schema__(self, source_type, handler):
        return core_schema.any_schema()
-def auto_mock(module, attr, max_mocks=100):
+def auto_mock(module_name: str, attr: str, max_mocks: int = 100):
    """Function that automatically mocks missing modules during imports."""
-    logger.info("Importing %s from %s", attr, module)
+    logger.info("Importing %s from %s", attr, module_name)
    for _ in range(max_mocks):
        try:
+            module = importlib.import_module(module_name)
            # First treat attr as an attr, then as a submodule
-            return getattr(
+            if hasattr(module, attr):
-                importlib.import_module(module),
+                return getattr(module, attr)
-                attr,
-                importlib.import_module(f"{module}.{attr}"),
+            return importlib.import_module(f"{module_name}.{attr}")
-            )
        except ModuleNotFoundError as e:
+            assert e.name is not None
            logger.info("Mocking %s for argparse doc generation", e.name)
            sys.modules[e.name] = PydanticMagicMock(name=e.name)
-        except Exception as e:
+        except Exception:
-            logger.warning("Failed to import %s.%s: %s", module, attr, e)
+            logger.exception("Failed to import %s.%s: %s", module_name, attr)
    raise ImportError(
-        f"Failed to import {module}.{attr} after mocking {max_mocks} imports"
+        f"Failed to import {module_name}.{attr} after mocking {max_mocks} imports"
    )
@@ -91,21 +105,26 @@ ChatCommand = auto_mock("vllm.entrypoints.cli.openai", "ChatCommand")
 CompleteCommand = auto_mock("vllm.entrypoints.cli.openai", "CompleteCommand")
 openai_cli_args = auto_mock("vllm.entrypoints.openai", "cli_args")
 openai_run_batch = auto_mock("vllm.entrypoints.openai", "run_batch")
-FlexibleArgumentParser = auto_mock(
-    "vllm.utils.argparse_utils", "FlexibleArgumentParser"
+if TYPE_CHECKING:
-)
+    from vllm.utils.argparse_utils import FlexibleArgumentParser
+else:
+    FlexibleArgumentParser = auto_mock(
+        "vllm.utils.argparse_utils", "FlexibleArgumentParser"
+    )
 class MarkdownFormatter(HelpFormatter):
    """Custom formatter that generates markdown for argument groups."""
-    def __init__(self, prog, starting_heading_level=3):
+    def __init__(self, prog: str, starting_heading_level: int = 3):
-        super().__init__(prog, max_help_position=float("inf"), width=float("inf"))
+        super().__init__(prog, max_help_position=sys.maxsize, width=sys.maxsize)
        self._section_heading_prefix = "#" * starting_heading_level
        self._argument_heading_prefix = "#" * (starting_heading_level + 1)
        self._markdown_output = []
-    def start_section(self, heading):
+    def start_section(self, heading: str):
        if heading not in {"positional arguments", "options"}:
            heading_md = f"\n{self._section_heading_prefix} {heading}\n\n"
            self._markdown_output.append(heading_md)
@@ -113,14 +132,14 @@ class MarkdownFormatter(HelpFormatter):
    def end_section(self):
        pass
-    def add_text(self, text):
+    def add_text(self, text: str):
        if text:
            self._markdown_output.append(f"{text.strip()}\n\n")
    def add_usage(self, usage, actions, groups, prefix=None):
        pass
-    def add_arguments(self, actions):
+    def add_arguments(self, actions: Iterable[Action]):
        for action in actions:
            if len(action.option_strings) == 0 or "--help" in action.option_strings:
                continue
@@ -169,7 +188,7 @@ def create_parser(add_cli_args, **kwargs) -> FlexibleArgumentParser:
        # Auto-mock runtime imports
        if tb_list := traceback.extract_tb(e.__traceback__):
            path = Path(tb_list[-1].filename).relative_to(ROOT_DIR)
-            auto_mock(module=".".join(path.parent.parts), attr=path.stem)
+            auto_mock(module_name=".".join(path.parent.parts), attr=path.stem)
            return create_parser(add_cli_args, **kwargs)
        else:
            raise e
@@ -209,7 +228,7 @@ def on_startup(command: Literal["build", "gh-deploy", "serve"], dirty: bool):
    # Generate documentation for each parser
    for stem, parser in parsers.items():
-        doc_path = ARGPARSE_DOC_DIR / f"{stem}.md"
+        doc_path = ARGPARSE_DOC_DIR / f"{stem}.inc.md"
        # Specify encoding for building on Windows
        with open(doc_path, "w", encoding="utf-8") as f:
            f.write(super(type(parser), parser).format_help())

--- a/docs/usage/README.md
+++ b/docs/usage/README.md
 # Using vLLM
-First, vLLM must be [installed](../getting_started/installation/) for your chosen device in either a Python or Docker environment.
+First, vLLM must be [installed](../getting_started/installation/README.md) for your chosen device in either a Python or Docker environment.
 Then, vLLM supports the following usage patterns: