Add full API docs and improve the UX of navigating them (#17485)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>

Add full API docs and improve the UX of navigating them (#17485)
Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
d6484ef3 · Harry Mellor · GitHub · 46fae69c · d6484ef3
Unverified Commit d6484ef3 authored May 04, 2025 by Harry Mellor Committed by GitHub May 03, 2025
Hide whitespace changes
Inline Side-by-side

Showing with 5 additions and 4 deletions

vllm/worker/xpu_worker.py vllm/worker/xpu_worker.py +5 -4

No files found.
--- a/vllm/worker/xpu_worker.py
+++ b/vllm/worker/xpu_worker.py
@@ -26,7 +26,7 @@ logger = init_logger(__name__)

 class XPUWorker(LoRANotSupportedWorkerBase, Worker):
    """A worker class that executes (a partition of) the model on a GPU.
-    
+
    Each worker is associated with a single XPU device. The worker is 
    responsible for maintaining the KV cache and executing the model on the 
    XPU. In case of distributed inference, each worker is assigned a partition
@@ -93,9 +93,10 @@ class XPUWorker(LoRANotSupportedWorkerBase, Worker):
        Then, it calculate the maximum possible number of GPU and CPU blocks
        that can be allocated with the remaining free memory.

-        .. tip::
-            You may limit the usage of GPU memory
-            by adjusting the `gpu_memory_utilization` parameter.
+        :::{tip}
+        You may limit the usage of GPU memory
+        by adjusting the `gpu_memory_utilization` parameter.
+        :::
        """
        # Profile the memory usage of the model and get the maximum number of
        # cache blocks that can be allocated with the remaining free memory.