[Model Runner V2] Init cuda graph pool when necessary (#33217)

Signed-off-by: Xinyu Chen <xinyu1.chen@intel.com>

[Model Runner V2] Init cuda graph pool when necessary (#33217)
Signed-off-by: Xinyu Chen <xinyu1.chen@intel.com>
ffb3d553 · Xinyu Chen · GitHub · fa7e0bfa · ffb3d553 · ffb3d553
Unverified Commit ffb3d553 authored Feb 12, 2026 by Xinyu Chen Committed by GitHub Feb 11, 2026
Showing with 6 additions and 2 deletions

vllm/v1/worker/gpu/cudagraph_utils.py vllm/v1/worker/gpu/cudagraph_utils.py +3 -1

vllm/v1/worker/gpu/spec_decode/eagle_cudagraph.py vllm/v1/worker/gpu/spec_decode/eagle_cudagraph.py +3 -1

No files found.
--- a/vllm/v1/worker/gpu/cudagraph_utils.py
+++ b/vllm/v1/worker/gpu/cudagraph_utils.py
@@ -45,7 +45,9 @@ class CudaGraphManager:
        )
        self.graphs: dict[int, torch.cuda.CUDAGraph] = {}
-        self.pool = torch.cuda.graph_pool_handle()
+        self.pool = None
+        if self.cudagraph_mode != CUDAGraphMode.NONE:
+            self.pool = torch.cuda.graph_pool_handle()
        self.hidden_states: torch.Tensor | None = None
    def needs_capture(self) -> bool:

--- a/vllm/v1/worker/gpu/spec_decode/eagle_cudagraph.py
+++ b/vllm/v1/worker/gpu/spec_decode/eagle_cudagraph.py
@@ -44,7 +44,9 @@ class EagleCudaGraphManager:
        )
        self.graphs: dict[int, torch.cuda.CUDAGraph] = {}
-        self.pool = torch.cuda.graph_pool_handle()
+        self.pool = None
+        if self.cudagraph_mode != CUDAGraphMode.NONE:
+            self.pool = torch.cuda.graph_pool_handle()
    def get_cudagraph_size(self, num_tokens: int) -> int | None:
        return self.cudagraph_sizes.get(num_tokens)