[Bugfix] tpu_model_runner: set vllm config context when calling reset_dynamo_cache() (#30331)

Signed-off-by: Daniele Trifirò <dtrifiro@redhat.com>

[Bugfix] tpu_model_runner: set vllm config context when calling reset_dynamo_cache() (#30331)
Signed-off-by: Daniele Trifirò <dtrifiro@redhat.com>
53d2420b · Daniele · GitHub · 9db78f34 · 53d2420b
Unverified Commit 53d2420b authored Dec 10, 2025 by Daniele Committed by GitHub Dec 10, 2025
Show whitespace changes
Inline Side-by-side

Showing with 3 additions and 2 deletions

vllm/v1/worker/tpu_worker.py vllm/v1/worker/tpu_worker.py +3 -2

No files found.
--- a/vllm/v1/worker/tpu_worker.py
+++ b/vllm/v1/worker/tpu_worker.py
@@ -10,7 +10,7 @@ import torch
 import torch.nn as nn

 import vllm.envs as envs
-from vllm.config import VllmConfig
+from vllm.config import VllmConfig, set_current_vllm_config
 from vllm.distributed import (
    ensure_model_parallel_initialized,
    init_distributed_environment,
@@ -207,6 +207,7 @@ class TPUWorker:
        # one compiled bytecode. Having one FX graph/cached bytecode per
        # compiled model is required for `support_torch_compile` decorator to
        # skip dynamo guard.
+        with set_current_vllm_config(self.vllm_config):
            self.model_runner.reset_dynamo_cache()

        # Get the maximum amount of memory used by the model weights and