Unverified Commit 53d2420b authored by Daniele's avatar Daniele Committed by GitHub
Browse files

[Bugfix] tpu_model_runner: set vllm config context when calling reset_dynamo_cache() (#30331)


Signed-off-by: default avatarDaniele Trifirò <dtrifiro@redhat.com>
parent 9db78f34
......@@ -10,7 +10,7 @@ import torch
import torch.nn as nn
import vllm.envs as envs
from vllm.config import VllmConfig
from vllm.config import VllmConfig, set_current_vllm_config
from vllm.distributed import (
ensure_model_parallel_initialized,
init_distributed_environment,
......@@ -207,6 +207,7 @@ class TPUWorker:
# one compiled bytecode. Having one FX graph/cached bytecode per
# compiled model is required for `support_torch_compile` decorator to
# skip dynamo guard.
with set_current_vllm_config(self.vllm_config):
self.model_runner.reset_dynamo_cache()
# Get the maximum amount of memory used by the model weights and
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment