Unverified Commit 533d1932 authored by Woosuk Kwon's avatar Woosuk Kwon Committed by GitHub
Browse files

[Bugfix][TPU] Set readonly=True for non-root devices (#6980)

parent 9f0e69b6
...@@ -104,7 +104,10 @@ class TPUWorker(LoraNotSupportedWorkerBase, LocalOrDistributedWorkerBase): ...@@ -104,7 +104,10 @@ class TPUWorker(LoraNotSupportedWorkerBase, LocalOrDistributedWorkerBase):
# Use persistent cache to avoid XLA recompilation. # Use persistent cache to avoid XLA recompilation.
# NOTE(woosuk): This does not completely eliminate the recompilation # NOTE(woosuk): This does not completely eliminate the recompilation
# overhead because dynamo does not cache the compiled results. # overhead because dynamo does not cache the compiled results.
xr.initialize_cache(envs.VLLM_XLA_CACHE_PATH, readonly=False) # NOTE(woosuk): Set readonly=False only for the rank 0 process to avoid
# race conditions.
xr.initialize_cache(envs.VLLM_XLA_CACHE_PATH,
readonly=not self.is_driver_worker)
def load_model(self): def load_model(self):
self.model_runner.load_model() self.model_runner.load_model()
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment