[Bugfix][TPU] Set readonly=True for non-root devices (#6980)

533d1932 · Woosuk Kwon · GitHub · 9f0e69b6 · 533d1932
Unverified Commit 533d1932 authored Jul 31, 2024 by Woosuk Kwon Committed by GitHub Jul 31, 2024
Show whitespace changes
Inline Side-by-side

Showing with 4 additions and 1 deletion

vllm/worker/tpu_worker.py vllm/worker/tpu_worker.py +4 -1

No files found.
--- a/vllm/worker/tpu_worker.py
+++ b/vllm/worker/tpu_worker.py
@@ -104,7 +104,10 @@ class TPUWorker(LoraNotSupportedWorkerBase, LocalOrDistributedWorkerBase):
        # Use persistent cache to avoid XLA recompilation.
        # NOTE(woosuk): This does not completely eliminate the recompilation
        # overhead because dynamo does not cache the compiled results.
-        xr.initialize_cache(envs.VLLM_XLA_CACHE_PATH, readonly=False)
+        # NOTE(woosuk): Set readonly=False only for the rank 0 process to avoid
+        # race conditions.
+        xr.initialize_cache(envs.VLLM_XLA_CACHE_PATH,
+                            readonly=not self.is_driver_worker)
    def load_model(self):
        self.model_runner.load_model()