[Bugfix] Fix torch.compile() error when using MultiprocessingGPUExecutor (#5229)

a58f24e5 · zifeitong · GitHub · f42a006b · a58f24e5
Unverified Commit a58f24e5 authored Jun 03, 2024 by zifeitong Committed by GitHub Jun 03, 2024
Hide whitespace changes
Inline Side-by-side

Showing with 3 additions and 0 deletions

vllm/executor/multiproc_gpu_executor.py vllm/executor/multiproc_gpu_executor.py +3 -0

No files found.
--- a/vllm/executor/multiproc_gpu_executor.py
+++ b/vllm/executor/multiproc_gpu_executor.py
@@ -34,6 +34,9 @@ class MultiprocessingGPUExecutor(DistributedGPUExecutor):
        # Ensure that VLLM_INSTANCE_ID is set, to be inherited by workers
        os.environ["VLLM_INSTANCE_ID"] = get_vllm_instance_id()
+        # Disable torch async compiling which won't work with daemonic processes
+        os.environ["TORCHINDUCTOR_COMPILE_THREADS"] = "1"
        from torch.cuda import device_count
        assert world_size <= device_count(), (
            "please set tensor_parallel_size to less than max local gpu count")