[Bugfix] Fix Llava inference with Tensor Parallelism. (#3883)

0ce0539d · Isotr0py · GitHub · 2f192835 · 0ce0539d
Unverified Commit 0ce0539d authored Apr 07, 2024 by Isotr0py Committed by GitHub Apr 07, 2024
Hide whitespace changes
Inline Side-by-side

Showing with 2 additions and 0 deletions

vllm/executor/ray_gpu_executor.py vllm/executor/ray_gpu_executor.py +2 -0

No files found.
--- a/vllm/executor/ray_gpu_executor.py
+++ b/vllm/executor/ray_gpu_executor.py
@@ -154,6 +154,7 @@ class RayGPUExecutor(ExecutorBase):
        scheduler_config = copy.deepcopy(self.scheduler_config)
        device_config = copy.deepcopy(self.device_config)
        lora_config = copy.deepcopy(self.lora_config)
+        vision_language_config = copy.deepcopy(self.vision_language_config)
        kv_cache_dtype = self.cache_config.cache_dtype

        # Initialize the actual workers with the Worker class.
@@ -172,6 +173,7 @@ class RayGPUExecutor(ExecutorBase):
                    rank,
                    distributed_init_method,
                    lora_config=lora_config,
+                    vision_language_config=vision_language_config,
                    kv_cache_dtype=kv_cache_dtype,
                ))