Unverified Commit 0ce0539d authored by Isotr0py's avatar Isotr0py Committed by GitHub
Browse files

[Bugfix] Fix Llava inference with Tensor Parallelism. (#3883)

parent 2f192835
...@@ -154,6 +154,7 @@ class RayGPUExecutor(ExecutorBase): ...@@ -154,6 +154,7 @@ class RayGPUExecutor(ExecutorBase):
scheduler_config = copy.deepcopy(self.scheduler_config) scheduler_config = copy.deepcopy(self.scheduler_config)
device_config = copy.deepcopy(self.device_config) device_config = copy.deepcopy(self.device_config)
lora_config = copy.deepcopy(self.lora_config) lora_config = copy.deepcopy(self.lora_config)
vision_language_config = copy.deepcopy(self.vision_language_config)
kv_cache_dtype = self.cache_config.cache_dtype kv_cache_dtype = self.cache_config.cache_dtype
# Initialize the actual workers with the Worker class. # Initialize the actual workers with the Worker class.
...@@ -172,6 +173,7 @@ class RayGPUExecutor(ExecutorBase): ...@@ -172,6 +173,7 @@ class RayGPUExecutor(ExecutorBase):
rank, rank,
distributed_init_method, distributed_init_method,
lora_config=lora_config, lora_config=lora_config,
vision_language_config=vision_language_config,
kv_cache_dtype=kv_cache_dtype, kv_cache_dtype=kv_cache_dtype,
)) ))
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment