fix vllm (#2708)

* fix vllm * fix data_parallel * copy to multimodal

fix vllm (#2708)
* fix vllm * fix data_parallel * copy to multimodal
52df63b7 · Baber Abbasi · GitHub · 41b952f3 · 52df63b7 · 52df63b7
Unverified Commit 52df63b7 authored Feb 17, 2025 by Baber Abbasi Committed by GitHub Feb 18, 2025
Hide whitespace changes
Inline Side-by-side

Showing with 3 additions and 7 deletions

lm_eval/models/vllm_causallms.py lm_eval/models/vllm_causallms.py +2 -4

lm_eval/models/vllm_vlms.py lm_eval/models/vllm_vlms.py +1 -3

No files found.
--- a/lm_eval/models/vllm_causallms.py
+++ b/lm_eval/models/vllm_causallms.py
@@ -109,7 +109,7 @@ class VLLM(TemplateLM):
            eval_logger.warning(
                "You might experience occasional issues with model weight downloading when data_parallel is in use. To ensure stable performance, run with data_parallel_size=1 until the weights are downloaded and cached."
            )
-            self.model_args["worker_use_ray"] = True
+            self.model_args["distributed_executor_backend"] = "ray"
            self.batch_size = "auto"
            eval_logger.info("Manual batching is not compatible with data parallelism.")

@@ -246,9 +246,7 @@ class VLLM(TemplateLM):
            # vLLM hangs if tensor_parallel > 1 and resources are set in ray.remote
            # also seems to only work with decorator and not with ray.remote() fn
            # see https://github.com/vllm-project/vllm/issues/973
-            # note: this has changed on 0.3.3, and it only works now if num_gpus are set.
-            # but then tensor_parallel breaks
-            @ray.remote
+            @ray.remote(num_gpus=1 if self.tensor_parallel_size == 1 else None)
            def run_inference_one_model(
                model_args: dict,
                sampling_params,

--- a/lm_eval/models/vllm_vlms.py
+++ b/lm_eval/models/vllm_vlms.py
@@ -109,9 +109,7 @@ class VLLM_VLM(VLLM):
            # vLLM hangs if tensor_parallel > 1 and resources are set in ray.remote
            # also seems to only work with decorator and not with ray.remote() fn
            # see https://github.com/vllm-project/vllm/issues/973
-            # note: this has changed on 0.3.3, and it only works now if num_gpus are set.
-            # but then tensor_parallel breaks
-            @ray.remote
+            @ray.remote(num_gpus=1 if self.tensor_parallel_size == 1 else None)
            def run_inference_one_model(
                model_args: dict, sampling_params, requests: List[List[dict]]
            ):