Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
lm-evaluation-harness
Commits
52df63b7
Unverified
Commit
52df63b7
authored
Feb 17, 2025
by
Baber Abbasi
Committed by
GitHub
Feb 18, 2025
Browse files
fix vllm (#2708)
* fix vllm * fix data_parallel * copy to multimodal
parent
41b952f3
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
3 additions
and
7 deletions
+3
-7
lm_eval/models/vllm_causallms.py
lm_eval/models/vllm_causallms.py
+2
-4
lm_eval/models/vllm_vlms.py
lm_eval/models/vllm_vlms.py
+1
-3
No files found.
lm_eval/models/vllm_causallms.py
View file @
52df63b7
...
...
@@ -109,7 +109,7 @@ class VLLM(TemplateLM):
eval_logger
.
warning
(
"You might experience occasional issues with model weight downloading when data_parallel is in use. To ensure stable performance, run with data_parallel_size=1 until the weights are downloaded and cached."
)
self
.
model_args
[
"
worker_use_ray"
]
=
True
self
.
model_args
[
"
distributed_executor_backend"
]
=
"ray"
self
.
batch_size
=
"auto"
eval_logger
.
info
(
"Manual batching is not compatible with data parallelism."
)
...
...
@@ -246,9 +246,7 @@ class VLLM(TemplateLM):
# vLLM hangs if tensor_parallel > 1 and resources are set in ray.remote
# also seems to only work with decorator and not with ray.remote() fn
# see https://github.com/vllm-project/vllm/issues/973
# note: this has changed on 0.3.3, and it only works now if num_gpus are set.
# but then tensor_parallel breaks
@
ray
.
remote
@
ray
.
remote
(
num_gpus
=
1
if
self
.
tensor_parallel_size
==
1
else
None
)
def
run_inference_one_model
(
model_args
:
dict
,
sampling_params
,
...
...
lm_eval/models/vllm_vlms.py
View file @
52df63b7
...
...
@@ -109,9 +109,7 @@ class VLLM_VLM(VLLM):
# vLLM hangs if tensor_parallel > 1 and resources are set in ray.remote
# also seems to only work with decorator and not with ray.remote() fn
# see https://github.com/vllm-project/vllm/issues/973
# note: this has changed on 0.3.3, and it only works now if num_gpus are set.
# but then tensor_parallel breaks
@
ray
.
remote
@
ray
.
remote
(
num_gpus
=
1
if
self
.
tensor_parallel_size
==
1
else
None
)
def
run_inference_one_model
(
model_args
:
dict
,
sampling_params
,
requests
:
List
[
List
[
dict
]]
):
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment