Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
norm
vllm
Commits
464dd985
Unverified
Commit
464dd985
authored
Dec 03, 2023
by
Woosuk Kwon
Committed by
GitHub
Dec 03, 2023
Browse files
Fix num_gpus when TP > 1 (#1852)
parent
c07a4428
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
15 additions
and
2 deletions
+15
-2
vllm/engine/async_llm_engine.py
vllm/engine/async_llm_engine.py
+10
-1
vllm/engine/llm_engine.py
vllm/engine/llm_engine.py
+5
-1
No files found.
vllm/engine/async_llm_engine.py
View file @
464dd985
...
@@ -301,7 +301,16 @@ class AsyncLLMEngine:
...
@@ -301,7 +301,16 @@ class AsyncLLMEngine:
elif
self
.
worker_use_ray
:
elif
self
.
worker_use_ray
:
engine_class
=
ray
.
remote
(
num_cpus
=
0
)(
self
.
_engine_class
).
remote
engine_class
=
ray
.
remote
(
num_cpus
=
0
)(
self
.
_engine_class
).
remote
else
:
else
:
engine_class
=
ray
.
remote
(
num_gpus
=
1
)(
self
.
_engine_class
).
remote
# FIXME(woosuk): This is a bit hacky. Be careful when changing the
# order of the arguments.
cache_config
=
args
[
1
]
parallel_config
=
args
[
2
]
if
parallel_config
.
tensor_parallel_size
==
1
:
num_gpus
=
cache_config
.
gpu_memory_utilization
else
:
num_gpus
=
1
engine_class
=
ray
.
remote
(
num_gpus
=
num_gpus
)(
self
.
_engine_class
).
remote
return
engine_class
(
*
args
,
**
kwargs
)
return
engine_class
(
*
args
,
**
kwargs
)
async
def
engine_step
(
self
)
->
bool
:
async
def
engine_step
(
self
)
->
bool
:
...
...
vllm/engine/llm_engine.py
View file @
464dd985
...
@@ -159,9 +159,13 @@ class LLMEngine:
...
@@ -159,9 +159,13 @@ class LLMEngine:
for
bundle
in
placement_group
.
bundle_specs
:
for
bundle
in
placement_group
.
bundle_specs
:
if
not
bundle
.
get
(
"GPU"
,
0
):
if
not
bundle
.
get
(
"GPU"
,
0
):
continue
continue
if
self
.
parallel_config
.
tensor_parallel_size
==
1
:
num_gpus
=
self
.
cache_config
.
gpu_memory_utilization
else
:
num_gpus
=
1
worker
=
ray
.
remote
(
worker
=
ray
.
remote
(
num_cpus
=
0
,
num_cpus
=
0
,
num_gpus
=
self
.
cache_config
.
gpu_memory_utilization
,
num_gpus
=
num_gpus
,
scheduling_strategy
=
PlacementGroupSchedulingStrategy
(
scheduling_strategy
=
PlacementGroupSchedulingStrategy
(
placement_group
=
placement_group
,
placement_group
=
placement_group
,
placement_group_capture_child_tasks
=
True
),
placement_group_capture_child_tasks
=
True
),
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment