Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
91dd8f7a
Unverified
Commit
91dd8f7a
authored
Feb 08, 2025
by
youkaichao
Committed by
GitHub
Feb 08, 2025
Browse files
[bugfix] respect distributed_executor_backend in world_size=1 (#12934)
Signed-off-by:
youkaichao
<
youkaichao@gmail.com
>
parent
d01f66b0
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
53 additions
and
32 deletions
+53
-32
tests/engine/test_executor.py
tests/engine/test_executor.py
+20
-1
vllm/config.py
vllm/config.py
+3
-0
vllm/engine/llm_engine.py
vllm/engine/llm_engine.py
+22
-22
vllm/v1/executor/abstract.py
vllm/v1/executor/abstract.py
+8
-9
No files found.
tests/engine/test_
custom_
executor.py
→
tests/engine/test_executor.py
View file @
91dd8f7a
...
...
@@ -55,6 +55,7 @@ def test_custom_executor(model, tmp_path):
engine_args
=
EngineArgs
(
model
=
model
,
distributed_executor_backend
=
CustomUniExecutor
,
enforce_eager
=
True
,
# reduce test time
)
engine
=
LLMEngine
.
from_engine_args
(
engine_args
)
sampling_params
=
SamplingParams
(
max_tokens
=
1
)
...
...
@@ -75,7 +76,10 @@ def test_custom_executor_async(model, tmp_path):
assert
not
os
.
path
.
exists
(
".marker"
)
engine_args
=
AsyncEngineArgs
(
model
=
model
,
distributed_executor_backend
=
CustomUniExecutorAsync
)
model
=
model
,
distributed_executor_backend
=
CustomUniExecutorAsync
,
enforce_eager
=
True
,
# reduce test time
)
engine
=
AsyncLLMEngine
.
from_engine_args
(
engine_args
)
sampling_params
=
SamplingParams
(
max_tokens
=
1
)
...
...
@@ -89,3 +93,18 @@ def test_custom_executor_async(model, tmp_path):
assert
os
.
path
.
exists
(
".marker"
)
finally
:
os
.
chdir
(
cwd
)
@
pytest
.
mark
.
parametrize
(
"model"
,
[
"facebook/opt-125m"
])
def
test_respect_ray
(
model
):
# even for TP=1 and PP=1,
# if users specify ray, we should use ray.
# users might do this if they want to manage the
# resources using ray.
engine_args
=
EngineArgs
(
model
=
model
,
distributed_executor_backend
=
"ray"
,
enforce_eager
=
True
,
# reduce test time
)
engine
=
LLMEngine
.
from_engine_args
(
engine_args
)
assert
engine
.
model_executor
.
uses_ray
vllm/config.py
View file @
91dd8f7a
...
...
@@ -1401,6 +1401,9 @@ class ParallelConfig:
logger
.
info
(
"Defaulting to use %s for distributed inference"
,
backend
)
if
self
.
distributed_executor_backend
is
None
and
self
.
world_size
==
1
:
self
.
distributed_executor_backend
=
"uni"
self
.
_verify_args
()
@
property
...
...
vllm/engine/llm_engine.py
View file @
91dd8f7a
...
...
@@ -434,6 +434,7 @@ class LLMEngine:
@
classmethod
def
_get_executor_cls
(
cls
,
engine_config
:
VllmConfig
)
->
Type
[
ExecutorBase
]:
# distributed_executor_backend must be set in VllmConfig.__post_init__
distributed_executor_backend
=
(
engine_config
.
parallel_config
.
distributed_executor_backend
)
# Initialize the cluster and specify the executor class.
...
...
@@ -443,30 +444,29 @@ class LLMEngine:
"distributed_executor_backend must be a subclass of "
f
"ExecutorBase. Got
{
distributed_executor_backend
}
."
)
executor_class
=
distributed_executor_backend
elif
engine_config
.
parallel_config
.
world_size
>
1
:
if
distributed_executor_backend
==
"ray"
:
from
vllm.executor.ray_distributed_executor
import
(
RayDistributedExecutor
)
executor_class
=
RayDistributedExecutor
elif
distributed_executor_backend
==
"mp"
:
from
vllm.executor.mp_distributed_executor
import
(
MultiprocessingDistributedExecutor
)
assert
not
envs
.
VLLM_USE_RAY_SPMD_WORKER
,
(
"multiprocessing distributed executor backend does not "
"support VLLM_USE_RAY_SPMD_WORKER=1"
)
executor_class
=
MultiprocessingDistributedExecutor
elif
distributed_executor_backend
==
"uni"
:
# JAX-style, single-process, multi-device executor.
from
vllm.executor.uniproc_executor
import
UniProcExecutor
executor_class
=
UniProcExecutor
elif
distributed_executor_backend
==
"external_launcher"
:
# executor with external launcher
from
vllm.executor.uniproc_executor
import
(
# noqa
ExecutorWithExternalLauncher
)
executor_class
=
ExecutorWithExternalLauncher
else
:
elif
distributed_executor_backend
==
"ray"
:
from
vllm.executor.ray_distributed_executor
import
(
RayDistributedExecutor
)
executor_class
=
RayDistributedExecutor
elif
distributed_executor_backend
==
"mp"
:
from
vllm.executor.mp_distributed_executor
import
(
MultiprocessingDistributedExecutor
)
assert
not
envs
.
VLLM_USE_RAY_SPMD_WORKER
,
(
"multiprocessing distributed executor backend does not "
"support VLLM_USE_RAY_SPMD_WORKER=1"
)
executor_class
=
MultiprocessingDistributedExecutor
elif
distributed_executor_backend
==
"uni"
:
# JAX-style, single-process, multi-device executor.
from
vllm.executor.uniproc_executor
import
UniProcExecutor
executor_class
=
UniProcExecutor
elif
distributed_executor_backend
==
"external_launcher"
:
# executor with external launcher
from
vllm.executor.uniproc_executor
import
(
# noqa
ExecutorWithExternalLauncher
)
executor_class
=
ExecutorWithExternalLauncher
else
:
raise
ValueError
(
"unrecognized distributed_executor_backend: "
f
"
{
distributed_executor_backend
}
"
)
return
executor_class
@
classmethod
...
...
vllm/v1/executor/abstract.py
View file @
91dd8f7a
...
...
@@ -25,15 +25,14 @@ class Executor(ExecutorBase):
parallel_config
=
vllm_config
.
parallel_config
distributed_executor_backend
=
(
parallel_config
.
distributed_executor_backend
)
if
distributed_executor_backend
is
None
:
# If the user does not specify the distributed executor backend,
# we will choose the backend based on the world size.
if
parallel_config
.
world_size
>
1
:
distributed_executor_backend
=
"mp"
else
:
distributed_executor_backend
=
"uni"
if
distributed_executor_backend
==
"ray"
:
# distributed_executor_backend must be set in VllmConfig.__post_init__
if
isinstance
(
distributed_executor_backend
,
type
):
if
not
issubclass
(
distributed_executor_backend
,
ExecutorBase
):
raise
TypeError
(
"distributed_executor_backend must be a subclass of "
f
"ExecutorBase. Got
{
distributed_executor_backend
}
."
)
executor_class
=
distributed_executor_backend
elif
distributed_executor_backend
==
"ray"
:
executor_class
=
RayDistributedExecutor
elif
distributed_executor_backend
==
"mp"
:
from
vllm.v1.executor.multiproc_executor
import
MultiprocExecutor
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment