Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
022c5c69
Unverified
Commit
022c5c69
authored
Jan 05, 2025
by
Rui Qiao
Committed by
GitHub
Jan 06, 2025
Browse files
[V1] Refactor get_executor_cls (#11754)
parent
f8fcca10
Changes
5
Show whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
26 additions
and
46 deletions
+26
-46
tests/v1/engine/test_engine_core.py
tests/v1/engine/test_engine_core.py
+3
-3
tests/v1/engine/test_engine_core_client.py
tests/v1/engine/test_engine_core_client.py
+3
-3
vllm/v1/engine/async_llm.py
vllm/v1/engine/async_llm.py
+1
-20
vllm/v1/engine/llm_engine.py
vllm/v1/engine/llm_engine.py
+1
-19
vllm/v1/executor/abstract.py
vllm/v1/executor/abstract.py
+18
-1
No files found.
tests/v1/engine/test_engine_core.py
View file @
022c5c69
...
...
@@ -8,8 +8,8 @@ from vllm import SamplingParams
from
vllm.engine.arg_utils
import
EngineArgs
from
vllm.platforms
import
current_platform
from
vllm.v1.engine
import
EngineCoreRequest
from
vllm.v1.engine.async_llm
import
AsyncLLM
from
vllm.v1.engine.core
import
EngineCore
from
vllm.v1.executor.abstract
import
Executor
if
not
current_platform
.
is_cuda
():
pytest
.
skip
(
reason
=
"V1 currently only supported on CUDA."
,
...
...
@@ -43,7 +43,7 @@ def test_engine_core(monkeypatch):
"""Setup the EngineCore."""
engine_args
=
EngineArgs
(
model
=
MODEL_NAME
)
vllm_config
=
engine_args
.
create_engine_config
()
executor_class
=
AsyncLLM
.
_get_e
xecutor_cls
(
vllm_config
)
executor_class
=
E
xecutor
.
get
_cl
as
s
(
vllm_config
)
engine_core
=
EngineCore
(
vllm_config
=
vllm_config
,
executor_class
=
executor_class
)
...
...
@@ -149,7 +149,7 @@ def test_engine_core_advanced_sampling(monkeypatch):
"""Setup the EngineCore."""
engine_args
=
EngineArgs
(
model
=
MODEL_NAME
)
vllm_config
=
engine_args
.
create_engine_config
()
executor_class
=
AsyncLLM
.
_get_e
xecutor_cls
(
vllm_config
)
executor_class
=
E
xecutor
.
get
_cl
as
s
(
vllm_config
)
engine_core
=
EngineCore
(
vllm_config
=
vllm_config
,
executor_class
=
executor_class
)
...
...
tests/v1/engine/test_engine_core_client.py
View file @
022c5c69
...
...
@@ -11,8 +11,8 @@ from vllm.engine.arg_utils import EngineArgs
from
vllm.platforms
import
current_platform
from
vllm.usage.usage_lib
import
UsageContext
from
vllm.v1.engine
import
EngineCoreRequest
from
vllm.v1.engine.async_llm
import
AsyncLLM
from
vllm.v1.engine.core_client
import
EngineCoreClient
from
vllm.v1.executor.abstract
import
Executor
if
not
current_platform
.
is_cuda
():
pytest
.
skip
(
reason
=
"V1 currently only supported on CUDA."
,
...
...
@@ -84,7 +84,7 @@ def test_engine_core_client(monkeypatch, multiprocessing_mode: bool):
engine_args
=
EngineArgs
(
model
=
MODEL_NAME
,
compilation_config
=
3
)
vllm_config
=
engine_args
.
create_engine_config
(
UsageContext
.
UNKNOWN_CONTEXT
)
executor_class
=
AsyncLLM
.
_get_e
xecutor_cls
(
vllm_config
)
executor_class
=
E
xecutor
.
get
_cl
as
s
(
vllm_config
)
client
=
EngineCoreClient
.
make_client
(
multiprocess_mode
=
multiprocessing_mode
,
asyncio_mode
=
False
,
...
...
@@ -152,7 +152,7 @@ async def test_engine_core_client_asyncio(monkeypatch):
engine_args
=
EngineArgs
(
model
=
MODEL_NAME
)
vllm_config
=
engine_args
.
create_engine_config
(
usage_context
=
UsageContext
.
UNKNOWN_CONTEXT
)
executor_class
=
AsyncLLM
.
_get_e
xecutor_cls
(
vllm_config
)
executor_class
=
E
xecutor
.
get
_cl
as
s
(
vllm_config
)
client
=
EngineCoreClient
.
make_client
(
multiprocess_mode
=
True
,
asyncio_mode
=
True
,
...
...
vllm/v1/engine/async_llm.py
View file @
022c5c69
...
...
@@ -22,7 +22,6 @@ from vllm.v1.engine.core_client import EngineCoreClient
from
vllm.v1.engine.detokenizer
import
Detokenizer
from
vllm.v1.engine.processor
import
Processor
from
vllm.v1.executor.abstract
import
Executor
from
vllm.v1.executor.ray_utils
import
initialize_ray_cluster
logger
=
init_logger
(
__name__
)
...
...
@@ -105,7 +104,7 @@ class AsyncLLM(EngineClient):
else
:
vllm_config
=
engine_config
executor_class
=
cls
.
_get_e
xecutor_cls
(
vllm_config
)
executor_class
=
E
xecutor
.
get
_cl
as
s
(
vllm_config
)
# Create the AsyncLLM.
return
cls
(
...
...
@@ -127,24 +126,6 @@ class AsyncLLM(EngineClient):
if
handler
:
=
getattr
(
self
,
"output_handler"
,
None
):
handler
.
cancel
()
@
classmethod
def
_get_executor_cls
(
cls
,
vllm_config
:
VllmConfig
)
->
Type
[
Executor
]:
executor_class
:
Type
[
Executor
]
distributed_executor_backend
=
(
vllm_config
.
parallel_config
.
distributed_executor_backend
)
if
distributed_executor_backend
==
"ray"
:
initialize_ray_cluster
(
vllm_config
.
parallel_config
)
from
vllm.v1.executor.ray_executor
import
RayExecutor
executor_class
=
RayExecutor
elif
distributed_executor_backend
==
"mp"
:
from
vllm.v1.executor.multiproc_executor
import
MultiprocExecutor
executor_class
=
MultiprocExecutor
else
:
assert
(
distributed_executor_backend
is
None
)
from
vllm.v1.executor.uniproc_executor
import
UniprocExecutor
executor_class
=
UniprocExecutor
return
executor_class
async
def
add_request
(
self
,
request_id
:
str
,
...
...
vllm/v1/engine/llm_engine.py
View file @
022c5c69
...
...
@@ -89,7 +89,7 @@ class LLMEngine:
# Create the engine configs.
vllm_config
=
engine_args
.
create_engine_config
(
usage_context
)
executor_class
=
cls
.
_get_e
xecutor_cls
(
vllm_config
)
executor_class
=
E
xecutor
.
get
_cl
as
s
(
vllm_config
)
if
VLLM_ENABLE_V1_MULTIPROCESSING
:
logger
.
debug
(
"Enabling multiprocessing for LLMEngine."
)
...
...
@@ -103,24 +103,6 @@ class LLMEngine:
stat_loggers
=
stat_loggers
,
multiprocess_mode
=
enable_multiprocessing
)
@
classmethod
def
_get_executor_cls
(
cls
,
vllm_config
:
VllmConfig
)
->
Type
[
Executor
]:
executor_class
:
Type
[
Executor
]
distributed_executor_backend
=
(
vllm_config
.
parallel_config
.
distributed_executor_backend
)
if
distributed_executor_backend
==
"ray"
:
from
vllm.v1.executor.ray_executor
import
RayExecutor
executor_class
=
RayExecutor
elif
distributed_executor_backend
==
"mp"
:
from
vllm.v1.executor.multiproc_executor
import
MultiprocExecutor
executor_class
=
MultiprocExecutor
else
:
assert
(
distributed_executor_backend
is
None
)
from
vllm.v1.executor.uniproc_executor
import
UniprocExecutor
executor_class
=
UniprocExecutor
return
executor_class
def
get_num_unfinished_requests
(
self
)
->
int
:
return
self
.
detokenizer
.
get_num_unfinished_requests
()
...
...
vllm/v1/executor/abstract.py
View file @
022c5c69
from
abc
import
ABC
,
abstractmethod
from
typing
import
Tuple
from
typing
import
Tuple
,
Type
from
vllm.config
import
VllmConfig
from
vllm.v1.outputs
import
ModelRunnerOutput
...
...
@@ -8,6 +8,23 @@ from vllm.v1.outputs import ModelRunnerOutput
class
Executor
(
ABC
):
"""Abstract class for executors."""
@
staticmethod
def
get_class
(
vllm_config
:
VllmConfig
)
->
Type
[
"Executor"
]:
executor_class
:
Type
[
Executor
]
distributed_executor_backend
=
(
vllm_config
.
parallel_config
.
distributed_executor_backend
)
if
distributed_executor_backend
==
"ray"
:
from
vllm.v1.executor.ray_executor
import
RayExecutor
executor_class
=
RayExecutor
elif
distributed_executor_backend
==
"mp"
:
from
vllm.v1.executor.multiproc_executor
import
MultiprocExecutor
executor_class
=
MultiprocExecutor
else
:
assert
(
distributed_executor_backend
is
None
)
from
vllm.v1.executor.uniproc_executor
import
UniprocExecutor
executor_class
=
UniprocExecutor
return
executor_class
@
abstractmethod
def
__init__
(
self
,
vllm_config
:
VllmConfig
)
->
None
:
raise
NotImplementedError
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment