Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
584f0ae4
Unverified
Commit
584f0ae4
authored
Dec 20, 2024
by
Ricky Xu
Committed by
GitHub
Dec 21, 2024
Browse files
[V1] Make AsyncLLMEngine v1-v0 opaque (#11383)
Signed-off-by:
Ricky Xu
<
xuchen727@hotmail.com
>
parent
51ff216d
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
9 additions
and
10 deletions
+9
-10
vllm/engine/async_llm_engine.py
vllm/engine/async_llm_engine.py
+7
-0
vllm/entrypoints/openai/api_server.py
vllm/entrypoints/openai/api_server.py
+1
-5
vllm/v1/engine/async_llm.py
vllm/v1/engine/async_llm.py
+1
-5
No files found.
vllm/engine/async_llm_engine.py
View file @
584f0ae4
...
...
@@ -1256,3 +1256,10 @@ class AsyncLLMEngine(EngineClient):
self
.
engine
.
model_executor
.
stop_profile
()
else
:
self
.
engine
.
model_executor
.
_run_workers
(
"stop_profile"
)
# TODO(v1): Remove this class proxy when V1 goes default.
if
envs
.
VLLM_USE_V1
:
from
vllm.v1.engine.async_llm
import
AsyncLLM
AsyncLLMEngine
=
AsyncLLM
# type: ignore
vllm/entrypoints/openai/api_server.py
View file @
584f0ae4
...
...
@@ -27,6 +27,7 @@ from typing_extensions import assert_never
import
vllm.envs
as
envs
from
vllm.config
import
ModelConfig
from
vllm.engine.arg_utils
import
AsyncEngineArgs
from
vllm.engine.async_llm_engine
import
AsyncLLMEngine
# type: ignore
from
vllm.engine.multiprocessing.client
import
MQLLMEngineClient
from
vllm.engine.multiprocessing.engine
import
run_mp_engine
from
vllm.engine.protocol
import
EngineClient
...
...
@@ -66,11 +67,6 @@ from vllm.utils import (FlexibleArgumentParser, get_open_zmq_ipc_path,
is_valid_ipv6_address
)
from
vllm.version
import
__version__
as
VLLM_VERSION
if
envs
.
VLLM_USE_V1
:
from
vllm.v1.engine.async_llm
import
AsyncLLMEngine
# type: ignore
else
:
from
vllm.engine.async_llm_engine
import
AsyncLLMEngine
# type: ignore
TIMEOUT_KEEP_ALIVE
=
5
# seconds
prometheus_multiproc_dir
:
tempfile
.
TemporaryDirectory
...
...
vllm/v1/engine/async_llm.py
View file @
584f0ae4
...
...
@@ -98,7 +98,7 @@ class AsyncLLM(EngineClient):
start_engine_loop
:
bool
=
True
,
usage_context
:
UsageContext
=
UsageContext
.
ENGINE_CONTEXT
,
stat_loggers
:
Optional
[
Dict
[
str
,
StatLoggerBase
]]
=
None
,
)
->
"AsyncLLM
Engine
"
:
)
->
"AsyncLLM"
:
"""Create an AsyncLLM from the EngineArgs."""
# Create the engine configs.
...
...
@@ -386,7 +386,3 @@ class AsyncLLM(EngineClient):
@
property
def
dead_error
(
self
)
->
BaseException
:
return
Exception
()
# TODO: implement
# Retain V0 name for backwards compatibility.
AsyncLLMEngine
=
AsyncLLM
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment