Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
970dfdc0
"vllm/model_executor/models/internlm.py" did not exist on "707ec647bb3a5018e5d8cdded409d6244bbb9ea5"
Unverified
Commit
970dfdc0
authored
Aug 21, 2024
by
Robert Shaw
Committed by
GitHub
Aug 21, 2024
Browse files
[Frontend] Improve Startup Failure UX (#7716)
parent
91f4522c
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
37 additions
and
19 deletions
+37
-19
tests/entrypoints/openai/test_mp_api_server.py
tests/entrypoints/openai/test_mp_api_server.py
+16
-13
vllm/entrypoints/openai/api_server.py
vllm/entrypoints/openai/api_server.py
+21
-6
No files found.
tests/entrypoints/openai/test_mp_api_server.py
View file @
970dfdc0
import
time
import
pytest
from
vllm.entrypoints.openai.api_server
import
build_async_engine_client
...
...
@@ -8,19 +10,20 @@ from vllm.utils import FlexibleArgumentParser
@
pytest
.
mark
.
asyncio
async
def
test_mp_crash_detection
():
with
pytest
.
raises
(
RuntimeError
)
as
excinfo
:
parser
=
FlexibleArgumentParser
(
description
=
"vLLM's remote OpenAI server."
)
parser
=
FlexibleArgumentParser
(
description
=
"vLLM's remote OpenAI server."
)
parser
=
make_arg_parser
(
parser
)
args
=
parser
.
parse_args
([])
# use an invalid tensor_parallel_size to trigger the
# error in the server
args
.
tensor_parallel_size
=
65536
start
=
time
.
perf_counter
()
async
with
build_async_engine_client
(
args
):
pass
assert
"The server process died before responding to the readiness probe"
\
in
str
(
excinfo
.
value
)
end
=
time
.
perf_counter
()
assert
end
-
start
<
60
,
(
"Expected vLLM to gracefully shutdown in <60s "
"if there is an error in the startup."
)
@
pytest
.
mark
.
asyncio
...
...
vllm/entrypoints/openai/api_server.py
View file @
970dfdc0
...
...
@@ -8,7 +8,7 @@ import tempfile
from
argparse
import
Namespace
from
contextlib
import
asynccontextmanager
from
http
import
HTTPStatus
from
typing
import
AsyncIterator
,
Set
from
typing
import
AsyncIterator
,
Optional
,
Set
from
fastapi
import
APIRouter
,
FastAPI
,
Request
from
fastapi.exceptions
import
RequestValidationError
...
...
@@ -60,6 +60,7 @@ openai_serving_embedding: OpenAIServingEmbedding
openai_serving_tokenization
:
OpenAIServingTokenization
prometheus_multiproc_dir
:
tempfile
.
TemporaryDirectory
# Cannot use __name__ (https://github.com/vllm-project/vllm/pull/4765)
logger
=
init_logger
(
'vllm.entrypoints.openai.api_server'
)
_running_tasks
:
Set
[
asyncio
.
Task
]
=
set
()
...
...
@@ -94,7 +95,15 @@ async def lifespan(app: FastAPI):
@
asynccontextmanager
async
def
build_async_engine_client
(
args
:
Namespace
)
->
AsyncIterator
[
AsyncEngineClient
]:
args
:
Namespace
)
->
AsyncIterator
[
Optional
[
AsyncEngineClient
]]:
"""
Create AsyncEngineClient, either:
- in-process using the AsyncLLMEngine Directly
- multiprocess using AsyncLLMEngine RPC
Returns the Client or None if the creation failed.
"""
# Context manager to handle async_engine_client lifecycle
# Ensures everything is shutdown and cleaned up on error/exit
global
engine_args
...
...
@@ -157,11 +166,13 @@ async def build_async_engine_client(
try
:
await
rpc_client
.
setup
()
break
except
TimeoutError
as
e
:
except
TimeoutError
:
if
not
rpc_server_process
.
is_alive
():
raise
RuntimeError
(
"The server process died before "
"responding to the readiness probe"
)
from
e
logger
.
error
(
"RPCServer process died before responding "
"to readiness probe"
)
yield
None
return
yield
async_engine_client
finally
:
...
...
@@ -410,6 +421,10 @@ async def run_server(args, **uvicorn_kwargs) -> None:
logger
.
info
(
"args: %s"
,
args
)
async
with
build_async_engine_client
(
args
)
as
async_engine_client
:
# If None, creation of the client failed and we exit.
if
async_engine_client
is
None
:
return
app
=
await
init_app
(
async_engine_client
,
args
)
shutdown_task
=
await
serve_http
(
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment