Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
ee812580
Unverified
Commit
ee812580
authored
Jul 24, 2024
by
Daniele
Committed by
GitHub
Jul 24, 2024
Browse files
[Frontend] split run_server into build_server and run_server (#6740)
parent
40468b13
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
52 additions
and
28 deletions
+52
-28
vllm/entrypoints/openai/api_server.py
vllm/entrypoints/openai/api_server.py
+50
-27
vllm/scripts.py
vllm/scripts.py
+2
-1
No files found.
vllm/entrypoints/openai/api_server.py
View file @
ee812580
...
@@ -2,6 +2,7 @@ import asyncio
...
@@ -2,6 +2,7 @@ import asyncio
import
importlib
import
importlib
import
inspect
import
inspect
import
re
import
re
import
signal
from
contextlib
import
asynccontextmanager
from
contextlib
import
asynccontextmanager
from
http
import
HTTPStatus
from
http
import
HTTPStatus
from
typing
import
Optional
,
Set
from
typing
import
Optional
,
Set
...
@@ -213,12 +214,13 @@ def build_app(args):
...
@@ -213,12 +214,13 @@ def build_app(args):
return
app
return
app
def
run_server
(
args
,
llm_engine
=
None
):
async
def
build_server
(
args
,
llm_engine
:
Optional
[
AsyncLLMEngine
]
=
None
,
**
uvicorn_kwargs
,
)
->
uvicorn
.
Server
:
app
=
build_app
(
args
)
app
=
build_app
(
args
)
logger
.
info
(
"vLLM API server version %s"
,
VLLM_VERSION
)
logger
.
info
(
"args: %s"
,
args
)
if
args
.
served_model_name
is
not
None
:
if
args
.
served_model_name
is
not
None
:
served_model_names
=
args
.
served_model_name
served_model_names
=
args
.
served_model_name
else
:
else
:
...
@@ -231,19 +233,7 @@ def run_server(args, llm_engine=None):
...
@@ -231,19 +233,7 @@ def run_server(args, llm_engine=None):
if
llm_engine
is
not
None
else
AsyncLLMEngine
.
from_engine_args
(
if
llm_engine
is
not
None
else
AsyncLLMEngine
.
from_engine_args
(
engine_args
,
usage_context
=
UsageContext
.
OPENAI_API_SERVER
))
engine_args
,
usage_context
=
UsageContext
.
OPENAI_API_SERVER
))
event_loop
:
Optional
[
asyncio
.
AbstractEventLoop
]
model_config
=
await
engine
.
get_model_config
()
try
:
event_loop
=
asyncio
.
get_running_loop
()
except
RuntimeError
:
event_loop
=
None
if
event_loop
is
not
None
and
event_loop
.
is_running
():
# If the current is instanced by Ray Serve,
# there is already a running event loop
model_config
=
event_loop
.
run_until_complete
(
engine
.
get_model_config
())
else
:
# When using single vLLM without engine_use_ray
model_config
=
asyncio
.
run
(
engine
.
get_model_config
())
if
args
.
disable_log_requests
:
if
args
.
disable_log_requests
:
request_logger
=
None
request_logger
=
None
...
@@ -296,15 +286,48 @@ def run_server(args, llm_engine=None):
...
@@ -296,15 +286,48 @@ def run_server(args, llm_engine=None):
methods
=
', '
.
join
(
route
.
methods
)
methods
=
', '
.
join
(
route
.
methods
)
logger
.
info
(
"Route: %s, Methods: %s"
,
route
.
path
,
methods
)
logger
.
info
(
"Route: %s, Methods: %s"
,
route
.
path
,
methods
)
uvicorn
.
run
(
app
,
config
=
uvicorn
.
Config
(
host
=
args
.
host
,
app
,
port
=
args
.
port
,
host
=
args
.
host
,
log_level
=
args
.
uvicorn_log_level
,
port
=
args
.
port
,
timeout_keep_alive
=
TIMEOUT_KEEP_ALIVE
,
log_level
=
args
.
uvicorn_log_level
,
ssl_keyfile
=
args
.
ssl_keyfile
,
timeout_keep_alive
=
TIMEOUT_KEEP_ALIVE
,
ssl_certfile
=
args
.
ssl_certfile
,
ssl_keyfile
=
args
.
ssl_keyfile
,
ssl_ca_certs
=
args
.
ssl_ca_certs
,
ssl_certfile
=
args
.
ssl_certfile
,
ssl_cert_reqs
=
args
.
ssl_cert_reqs
)
ssl_ca_certs
=
args
.
ssl_ca_certs
,
ssl_cert_reqs
=
args
.
ssl_cert_reqs
,
**
uvicorn_kwargs
,
)
return
uvicorn
.
Server
(
config
)
async
def
run_server
(
args
,
llm_engine
=
None
,
**
uvicorn_kwargs
)
->
None
:
logger
.
info
(
"vLLM API server version %s"
,
VLLM_VERSION
)
logger
.
info
(
"args: %s"
,
args
)
server
=
await
build_server
(
args
,
llm_engine
,
**
uvicorn_kwargs
,
)
loop
=
asyncio
.
get_running_loop
()
server_task
=
loop
.
create_task
(
server
.
serve
())
def
signal_handler
()
->
None
:
# prevents the uvicorn signal handler to exit early
server_task
.
cancel
()
loop
.
add_signal_handler
(
signal
.
SIGINT
,
signal_handler
)
loop
.
add_signal_handler
(
signal
.
SIGTERM
,
signal_handler
)
try
:
await
server_task
except
asyncio
.
CancelledError
:
print
(
"Gracefully stopping http server"
)
await
server
.
shutdown
()
if
__name__
==
"__main__"
:
if
__name__
==
"__main__"
:
...
@@ -314,4 +337,4 @@ if __name__ == "__main__":
...
@@ -314,4 +337,4 @@ if __name__ == "__main__":
description
=
"vLLM OpenAI-Compatible RESTful API server."
)
description
=
"vLLM OpenAI-Compatible RESTful API server."
)
parser
=
make_arg_parser
(
parser
)
parser
=
make_arg_parser
(
parser
)
args
=
parser
.
parse_args
()
args
=
parser
.
parse_args
()
run_server
(
args
)
asyncio
.
run
(
run_server
(
args
)
)
vllm/scripts.py
View file @
ee812580
# The CLI entrypoint to vLLM.
# The CLI entrypoint to vLLM.
import
argparse
import
argparse
import
asyncio
import
os
import
os
import
signal
import
signal
import
sys
import
sys
...
@@ -25,7 +26,7 @@ def serve(args: argparse.Namespace) -> None:
...
@@ -25,7 +26,7 @@ def serve(args: argparse.Namespace) -> None:
# EngineArgs expects the model name to be passed as --model.
# EngineArgs expects the model name to be passed as --model.
args
.
model
=
args
.
model_tag
args
.
model
=
args
.
model_tag
run_server
(
args
)
asyncio
.
run
(
run_server
(
args
)
)
def
interactive_cli
(
args
:
argparse
.
Namespace
)
->
None
:
def
interactive_cli
(
args
:
argparse
.
Namespace
)
->
None
:
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment