Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
9a94ca4a
Unverified
Commit
9a94ca4a
authored
Oct 08, 2024
by
Daniele
Committed by
GitHub
Oct 08, 2024
Browse files
[Bugfix] fix OpenAI API server startup with --disable-frontend-multiprocessing (#8537)
parent
cfba685b
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
63 additions
and
5 deletions
+63
-5
tests/entrypoints/openai/test_basic.py
tests/entrypoints/openai/test_basic.py
+57
-1
vllm/entrypoints/openai/api_server.py
vllm/entrypoints/openai/api_server.py
+6
-4
No files found.
tests/entrypoints/openai/test_basic.py
View file @
9a94ca4a
from
http
import
HTTPStatus
from
http
import
HTTPStatus
from
typing
import
List
import
openai
import
openai
import
pytest
import
pytest
...
@@ -12,8 +13,44 @@ from ...utils import RemoteOpenAIServer
...
@@ -12,8 +13,44 @@ from ...utils import RemoteOpenAIServer
MODEL_NAME
=
"HuggingFaceH4/zephyr-7b-beta"
MODEL_NAME
=
"HuggingFaceH4/zephyr-7b-beta"
@
pytest
.
fixture
(
scope
=
'module'
)
def
server_args
(
request
:
pytest
.
FixtureRequest
)
->
List
[
str
]:
""" Provide extra arguments to the server via indirect parametrization
Usage:
>>> @pytest.mark.parametrize(
>>> "server_args",
>>> [
>>> ["--disable-frontend-multiprocessing"],
>>> [
>>> "--model=NousResearch/Hermes-3-Llama-3.1-70B",
>>> "--enable-auto-tool-choice",
>>> ],
>>> ],
>>> indirect=True,
>>> )
>>> def test_foo(server, client):
>>> ...
This will run `test_foo` twice with servers with:
- `--disable-frontend-multiprocessing`
- `--model=NousResearch/Hermes-3-Llama-3.1-70B --enable-auto-tool-choice`.
"""
if
not
hasattr
(
request
,
"param"
):
return
[]
val
=
request
.
param
if
isinstance
(
val
,
str
):
return
[
val
]
return
request
.
param
@
pytest
.
fixture
(
scope
=
"module"
)
@
pytest
.
fixture
(
scope
=
"module"
)
def
server
():
def
server
(
server_args
):
args
=
[
args
=
[
# use half precision for speed and memory savings in CI environment
# use half precision for speed and memory savings in CI environment
"--dtype"
,
"--dtype"
,
...
@@ -23,6 +60,7 @@ def server():
...
@@ -23,6 +60,7 @@ def server():
"--enforce-eager"
,
"--enforce-eager"
,
"--max-num-seqs"
,
"--max-num-seqs"
,
"128"
,
"128"
,
*
server_args
,
]
]
with
RemoteOpenAIServer
(
MODEL_NAME
,
args
)
as
remote_server
:
with
RemoteOpenAIServer
(
MODEL_NAME
,
args
)
as
remote_server
:
...
@@ -35,6 +73,15 @@ async def client(server):
...
@@ -35,6 +73,15 @@ async def client(server):
yield
async_client
yield
async_client
@
pytest
.
mark
.
parametrize
(
"server_args"
,
[
pytest
.
param
([],
id
=
"default-frontend-multiprocessing"
),
pytest
.
param
([
"--disable-frontend-multiprocessing"
],
id
=
"disable-frontend-multiprocessing"
)
],
indirect
=
True
,
)
@
pytest
.
mark
.
asyncio
@
pytest
.
mark
.
asyncio
async
def
test_show_version
(
client
:
openai
.
AsyncOpenAI
):
async
def
test_show_version
(
client
:
openai
.
AsyncOpenAI
):
base_url
=
str
(
client
.
base_url
)[:
-
3
].
strip
(
"/"
)
base_url
=
str
(
client
.
base_url
)[:
-
3
].
strip
(
"/"
)
...
@@ -45,6 +92,15 @@ async def test_show_version(client: openai.AsyncOpenAI):
...
@@ -45,6 +92,15 @@ async def test_show_version(client: openai.AsyncOpenAI):
assert
response
.
json
()
==
{
"version"
:
VLLM_VERSION
}
assert
response
.
json
()
==
{
"version"
:
VLLM_VERSION
}
@
pytest
.
mark
.
parametrize
(
"server_args"
,
[
pytest
.
param
([],
id
=
"default-frontend-multiprocessing"
),
pytest
.
param
([
"--disable-frontend-multiprocessing"
],
id
=
"disable-frontend-multiprocessing"
)
],
indirect
=
True
,
)
@
pytest
.
mark
.
asyncio
@
pytest
.
mark
.
asyncio
async
def
test_check_health
(
client
:
openai
.
AsyncOpenAI
):
async
def
test_check_health
(
client
:
openai
.
AsyncOpenAI
):
base_url
=
str
(
client
.
base_url
)[:
-
3
].
strip
(
"/"
)
base_url
=
str
(
client
.
base_url
)[:
-
3
].
strip
(
"/"
)
...
...
vllm/entrypoints/openai/api_server.py
View file @
9a94ca4a
...
@@ -537,8 +537,11 @@ async def run_server(args, **uvicorn_kwargs) -> None:
...
@@ -537,8 +537,11 @@ async def run_server(args, **uvicorn_kwargs) -> None:
raise
KeyError
(
f
"invalid tool call parser:
{
args
.
tool_call_parser
}
"
raise
KeyError
(
f
"invalid tool call parser:
{
args
.
tool_call_parser
}
"
f
"(chose from {{
{
','
.
join
(
valide_tool_parses
)
}
}})"
)
f
"(chose from {{
{
','
.
join
(
valide_tool_parses
)
}
}})"
)
temp_socket
=
socket
.
socket
(
socket
.
AF_INET
,
socket
.
SOCK_STREAM
)
# workaround to make sure that we bind the port before the engine is set up.
temp_socket
.
bind
((
""
,
args
.
port
))
# This avoids race conditions with ray.
# see https://github.com/vllm-project/vllm/issues/8204
sock
=
socket
.
socket
(
socket
.
AF_INET
,
socket
.
SOCK_STREAM
)
sock
.
bind
((
""
,
args
.
port
))
def
signal_handler
(
*
_
)
->
None
:
def
signal_handler
(
*
_
)
->
None
:
# Interrupt server on sigterm while initializing
# Interrupt server on sigterm while initializing
...
@@ -552,8 +555,6 @@ async def run_server(args, **uvicorn_kwargs) -> None:
...
@@ -552,8 +555,6 @@ async def run_server(args, **uvicorn_kwargs) -> None:
model_config
=
await
engine_client
.
get_model_config
()
model_config
=
await
engine_client
.
get_model_config
()
init_app_state
(
engine_client
,
model_config
,
app
.
state
,
args
)
init_app_state
(
engine_client
,
model_config
,
app
.
state
,
args
)
temp_socket
.
close
()
shutdown_task
=
await
serve_http
(
shutdown_task
=
await
serve_http
(
app
,
app
,
host
=
args
.
host
,
host
=
args
.
host
,
...
@@ -564,6 +565,7 @@ async def run_server(args, **uvicorn_kwargs) -> None:
...
@@ -564,6 +565,7 @@ async def run_server(args, **uvicorn_kwargs) -> None:
ssl_certfile
=
args
.
ssl_certfile
,
ssl_certfile
=
args
.
ssl_certfile
,
ssl_ca_certs
=
args
.
ssl_ca_certs
,
ssl_ca_certs
=
args
.
ssl_ca_certs
,
ssl_cert_reqs
=
args
.
ssl_cert_reqs
,
ssl_cert_reqs
=
args
.
ssl_cert_reqs
,
fd
=
sock
.
fileno
(),
**
uvicorn_kwargs
,
**
uvicorn_kwargs
,
)
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment