Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
5bf35a91
"vscode:/vscode.git/clone" did not exist on "46d4359450cd194ab2a4f2fdc370ff4b33a188e2"
Unverified
Commit
5bf35a91
authored
Jul 17, 2024
by
Cyrus Leung
Committed by
GitHub
Jul 17, 2024
Browse files
[Doc][CI/Build] Update docs and tests to use `vllm serve` (#6431)
parent
a19e8d37
Changes
23
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
24 additions
and
20 deletions
+24
-20
tests/entrypoints/openai/test_vision.py
tests/entrypoints/openai/test_vision.py
+11
-11
tests/tensorizer_loader/test_tensorizer.py
tests/tensorizer_loader/test_tensorizer.py
+2
-2
tests/utils.py
tests/utils.py
+11
-7
No files found.
tests/entrypoints/openai/test_vision.py
View file @
5bf35a91
...
@@ -23,17 +23,17 @@ TEST_IMAGE_URLS = [
...
@@ -23,17 +23,17 @@ TEST_IMAGE_URLS = [
@
pytest
.
fixture
(
scope
=
"module"
)
@
pytest
.
fixture
(
scope
=
"module"
)
def
server
():
def
server
():
with
RemoteOpenAIServer
(
[
args
=
[
"--model
"
,
"--dtype
"
,
MODEL_NAME
,
"bfloat16"
,
"--dtype
"
,
"--max-model-len
"
,
"bfloat1
6"
,
"409
6"
,
"--max-model-len
"
,
"--enforce-eager
"
,
"4096
"
,
"--chat-template
"
,
"--enforce-eager"
,
str
(
LLAVA_CHAT_TEMPLATE
)
,
"--chat-template"
,
]
str
(
LLAVA_CHAT_TEMPLATE
),
]
)
as
remote_server
:
with
RemoteOpenAIServer
(
MODEL_NAME
,
args
)
as
remote_server
:
yield
remote_server
yield
remote_server
...
...
tests/tensorizer_loader/test_tensorizer.py
View file @
5bf35a91
...
@@ -214,12 +214,12 @@ def test_openai_apiserver_with_tensorizer(vllm_runner, tmp_path):
...
@@ -214,12 +214,12 @@ def test_openai_apiserver_with_tensorizer(vllm_runner, tmp_path):
## Start OpenAI API server
## Start OpenAI API server
openai_args
=
[
openai_args
=
[
"--model"
,
model_ref
,
"--dtype"
,
"float16"
,
"--load-format"
,
"--dtype"
,
"float16"
,
"--load-format"
,
"tensorizer"
,
"--model-loader-extra-config"
,
"tensorizer"
,
"--model-loader-extra-config"
,
json
.
dumps
(
model_loader_extra_config
),
json
.
dumps
(
model_loader_extra_config
),
]
]
with
RemoteOpenAIServer
(
openai_args
)
as
server
:
with
RemoteOpenAIServer
(
model_ref
,
openai_args
)
as
server
:
print
(
"Server ready."
)
print
(
"Server ready."
)
client
=
server
.
get_client
()
client
=
server
.
get_client
()
...
...
tests/utils.py
View file @
5bf35a91
...
@@ -49,7 +49,13 @@ class RemoteOpenAIServer:
...
@@ -49,7 +49,13 @@ class RemoteOpenAIServer:
DUMMY_API_KEY
=
"token-abc123"
# vLLM's OpenAI server does not need API key
DUMMY_API_KEY
=
"token-abc123"
# vLLM's OpenAI server does not need API key
MAX_SERVER_START_WAIT_S
=
600
# wait for server to start for 60 seconds
MAX_SERVER_START_WAIT_S
=
600
# wait for server to start for 60 seconds
def
__init__
(
self
,
cli_args
:
List
[
str
],
*
,
auto_port
:
bool
=
True
)
->
None
:
def
__init__
(
self
,
model
:
str
,
cli_args
:
List
[
str
],
*
,
auto_port
:
bool
=
True
,
)
->
None
:
if
auto_port
:
if
auto_port
:
if
"-p"
in
cli_args
or
"--port"
in
cli_args
:
if
"-p"
in
cli_args
or
"--port"
in
cli_args
:
raise
ValueError
(
"You have manually specified the port"
raise
ValueError
(
"You have manually specified the port"
...
@@ -68,12 +74,10 @@ class RemoteOpenAIServer:
...
@@ -68,12 +74,10 @@ class RemoteOpenAIServer:
# the current process might initialize cuda,
# the current process might initialize cuda,
# to be safe, we should use spawn method
# to be safe, we should use spawn method
env
[
'VLLM_WORKER_MULTIPROC_METHOD'
]
=
'spawn'
env
[
'VLLM_WORKER_MULTIPROC_METHOD'
]
=
'spawn'
self
.
proc
=
subprocess
.
Popen
(
self
.
proc
=
subprocess
.
Popen
([
"vllm"
,
"serve"
]
+
[
model
]
+
cli_args
,
[
sys
.
executable
,
"-m"
,
"vllm.entrypoints.openai.api_server"
]
+
env
=
env
,
cli_args
,
stdout
=
sys
.
stdout
,
env
=
env
,
stderr
=
sys
.
stderr
)
stdout
=
sys
.
stdout
,
stderr
=
sys
.
stderr
)
self
.
_wait_for_server
(
url
=
self
.
url_for
(
"health"
),
self
.
_wait_for_server
(
url
=
self
.
url_for
(
"health"
),
timeout
=
self
.
MAX_SERVER_START_WAIT_S
)
timeout
=
self
.
MAX_SERVER_START_WAIT_S
)
...
...
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment