Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
0d7db16a
Unverified
Commit
0d7db16a
authored
Aug 04, 2025
by
Abirdcfly
Committed by
GitHub
Aug 03, 2025
Browse files
[PD] add test for chat completions endpoint (#21925)
Signed-off-by:
Abirdcfly
<
fp544037857@gmail.com
>
parent
845420ac
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
29 additions
and
14 deletions
+29
-14
tests/v1/kv_connector/nixl_integration/test_disagg_accuracy.py
.../v1/kv_connector/nixl_integration/test_disagg_accuracy.py
+27
-14
tests/v1/kv_connector/nixl_integration/toy_proxy_server.py
tests/v1/kv_connector/nixl_integration/toy_proxy_server.py
+2
-0
No files found.
tests/v1/kv_connector/nixl_integration/test_disagg_accuracy.py
View file @
0d7db16a
...
...
@@ -51,19 +51,30 @@ def check_vllm_server(url: str, timeout=5, retries=3) -> bool:
return
False
def
run_simple_prompt
(
base_url
:
str
,
model_name
:
str
,
input_prompt
:
str
)
->
str
:
def
run_simple_prompt
(
base_url
:
str
,
model_name
:
str
,
input_prompt
:
str
,
use_chat_endpoint
:
bool
)
->
str
:
client
=
openai
.
OpenAI
(
api_key
=
"EMPTY"
,
base_url
=
base_url
)
if
use_chat_endpoint
:
completion
=
client
.
chat
.
completions
.
create
(
model
=
model_name
,
messages
=
[{
"role"
:
"user"
,
"content"
:
[{
"type"
:
"text"
,
"text"
:
input_prompt
}]
}],
max_completion_tokens
=
MAX_OUTPUT_LEN
,
temperature
=
0.0
,
seed
=
42
)
return
completion
.
choices
[
0
].
message
.
content
else
:
completion
=
client
.
completions
.
create
(
model
=
model_name
,
prompt
=
input_prompt
,
max_tokens
=
MAX_OUTPUT_LEN
,
temperature
=
0.0
,
seed
=
42
)
# print("-" * 50)
# print(f"Completion results for {model_name}:")
# print(completion)
# print("-" * 50)
return
completion
.
choices
[
0
].
text
...
...
@@ -125,10 +136,12 @@ def main():
f
"vllm server:
{
args
.
service_url
}
is not ready yet!"
)
output_strs
=
dict
()
for
prompt
in
SAMPLE_PROMPTS
:
for
i
,
prompt
in
enumerate
(
SAMPLE_PROMPTS
):
use_chat_endpoint
=
(
i
%
2
==
1
)
output_str
=
run_simple_prompt
(
base_url
=
service_url
,
model_name
=
args
.
model_name
,
input_prompt
=
prompt
)
input_prompt
=
prompt
,
use_chat_endpoint
=
use_chat_endpoint
)
print
(
f
"Prompt:
{
prompt
}
, output:
{
output_str
}
"
)
output_strs
[
prompt
]
=
output_str
...
...
tests/v1/kv_connector/nixl_integration/toy_proxy_server.py
View file @
0d7db16a
...
...
@@ -162,6 +162,8 @@ async def send_request_to_service(client_info: dict, endpoint: str,
}
req_data
[
"stream"
]
=
False
req_data
[
"max_tokens"
]
=
1
if
"max_completion_tokens"
in
req_data
:
req_data
[
"max_completion_tokens"
]
=
1
if
"stream_options"
in
req_data
:
del
req_data
[
"stream_options"
]
headers
=
{
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment