Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
0bcc3a16
Unverified
Commit
0bcc3a16
authored
Sep 25, 2025
by
Cyrus Leung
Committed by
GitHub
Sep 25, 2025
Browse files
[CI/Build] Fix flaky entrypoints test (#25663)
Signed-off-by:
DarkLight1337
<
tlleungac@connect.ust.hk
>
parent
70fbdb26
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
41 additions
and
23 deletions
+41
-23
tests/entrypoints/openai/test_completion_with_prompt_embeds.py
.../entrypoints/openai/test_completion_with_prompt_embeds.py
+41
-23
No files found.
tests/entrypoints/openai/test_completion_with_prompt_embeds.py
View file @
0bcc3a16
...
...
@@ -15,7 +15,7 @@ from transformers import AutoConfig
from
...utils
import
RemoteOpenAIServer
# any model with a chat template should work here
MODEL_NAME
=
"
HuggingFaceH4/zephyr-7b-beta
"
MODEL_NAME
=
"
facebook/opt-125m
"
CONFIG
=
AutoConfig
.
from_pretrained
(
MODEL_NAME
)
...
...
@@ -27,7 +27,7 @@ def default_server_args() -> list[str]:
"--dtype"
,
"bfloat16"
,
"--max-model-len"
,
"
8192
"
,
"
2048
"
,
"--max-num-seqs"
,
"128"
,
"--enforce-eager"
,
...
...
@@ -36,6 +36,27 @@ def default_server_args() -> list[str]:
]
EXAMPLE_PROMPTS
=
[
"Hello, my name is"
,
"What is an LLM?"
,
]
def
_encode_embeds
(
embeds
:
torch
.
Tensor
):
buffer
=
io
.
BytesIO
()
torch
.
save
(
embeds
,
buffer
)
return
base64
.
b64encode
(
buffer
.
getvalue
()).
decode
(
'utf-8'
)
@
pytest
.
fixture
(
scope
=
"module"
)
def
example_prompt_embeds
(
hf_runner
):
"""Create example embeddings and return them as base64 encoded string."""
with
hf_runner
(
MODEL_NAME
)
as
hf_model
:
example_embeddings
=
hf_model
.
get_prompt_embeddings
(
EXAMPLE_PROMPTS
)
return
[
_encode_embeds
(
item
)
for
item
in
example_embeddings
]
@
pytest
.
fixture
(
scope
=
"module"
,
params
=
[
""
,
"--disable-frontend-multiprocessing"
])
def
server_with_prompt_embeds
(
default_server_args
,
request
):
...
...
@@ -52,21 +73,16 @@ async def client_with_prompt_embeds(server_with_prompt_embeds):
yield
async_client
def
create_dummy_embeds
(
num_tokens
:
int
=
5
)
->
str
:
"""Create dummy embeddings and return them as base64 encoded string."""
dummy_embeds
=
torch
.
randn
(
num_tokens
,
CONFIG
.
hidden_size
)
buffer
=
io
.
BytesIO
()
torch
.
save
(
dummy_embeds
,
buffer
)
return
base64
.
b64encode
(
buffer
.
getvalue
()).
decode
(
'utf-8'
)
@
pytest
.
mark
.
skip
(
"This test is skipped because it is flaky."
)
@
pytest
.
mark
.
asyncio
@
pytest
.
mark
.
parametrize
(
"model_name"
,
[
MODEL_NAME
])
async
def
test_completions_with_prompt_embeds
(
client_with_prompt_embeds
:
openai
.
AsyncOpenAI
,
model_name
:
str
):
example_prompt_embeds
,
client_with_prompt_embeds
:
openai
.
AsyncOpenAI
,
model_name
:
str
,
):
encoded_embeds
,
encoded_embeds2
=
example_prompt_embeds
# Test case: Single prompt embeds input
encoded_embeds
=
create_dummy_embeds
()
completion
=
await
client_with_prompt_embeds
.
completions
.
create
(
model
=
model_name
,
prompt
=
""
,
# Add empty prompt as required parameter
...
...
@@ -77,7 +93,6 @@ async def test_completions_with_prompt_embeds(
assert
completion
.
choices
[
0
].
prompt_logprobs
is
None
# Test case: batch completion with prompt_embeds
encoded_embeds2
=
create_dummy_embeds
()
completion
=
await
client_with_prompt_embeds
.
completions
.
create
(
model
=
model_name
,
prompt
=
""
,
# Add empty prompt as required parameter
...
...
@@ -89,7 +104,6 @@ async def test_completions_with_prompt_embeds(
assert
len
(
completion
.
choices
[
1
].
text
)
>=
1
# Test case: streaming with prompt_embeds
encoded_embeds
=
create_dummy_embeds
()
single_completion
=
await
client_with_prompt_embeds
.
completions
.
create
(
model
=
model_name
,
prompt
=
""
,
# Add empty prompt as required parameter
...
...
@@ -117,7 +131,6 @@ async def test_completions_with_prompt_embeds(
assert
""
.
join
(
chunks
)
==
single_output
# Test case: batch streaming with prompt_embeds
encoded_embeds2
=
create_dummy_embeds
()
stream
=
await
client_with_prompt_embeds
.
completions
.
create
(
model
=
model_name
,
prompt
=
""
,
# Add empty prompt as required parameter
...
...
@@ -139,7 +152,6 @@ async def test_completions_with_prompt_embeds(
assert
len
(
chunks_stream_embeds
[
1
])
>
0
# Test case: mixed text and prompt_embeds
encoded_embeds
=
create_dummy_embeds
()
completion_mixed
=
await
client_with_prompt_embeds
.
completions
.
create
(
model
=
model_name
,
prompt
=
"This is a prompt"
,
...
...
@@ -184,10 +196,14 @@ async def test_completions_errors_with_prompt_embeds(
@
pytest
.
mark
.
parametrize
(
"logprobs_arg"
,
[
1
,
0
])
@
pytest
.
mark
.
parametrize
(
"model_name"
,
[
MODEL_NAME
])
async
def
test_completions_with_logprobs_and_prompt_embeds
(
client_with_prompt_embeds
:
openai
.
AsyncOpenAI
,
logprobs_arg
:
int
,
model_name
:
str
):
example_prompt_embeds
,
client_with_prompt_embeds
:
openai
.
AsyncOpenAI
,
logprobs_arg
:
int
,
model_name
:
str
,
):
encoded_embeds
,
encoded_embeds2
=
example_prompt_embeds
# Test case: Logprobs using prompt_embeds
encoded_embeds
=
create_dummy_embeds
()
completion
=
await
client_with_prompt_embeds
.
completions
.
create
(
model
=
model_name
,
prompt
=
""
,
# Add empty prompt as required parameter
...
...
@@ -207,7 +223,6 @@ async def test_completions_with_logprobs_and_prompt_embeds(
assert
len
(
logprobs
.
tokens
)
==
5
# Test case: Log probs with batch completion and prompt_embeds
encoded_embeds2
=
create_dummy_embeds
()
completion
=
await
client_with_prompt_embeds
.
completions
.
create
(
model
=
model_name
,
prompt
=
""
,
# Add empty prompt as required parameter
...
...
@@ -232,9 +247,12 @@ async def test_completions_with_logprobs_and_prompt_embeds(
@
pytest
.
mark
.
asyncio
async
def
test_prompt_logprobs_raises_error
(
client_with_prompt_embeds
:
openai
.
AsyncOpenAI
):
example_prompt_embeds
,
client_with_prompt_embeds
:
openai
.
AsyncOpenAI
,
):
encoded_embeds
,
_
=
example_prompt_embeds
with
pytest
.
raises
(
BadRequestError
,
match
=
"not compatible"
):
encoded_embeds
=
create_dummy_embeds
()
await
client_with_prompt_embeds
.
completions
.
create
(
model
=
MODEL_NAME
,
prompt
=
""
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment