Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
311d8756
Unverified
Commit
311d8756
authored
Aug 08, 2025
by
Russell Bryant
Committed by
GitHub
Aug 08, 2025
Browse files
Drop flaky test_healthcheck_response_time (#22539)
Signed-off-by:
Russell Bryant
<
rbryant@redhat.com
>
parent
e3edc0a7
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
0 additions
and
54 deletions
+0
-54
tests/entrypoints/openai/test_async_tokenization.py
tests/entrypoints/openai/test_async_tokenization.py
+0
-54
No files found.
tests/entrypoints/openai/test_async_tokenization.py
View file @
311d8756
...
...
@@ -2,15 +2,12 @@
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import
asyncio
import
contextlib
import
random
import
time
from
typing
import
Callable
import
openai
import
pytest
import
pytest_asyncio
import
requests
from
tests.utils
import
RemoteOpenAIServer
...
...
@@ -87,54 +84,3 @@ async def test_with_and_without_truncate(
responses
=
await
asyncio
.
gather
(
*
[
get_status_code
(
**
b
)
for
b
in
bodies
])
assert
500
not
in
responses
@
pytest
.
mark
.
asyncio
@
pytest
.
mark
.
parametrize
(
ids
=
[
"single completion"
,
"multiple completions"
,
"chat"
],
argnames
=
[
"create_func_gen"
,
"content_body"
],
argvalues
=
[
(
lambda
x
:
x
.
completions
.
create
,
{
"prompt"
:
" "
.
join
([
'A'
]
*
300_000
)
}),
(
lambda
x
:
x
.
completions
.
create
,
{
"prompt"
:
[
" "
.
join
([
'A'
]
*
300_000
)]
*
2
}),
(
lambda
x
:
x
.
chat
.
completions
.
create
,
{
"messages"
:
[{
"role"
:
"user"
,
"content"
:
" "
.
join
([
'A'
]
*
300_000
)
}]
}),
],
)
async
def
test_healthcheck_response_time
(
server
:
RemoteOpenAIServer
,
client
:
openai
.
AsyncOpenAI
,
create_func_gen
:
Callable
,
content_body
:
dict
,
):
num_requests
=
50
create_func
=
create_func_gen
(
client
)
body
=
{
"model"
:
MODEL_NAME
,
**
content_body
,
"max_tokens"
:
10
}
def
get_response_time
(
url
):
start_time
=
time
.
monotonic
()
res
=
requests
.
get
(
url
)
end_time
=
time
.
monotonic
()
assert
res
.
status_code
==
200
return
end_time
-
start_time
no_load_response_time
=
get_response_time
(
server
.
url_for
(
"health"
))
tasks
=
[
asyncio
.
create_task
(
create_func
(
**
body
))
for
_
in
range
(
num_requests
)
]
await
asyncio
.
sleep
(
1
)
# give the tasks a chance to start running
load_response_time
=
get_response_time
(
server
.
url_for
(
"health"
))
with
contextlib
.
suppress
(
openai
.
APIStatusError
):
await
asyncio
.
gather
(
*
tasks
)
assert
load_response_time
<
100
*
no_load_response_time
assert
load_response_time
<
0.1
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment