Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
311d8756
Unverified
Commit
311d8756
authored
Aug 08, 2025
by
Russell Bryant
Committed by
GitHub
Aug 08, 2025
Browse files
Drop flaky test_healthcheck_response_time (#22539)
Signed-off-by:
Russell Bryant
<
rbryant@redhat.com
>
parent
e3edc0a7
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
0 additions
and
54 deletions
+0
-54
tests/entrypoints/openai/test_async_tokenization.py
tests/entrypoints/openai/test_async_tokenization.py
+0
-54
No files found.
tests/entrypoints/openai/test_async_tokenization.py
View file @
311d8756
...
@@ -2,15 +2,12 @@
...
@@ -2,15 +2,12 @@
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import
asyncio
import
asyncio
import
contextlib
import
random
import
random
import
time
from
typing
import
Callable
from
typing
import
Callable
import
openai
import
openai
import
pytest
import
pytest
import
pytest_asyncio
import
pytest_asyncio
import
requests
from
tests.utils
import
RemoteOpenAIServer
from
tests.utils
import
RemoteOpenAIServer
...
@@ -87,54 +84,3 @@ async def test_with_and_without_truncate(
...
@@ -87,54 +84,3 @@ async def test_with_and_without_truncate(
responses
=
await
asyncio
.
gather
(
*
[
get_status_code
(
**
b
)
for
b
in
bodies
])
responses
=
await
asyncio
.
gather
(
*
[
get_status_code
(
**
b
)
for
b
in
bodies
])
assert
500
not
in
responses
assert
500
not
in
responses
@
pytest
.
mark
.
asyncio
@
pytest
.
mark
.
parametrize
(
ids
=
[
"single completion"
,
"multiple completions"
,
"chat"
],
argnames
=
[
"create_func_gen"
,
"content_body"
],
argvalues
=
[
(
lambda
x
:
x
.
completions
.
create
,
{
"prompt"
:
" "
.
join
([
'A'
]
*
300_000
)
}),
(
lambda
x
:
x
.
completions
.
create
,
{
"prompt"
:
[
" "
.
join
([
'A'
]
*
300_000
)]
*
2
}),
(
lambda
x
:
x
.
chat
.
completions
.
create
,
{
"messages"
:
[{
"role"
:
"user"
,
"content"
:
" "
.
join
([
'A'
]
*
300_000
)
}]
}),
],
)
async
def
test_healthcheck_response_time
(
server
:
RemoteOpenAIServer
,
client
:
openai
.
AsyncOpenAI
,
create_func_gen
:
Callable
,
content_body
:
dict
,
):
num_requests
=
50
create_func
=
create_func_gen
(
client
)
body
=
{
"model"
:
MODEL_NAME
,
**
content_body
,
"max_tokens"
:
10
}
def
get_response_time
(
url
):
start_time
=
time
.
monotonic
()
res
=
requests
.
get
(
url
)
end_time
=
time
.
monotonic
()
assert
res
.
status_code
==
200
return
end_time
-
start_time
no_load_response_time
=
get_response_time
(
server
.
url_for
(
"health"
))
tasks
=
[
asyncio
.
create_task
(
create_func
(
**
body
))
for
_
in
range
(
num_requests
)
]
await
asyncio
.
sleep
(
1
)
# give the tasks a chance to start running
load_response_time
=
get_response_time
(
server
.
url_for
(
"health"
))
with
contextlib
.
suppress
(
openai
.
APIStatusError
):
await
asyncio
.
gather
(
*
tasks
)
assert
load_response_time
<
100
*
no_load_response_time
assert
load_response_time
<
0.1
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment