Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
fa42e419
Unverified
Commit
fa42e419
authored
Jun 23, 2025
by
Chang Su
Committed by
GitHub
Jun 23, 2025
Browse files
ci: Revert openai_server related tests in AMD suites (#7449)
parent
e5afb88b
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
47 additions
and
60 deletions
+47
-60
test/srt/openai_server/features/test_cache_report.py
test/srt/openai_server/features/test_cache_report.py
+43
-42
test/srt/run_suite.py
test/srt/run_suite.py
+4
-18
No files found.
test/srt/openai_server/features/test_cache_report.py
View file @
fa42e419
...
@@ -163,48 +163,49 @@ class TestCacheReport(CustomTestCase):
...
@@ -163,48 +163,49 @@ class TestCacheReport(CustomTestCase):
>=
usage_2
.
prompt_tokens
-
self
.
min_cached
>=
usage_2
.
prompt_tokens
-
self
.
min_cached
)
)
def
test_cache_report_openai_async
(
self
):
# TODO: flaky test
print
(
"="
*
100
)
# def test_cache_report_openai_async(self):
# print("=" * 100)
async
def
run_test
():
task0
=
asyncio
.
create_task
(
# async def run_test():
self
.
cache_report_openai_async
(
# task0 = asyncio.create_task(
"first request, to start the inference and let the next two request be started in the same batch"
# self.cache_report_openai_async(
)
# "first request, to start the inference and let the next two request be started in the same batch"
)
# )
await
asyncio
.
sleep
(
0.05
)
# to force the first request to be started first
# )
task1
=
asyncio
.
create_task
(
# await asyncio.sleep(1) # to force the first request to be started first
self
.
cache_report_openai_async
(
# task1 = asyncio.create_task(
"> can the same batch parallel request use the cache?"
# self.cache_report_openai_async(
)
# "> can the same batch parallel request use the cache?"
)
# )
task2
=
asyncio
.
create_task
(
# )
self
.
cache_report_openai_async
(
# task2 = asyncio.create_task(
"> can the same batch parallel request use the cache?"
# self.cache_report_openai_async(
)
# "> can the same batch parallel request use the cache?"
)
# )
result0
,
result1
,
result2
=
await
asyncio
.
gather
(
task0
,
task1
,
task2
)
# )
# result0, result1, result2 = await asyncio.gather(task0, task1, task2)
cached_tokens0
,
prompt_tokens0
=
result0
cached_tokens1
,
prompt_tokens1
=
result1
# cached_tokens0, prompt_tokens0 = result0
cached_tokens2
,
prompt_tokens2
=
result2
# cached_tokens1, prompt_tokens1 = result1
# cached_tokens2, prompt_tokens2 = result2
print
(
f
"Async request 0 - Cached tokens:
{
cached_tokens0
}
, Prompt tokens:
{
prompt_tokens0
}
"
# print(
)
# f"Async request 0 - Cached tokens: {cached_tokens0}, Prompt tokens: {prompt_tokens0}"
print
(
# )
f
"Async request 1 - Cached tokens:
{
cached_tokens1
}
, Prompt tokens:
{
prompt_tokens1
}
"
# print(
)
# f"Async request 1 - Cached tokens: {cached_tokens1}, Prompt tokens: {prompt_tokens1}"
print
(
# )
f
"Async request 2 - Cached tokens:
{
cached_tokens2
}
, Prompt tokens:
{
prompt_tokens2
}
"
# print(
)
# f"Async request 2 - Cached tokens: {cached_tokens2}, Prompt tokens: {prompt_tokens2}"
# )
# Assert that no requests used the cache (becausefirst is alone, and the next two are in the same batch)
# If a new optimisation limiting starting request with same prefix at the same time was added
# # Assert that no requests used the cache (because first is alone, and the next two are in the same batch)
# to maximise the cache hit, this would not be true
# # If a new optimisation limiting starting request with same prefix at the same time was added
assert
cached_tokens1
==
cached_tokens2
==
cached_tokens0
# # to maximise the cache hit, this would not be true
# assert cached_tokens1 == cached_tokens2 == cached_tokens0
asyncio
.
run
(
run_test
())
# asyncio.run(run_test())
if
__name__
==
"__main__"
:
if
__name__
==
"__main__"
:
...
...
test/srt/run_suite.py
View file @
fa42e419
...
@@ -113,11 +113,10 @@ suites = {
...
@@ -113,11 +113,10 @@ suites = {
TestFile
(
"models/test_qwen_models.py"
,
82
),
TestFile
(
"models/test_qwen_models.py"
,
82
),
TestFile
(
"models/test_reward_models.py"
,
132
),
TestFile
(
"models/test_reward_models.py"
,
132
),
TestFile
(
"openai_server/basic/test_openai_embedding.py"
,
141
),
TestFile
(
"openai_server/basic/test_openai_embedding.py"
,
141
),
TestFile
(
"openai_server/basic/test_openai_server.py"
,
149
),
TestFile
(
"openai_server/features/test_enable_thinking.py"
,
70
),
TestFile
(
"openai_server/basic/test_protocol.py"
,
10
),
TestFile
(
"openai_server/features/test_reasoning_content.py"
,
89
),
TestFile
(
"openai_server/basic/test_serving_chat.py"
,
10
),
TestFile
(
"openai_server/validation/test_large_max_new_tokens.py"
,
41
),
TestFile
(
"openai_server/basic/test_serving_completions.py"
,
10
),
TestFile
(
"openai_server/validation/test_request_length_validation.py"
,
31
),
TestFile
(
"openai_server/basic/test_serving_embedding.py"
,
10
),
TestFile
(
"test_abort.py"
,
51
),
TestFile
(
"test_abort.py"
,
51
),
TestFile
(
"test_block_int8.py"
,
22
),
TestFile
(
"test_block_int8.py"
,
22
),
TestFile
(
"test_create_kvindices.py"
,
2
),
TestFile
(
"test_create_kvindices.py"
,
2
),
...
@@ -125,19 +124,6 @@ suites = {
...
@@ -125,19 +124,6 @@ suites = {
TestFile
(
"test_eval_fp8_accuracy.py"
,
303
),
TestFile
(
"test_eval_fp8_accuracy.py"
,
303
),
TestFile
(
"test_function_call_parser.py"
,
10
),
TestFile
(
"test_function_call_parser.py"
,
10
),
TestFile
(
"test_input_embeddings.py"
,
38
),
TestFile
(
"test_input_embeddings.py"
,
38
),
TestFile
(
"openai_server/features/test_cache_report.py"
,
100
),
TestFile
(
"openai_server/features/test_enable_thinking.py"
,
70
),
TestFile
(
"openai_server/features/test_json_constrained.py"
,
98
),
TestFile
(
"openai_server/features/test_json_mode.py"
,
90
),
TestFile
(
"openai_server/features/test_openai_server_ebnf.py"
,
95
),
TestFile
(
"openai_server/features/test_openai_server_hidden_states.py"
,
240
),
TestFile
(
"openai_server/features/test_reasoning_content.py"
,
89
),
TestFile
(
"openai_server/function_call/test_openai_function_calling.py"
,
60
),
TestFile
(
"openai_server/function_call/test_tool_choice.py"
,
226
),
TestFile
(
"openai_server/validation/test_large_max_new_tokens.py"
,
41
),
TestFile
(
"openai_server/validation/test_matched_stop.py"
,
60
),
TestFile
(
"openai_server/validation/test_openai_server_ignore_eos.py"
,
85
),
TestFile
(
"openai_server/validation/test_request_length_validation.py"
,
31
),
TestFile
(
"test_metrics.py"
,
32
),
TestFile
(
"test_metrics.py"
,
32
),
TestFile
(
"test_no_chunked_prefill.py"
,
108
),
TestFile
(
"test_no_chunked_prefill.py"
,
108
),
TestFile
(
"test_no_overlap_scheduler.py"
,
234
),
TestFile
(
"test_no_overlap_scheduler.py"
,
234
),
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment