Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
2c7f01bc
Unverified
Commit
2c7f01bc
authored
Aug 10, 2025
by
Lianmin Zheng
Committed by
GitHub
Aug 10, 2025
Browse files
Reorganize CI and test files (#9027)
parent
b58ae7a2
Changes
66
Show whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
46 additions
and
44 deletions
+46
-44
test/srt/rl/test_update_weights_from_tensor.py
test/srt/rl/test_update_weights_from_tensor.py
+0
-0
test/srt/rl/test_verl_engine_2_gpu.py
test/srt/rl/test_verl_engine_2_gpu.py
+0
-0
test/srt/rl/test_verl_engine_4_gpu.py
test/srt/rl/test_verl_engine_4_gpu.py
+0
-0
test/srt/run_suite.py
test/srt/run_suite.py
+41
-42
test/srt/test_bench_serving.py
test/srt/test_bench_serving.py
+4
-1
test/srt/test_intel_amx_attention_backend.py
test/srt/test_intel_amx_attention_backend.py
+1
-1
No files found.
test/srt/test_update_weights_from_tensor.py
→
test/srt/
rl/
test_update_weights_from_tensor.py
View file @
2c7f01bc
File moved
test/srt/test_verl_engine_2_gpu.py
→
test/srt/
rl/
test_verl_engine_2_gpu.py
View file @
2c7f01bc
File moved
test/srt/test_verl_engine_4_gpu.py
→
test/srt/
rl/
test_verl_engine_4_gpu.py
View file @
2c7f01bc
File moved
test/srt/run_suite.py
View file @
2c7f01bc
...
@@ -13,13 +13,16 @@ class TestFile:
...
@@ -13,13 +13,16 @@ class TestFile:
suites
=
{
suites
=
{
"per-commit"
:
[
"per-commit"
:
[
TestFile
(
"models/lora/test_lora.py"
,
200
),
TestFile
(
"hicache/test_hicache.py"
,
116
),
TestFile
(
"models/lora/test_lora_eviction.py"
,
200
),
TestFile
(
"hicache/test_hicache_mla.py"
,
127
),
TestFile
(
"models/lora/test_lora_backend.py"
,
99
),
TestFile
(
"hicache/test_hicache_storage.py"
,
127
),
TestFile
(
"models/lora/test_multi_lora_backend.py"
,
60
),
TestFile
(
"lora/test_lora.py"
,
200
),
TestFile
(
"models/lora/test_lora_cuda_graph.py"
,
250
),
TestFile
(
"lora/test_lora_eviction.py"
,
200
),
TestFile
(
"models/lora/test_lora_update.py"
,
800
),
TestFile
(
"lora/test_lora_backend.py"
,
99
),
TestFile
(
"models/lora/test_lora_qwen3.py"
,
97
),
TestFile
(
"lora/test_multi_lora_backend.py"
,
60
),
TestFile
(
"lora/test_lora_cuda_graph.py"
,
250
),
TestFile
(
"lora/test_lora_update.py"
,
800
),
TestFile
(
"lora/test_lora_qwen3.py"
,
97
),
TestFile
(
"models/test_embedding_models.py"
,
73
),
TestFile
(
"models/test_embedding_models.py"
,
73
),
# TestFile("models/test_clip_models.py", 52),
# TestFile("models/test_clip_models.py", 52),
TestFile
(
"models/test_encoder_embedding_models.py"
,
100
),
TestFile
(
"models/test_encoder_embedding_models.py"
,
100
),
...
@@ -50,8 +53,13 @@ suites = {
...
@@ -50,8 +53,13 @@ suites = {
TestFile
(
"openai_server/validation/test_matched_stop.py"
,
60
),
TestFile
(
"openai_server/validation/test_matched_stop.py"
,
60
),
TestFile
(
"openai_server/validation/test_openai_server_ignore_eos.py"
,
85
),
TestFile
(
"openai_server/validation/test_openai_server_ignore_eos.py"
,
85
),
TestFile
(
"openai_server/validation/test_request_length_validation.py"
,
31
),
TestFile
(
"openai_server/validation/test_request_length_validation.py"
,
31
),
TestFile
(
"quant/test_block_int8.py"
,
22
),
TestFile
(
"quant/test_fp8_kernel.py"
,
8
),
TestFile
(
"quant/test_int8_kernel.py"
,
8
),
TestFile
(
"quant/test_w8a8_quantization.py"
,
46
),
TestFile
(
"rl/test_update_weights_from_disk.py"
,
114
),
TestFile
(
"rl/test_update_weights_from_tensor.py"
,
48
),
TestFile
(
"test_abort.py"
,
51
),
TestFile
(
"test_abort.py"
,
51
),
TestFile
(
"test_block_int8.py"
,
22
),
TestFile
(
"test_create_kvindices.py"
,
2
),
TestFile
(
"test_create_kvindices.py"
,
2
),
TestFile
(
"test_chunked_prefill.py"
,
313
),
TestFile
(
"test_chunked_prefill.py"
,
313
),
TestFile
(
"test_eagle_infer_a.py"
,
370
),
TestFile
(
"test_eagle_infer_a.py"
,
370
),
...
@@ -60,15 +68,11 @@ suites = {
...
@@ -60,15 +68,11 @@ suites = {
TestFile
(
"test_eval_fp8_accuracy.py"
,
303
),
TestFile
(
"test_eval_fp8_accuracy.py"
,
303
),
TestFile
(
"test_fa3.py"
,
376
),
TestFile
(
"test_fa3.py"
,
376
),
# TestFile("test_flashmla.py", 352),
# TestFile("test_flashmla.py", 352),
TestFile
(
"test_fp8_kernel.py"
,
8
),
TestFile
(
"test_function_call_parser.py"
,
10
),
TestFile
(
"test_function_call_parser.py"
,
10
),
TestFile
(
"test_fused_moe.py"
,
30
),
TestFile
(
"test_fused_moe.py"
,
30
),
TestFile
(
"test_gpt_oss_1gpu.py"
,
600
),
TestFile
(
"test_gpt_oss_1gpu.py"
,
600
),
TestFile
(
"test_hicache.py"
,
116
),
TestFile
(
"test_hicache_mla.py"
,
127
),
TestFile
(
"test_hicache_storage.py"
,
127
),
TestFile
(
"test_hidden_states.py"
,
55
),
TestFile
(
"test_hidden_states.py"
,
55
),
TestFile
(
"test_
int8_kernel
.py"
,
8
),
TestFile
(
"test_
hybrid_attn_backend
.py"
,
100
),
TestFile
(
"test_input_embeddings.py"
,
38
),
TestFile
(
"test_input_embeddings.py"
,
38
),
TestFile
(
"test_io_struct.py"
,
8
),
TestFile
(
"test_io_struct.py"
,
8
),
TestFile
(
"test_jinja_template_utils.py"
,
1
),
TestFile
(
"test_jinja_template_utils.py"
,
1
),
...
@@ -85,6 +89,7 @@ suites = {
...
@@ -85,6 +89,7 @@ suites = {
TestFile
(
"test_pytorch_sampling_backend.py"
,
66
),
TestFile
(
"test_pytorch_sampling_backend.py"
,
66
),
TestFile
(
"test_radix_attention.py"
,
105
),
TestFile
(
"test_radix_attention.py"
,
105
),
TestFile
(
"test_regex_constrained.py"
,
64
),
TestFile
(
"test_regex_constrained.py"
,
64
),
TestFile
(
"test_reasoning_parser.py"
,
5
),
TestFile
(
"test_retract_decode.py"
,
54
),
TestFile
(
"test_retract_decode.py"
,
54
),
TestFile
(
"test_request_queue_validation.py"
,
30
),
TestFile
(
"test_request_queue_validation.py"
,
30
),
TestFile
(
"test_server_args.py"
,
1
),
TestFile
(
"test_server_args.py"
,
1
),
...
@@ -100,23 +105,18 @@ suites = {
...
@@ -100,23 +105,18 @@ suites = {
TestFile
(
"test_triton_attention_backend.py"
,
150
),
TestFile
(
"test_triton_attention_backend.py"
,
150
),
TestFile
(
"test_triton_moe_channel_fp8_kernel.py"
,
25
),
TestFile
(
"test_triton_moe_channel_fp8_kernel.py"
,
25
),
TestFile
(
"test_triton_sliding_window.py"
,
250
),
TestFile
(
"test_triton_sliding_window.py"
,
250
),
TestFile
(
"test_update_weights_from_disk.py"
,
114
),
TestFile
(
"test_update_weights_from_tensor.py"
,
48
),
TestFile
(
"test_utils_update_weights.py"
,
48
),
TestFile
(
"test_utils_update_weights.py"
,
48
),
TestFile
(
"test_vision_chunked_prefill.py"
,
175
),
TestFile
(
"test_vision_chunked_prefill.py"
,
175
),
TestFile
(
"test_vlm_input_format.py"
,
300
),
TestFile
(
"test_vlm_input_format.py"
,
300
),
TestFile
(
"test_vision_openai_server_a.py"
,
989
),
TestFile
(
"test_vision_openai_server_a.py"
,
989
),
TestFile
(
"test_vision_openai_server_b.py"
,
620
),
TestFile
(
"test_vision_openai_server_b.py"
,
620
),
TestFile
(
"test_w8a8_quantization.py"
,
46
),
TestFile
(
"test_reasoning_parser.py"
,
5
),
TestFile
(
"test_hybrid_attn_backend.py"
,
100
),
],
],
"per-commit-2-gpu"
:
[
"per-commit-2-gpu"
:
[
TestFile
(
"models/lora/test_lora_tp.py"
,
116
),
TestFile
(
"lora/test_lora_tp.py"
,
116
),
TestFile
(
"rl/test_update_weights_from_distributed.py"
,
103
),
TestFile
(
"test_data_parallelism.py"
,
73
),
TestFile
(
"test_data_parallelism.py"
,
73
),
TestFile
(
"test_dp_attention.py"
,
277
),
TestFile
(
"test_dp_attention.py"
,
277
),
TestFile
(
"test_patch_torch.py"
,
19
),
TestFile
(
"test_patch_torch.py"
,
19
),
TestFile
(
"test_update_weights_from_distributed.py"
,
103
),
TestFile
(
"test_release_memory_occupation.py"
,
127
),
TestFile
(
"test_release_memory_occupation.py"
,
127
),
],
],
"per-commit-4-gpu"
:
[
"per-commit-4-gpu"
:
[
...
@@ -127,7 +127,7 @@ suites = {
...
@@ -127,7 +127,7 @@ suites = {
],
],
"per-commit-8-gpu"
:
[
"per-commit-8-gpu"
:
[
# Disabled because it hangs on the CI.
# Disabled because it hangs on the CI.
# TestFile("test_moe_ep.py", 181),
# TestFile("
ep/
test_moe_ep.py", 181),
TestFile
(
"test_disaggregation.py"
,
499
),
TestFile
(
"test_disaggregation.py"
,
499
),
TestFile
(
"test_disaggregation_different_tp.py"
,
155
),
TestFile
(
"test_disaggregation_different_tp.py"
,
155
),
TestFile
(
"test_full_deepseek_v3.py"
,
333
),
TestFile
(
"test_full_deepseek_v3.py"
,
333
),
...
@@ -136,16 +136,16 @@ suites = {
...
@@ -136,16 +136,16 @@ suites = {
# add more here
# add more here
],
],
"per-commit-4-gpu-deepep"
:
[
"per-commit-4-gpu-deepep"
:
[
TestFile
(
"test_deepep_small.py"
,
531
),
TestFile
(
"
ep/
test_deepep_small.py"
,
531
),
],
],
"per-commit-8-gpu-deepep"
:
[
"per-commit-8-gpu-deepep"
:
[
TestFile
(
"test_deepep_large.py"
,
338
),
TestFile
(
"
ep/
test_deepep_large.py"
,
338
),
],
],
"nightly"
:
[
"nightly"
:
[
TestFile
(
"test_nightly_gsm8k_eval.py"
),
TestFile
(
"test_nightly_gsm8k_eval.py"
),
],
],
"vllm_dependency_test"
:
[
"vllm_dependency_test"
:
[
TestFile
(
"test_awq.py"
,
163
),
TestFile
(
"
quant/
test_awq.py"
,
163
),
TestFile
(
"test_bnb.py"
,
5
),
TestFile
(
"test_bnb.py"
,
5
),
TestFile
(
"test_gguf.py"
,
96
),
TestFile
(
"test_gguf.py"
,
96
),
TestFile
(
"test_gptqmodel_dynamic.py"
,
102
),
TestFile
(
"test_gptqmodel_dynamic.py"
,
102
),
...
@@ -156,13 +156,9 @@ suites = {
...
@@ -156,13 +156,9 @@ suites = {
# Add AMD tests
# Add AMD tests
suite_amd
=
{
suite_amd
=
{
"per-commit-amd"
:
[
"per-commit-amd"
:
[
TestFile
(
"models/lora/test_lora_backend.py"
,
99
),
TestFile
(
"lora/test_lora_backend.py"
,
99
),
TestFile
(
"models/lora/test_multi_lora_backend.py"
,
60
),
TestFile
(
"lora/test_multi_lora_backend.py"
,
60
),
TestFile
(
"models/lora/test_lora_cuda_graph.py"
,
250
),
TestFile
(
"lora/test_lora_cuda_graph.py"
,
250
),
TestFile
(
"test_mla.py"
,
242
),
TestFile
(
"test_mla_deepseek_v3.py"
,
221
),
TestFile
(
"test_torch_compile.py"
,
76
),
TestFile
(
"test_torch_compile_moe.py"
,
172
),
TestFile
(
"models/test_qwen_models.py"
,
82
),
TestFile
(
"models/test_qwen_models.py"
,
82
),
TestFile
(
"models/test_reward_models.py"
,
132
),
TestFile
(
"models/test_reward_models.py"
,
132
),
TestFile
(
"openai_server/basic/test_openai_embedding.py"
,
141
),
TestFile
(
"openai_server/basic/test_openai_embedding.py"
,
141
),
...
@@ -170,14 +166,18 @@ suite_amd = {
...
@@ -170,14 +166,18 @@ suite_amd = {
TestFile
(
"openai_server/features/test_reasoning_content.py"
,
89
),
TestFile
(
"openai_server/features/test_reasoning_content.py"
,
89
),
TestFile
(
"openai_server/validation/test_large_max_new_tokens.py"
,
41
),
TestFile
(
"openai_server/validation/test_large_max_new_tokens.py"
,
41
),
TestFile
(
"openai_server/validation/test_request_length_validation.py"
,
31
),
TestFile
(
"openai_server/validation/test_request_length_validation.py"
,
31
),
TestFile
(
"quant/test_block_int8.py"
,
22
),
TestFile
(
"quant/test_awq_dequant.py"
,
2
),
TestFile
(
"rl/test_update_weights_from_disk.py"
,
114
),
TestFile
(
"test_abort.py"
,
51
),
TestFile
(
"test_abort.py"
,
51
),
TestFile
(
"test_block_int8.py"
,
22
),
TestFile
(
"test_create_kvindices.py"
,
2
),
TestFile
(
"test_create_kvindices.py"
,
2
),
TestFile
(
"test_chunked_prefill.py"
,
313
),
TestFile
(
"test_chunked_prefill.py"
,
313
),
TestFile
(
"test_eval_fp8_accuracy.py"
,
303
),
TestFile
(
"test_eval_fp8_accuracy.py"
,
303
),
TestFile
(
"test_function_call_parser.py"
,
10
),
TestFile
(
"test_function_call_parser.py"
,
10
),
TestFile
(
"test_fused_moe.py"
,
30
),
TestFile
(
"test_fused_moe.py"
,
30
),
TestFile
(
"test_input_embeddings.py"
,
38
),
TestFile
(
"test_input_embeddings.py"
,
38
),
TestFile
(
"test_mla.py"
,
242
),
TestFile
(
"test_mla_deepseek_v3.py"
,
221
),
TestFile
(
"test_metrics.py"
,
32
),
TestFile
(
"test_metrics.py"
,
32
),
TestFile
(
"test_no_chunked_prefill.py"
,
108
),
TestFile
(
"test_no_chunked_prefill.py"
,
108
),
# TestFile("test_no_overlap_scheduler.py", 234), # Disabled temporarily and track in #7703
# TestFile("test_no_overlap_scheduler.py", 234), # Disabled temporarily and track in #7703
...
@@ -186,22 +186,21 @@ suite_amd = {
...
@@ -186,22 +186,21 @@ suite_amd = {
TestFile
(
"test_pytorch_sampling_backend.py"
,
66
),
TestFile
(
"test_pytorch_sampling_backend.py"
,
66
),
TestFile
(
"test_radix_attention.py"
,
105
),
TestFile
(
"test_radix_attention.py"
,
105
),
TestFile
(
"test_retract_decode.py"
,
54
),
TestFile
(
"test_retract_decode.py"
,
54
),
TestFile
(
"test_reasoning_parser.py"
,
5
),
TestFile
(
"test_rope_rocm.py"
,
3
),
TestFile
(
"test_server_args.py"
,
1
),
TestFile
(
"test_server_args.py"
,
1
),
TestFile
(
"test_skip_tokenizer_init.py"
,
117
),
TestFile
(
"test_skip_tokenizer_init.py"
,
117
),
TestFile
(
"test_torch_compile.py"
,
76
),
TestFile
(
"test_torch_compile_moe.py"
,
172
),
TestFile
(
"test_torch_native_attention_backend.py"
,
123
),
TestFile
(
"test_torch_native_attention_backend.py"
,
123
),
TestFile
(
"test_triton_attention_backend.py"
,
150
),
TestFile
(
"test_triton_attention_backend.py"
,
150
),
TestFile
(
"test_update_weights_from_disk.py"
,
114
),
TestFile
(
"test_vertex_endpoint.py"
,
31
),
# TestFile("test_vision_chunked_prefill.py", 175), # Disabled temporarily and track in #7701
# TestFile("test_vision_chunked_prefill.py", 175), # Disabled temporarily and track in #7701
TestFile
(
"test_reasoning_parser.py"
,
5
),
TestFile
(
"test_rope_rocm.py"
,
3
),
TestFile
(
"test_awq_dequant.py"
,
2
),
],
],
"per-commit-2-gpu-amd"
:
[
"per-commit-2-gpu-amd"
:
[
TestFile
(
"models/lora/test_lora_tp.py"
,
116
),
TestFile
(
"lora/test_lora_tp.py"
,
116
),
TestFile
(
"rl/test_update_weights_from_distributed.py"
,
103
),
TestFile
(
"test_data_parallelism.py"
,
73
),
TestFile
(
"test_data_parallelism.py"
,
73
),
TestFile
(
"test_patch_torch.py"
,
19
),
TestFile
(
"test_patch_torch.py"
,
19
),
TestFile
(
"test_update_weights_from_distributed.py"
,
103
),
],
],
"per-commit-4-gpu-amd"
:
[
"per-commit-4-gpu-amd"
:
[
TestFile
(
"test_pp_single_node.py"
,
150
),
TestFile
(
"test_pp_single_node.py"
,
150
),
...
@@ -236,13 +235,13 @@ suite_xeon = {
...
@@ -236,13 +235,13 @@ suite_xeon = {
# Add Ascend NPU tests
# Add Ascend NPU tests
suite_ascend
=
{
suite_ascend
=
{
"per-commit-1-ascend-npu"
:
[
"per-commit-1-ascend-npu"
:
[
TestFile
(
"test_ascend_tp1_bf16.py"
,
400
),
TestFile
(
"
ascend/
test_ascend_tp1_bf16.py"
,
400
),
],
],
"per-commit-2-ascend-npu"
:
[
"per-commit-2-ascend-npu"
:
[
TestFile
(
"test_ascend_tp2_bf16.py"
,
400
),
TestFile
(
"
ascend/
test_ascend_tp2_bf16.py"
,
400
),
],
],
"per-commit-4-ascend-npu"
:
[
"per-commit-4-ascend-npu"
:
[
TestFile
(
"test_ascend_mla_w8a8int8.py"
,
400
),
TestFile
(
"
ascend/
test_ascend_mla_w8a8int8.py"
,
400
),
],
],
}
}
...
...
test/srt/test_bench_serving.py
View file @
2c7f01bc
...
@@ -56,7 +56,10 @@ class TestBenchServing(CustomTestCase):
...
@@ -56,7 +56,10 @@ class TestBenchServing(CustomTestCase):
f
"### test_offline_throughput_non_stream_small_batch_size
\n
"
f
"### test_offline_throughput_non_stream_small_batch_size
\n
"
f
"Output throughput:
{
res
[
'output_throughput'
]:.
2
f
}
token/s
\n
"
f
"Output throughput:
{
res
[
'output_throughput'
]:.
2
f
}
token/s
\n
"
)
)
self
.
assertGreater
(
res
[
"output_throughput"
],
1045
)
if
is_in_amd_ci
():
self
.
assertGreater
(
res
[
"output_throughput"
],
1000
)
else
:
self
.
assertGreater
(
res
[
"output_throughput"
],
1050
)
def
test_offline_throughput_without_radix_cache
(
self
):
def
test_offline_throughput_without_radix_cache
(
self
):
res
=
run_bench_serving
(
res
=
run_bench_serving
(
...
...
test/srt/test_intel_amx_attention_backend.py
View file @
2c7f01bc
...
@@ -70,7 +70,7 @@ class TestIntelAMXAttnBackend(CustomTestCase):
...
@@ -70,7 +70,7 @@ class TestIntelAMXAttnBackend(CustomTestCase):
)
)
metrics
=
run_eval
(
args
)
metrics
=
run_eval
(
args
)
self
.
assertGreater
(
metrics
[
"score"
],
0.5
)
self
.
assertGreater
(
metrics
[
"score"
],
0.
4
5
)
finally
:
finally
:
kill_process_tree
(
process
.
pid
)
kill_process_tree
(
process
.
pid
)
...
...
Prev
1
2
3
4
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment