Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
2c7f01bc
Unverified
Commit
2c7f01bc
authored
Aug 10, 2025
by
Lianmin Zheng
Committed by
GitHub
Aug 10, 2025
Browse files
Reorganize CI and test files (#9027)
parent
b58ae7a2
Changes
66
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
46 additions
and
44 deletions
+46
-44
test/srt/rl/test_update_weights_from_tensor.py
test/srt/rl/test_update_weights_from_tensor.py
+0
-0
test/srt/rl/test_verl_engine_2_gpu.py
test/srt/rl/test_verl_engine_2_gpu.py
+0
-0
test/srt/rl/test_verl_engine_4_gpu.py
test/srt/rl/test_verl_engine_4_gpu.py
+0
-0
test/srt/run_suite.py
test/srt/run_suite.py
+41
-42
test/srt/test_bench_serving.py
test/srt/test_bench_serving.py
+4
-1
test/srt/test_intel_amx_attention_backend.py
test/srt/test_intel_amx_attention_backend.py
+1
-1
No files found.
test/srt/test_update_weights_from_tensor.py
→
test/srt/
rl/
test_update_weights_from_tensor.py
View file @
2c7f01bc
File moved
test/srt/test_verl_engine_2_gpu.py
→
test/srt/
rl/
test_verl_engine_2_gpu.py
View file @
2c7f01bc
File moved
test/srt/test_verl_engine_4_gpu.py
→
test/srt/
rl/
test_verl_engine_4_gpu.py
View file @
2c7f01bc
File moved
test/srt/run_suite.py
View file @
2c7f01bc
...
...
@@ -13,13 +13,16 @@ class TestFile:
suites
=
{
"per-commit"
:
[
TestFile
(
"models/lora/test_lora.py"
,
200
),
TestFile
(
"models/lora/test_lora_eviction.py"
,
200
),
TestFile
(
"models/lora/test_lora_backend.py"
,
99
),
TestFile
(
"models/lora/test_multi_lora_backend.py"
,
60
),
TestFile
(
"models/lora/test_lora_cuda_graph.py"
,
250
),
TestFile
(
"models/lora/test_lora_update.py"
,
800
),
TestFile
(
"models/lora/test_lora_qwen3.py"
,
97
),
TestFile
(
"hicache/test_hicache.py"
,
116
),
TestFile
(
"hicache/test_hicache_mla.py"
,
127
),
TestFile
(
"hicache/test_hicache_storage.py"
,
127
),
TestFile
(
"lora/test_lora.py"
,
200
),
TestFile
(
"lora/test_lora_eviction.py"
,
200
),
TestFile
(
"lora/test_lora_backend.py"
,
99
),
TestFile
(
"lora/test_multi_lora_backend.py"
,
60
),
TestFile
(
"lora/test_lora_cuda_graph.py"
,
250
),
TestFile
(
"lora/test_lora_update.py"
,
800
),
TestFile
(
"lora/test_lora_qwen3.py"
,
97
),
TestFile
(
"models/test_embedding_models.py"
,
73
),
# TestFile("models/test_clip_models.py", 52),
TestFile
(
"models/test_encoder_embedding_models.py"
,
100
),
...
...
@@ -50,8 +53,13 @@ suites = {
TestFile
(
"openai_server/validation/test_matched_stop.py"
,
60
),
TestFile
(
"openai_server/validation/test_openai_server_ignore_eos.py"
,
85
),
TestFile
(
"openai_server/validation/test_request_length_validation.py"
,
31
),
TestFile
(
"quant/test_block_int8.py"
,
22
),
TestFile
(
"quant/test_fp8_kernel.py"
,
8
),
TestFile
(
"quant/test_int8_kernel.py"
,
8
),
TestFile
(
"quant/test_w8a8_quantization.py"
,
46
),
TestFile
(
"rl/test_update_weights_from_disk.py"
,
114
),
TestFile
(
"rl/test_update_weights_from_tensor.py"
,
48
),
TestFile
(
"test_abort.py"
,
51
),
TestFile
(
"test_block_int8.py"
,
22
),
TestFile
(
"test_create_kvindices.py"
,
2
),
TestFile
(
"test_chunked_prefill.py"
,
313
),
TestFile
(
"test_eagle_infer_a.py"
,
370
),
...
...
@@ -60,15 +68,11 @@ suites = {
TestFile
(
"test_eval_fp8_accuracy.py"
,
303
),
TestFile
(
"test_fa3.py"
,
376
),
# TestFile("test_flashmla.py", 352),
TestFile
(
"test_fp8_kernel.py"
,
8
),
TestFile
(
"test_function_call_parser.py"
,
10
),
TestFile
(
"test_fused_moe.py"
,
30
),
TestFile
(
"test_gpt_oss_1gpu.py"
,
600
),
TestFile
(
"test_hicache.py"
,
116
),
TestFile
(
"test_hicache_mla.py"
,
127
),
TestFile
(
"test_hicache_storage.py"
,
127
),
TestFile
(
"test_hidden_states.py"
,
55
),
TestFile
(
"test_
int8_kernel
.py"
,
8
),
TestFile
(
"test_
hybrid_attn_backend
.py"
,
100
),
TestFile
(
"test_input_embeddings.py"
,
38
),
TestFile
(
"test_io_struct.py"
,
8
),
TestFile
(
"test_jinja_template_utils.py"
,
1
),
...
...
@@ -85,6 +89,7 @@ suites = {
TestFile
(
"test_pytorch_sampling_backend.py"
,
66
),
TestFile
(
"test_radix_attention.py"
,
105
),
TestFile
(
"test_regex_constrained.py"
,
64
),
TestFile
(
"test_reasoning_parser.py"
,
5
),
TestFile
(
"test_retract_decode.py"
,
54
),
TestFile
(
"test_request_queue_validation.py"
,
30
),
TestFile
(
"test_server_args.py"
,
1
),
...
...
@@ -100,23 +105,18 @@ suites = {
TestFile
(
"test_triton_attention_backend.py"
,
150
),
TestFile
(
"test_triton_moe_channel_fp8_kernel.py"
,
25
),
TestFile
(
"test_triton_sliding_window.py"
,
250
),
TestFile
(
"test_update_weights_from_disk.py"
,
114
),
TestFile
(
"test_update_weights_from_tensor.py"
,
48
),
TestFile
(
"test_utils_update_weights.py"
,
48
),
TestFile
(
"test_vision_chunked_prefill.py"
,
175
),
TestFile
(
"test_vlm_input_format.py"
,
300
),
TestFile
(
"test_vision_openai_server_a.py"
,
989
),
TestFile
(
"test_vision_openai_server_b.py"
,
620
),
TestFile
(
"test_w8a8_quantization.py"
,
46
),
TestFile
(
"test_reasoning_parser.py"
,
5
),
TestFile
(
"test_hybrid_attn_backend.py"
,
100
),
],
"per-commit-2-gpu"
:
[
TestFile
(
"models/lora/test_lora_tp.py"
,
116
),
TestFile
(
"lora/test_lora_tp.py"
,
116
),
TestFile
(
"rl/test_update_weights_from_distributed.py"
,
103
),
TestFile
(
"test_data_parallelism.py"
,
73
),
TestFile
(
"test_dp_attention.py"
,
277
),
TestFile
(
"test_patch_torch.py"
,
19
),
TestFile
(
"test_update_weights_from_distributed.py"
,
103
),
TestFile
(
"test_release_memory_occupation.py"
,
127
),
],
"per-commit-4-gpu"
:
[
...
...
@@ -127,7 +127,7 @@ suites = {
],
"per-commit-8-gpu"
:
[
# Disabled because it hangs on the CI.
# TestFile("test_moe_ep.py", 181),
# TestFile("
ep/
test_moe_ep.py", 181),
TestFile
(
"test_disaggregation.py"
,
499
),
TestFile
(
"test_disaggregation_different_tp.py"
,
155
),
TestFile
(
"test_full_deepseek_v3.py"
,
333
),
...
...
@@ -136,16 +136,16 @@ suites = {
# add more here
],
"per-commit-4-gpu-deepep"
:
[
TestFile
(
"test_deepep_small.py"
,
531
),
TestFile
(
"
ep/
test_deepep_small.py"
,
531
),
],
"per-commit-8-gpu-deepep"
:
[
TestFile
(
"test_deepep_large.py"
,
338
),
TestFile
(
"
ep/
test_deepep_large.py"
,
338
),
],
"nightly"
:
[
TestFile
(
"test_nightly_gsm8k_eval.py"
),
],
"vllm_dependency_test"
:
[
TestFile
(
"test_awq.py"
,
163
),
TestFile
(
"
quant/
test_awq.py"
,
163
),
TestFile
(
"test_bnb.py"
,
5
),
TestFile
(
"test_gguf.py"
,
96
),
TestFile
(
"test_gptqmodel_dynamic.py"
,
102
),
...
...
@@ -156,13 +156,9 @@ suites = {
# Add AMD tests
suite_amd
=
{
"per-commit-amd"
:
[
TestFile
(
"models/lora/test_lora_backend.py"
,
99
),
TestFile
(
"models/lora/test_multi_lora_backend.py"
,
60
),
TestFile
(
"models/lora/test_lora_cuda_graph.py"
,
250
),
TestFile
(
"test_mla.py"
,
242
),
TestFile
(
"test_mla_deepseek_v3.py"
,
221
),
TestFile
(
"test_torch_compile.py"
,
76
),
TestFile
(
"test_torch_compile_moe.py"
,
172
),
TestFile
(
"lora/test_lora_backend.py"
,
99
),
TestFile
(
"lora/test_multi_lora_backend.py"
,
60
),
TestFile
(
"lora/test_lora_cuda_graph.py"
,
250
),
TestFile
(
"models/test_qwen_models.py"
,
82
),
TestFile
(
"models/test_reward_models.py"
,
132
),
TestFile
(
"openai_server/basic/test_openai_embedding.py"
,
141
),
...
...
@@ -170,14 +166,18 @@ suite_amd = {
TestFile
(
"openai_server/features/test_reasoning_content.py"
,
89
),
TestFile
(
"openai_server/validation/test_large_max_new_tokens.py"
,
41
),
TestFile
(
"openai_server/validation/test_request_length_validation.py"
,
31
),
TestFile
(
"quant/test_block_int8.py"
,
22
),
TestFile
(
"quant/test_awq_dequant.py"
,
2
),
TestFile
(
"rl/test_update_weights_from_disk.py"
,
114
),
TestFile
(
"test_abort.py"
,
51
),
TestFile
(
"test_block_int8.py"
,
22
),
TestFile
(
"test_create_kvindices.py"
,
2
),
TestFile
(
"test_chunked_prefill.py"
,
313
),
TestFile
(
"test_eval_fp8_accuracy.py"
,
303
),
TestFile
(
"test_function_call_parser.py"
,
10
),
TestFile
(
"test_fused_moe.py"
,
30
),
TestFile
(
"test_input_embeddings.py"
,
38
),
TestFile
(
"test_mla.py"
,
242
),
TestFile
(
"test_mla_deepseek_v3.py"
,
221
),
TestFile
(
"test_metrics.py"
,
32
),
TestFile
(
"test_no_chunked_prefill.py"
,
108
),
# TestFile("test_no_overlap_scheduler.py", 234), # Disabled temporarily and track in #7703
...
...
@@ -186,22 +186,21 @@ suite_amd = {
TestFile
(
"test_pytorch_sampling_backend.py"
,
66
),
TestFile
(
"test_radix_attention.py"
,
105
),
TestFile
(
"test_retract_decode.py"
,
54
),
TestFile
(
"test_reasoning_parser.py"
,
5
),
TestFile
(
"test_rope_rocm.py"
,
3
),
TestFile
(
"test_server_args.py"
,
1
),
TestFile
(
"test_skip_tokenizer_init.py"
,
117
),
TestFile
(
"test_torch_compile.py"
,
76
),
TestFile
(
"test_torch_compile_moe.py"
,
172
),
TestFile
(
"test_torch_native_attention_backend.py"
,
123
),
TestFile
(
"test_triton_attention_backend.py"
,
150
),
TestFile
(
"test_update_weights_from_disk.py"
,
114
),
TestFile
(
"test_vertex_endpoint.py"
,
31
),
# TestFile("test_vision_chunked_prefill.py", 175), # Disabled temporarily and track in #7701
TestFile
(
"test_reasoning_parser.py"
,
5
),
TestFile
(
"test_rope_rocm.py"
,
3
),
TestFile
(
"test_awq_dequant.py"
,
2
),
],
"per-commit-2-gpu-amd"
:
[
TestFile
(
"models/lora/test_lora_tp.py"
,
116
),
TestFile
(
"lora/test_lora_tp.py"
,
116
),
TestFile
(
"rl/test_update_weights_from_distributed.py"
,
103
),
TestFile
(
"test_data_parallelism.py"
,
73
),
TestFile
(
"test_patch_torch.py"
,
19
),
TestFile
(
"test_update_weights_from_distributed.py"
,
103
),
],
"per-commit-4-gpu-amd"
:
[
TestFile
(
"test_pp_single_node.py"
,
150
),
...
...
@@ -236,13 +235,13 @@ suite_xeon = {
# Add Ascend NPU tests
suite_ascend
=
{
"per-commit-1-ascend-npu"
:
[
TestFile
(
"test_ascend_tp1_bf16.py"
,
400
),
TestFile
(
"
ascend/
test_ascend_tp1_bf16.py"
,
400
),
],
"per-commit-2-ascend-npu"
:
[
TestFile
(
"test_ascend_tp2_bf16.py"
,
400
),
TestFile
(
"
ascend/
test_ascend_tp2_bf16.py"
,
400
),
],
"per-commit-4-ascend-npu"
:
[
TestFile
(
"test_ascend_mla_w8a8int8.py"
,
400
),
TestFile
(
"
ascend/
test_ascend_mla_w8a8int8.py"
,
400
),
],
}
...
...
test/srt/test_bench_serving.py
View file @
2c7f01bc
...
...
@@ -56,7 +56,10 @@ class TestBenchServing(CustomTestCase):
f
"### test_offline_throughput_non_stream_small_batch_size
\n
"
f
"Output throughput:
{
res
[
'output_throughput'
]:.
2
f
}
token/s
\n
"
)
self
.
assertGreater
(
res
[
"output_throughput"
],
1045
)
if
is_in_amd_ci
():
self
.
assertGreater
(
res
[
"output_throughput"
],
1000
)
else
:
self
.
assertGreater
(
res
[
"output_throughput"
],
1050
)
def
test_offline_throughput_without_radix_cache
(
self
):
res
=
run_bench_serving
(
...
...
test/srt/test_intel_amx_attention_backend.py
View file @
2c7f01bc
...
...
@@ -70,7 +70,7 @@ class TestIntelAMXAttnBackend(CustomTestCase):
)
metrics
=
run_eval
(
args
)
self
.
assertGreater
(
metrics
[
"score"
],
0.5
)
self
.
assertGreater
(
metrics
[
"score"
],
0.
4
5
)
finally
:
kill_process_tree
(
process
.
pid
)
...
...
Prev
1
2
3
4
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment