Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
61055cb3
Unverified
Commit
61055cb3
authored
Oct 10, 2025
by
Lianmin Zheng
Committed by
GitHub
Oct 10, 2025
Browse files
Reorder PD disagg CI tests (#11438)
parent
92777135
Changes
7
Hide whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
401 additions
and
151 deletions
+401
-151
.github/workflows/pr-test.yml
.github/workflows/pr-test.yml
+0
-82
python/sglang/srt/utils/common.py
python/sglang/srt/utils/common.py
+1
-1
scripts/sort_testcases_alphabetically.py
scripts/sort_testcases_alphabetically.py
+339
-0
test/srt/models/test_nvidia_nemotron_nano_v2.py
test/srt/models/test_nvidia_nemotron_nano_v2.py
+5
-0
test/srt/run_suite.py
test/srt/run_suite.py
+56
-64
test/srt/test_disaggregation_basic.py
test/srt/test_disaggregation_basic.py
+0
-1
test/srt/test_disaggregation_dp_attention.py
test/srt/test_disaggregation_dp_attention.py
+0
-3
No files found.
.github/workflows/pr-test.yml
View file @
61055cb3
...
...
@@ -693,87 +693,6 @@ jobs:
cd test/srt
python3 run_suite.py --suite per-commit-4-gpu-b200 --auto-partition-id 0 --auto-partition-size 1 --timeout-per-file 3600
unit-test-disaggregation-2-gpu
:
needs
:
[
check-changes
,
sgl-kernel-build-wheels
]
if
:
always() && !failure() && !cancelled() &&
((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true'))
runs-on
:
2-gpu-runner
steps
:
-
name
:
Checkout code
uses
:
actions/checkout@v4
-
name
:
Download artifacts
if
:
needs.check-changes.outputs.sgl_kernel == 'true'
uses
:
actions/download-artifact@v4
with
:
path
:
sgl-kernel/dist/
merge-multiple
:
true
pattern
:
wheel-python3.10-cuda12.9
-
name
:
Install dependencies
run
:
|
CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} bash scripts/ci/ci_install_dependency.sh
-
name
:
Run test
timeout-minutes
:
20
run
:
|
cd test/srt
python3 run_suite.py --suite per-commit-2-gpu-disaggregation
unit-test-disaggregation-4-gpu
:
needs
:
[
check-changes
,
unit-test-disaggregation-2-gpu
,
sgl-kernel-build-wheels
]
if
:
always() && !failure() && !cancelled() &&
((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true'))
runs-on
:
4-gpu-runner
steps
:
-
name
:
Checkout code
uses
:
actions/checkout@v4
-
name
:
Download artifacts
if
:
needs.check-changes.outputs.sgl_kernel == 'true'
uses
:
actions/download-artifact@v4
with
:
path
:
sgl-kernel/dist/
merge-multiple
:
true
pattern
:
wheel-python3.10-cuda12.9
-
name
:
Install dependencies
run
:
|
CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} bash scripts/ci/ci_install_dependency.sh
-
name
:
Run test
timeout-minutes
:
20
run
:
|
cd test/srt
python3 run_suite.py --suite per-commit-4-gpu-disaggregation
unit-test-disaggregation-8-gpu
:
needs
:
[
check-changes
,
unit-test-disaggregation-2-gpu
,
sgl-kernel-build-wheels
]
if
:
always() && !failure() && !cancelled() &&
((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true'))
runs-on
:
8-gpu-h200
steps
:
-
name
:
Checkout code
uses
:
actions/checkout@v4
-
name
:
Download artifacts
if
:
needs.check-changes.outputs.sgl_kernel == 'true'
uses
:
actions/download-artifact@v4
with
:
path
:
sgl-kernel/dist/
merge-multiple
:
true
pattern
:
wheel-python3.10-cuda12.9
-
name
:
Install dependencies
run
:
|
CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} bash scripts/ci/ci_install_dependency.sh
-
name
:
Run test
timeout-minutes
:
20
run
:
|
cd test/srt
python3 run_suite.py --suite per-commit-8-gpu-disaggregation
pr-test-finish
:
needs
:
[
check-changes
,
...
...
@@ -788,7 +707,6 @@ jobs:
accuracy-test-1-gpu
,
accuracy-test-2-gpu
,
unit-test-deepep-4-gpu
,
unit-test-deepep-8-gpu
,
unit-test-backend-4-gpu-b200
,
unit-test-disaggregation-2-gpu
,
unit-test-disaggregation-4-gpu
,
unit-test-disaggregation-8-gpu
,
]
if
:
always()
runs-on
:
ubuntu-latest
...
...
python/sglang/srt/utils/common.py
View file @
61055cb3
...
...
@@ -523,7 +523,7 @@ def make_layers_non_pp(
layer_fn
:
LayerFn
,
prefix
:
str
=
""
,
)
->
torch
.
nn
.
ModuleList
:
from
sglang.srt.offloader
import
get_offloader
from
sglang.srt.
utils.
offloader
import
get_offloader
layers
=
torch
.
nn
.
ModuleList
(
get_offloader
().
wrap_modules
(
...
...
scripts/sort_testcases_alphabetically.py
0 → 100644
View file @
61055cb3
"""
Sort the test case by name alphabetically for run_suite.py
"""
from
dataclasses
import
dataclass
@
dataclass
class
TestFile
:
name
:
str
estimated_time
:
float
=
60
suites
=
{
"per-commit"
:
[
TestFile
(
"function_call/test_json_schema_constraint.py"
,
30
),
TestFile
(
"hicache/test_hicache.py"
,
116
),
TestFile
(
"hicache/test_hicache_eagle.py"
,
150
),
TestFile
(
"hicache/test_hicache_mla.py"
,
127
),
TestFile
(
"hicache/test_hicache_storage.py"
,
127
),
TestFile
(
"lora/test_lora.py"
,
200
),
TestFile
(
"lora/test_lora_backend.py"
,
99
),
TestFile
(
"lora/test_lora_eviction.py"
,
200
),
TestFile
(
"lora/test_lora_qwen3.py"
,
97
),
TestFile
(
"lora/test_lora_radix_cache.py"
,
100
),
TestFile
(
"lora/test_lora_update.py"
,
400
),
TestFile
(
"lora/test_multi_lora_backend.py"
,
60
),
TestFile
(
"models/test_embedding_models.py"
,
73
),
TestFile
(
"models/test_encoder_embedding_models.py"
,
100
),
TestFile
(
"models/test_cross_encoder_models.py"
,
100
),
TestFile
(
"models/test_compressed_tensors_models.py"
,
42
),
TestFile
(
"models/test_generation_models.py"
,
103
),
TestFile
(
"models/test_nvidia_nemotron_nano_v2.py"
,
180
),
TestFile
(
"models/test_qwen_models.py"
,
82
),
TestFile
(
"models/test_reward_models.py"
,
132
),
TestFile
(
"models/test_transformers_models.py"
,
320
),
TestFile
(
"models/test_vlm_models.py"
,
741
),
TestFile
(
"openai_server/basic/test_protocol.py"
,
10
),
TestFile
(
"openai_server/basic/test_serving_chat.py"
,
10
),
TestFile
(
"openai_server/basic/test_serving_completions.py"
,
10
),
TestFile
(
"openai_server/basic/test_serving_embedding.py"
,
10
),
TestFile
(
"openai_server/basic/test_openai_embedding.py"
,
141
),
TestFile
(
"openai_server/basic/test_openai_server.py"
,
149
),
TestFile
(
"openai_server/features/test_enable_thinking.py"
,
70
),
TestFile
(
"openai_server/features/test_json_constrained.py"
,
98
),
TestFile
(
"openai_server/features/test_json_mode.py"
,
90
),
TestFile
(
"openai_server/features/test_openai_server_ebnf.py"
,
95
),
TestFile
(
"openai_server/features/test_openai_server_hidden_states.py"
,
240
),
TestFile
(
"openai_server/features/test_reasoning_content.py"
,
89
),
TestFile
(
"openai_server/function_call/test_openai_function_calling.py"
,
60
),
TestFile
(
"openai_server/function_call/test_tool_choice.py"
,
226
),
TestFile
(
"openai_server/validation/test_large_max_new_tokens.py"
,
41
),
TestFile
(
"openai_server/validation/test_matched_stop.py"
,
60
),
TestFile
(
"openai_server/validation/test_openai_server_ignore_eos.py"
,
85
),
TestFile
(
"openai_server/validation/test_request_length_validation.py"
,
31
),
TestFile
(
"quant/test_block_int8.py"
,
22
),
TestFile
(
"quant/test_fp8_kernel.py"
,
8
),
TestFile
(
"quant/test_int8_kernel.py"
,
8
),
TestFile
(
"quant/test_triton_scaled_mm.py"
,
8
),
TestFile
(
"quant/test_w8a8_quantization.py"
,
46
),
TestFile
(
"rl/test_fp32_lm_head.py"
,
30
),
TestFile
(
"rl/test_update_weights_from_disk.py"
,
114
),
TestFile
(
"rl/test_update_weights_from_tensor.py"
,
48
),
TestFile
(
"test_abort.py"
,
51
),
TestFile
(
"test_create_kvindices.py"
,
2
),
TestFile
(
"test_chunked_prefill.py"
,
313
),
TestFile
(
"test_deterministic.py"
,
300
),
TestFile
(
"test_eagle_infer_a.py"
,
370
),
TestFile
(
"test_eagle_infer_b.py"
,
700
),
TestFile
(
"test_ebnf_constrained.py"
,
108
),
TestFile
(
"test_eval_fp8_accuracy.py"
,
303
),
TestFile
(
"test_fa3.py"
,
376
),
# TestFile("test_flashmla.py", 352),
TestFile
(
"test_function_call_parser.py"
,
10
),
TestFile
(
"test_fused_moe.py"
,
30
),
TestFile
(
"test_gpt_oss_1gpu.py"
,
600
),
TestFile
(
"test_harmony_parser.py"
,
20
),
TestFile
(
"test_hidden_states.py"
,
55
),
TestFile
(
"test_hybrid_attn_backend.py"
,
100
),
TestFile
(
"test_input_embeddings.py"
,
38
),
TestFile
(
"test_io_struct.py"
,
8
),
TestFile
(
"test_jinja_template_utils.py"
,
1
),
TestFile
(
"test_logprobs.py"
,
55
),
TestFile
(
"test_metrics.py"
,
32
),
TestFile
(
"test_metrics_utils.py"
,
1
),
TestFile
(
"test_mla.py"
,
167
),
TestFile
(
"test_mla_deepseek_v3.py"
,
500
),
TestFile
(
"test_mla_int8_deepseek_v3.py"
,
429
),
TestFile
(
"test_mla_flashinfer.py"
,
302
),
TestFile
(
"test_mla_fp8.py"
,
93
),
TestFile
(
"test_modelopt_loader.py"
,
30
),
TestFile
(
"test_multi_tokenizer.py"
,
230
),
TestFile
(
"test_ngram_speculative_decoding.py"
,
250
),
TestFile
(
"test_no_chunked_prefill.py"
,
108
),
TestFile
(
"test_no_overlap_scheduler.py"
,
234
),
TestFile
(
"test_original_logprobs.py"
,
41
),
TestFile
(
"test_penalty.py"
,
41
),
TestFile
(
"test_page_size.py"
,
60
),
TestFile
(
"test_priority_scheduling.py"
,
100
),
TestFile
(
"test_pytorch_sampling_backend.py"
,
66
),
TestFile
(
"test_radix_attention.py"
,
105
),
TestFile
(
"test_radix_cache_unit.py"
,
5
),
TestFile
(
"test_regex_constrained.py"
,
64
),
TestFile
(
"test_reasoning_parser.py"
,
5
),
TestFile
(
"test_retract_decode.py"
,
54
),
TestFile
(
"test_request_queue_validation.py"
,
30
),
TestFile
(
"test_score_api.py"
,
180
),
TestFile
(
"test_server_args.py"
,
1
),
TestFile
(
"test_skip_tokenizer_init.py"
,
117
),
TestFile
(
"test_srt_engine.py"
,
261
),
TestFile
(
"test_srt_endpoint.py"
,
130
),
TestFile
(
"test_standalone_speculative_decoding.py"
,
250
),
TestFile
(
"test_start_profile.py"
,
60
),
TestFile
(
"test_swa_unittest.py"
,
1
),
TestFile
(
"test_torch_compile.py"
,
76
),
TestFile
(
"test_torch_compile_moe.py"
,
172
),
TestFile
(
"test_torch_native_attention_backend.py"
,
123
),
TestFile
(
"test_torchao.py"
,
70
),
TestFile
(
"test_triton_attention_kernels.py"
,
4
),
TestFile
(
"test_triton_attention_backend.py"
,
150
),
TestFile
(
"test_triton_moe_channel_fp8_kernel.py"
,
25
),
TestFile
(
"test_triton_sliding_window.py"
,
250
),
TestFile
(
"test_utils_update_weights.py"
,
48
),
TestFile
(
"test_vision_chunked_prefill.py"
,
175
),
TestFile
(
"test_vlm_input_format.py"
,
300
),
TestFile
(
"test_vision_openai_server_a.py"
,
724
),
TestFile
(
"test_vision_openai_server_b.py"
,
446
),
],
"per-commit-2-gpu"
:
[
TestFile
(
"ep/test_moe_ep.py"
,
140
),
TestFile
(
"hicache/test_hicache_storage_file_backend.py"
,
200
),
TestFile
(
"hicache/test_hicache_storage_mooncake_backend.py"
,
400
),
TestFile
(
"hicache/test_hicache_storage_3fs_backend.py"
,
200
),
TestFile
(
"layers/attention/mamba/test_mamba2_mixer.py"
,
110
),
TestFile
(
"lora/test_lora_tp.py"
,
116
),
TestFile
(
"rl/test_update_weights_from_distributed.py"
,
103
),
TestFile
(
"test_data_parallelism.py"
,
73
),
TestFile
(
"test_disaggregation_basic.py"
,
400
),
TestFile
(
"test_dp_attention.py"
,
594
),
TestFile
(
"test_load_weights_from_remote_instance.py"
,
72
),
TestFile
(
"test_patch_torch.py"
,
19
),
TestFile
(
"test_release_memory_occupation.py"
,
257
),
],
"per-commit-4-gpu"
:
[
TestFile
(
"models/test_qwen3_next_models.py"
,
291
),
TestFile
(
"test_disaggregation_dp_attention.py"
,
155
),
TestFile
(
"test_gpt_oss_4gpu.py"
,
300
),
TestFile
(
"test_local_attn.py"
,
411
),
TestFile
(
"test_multi_instance_release_memory_occupation.py"
,
64
),
TestFile
(
"test_pp_single_node.py"
,
481
),
],
"per-commit-8-gpu"
:
[
TestFile
(
"lora/test_lora_llama4.py"
,
400
),
TestFile
(
"test_deepseek_v3_basic.py"
,
275
),
TestFile
(
"test_deepseek_v3_mtp.py"
,
275
),
TestFile
(
"test_disaggregation_different_tp.py"
,
600
),
TestFile
(
"test_disaggregation_pp.py"
,
140
),
],
"per-commit-4-gpu-b200"
:
[
# TestFile("test_gpt_oss_4gpu.py", 600),
# TestFile("test_deepseek_v3_fp4_4gpu.py", 3600),
],
"per-commit-4-gpu-deepep"
:
[
TestFile
(
"ep/test_deepep_small.py"
,
531
),
],
"per-commit-8-gpu-deepep"
:
[
TestFile
(
"ep/test_deepep_large.py"
,
338
),
],
"per-commit-8-gpu-h20"
:
[
TestFile
(
"quant/test_w4a8_deepseek_v3.py"
,
371
),
],
"vllm_dependency_test"
:
[
TestFile
(
"quant/test_awq.py"
,
163
),
TestFile
(
"test_bnb.py"
,
5
),
TestFile
(
"test_gptqmodel_dynamic.py"
,
102
),
TestFile
(
"test_vllm_dependency.py"
,
185
),
# TestFile("test_gguf.py", 96),
],
}
# Add AMD tests
# NOTE: please sort the test cases alphabetically by the test file name
suite_amd
=
{
"per-commit-amd"
:
[
TestFile
(
"hicache/test_hicache.py"
,
116
),
TestFile
(
"hicache/test_hicache_mla.py"
,
127
),
TestFile
(
"hicache/test_hicache_storage.py"
,
127
),
TestFile
(
"lora/test_lora.py"
,
200
),
TestFile
(
"lora/test_lora_eviction.py"
,
200
),
TestFile
(
"lora/test_lora_backend.py"
,
99
),
TestFile
(
"lora/test_multi_lora_backend.py"
,
60
),
TestFile
(
"lora/test_lora_cuda_graph.py"
,
250
),
TestFile
(
"lora/test_lora_qwen3.py"
,
97
),
# TestFile("models/test_embedding_models.py", 73), # Disabled temporarily, see https://github.com/sgl-project/sglang/issues/11127
TestFile
(
"models/test_compressed_tensors_models.py"
,
42
),
TestFile
(
"models/test_qwen_models.py"
,
82
),
TestFile
(
"models/test_reward_models.py"
,
132
),
TestFile
(
"models/test_transformers_models.py"
,
320
),
TestFile
(
"openai_server/basic/test_protocol.py"
,
10
),
TestFile
(
"openai_server/basic/test_serving_chat.py"
,
10
),
TestFile
(
"openai_server/basic/test_serving_completions.py"
,
10
),
TestFile
(
"openai_server/basic/test_serving_embedding.py"
,
10
),
TestFile
(
"openai_server/basic/test_openai_embedding.py"
,
141
),
TestFile
(
"openai_server/basic/test_openai_server.py"
,
149
),
TestFile
(
"openai_server/features/test_enable_thinking.py"
,
70
),
TestFile
(
"openai_server/features/test_json_constrained.py"
,
98
),
TestFile
(
"openai_server/features/test_json_mode.py"
,
90
),
TestFile
(
"openai_server/features/test_openai_server_ebnf.py"
,
95
),
# TestFile("openai_server/features/test_openai_server_hidden_states.py", 240),
TestFile
(
"openai_server/features/test_reasoning_content.py"
,
89
),
TestFile
(
"openai_server/function_call/test_openai_function_calling.py"
,
60
),
TestFile
(
"openai_server/function_call/test_tool_choice.py"
,
226
),
TestFile
(
"function_call/test_json_schema_constraint.py"
,
30
),
TestFile
(
"openai_server/validation/test_large_max_new_tokens.py"
,
41
),
TestFile
(
"openai_server/validation/test_matched_stop.py"
,
60
),
TestFile
(
"openai_server/validation/test_openai_server_ignore_eos.py"
,
85
),
TestFile
(
"openai_server/validation/test_request_length_validation.py"
,
31
),
TestFile
(
"quant/test_block_int8.py"
,
22
),
TestFile
(
"quant/test_awq_dequant.py"
,
2
),
TestFile
(
"rl/test_update_weights_from_disk.py"
,
114
),
# TestFile("rl/test_update_weights_from_tensor.py", 48),
TestFile
(
"test_abort.py"
,
51
),
TestFile
(
"test_create_kvindices.py"
,
2
),
TestFile
(
"test_chunked_prefill.py"
,
313
),
TestFile
(
"test_ebnf_constrained.py"
,
108
),
TestFile
(
"test_eval_fp8_accuracy.py"
,
303
),
TestFile
(
"test_function_call_parser.py"
,
10
),
TestFile
(
"test_fused_moe.py"
,
30
),
TestFile
(
"test_input_embeddings.py"
,
38
),
TestFile
(
"test_io_struct.py"
,
8
),
TestFile
(
"test_jinja_template_utils.py"
,
1
),
TestFile
(
"test_metrics.py"
,
32
),
TestFile
(
"test_metrics_utils.py"
,
1
),
TestFile
(
"test_mla.py"
,
242
),
TestFile
(
"test_mla_deepseek_v3.py"
,
221
),
TestFile
(
"test_no_chunked_prefill.py"
,
108
),
# TestFile("test_no_overlap_scheduler.py", 234), # Disabled temporarily and track in #7703
TestFile
(
"test_penalty.py"
,
41
),
TestFile
(
"test_page_size.py"
,
60
),
TestFile
(
"test_pytorch_sampling_backend.py"
,
66
),
TestFile
(
"test_radix_attention.py"
,
105
),
TestFile
(
"test_regex_constrained.py"
,
64
),
TestFile
(
"test_retract_decode.py"
,
54
),
TestFile
(
"test_reasoning_parser.py"
,
5
),
TestFile
(
"test_rope_rocm.py"
,
3
),
TestFile
(
"test_server_args.py"
,
1
),
TestFile
(
"test_skip_tokenizer_init.py"
,
117
),
TestFile
(
"test_srt_engine.py"
,
261
),
TestFile
(
"test_srt_endpoint.py"
,
130
),
TestFile
(
"test_torch_compile.py"
,
169
),
TestFile
(
"test_torch_compile_moe.py"
,
172
),
TestFile
(
"test_torch_native_attention_backend.py"
,
123
),
TestFile
(
"test_triton_attention_backend.py"
,
150
),
# TestFile("test_vision_chunked_prefill.py", 175), # Disabled temporarily and track in #7701
TestFile
(
"test_wave_attention_kernels.py"
,
2
),
# TestFile("test_wave_attention_backend.py", 150), # Disabled temporarily, see https://github.com/sgl-project/sglang/issues/11127
],
"per-commit-amd-mi35x"
:
[
TestFile
(
"test_mla.py"
,
242
),
TestFile
(
"test_gpt_oss_1gpu.py"
,
600
),
],
"per-commit-2-gpu-amd"
:
[
TestFile
(
"lora/test_lora_tp.py"
,
116
),
TestFile
(
"rl/test_update_weights_from_distributed.py"
,
103
),
TestFile
(
"test_data_parallelism.py"
,
73
),
TestFile
(
"test_load_weights_from_remote_instance.py"
,
72
),
# TestFile("test_patch_torch.py", 19), # Disabled temporarily, see https://github.com/sgl-project/sglang/issues/11127
],
"per-commit-4-gpu-amd"
:
[
TestFile
(
"test_pp_single_node.py"
,
150
),
],
"per-commit-8-gpu-amd"
:
[
TestFile
(
"test_deepseek_v3_basic.py"
,
275
),
TestFile
(
"test_deepseek_v3_mtp.py"
,
275
),
],
"nightly-amd"
:
[
TestFile
(
"test_nightly_gsm8k_eval_amd.py"
),
],
}
# Add Intel Xeon tests
# NOTE: please sort the test cases alphabetically by the test file name
suite_xeon
=
{
"per-commit-cpu"
:
[
TestFile
(
"cpu/test_activation.py"
),
TestFile
(
"cpu/test_binding.py"
),
TestFile
(
"cpu/test_decode.py"
),
TestFile
(
"cpu/test_extend.py"
),
TestFile
(
"cpu/test_gemm.py"
),
TestFile
(
"cpu/test_mla.py"
),
TestFile
(
"cpu/test_moe.py"
),
TestFile
(
"cpu/test_norm.py"
),
TestFile
(
"cpu/test_qkv_proj_with_rope.py"
),
TestFile
(
"cpu/test_rope.py"
),
TestFile
(
"cpu/test_shared_expert.py"
),
TestFile
(
"cpu/test_topk.py"
),
TestFile
(
"test_intel_amx_attention_backend.py"
),
TestFile
(
"test_cpu_graph.py"
),
],
}
# Add Ascend NPU tests
# NOTE: please sort the test cases alphabetically by the test file name
suite_ascend
=
{
"per-commit-1-ascend-npu"
:
[
TestFile
(
"ascend/test_ascend_graph_tp1_bf16.py"
,
400
),
TestFile
(
"ascend/test_ascend_tp1_bf16.py"
,
400
),
],
"per-commit-2-ascend-npu"
:
[
TestFile
(
"ascend/test_ascend_graph_tp2_bf16.py"
,
400
),
TestFile
(
"ascend/test_ascend_mla_fia_w8a8int8.py"
,
400
),
TestFile
(
"ascend/test_ascend_tp2_bf16.py"
,
400
),
TestFile
(
"ascend/test_ascend_tp2_fia_bf16.py"
,
400
),
],
"per-commit-4-ascend-npu"
:
[
TestFile
(
"ascend/test_ascend_mla_w8a8int8.py"
,
400
),
TestFile
(
"ascend/test_ascend_tp4_bf16.py"
,
400
),
],
"per-commit-16-ascend-a3"
:
[
TestFile
(
"ascend/test_ascend_deepep.py"
,
400
),
],
}
suites
.
update
(
suite_amd
)
suites
.
update
(
suite_xeon
)
suites
.
update
(
suite_ascend
)
if
__name__
==
"__main__"
:
for
key
in
suites
:
cases
=
suites
[
key
]
names
=
[
x
.
name
for
x
in
cases
]
names
.
sort
()
print
(
f
' "
{
key
}
": ['
)
for
name
in
names
:
estimated_time
=
[
x
.
estimated_time
for
x
in
cases
if
x
.
name
==
name
][
0
]
print
(
f
' TestFile("
{
name
}
",
{
estimated_time
}
),'
)
print
(
f
" ],
\n
"
)
test/srt/models/test_nvidia_nemotron_nano_v2.py
View file @
61055cb3
import
unittest
from
types
import
SimpleNamespace
from
sglang.srt.utils
import
kill_process_tree
...
...
@@ -42,3 +43,7 @@ class TestNvidiaNemotronNanoV2(CustomTestCase):
metrics
=
run_eval
(
args
)
print
(
f
"
{
metrics
=
}
"
)
self
.
assertGreater
(
metrics
[
"accuracy"
],
0.87
)
if
__name__
==
"__main__"
:
unittest
.
main
()
test/srt/run_suite.py
View file @
61055cb3
...
...
@@ -2,7 +2,6 @@ import argparse
import
glob
from
dataclasses
import
dataclass
from
sglang.srt.utils
import
is_hip
from
sglang.test.test_utils
import
run_unittest_files
...
...
@@ -12,38 +11,37 @@ class TestFile:
estimated_time
:
float
=
60
# NOTE: please sort the test cases alphabetically by the test file name
suites
=
{
"per-commit"
:
[
TestFile
(
"function_call/test_json_schema_constraint.py"
,
30
),
TestFile
(
"hicache/test_hicache.py"
,
116
),
TestFile
(
"hicache/test_hicache_eagle.py"
,
150
),
TestFile
(
"hicache/test_hicache_mla.py"
,
127
),
TestFile
(
"hicache/test_hicache_storage.py"
,
127
),
TestFile
(
"hicache/test_hicache_eagle.py"
,
150
),
TestFile
(
"lora/test_lora.py"
,
200
),
TestFile
(
"lora/test_lora_eviction.py"
,
200
),
TestFile
(
"lora/test_lora_backend.py"
,
99
),
TestFile
(
"lora/test_multi_lora_backend.py"
,
60
),
TestFile
(
"lora/test_lora_update.py"
,
400
),
TestFile
(
"lora/test_lora_eviction.py"
,
200
),
TestFile
(
"lora/test_lora_qwen3.py"
,
97
),
TestFile
(
"lora/test_lora_radix_cache.py"
,
100
),
TestFile
(
"lora/test_lora_update.py"
,
400
),
TestFile
(
"lora/test_multi_lora_backend.py"
,
60
),
TestFile
(
"models/test_compressed_tensors_models.py"
,
42
),
TestFile
(
"models/test_cross_encoder_models.py"
,
100
),
TestFile
(
"models/test_embedding_models.py"
,
73
),
# TestFile("models/test_clip_models.py", 52),
TestFile
(
"models/test_encoder_embedding_models.py"
,
100
),
TestFile
(
"models/test_cross_encoder_models.py"
,
100
),
TestFile
(
"models/test_compressed_tensors_models.py"
,
42
),
TestFile
(
"models/test_generation_models.py"
,
103
),
# TestFile("models/test_gme_qwen_models.py", 45),
# TestFile("models/test_grok_models.py", 60), # Disabled due to illegal memory access
TestFile
(
"models/test_nvidia_nemotron_nano_v2.py"
,
180
),
TestFile
(
"models/test_qwen_models.py"
,
82
),
TestFile
(
"models/test_reward_models.py"
,
132
),
TestFile
(
"models/test_vlm_models.py"
,
741
),
TestFile
(
"models/test_transformers_models.py"
,
320
),
TestFile
(
"models/test_vlm_models.py"
,
741
),
TestFile
(
"openai_server/basic/test_openai_embedding.py"
,
141
),
TestFile
(
"openai_server/basic/test_openai_server.py"
,
149
),
TestFile
(
"openai_server/basic/test_protocol.py"
,
10
),
TestFile
(
"openai_server/basic/test_serving_chat.py"
,
10
),
TestFile
(
"openai_server/basic/test_serving_completions.py"
,
10
),
TestFile
(
"openai_server/basic/test_serving_embedding.py"
,
10
),
TestFile
(
"openai_server/basic/test_openai_embedding.py"
,
141
),
TestFile
(
"openai_server/basic/test_openai_server.py"
,
149
),
TestFile
(
"openai_server/features/test_enable_thinking.py"
,
70
),
TestFile
(
"openai_server/features/test_json_constrained.py"
,
98
),
TestFile
(
"openai_server/features/test_json_mode.py"
,
90
),
...
...
@@ -65,8 +63,8 @@ suites = {
TestFile
(
"rl/test_update_weights_from_disk.py"
,
114
),
TestFile
(
"rl/test_update_weights_from_tensor.py"
,
48
),
TestFile
(
"test_abort.py"
,
51
),
TestFile
(
"test_create_kvindices.py"
,
2
),
TestFile
(
"test_chunked_prefill.py"
,
313
),
TestFile
(
"test_create_kvindices.py"
,
2
),
TestFile
(
"test_deterministic.py"
,
300
),
TestFile
(
"test_eagle_infer_a.py"
,
370
),
TestFile
(
"test_eagle_infer_b.py"
,
700
),
...
...
@@ -80,8 +78,6 @@ suites = {
TestFile
(
"test_harmony_parser.py"
,
20
),
TestFile
(
"test_hidden_states.py"
,
55
),
TestFile
(
"test_hybrid_attn_backend.py"
,
100
),
TestFile
(
"test_standalone_speculative_decoding.py"
,
250
),
TestFile
(
"test_ngram_speculative_decoding.py"
,
250
),
TestFile
(
"test_input_embeddings.py"
,
38
),
TestFile
(
"test_io_struct.py"
,
8
),
TestFile
(
"test_jinja_template_utils.py"
,
1
),
...
...
@@ -90,74 +86,76 @@ suites = {
TestFile
(
"test_metrics_utils.py"
,
1
),
TestFile
(
"test_mla.py"
,
167
),
TestFile
(
"test_mla_deepseek_v3.py"
,
500
),
TestFile
(
"test_mla_int8_deepseek_v3.py"
,
429
),
TestFile
(
"test_mla_flashinfer.py"
,
302
),
TestFile
(
"test_mla_fp8.py"
,
93
),
TestFile
(
"test_mla_int8_deepseek_v3.py"
,
429
),
TestFile
(
"test_modelopt_loader.py"
,
30
),
TestFile
(
"test_multi_tokenizer.py"
,
230
),
TestFile
(
"test_ngram_speculative_decoding.py"
,
250
),
TestFile
(
"test_no_chunked_prefill.py"
,
108
),
TestFile
(
"test_no_overlap_scheduler.py"
,
234
),
TestFile
(
"test_original_logprobs.py"
,
41
),
TestFile
(
"test_penalty.py"
,
41
),
TestFile
(
"test_page_size.py"
,
60
),
TestFile
(
"test_penalty.py"
,
41
),
TestFile
(
"test_priority_scheduling.py"
,
100
),
TestFile
(
"test_pytorch_sampling_backend.py"
,
66
),
TestFile
(
"test_radix_attention.py"
,
105
),
TestFile
(
"test_radix_cache_unit.py"
,
5
),
TestFile
(
"test_regex_constrained.py"
,
64
),
TestFile
(
"test_reasoning_parser.py"
,
5
),
TestFile
(
"test_re
tract_decode
.py"
,
5
4
),
TestFile
(
"test_re
gex_constrained
.py"
,
6
4
),
TestFile
(
"test_request_queue_validation.py"
,
30
),
TestFile
(
"test_retract_decode.py"
,
54
),
TestFile
(
"test_score_api.py"
,
180
),
TestFile
(
"test_server_args.py"
,
1
),
TestFile
(
"test_skip_tokenizer_init.py"
,
117
),
TestFile
(
"test_srt_engine.py"
,
261
),
TestFile
(
"test_srt_endpoint.py"
,
130
),
TestFile
(
"test_srt_engine.py"
,
261
),
TestFile
(
"test_standalone_speculative_decoding.py"
,
250
),
TestFile
(
"test_start_profile.py"
,
60
),
TestFile
(
"test_swa_unittest.py"
,
1
),
TestFile
(
"test_torch_compile.py"
,
76
),
TestFile
(
"test_torch_compile_moe.py"
,
172
),
TestFile
(
"test_torch_native_attention_backend.py"
,
123
),
TestFile
(
"test_torchao.py"
,
70
),
TestFile
(
"test_triton_attention_kernels.py"
,
4
),
TestFile
(
"test_triton_attention_backend.py"
,
150
),
TestFile
(
"test_triton_attention_kernels.py"
,
4
),
TestFile
(
"test_triton_moe_channel_fp8_kernel.py"
,
25
),
TestFile
(
"test_triton_sliding_window.py"
,
250
),
TestFile
(
"test_utils_update_weights.py"
,
48
),
TestFile
(
"test_vision_chunked_prefill.py"
,
175
),
TestFile
(
"test_vlm_input_format.py"
,
300
),
TestFile
(
"test_vision_openai_server_a.py"
,
724
),
TestFile
(
"test_vision_openai_server_b.py"
,
446
),
TestFile
(
"layers/attention/mamba/test_causal_conv1d.py"
,
85
),
TestFile
(
"layers/attention/mamba/test_mamba_ssm.py"
,
85
),
TestFile
(
"layers/attention/mamba/test_mamba_ssm_ssd.py"
,
220
),
TestFile
(
"models/test_nvidia_nemotron_nano_v2.py"
,
180
),
TestFile
(
"test_modelopt_loader.py"
,
30
),
TestFile
(
"test_vlm_input_format.py"
,
300
),
],
"per-commit-2-gpu"
:
[
TestFile
(
"ep/test_moe_ep.py"
,
140
),
TestFile
(
"hicache/test_hicache_storage_3fs_backend.py"
,
200
),
TestFile
(
"hicache/test_hicache_storage_file_backend.py"
,
200
),
TestFile
(
"hicache/test_hicache_storage_mooncake_backend.py"
,
400
),
TestFile
(
"layers/attention/mamba/test_mamba2_mixer.py"
,
110
),
TestFile
(
"lora/test_lora_tp.py"
,
116
),
TestFile
(
"rl/test_update_weights_from_distributed.py"
,
103
),
TestFile
(
"test_data_parallelism.py"
,
73
),
TestFile
(
"test_disaggregation_basic.py"
,
400
),
TestFile
(
"test_dp_attention.py"
,
594
),
TestFile
(
"test_load_weights_from_remote_instance.py"
,
72
),
TestFile
(
"test_patch_torch.py"
,
19
),
TestFile
(
"test_release_memory_occupation.py"
,
257
),
TestFile
(
"hicache/test_hicache_storage_file_backend.py"
,
200
),
TestFile
(
"hicache/test_hicache_storage_mooncake_backend.py"
,
400
),
TestFile
(
"hicache/test_hicache_storage_3fs_backend.py"
,
200
),
TestFile
(
"layers/attention/mamba/test_mamba2_mixer.py"
,
110
),
],
"per-commit-4-gpu"
:
[
TestFile
(
"models/test_qwen3_next_models.py"
,
291
),
TestFile
(
"test_disaggregation_dp_attention.py"
,
155
),
TestFile
(
"test_gpt_oss_4gpu.py"
,
300
),
TestFile
(
"test_local_attn.py"
,
411
),
TestFile
(
"test_pp_single_node.py"
,
481
),
TestFile
(
"models/test_qwen3_next_models.py"
,
291
),
TestFile
(
"test_multi_instance_release_memory_occupation.py"
,
64
),
TestFile
(
"test_pp_single_node.py"
,
481
),
],
"per-commit-8-gpu"
:
[
TestFile
(
"lora/test_lora_llama4.py"
,
400
),
TestFile
(
"test_deepseek_v3_basic.py"
,
275
),
TestFile
(
"test_deepseek_v3_mtp.py"
,
275
),
TestFile
(
"test_disaggregation_different_tp.py"
,
600
),
TestFile
(
"test_disaggregation_pp.py"
,
140
),
],
"per-commit-4-gpu-b200"
:
[
# TestFile("test_gpt_oss_4gpu.py", 600),
...
...
@@ -169,16 +167,6 @@ suites = {
"per-commit-8-gpu-deepep"
:
[
TestFile
(
"ep/test_deepep_large.py"
,
338
),
],
"per-commit-2-gpu-disaggregation"
:
[
TestFile
(
"test_disaggregation_basic.py"
,
400
),
],
"per-commit-4-gpu-disaggregation"
:
[
TestFile
(
"test_disaggregation_dp_attention.py"
,
155
),
],
"per-commit-8-gpu-disaggregation"
:
[
TestFile
(
"test_disaggregation_different_tp.py"
,
600
),
TestFile
(
"test_disaggregation_pp.py"
,
140
),
],
"per-commit-8-gpu-h20"
:
[
TestFile
(
"quant/test_w4a8_deepseek_v3.py"
,
371
),
],
...
...
@@ -192,48 +180,46 @@ suites = {
}
# Add AMD tests
# NOTE: please sort the test cases alphabetically by the test file name
suite_amd
=
{
"per-commit-amd"
:
[
TestFile
(
"function_call/test_json_schema_constraint.py"
,
30
),
TestFile
(
"hicache/test_hicache.py"
,
116
),
TestFile
(
"hicache/test_hicache_mla.py"
,
127
),
TestFile
(
"hicache/test_hicache_storage.py"
,
127
),
TestFile
(
"lora/test_lora.py"
,
200
),
TestFile
(
"lora/test_lora_eviction.py"
,
200
),
TestFile
(
"lora/test_lora_backend.py"
,
99
),
TestFile
(
"lora/test_multi_lora_backend.py"
,
60
),
TestFile
(
"lora/test_lora_cuda_graph.py"
,
250
),
TestFile
(
"lora/test_lora_eviction.py"
,
200
),
TestFile
(
"lora/test_lora_qwen3.py"
,
97
),
#
TestFile("
models/test_embedding_models.py", 73), # Disabled temporarily, see https://github.com/sgl-project/sglang/issues/11127
TestFile
(
"
lora/test_multi_lora_backend.py"
,
60
),
TestFile
(
"models/test_compressed_tensors_models.py"
,
42
),
TestFile
(
"models/test_qwen_models.py"
,
82
),
TestFile
(
"models/test_reward_models.py"
,
132
),
TestFile
(
"models/test_transformers_models.py"
,
320
),
TestFile
(
"openai_server/basic/test_openai_embedding.py"
,
141
),
TestFile
(
"openai_server/basic/test_openai_server.py"
,
149
),
TestFile
(
"openai_server/basic/test_protocol.py"
,
10
),
TestFile
(
"openai_server/basic/test_serving_chat.py"
,
10
),
TestFile
(
"openai_server/basic/test_serving_completions.py"
,
10
),
TestFile
(
"openai_server/basic/test_serving_embedding.py"
,
10
),
TestFile
(
"openai_server/basic/test_openai_embedding.py"
,
141
),
TestFile
(
"openai_server/basic/test_openai_server.py"
,
149
),
TestFile
(
"openai_server/features/test_enable_thinking.py"
,
70
),
TestFile
(
"openai_server/features/test_json_constrained.py"
,
98
),
TestFile
(
"openai_server/features/test_json_mode.py"
,
90
),
TestFile
(
"openai_server/features/test_openai_server_ebnf.py"
,
95
),
# TestFile("openai_server/features/test_openai_server_hidden_states.py", 240),
TestFile
(
"openai_server/features/test_reasoning_content.py"
,
89
),
TestFile
(
"openai_server/function_call/test_openai_function_calling.py"
,
60
),
TestFile
(
"openai_server/function_call/test_tool_choice.py"
,
226
),
TestFile
(
"function_call/test_json_schema_constraint.py"
,
30
),
TestFile
(
"openai_server/validation/test_large_max_new_tokens.py"
,
41
),
TestFile
(
"openai_server/validation/test_matched_stop.py"
,
60
),
TestFile
(
"openai_server/validation/test_openai_server_ignore_eos.py"
,
85
),
TestFile
(
"openai_server/validation/test_request_length_validation.py"
,
31
),
TestFile
(
"quant/test_block_int8.py"
,
22
),
TestFile
(
"quant/test_awq_dequant.py"
,
2
),
TestFile
(
"quant/test_block_int8.py"
,
22
),
TestFile
(
"rl/test_update_weights_from_disk.py"
,
114
),
# TestFile("rl/test_update_weights_from_tensor.py", 48),
TestFile
(
"test_abort.py"
,
51
),
TestFile
(
"test_create_kvindices.py"
,
2
),
TestFile
(
"test_chunked_prefill.py"
,
313
),
TestFile
(
"test_create_kvindices.py"
,
2
),
TestFile
(
"test_ebnf_constrained.py"
,
108
),
TestFile
(
"test_eval_fp8_accuracy.py"
,
303
),
TestFile
(
"test_function_call_parser.py"
,
10
),
...
...
@@ -246,30 +232,34 @@ suite_amd = {
TestFile
(
"test_mla.py"
,
242
),
TestFile
(
"test_mla_deepseek_v3.py"
,
221
),
TestFile
(
"test_no_chunked_prefill.py"
,
108
),
# TestFile("test_no_overlap_scheduler.py", 234), # Disabled temporarily and track in #7703
TestFile
(
"test_penalty.py"
,
41
),
TestFile
(
"test_page_size.py"
,
60
),
TestFile
(
"test_penalty.py"
,
41
),
TestFile
(
"test_pytorch_sampling_backend.py"
,
66
),
TestFile
(
"test_radix_attention.py"
,
105
),
TestFile
(
"test_reasoning_parser.py"
,
5
),
TestFile
(
"test_regex_constrained.py"
,
64
),
TestFile
(
"test_retract_decode.py"
,
54
),
TestFile
(
"test_reasoning_parser.py"
,
5
),
TestFile
(
"test_rope_rocm.py"
,
3
),
TestFile
(
"test_server_args.py"
,
1
),
TestFile
(
"test_skip_tokenizer_init.py"
,
117
),
TestFile
(
"test_srt_engine.py"
,
261
),
TestFile
(
"test_srt_endpoint.py"
,
130
),
TestFile
(
"test_srt_engine.py"
,
261
),
TestFile
(
"test_torch_compile.py"
,
169
),
TestFile
(
"test_torch_compile_moe.py"
,
172
),
TestFile
(
"test_torch_native_attention_backend.py"
,
123
),
TestFile
(
"test_triton_attention_backend.py"
,
150
),
# TestFile("test_vision_chunked_prefill.py", 175), # Disabled temporarily and track in #7701
TestFile
(
"test_wave_attention_kernels.py"
,
2
),
# Disabled temporarily
# TestFile("models/test_embedding_models.py", 73), # Disabled temporarily, see https://github.com/sgl-project/sglang/issues/11127
# TestFile("openai_server/features/test_openai_server_hidden_states.py", 240),
# TestFile("rl/test_update_weights_from_tensor.py", 48),
# TestFile("test_no_overlap_scheduler.py", 234), # Disabled temporarily and track in #7703
# TestFile("test_vision_chunked_prefill.py", 175), # Disabled temporarily and track in #7701
# TestFile("test_wave_attention_backend.py", 150), # Disabled temporarily, see https://github.com/sgl-project/sglang/issues/11127
],
"per-commit-amd-mi35x"
:
[
TestFile
(
"test_mla.py"
,
242
),
TestFile
(
"test_gpt_oss_1gpu.py"
,
600
),
TestFile
(
"test_mla.py"
,
242
),
],
"per-commit-2-gpu-amd"
:
[
TestFile
(
"lora/test_lora_tp.py"
,
116
),
...
...
@@ -291,6 +281,7 @@ suite_amd = {
}
# Add Intel Xeon tests
# NOTE: please sort the test cases alphabetically by the test file name
suite_xeon
=
{
"per-commit-cpu"
:
[
TestFile
(
"cpu/test_activation.py"
),
...
...
@@ -305,22 +296,23 @@ suite_xeon = {
TestFile
(
"cpu/test_rope.py"
),
TestFile
(
"cpu/test_shared_expert.py"
),
TestFile
(
"cpu/test_topk.py"
),
TestFile
(
"test_intel_amx_attention_backend.py"
),
TestFile
(
"test_cpu_graph.py"
),
TestFile
(
"test_intel_amx_attention_backend.py"
),
],
}
# Add Ascend NPU tests
# NOTE: please sort the test cases alphabetically by the test file name
suite_ascend
=
{
"per-commit-1-ascend-npu"
:
[
TestFile
(
"ascend/test_ascend_tp1_bf16.py"
,
400
),
TestFile
(
"ascend/test_ascend_graph_tp1_bf16.py"
,
400
),
TestFile
(
"ascend/test_ascend_tp1_bf16.py"
,
400
),
],
"per-commit-2-ascend-npu"
:
[
TestFile
(
"ascend/test_ascend_tp2_bf16.py"
,
400
),
TestFile
(
"ascend/test_ascend_graph_tp2_bf16.py"
,
400
),
TestFile
(
"ascend/test_ascend_tp2_fia_bf16.py"
,
400
),
TestFile
(
"ascend/test_ascend_mla_fia_w8a8int8.py"
,
400
),
TestFile
(
"ascend/test_ascend_tp2_bf16.py"
,
400
),
TestFile
(
"ascend/test_ascend_tp2_fia_bf16.py"
,
400
),
],
"per-commit-4-ascend-npu"
:
[
TestFile
(
"ascend/test_ascend_mla_w8a8int8.py"
,
400
),
...
...
test/srt/test_disaggregation_basic.py
View file @
61055cb3
import
json
import
os
import
time
import
unittest
from
types
import
SimpleNamespace
...
...
test/srt/test_disaggregation_dp_attention.py
View file @
61055cb3
import
os
import
time
import
unittest
from
types
import
SimpleNamespace
from
urllib.parse
import
urlparse
from
sglang.test.few_shot_gsm8k
import
run_eval
as
run_eval_few_shot_gsm8k
from
sglang.test.test_disaggregation_utils
import
TestDisaggregationBase
from
sglang.test.test_utils
import
(
DEFAULT_MODEL_NAME_FOR_TEST_MLA
,
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
DEFAULT_URL_FOR_TEST
,
popen_launch_pd_server
,
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment