Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
61055cb3
Unverified
Commit
61055cb3
authored
Oct 10, 2025
by
Lianmin Zheng
Committed by
GitHub
Oct 10, 2025
Browse files
Reorder PD disagg CI tests (#11438)
parent
92777135
Changes
7
Show whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
401 additions
and
151 deletions
+401
-151
.github/workflows/pr-test.yml
.github/workflows/pr-test.yml
+0
-82
python/sglang/srt/utils/common.py
python/sglang/srt/utils/common.py
+1
-1
scripts/sort_testcases_alphabetically.py
scripts/sort_testcases_alphabetically.py
+339
-0
test/srt/models/test_nvidia_nemotron_nano_v2.py
test/srt/models/test_nvidia_nemotron_nano_v2.py
+5
-0
test/srt/run_suite.py
test/srt/run_suite.py
+56
-64
test/srt/test_disaggregation_basic.py
test/srt/test_disaggregation_basic.py
+0
-1
test/srt/test_disaggregation_dp_attention.py
test/srt/test_disaggregation_dp_attention.py
+0
-3
No files found.
.github/workflows/pr-test.yml
View file @
61055cb3
...
@@ -693,87 +693,6 @@ jobs:
...
@@ -693,87 +693,6 @@ jobs:
cd test/srt
cd test/srt
python3 run_suite.py --suite per-commit-4-gpu-b200 --auto-partition-id 0 --auto-partition-size 1 --timeout-per-file 3600
python3 run_suite.py --suite per-commit-4-gpu-b200 --auto-partition-id 0 --auto-partition-size 1 --timeout-per-file 3600
unit-test-disaggregation-2-gpu
:
needs
:
[
check-changes
,
sgl-kernel-build-wheels
]
if
:
always() && !failure() && !cancelled() &&
((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true'))
runs-on
:
2-gpu-runner
steps
:
-
name
:
Checkout code
uses
:
actions/checkout@v4
-
name
:
Download artifacts
if
:
needs.check-changes.outputs.sgl_kernel == 'true'
uses
:
actions/download-artifact@v4
with
:
path
:
sgl-kernel/dist/
merge-multiple
:
true
pattern
:
wheel-python3.10-cuda12.9
-
name
:
Install dependencies
run
:
|
CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} bash scripts/ci/ci_install_dependency.sh
-
name
:
Run test
timeout-minutes
:
20
run
:
|
cd test/srt
python3 run_suite.py --suite per-commit-2-gpu-disaggregation
unit-test-disaggregation-4-gpu
:
needs
:
[
check-changes
,
unit-test-disaggregation-2-gpu
,
sgl-kernel-build-wheels
]
if
:
always() && !failure() && !cancelled() &&
((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true'))
runs-on
:
4-gpu-runner
steps
:
-
name
:
Checkout code
uses
:
actions/checkout@v4
-
name
:
Download artifacts
if
:
needs.check-changes.outputs.sgl_kernel == 'true'
uses
:
actions/download-artifact@v4
with
:
path
:
sgl-kernel/dist/
merge-multiple
:
true
pattern
:
wheel-python3.10-cuda12.9
-
name
:
Install dependencies
run
:
|
CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} bash scripts/ci/ci_install_dependency.sh
-
name
:
Run test
timeout-minutes
:
20
run
:
|
cd test/srt
python3 run_suite.py --suite per-commit-4-gpu-disaggregation
unit-test-disaggregation-8-gpu
:
needs
:
[
check-changes
,
unit-test-disaggregation-2-gpu
,
sgl-kernel-build-wheels
]
if
:
always() && !failure() && !cancelled() &&
((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true'))
runs-on
:
8-gpu-h200
steps
:
-
name
:
Checkout code
uses
:
actions/checkout@v4
-
name
:
Download artifacts
if
:
needs.check-changes.outputs.sgl_kernel == 'true'
uses
:
actions/download-artifact@v4
with
:
path
:
sgl-kernel/dist/
merge-multiple
:
true
pattern
:
wheel-python3.10-cuda12.9
-
name
:
Install dependencies
run
:
|
CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} bash scripts/ci/ci_install_dependency.sh
-
name
:
Run test
timeout-minutes
:
20
run
:
|
cd test/srt
python3 run_suite.py --suite per-commit-8-gpu-disaggregation
pr-test-finish
:
pr-test-finish
:
needs
:
[
needs
:
[
check-changes
,
check-changes
,
...
@@ -788,7 +707,6 @@ jobs:
...
@@ -788,7 +707,6 @@ jobs:
accuracy-test-1-gpu
,
accuracy-test-2-gpu
,
accuracy-test-1-gpu
,
accuracy-test-2-gpu
,
unit-test-deepep-4-gpu
,
unit-test-deepep-8-gpu
,
unit-test-deepep-4-gpu
,
unit-test-deepep-8-gpu
,
unit-test-backend-4-gpu-b200
,
unit-test-backend-4-gpu-b200
,
unit-test-disaggregation-2-gpu
,
unit-test-disaggregation-4-gpu
,
unit-test-disaggregation-8-gpu
,
]
]
if
:
always()
if
:
always()
runs-on
:
ubuntu-latest
runs-on
:
ubuntu-latest
...
...
python/sglang/srt/utils/common.py
View file @
61055cb3
...
@@ -523,7 +523,7 @@ def make_layers_non_pp(
...
@@ -523,7 +523,7 @@ def make_layers_non_pp(
layer_fn
:
LayerFn
,
layer_fn
:
LayerFn
,
prefix
:
str
=
""
,
prefix
:
str
=
""
,
)
->
torch
.
nn
.
ModuleList
:
)
->
torch
.
nn
.
ModuleList
:
from
sglang.srt.offloader
import
get_offloader
from
sglang.srt.
utils.
offloader
import
get_offloader
layers
=
torch
.
nn
.
ModuleList
(
layers
=
torch
.
nn
.
ModuleList
(
get_offloader
().
wrap_modules
(
get_offloader
().
wrap_modules
(
...
...
scripts/sort_testcases_alphabetically.py
0 → 100644
View file @
61055cb3
"""
Sort the test case by name alphabetically for run_suite.py
"""
from
dataclasses
import
dataclass
@
dataclass
class
TestFile
:
name
:
str
estimated_time
:
float
=
60
suites
=
{
"per-commit"
:
[
TestFile
(
"function_call/test_json_schema_constraint.py"
,
30
),
TestFile
(
"hicache/test_hicache.py"
,
116
),
TestFile
(
"hicache/test_hicache_eagle.py"
,
150
),
TestFile
(
"hicache/test_hicache_mla.py"
,
127
),
TestFile
(
"hicache/test_hicache_storage.py"
,
127
),
TestFile
(
"lora/test_lora.py"
,
200
),
TestFile
(
"lora/test_lora_backend.py"
,
99
),
TestFile
(
"lora/test_lora_eviction.py"
,
200
),
TestFile
(
"lora/test_lora_qwen3.py"
,
97
),
TestFile
(
"lora/test_lora_radix_cache.py"
,
100
),
TestFile
(
"lora/test_lora_update.py"
,
400
),
TestFile
(
"lora/test_multi_lora_backend.py"
,
60
),
TestFile
(
"models/test_embedding_models.py"
,
73
),
TestFile
(
"models/test_encoder_embedding_models.py"
,
100
),
TestFile
(
"models/test_cross_encoder_models.py"
,
100
),
TestFile
(
"models/test_compressed_tensors_models.py"
,
42
),
TestFile
(
"models/test_generation_models.py"
,
103
),
TestFile
(
"models/test_nvidia_nemotron_nano_v2.py"
,
180
),
TestFile
(
"models/test_qwen_models.py"
,
82
),
TestFile
(
"models/test_reward_models.py"
,
132
),
TestFile
(
"models/test_transformers_models.py"
,
320
),
TestFile
(
"models/test_vlm_models.py"
,
741
),
TestFile
(
"openai_server/basic/test_protocol.py"
,
10
),
TestFile
(
"openai_server/basic/test_serving_chat.py"
,
10
),
TestFile
(
"openai_server/basic/test_serving_completions.py"
,
10
),
TestFile
(
"openai_server/basic/test_serving_embedding.py"
,
10
),
TestFile
(
"openai_server/basic/test_openai_embedding.py"
,
141
),
TestFile
(
"openai_server/basic/test_openai_server.py"
,
149
),
TestFile
(
"openai_server/features/test_enable_thinking.py"
,
70
),
TestFile
(
"openai_server/features/test_json_constrained.py"
,
98
),
TestFile
(
"openai_server/features/test_json_mode.py"
,
90
),
TestFile
(
"openai_server/features/test_openai_server_ebnf.py"
,
95
),
TestFile
(
"openai_server/features/test_openai_server_hidden_states.py"
,
240
),
TestFile
(
"openai_server/features/test_reasoning_content.py"
,
89
),
TestFile
(
"openai_server/function_call/test_openai_function_calling.py"
,
60
),
TestFile
(
"openai_server/function_call/test_tool_choice.py"
,
226
),
TestFile
(
"openai_server/validation/test_large_max_new_tokens.py"
,
41
),
TestFile
(
"openai_server/validation/test_matched_stop.py"
,
60
),
TestFile
(
"openai_server/validation/test_openai_server_ignore_eos.py"
,
85
),
TestFile
(
"openai_server/validation/test_request_length_validation.py"
,
31
),
TestFile
(
"quant/test_block_int8.py"
,
22
),
TestFile
(
"quant/test_fp8_kernel.py"
,
8
),
TestFile
(
"quant/test_int8_kernel.py"
,
8
),
TestFile
(
"quant/test_triton_scaled_mm.py"
,
8
),
TestFile
(
"quant/test_w8a8_quantization.py"
,
46
),
TestFile
(
"rl/test_fp32_lm_head.py"
,
30
),
TestFile
(
"rl/test_update_weights_from_disk.py"
,
114
),
TestFile
(
"rl/test_update_weights_from_tensor.py"
,
48
),
TestFile
(
"test_abort.py"
,
51
),
TestFile
(
"test_create_kvindices.py"
,
2
),
TestFile
(
"test_chunked_prefill.py"
,
313
),
TestFile
(
"test_deterministic.py"
,
300
),
TestFile
(
"test_eagle_infer_a.py"
,
370
),
TestFile
(
"test_eagle_infer_b.py"
,
700
),
TestFile
(
"test_ebnf_constrained.py"
,
108
),
TestFile
(
"test_eval_fp8_accuracy.py"
,
303
),
TestFile
(
"test_fa3.py"
,
376
),
# TestFile("test_flashmla.py", 352),
TestFile
(
"test_function_call_parser.py"
,
10
),
TestFile
(
"test_fused_moe.py"
,
30
),
TestFile
(
"test_gpt_oss_1gpu.py"
,
600
),
TestFile
(
"test_harmony_parser.py"
,
20
),
TestFile
(
"test_hidden_states.py"
,
55
),
TestFile
(
"test_hybrid_attn_backend.py"
,
100
),
TestFile
(
"test_input_embeddings.py"
,
38
),
TestFile
(
"test_io_struct.py"
,
8
),
TestFile
(
"test_jinja_template_utils.py"
,
1
),
TestFile
(
"test_logprobs.py"
,
55
),
TestFile
(
"test_metrics.py"
,
32
),
TestFile
(
"test_metrics_utils.py"
,
1
),
TestFile
(
"test_mla.py"
,
167
),
TestFile
(
"test_mla_deepseek_v3.py"
,
500
),
TestFile
(
"test_mla_int8_deepseek_v3.py"
,
429
),
TestFile
(
"test_mla_flashinfer.py"
,
302
),
TestFile
(
"test_mla_fp8.py"
,
93
),
TestFile
(
"test_modelopt_loader.py"
,
30
),
TestFile
(
"test_multi_tokenizer.py"
,
230
),
TestFile
(
"test_ngram_speculative_decoding.py"
,
250
),
TestFile
(
"test_no_chunked_prefill.py"
,
108
),
TestFile
(
"test_no_overlap_scheduler.py"
,
234
),
TestFile
(
"test_original_logprobs.py"
,
41
),
TestFile
(
"test_penalty.py"
,
41
),
TestFile
(
"test_page_size.py"
,
60
),
TestFile
(
"test_priority_scheduling.py"
,
100
),
TestFile
(
"test_pytorch_sampling_backend.py"
,
66
),
TestFile
(
"test_radix_attention.py"
,
105
),
TestFile
(
"test_radix_cache_unit.py"
,
5
),
TestFile
(
"test_regex_constrained.py"
,
64
),
TestFile
(
"test_reasoning_parser.py"
,
5
),
TestFile
(
"test_retract_decode.py"
,
54
),
TestFile
(
"test_request_queue_validation.py"
,
30
),
TestFile
(
"test_score_api.py"
,
180
),
TestFile
(
"test_server_args.py"
,
1
),
TestFile
(
"test_skip_tokenizer_init.py"
,
117
),
TestFile
(
"test_srt_engine.py"
,
261
),
TestFile
(
"test_srt_endpoint.py"
,
130
),
TestFile
(
"test_standalone_speculative_decoding.py"
,
250
),
TestFile
(
"test_start_profile.py"
,
60
),
TestFile
(
"test_swa_unittest.py"
,
1
),
TestFile
(
"test_torch_compile.py"
,
76
),
TestFile
(
"test_torch_compile_moe.py"
,
172
),
TestFile
(
"test_torch_native_attention_backend.py"
,
123
),
TestFile
(
"test_torchao.py"
,
70
),
TestFile
(
"test_triton_attention_kernels.py"
,
4
),
TestFile
(
"test_triton_attention_backend.py"
,
150
),
TestFile
(
"test_triton_moe_channel_fp8_kernel.py"
,
25
),
TestFile
(
"test_triton_sliding_window.py"
,
250
),
TestFile
(
"test_utils_update_weights.py"
,
48
),
TestFile
(
"test_vision_chunked_prefill.py"
,
175
),
TestFile
(
"test_vlm_input_format.py"
,
300
),
TestFile
(
"test_vision_openai_server_a.py"
,
724
),
TestFile
(
"test_vision_openai_server_b.py"
,
446
),
],
"per-commit-2-gpu"
:
[
TestFile
(
"ep/test_moe_ep.py"
,
140
),
TestFile
(
"hicache/test_hicache_storage_file_backend.py"
,
200
),
TestFile
(
"hicache/test_hicache_storage_mooncake_backend.py"
,
400
),
TestFile
(
"hicache/test_hicache_storage_3fs_backend.py"
,
200
),
TestFile
(
"layers/attention/mamba/test_mamba2_mixer.py"
,
110
),
TestFile
(
"lora/test_lora_tp.py"
,
116
),
TestFile
(
"rl/test_update_weights_from_distributed.py"
,
103
),
TestFile
(
"test_data_parallelism.py"
,
73
),
TestFile
(
"test_disaggregation_basic.py"
,
400
),
TestFile
(
"test_dp_attention.py"
,
594
),
TestFile
(
"test_load_weights_from_remote_instance.py"
,
72
),
TestFile
(
"test_patch_torch.py"
,
19
),
TestFile
(
"test_release_memory_occupation.py"
,
257
),
],
"per-commit-4-gpu"
:
[
TestFile
(
"models/test_qwen3_next_models.py"
,
291
),
TestFile
(
"test_disaggregation_dp_attention.py"
,
155
),
TestFile
(
"test_gpt_oss_4gpu.py"
,
300
),
TestFile
(
"test_local_attn.py"
,
411
),
TestFile
(
"test_multi_instance_release_memory_occupation.py"
,
64
),
TestFile
(
"test_pp_single_node.py"
,
481
),
],
"per-commit-8-gpu"
:
[
TestFile
(
"lora/test_lora_llama4.py"
,
400
),
TestFile
(
"test_deepseek_v3_basic.py"
,
275
),
TestFile
(
"test_deepseek_v3_mtp.py"
,
275
),
TestFile
(
"test_disaggregation_different_tp.py"
,
600
),
TestFile
(
"test_disaggregation_pp.py"
,
140
),
],
"per-commit-4-gpu-b200"
:
[
# TestFile("test_gpt_oss_4gpu.py", 600),
# TestFile("test_deepseek_v3_fp4_4gpu.py", 3600),
],
"per-commit-4-gpu-deepep"
:
[
TestFile
(
"ep/test_deepep_small.py"
,
531
),
],
"per-commit-8-gpu-deepep"
:
[
TestFile
(
"ep/test_deepep_large.py"
,
338
),
],
"per-commit-8-gpu-h20"
:
[
TestFile
(
"quant/test_w4a8_deepseek_v3.py"
,
371
),
],
"vllm_dependency_test"
:
[
TestFile
(
"quant/test_awq.py"
,
163
),
TestFile
(
"test_bnb.py"
,
5
),
TestFile
(
"test_gptqmodel_dynamic.py"
,
102
),
TestFile
(
"test_vllm_dependency.py"
,
185
),
# TestFile("test_gguf.py", 96),
],
}
# Add AMD tests
# NOTE: please sort the test cases alphabetically by the test file name
suite_amd
=
{
"per-commit-amd"
:
[
TestFile
(
"hicache/test_hicache.py"
,
116
),
TestFile
(
"hicache/test_hicache_mla.py"
,
127
),
TestFile
(
"hicache/test_hicache_storage.py"
,
127
),
TestFile
(
"lora/test_lora.py"
,
200
),
TestFile
(
"lora/test_lora_eviction.py"
,
200
),
TestFile
(
"lora/test_lora_backend.py"
,
99
),
TestFile
(
"lora/test_multi_lora_backend.py"
,
60
),
TestFile
(
"lora/test_lora_cuda_graph.py"
,
250
),
TestFile
(
"lora/test_lora_qwen3.py"
,
97
),
# TestFile("models/test_embedding_models.py", 73), # Disabled temporarily, see https://github.com/sgl-project/sglang/issues/11127
TestFile
(
"models/test_compressed_tensors_models.py"
,
42
),
TestFile
(
"models/test_qwen_models.py"
,
82
),
TestFile
(
"models/test_reward_models.py"
,
132
),
TestFile
(
"models/test_transformers_models.py"
,
320
),
TestFile
(
"openai_server/basic/test_protocol.py"
,
10
),
TestFile
(
"openai_server/basic/test_serving_chat.py"
,
10
),
TestFile
(
"openai_server/basic/test_serving_completions.py"
,
10
),
TestFile
(
"openai_server/basic/test_serving_embedding.py"
,
10
),
TestFile
(
"openai_server/basic/test_openai_embedding.py"
,
141
),
TestFile
(
"openai_server/basic/test_openai_server.py"
,
149
),
TestFile
(
"openai_server/features/test_enable_thinking.py"
,
70
),
TestFile
(
"openai_server/features/test_json_constrained.py"
,
98
),
TestFile
(
"openai_server/features/test_json_mode.py"
,
90
),
TestFile
(
"openai_server/features/test_openai_server_ebnf.py"
,
95
),
# TestFile("openai_server/features/test_openai_server_hidden_states.py", 240),
TestFile
(
"openai_server/features/test_reasoning_content.py"
,
89
),
TestFile
(
"openai_server/function_call/test_openai_function_calling.py"
,
60
),
TestFile
(
"openai_server/function_call/test_tool_choice.py"
,
226
),
TestFile
(
"function_call/test_json_schema_constraint.py"
,
30
),
TestFile
(
"openai_server/validation/test_large_max_new_tokens.py"
,
41
),
TestFile
(
"openai_server/validation/test_matched_stop.py"
,
60
),
TestFile
(
"openai_server/validation/test_openai_server_ignore_eos.py"
,
85
),
TestFile
(
"openai_server/validation/test_request_length_validation.py"
,
31
),
TestFile
(
"quant/test_block_int8.py"
,
22
),
TestFile
(
"quant/test_awq_dequant.py"
,
2
),
TestFile
(
"rl/test_update_weights_from_disk.py"
,
114
),
# TestFile("rl/test_update_weights_from_tensor.py", 48),
TestFile
(
"test_abort.py"
,
51
),
TestFile
(
"test_create_kvindices.py"
,
2
),
TestFile
(
"test_chunked_prefill.py"
,
313
),
TestFile
(
"test_ebnf_constrained.py"
,
108
),
TestFile
(
"test_eval_fp8_accuracy.py"
,
303
),
TestFile
(
"test_function_call_parser.py"
,
10
),
TestFile
(
"test_fused_moe.py"
,
30
),
TestFile
(
"test_input_embeddings.py"
,
38
),
TestFile
(
"test_io_struct.py"
,
8
),
TestFile
(
"test_jinja_template_utils.py"
,
1
),
TestFile
(
"test_metrics.py"
,
32
),
TestFile
(
"test_metrics_utils.py"
,
1
),
TestFile
(
"test_mla.py"
,
242
),
TestFile
(
"test_mla_deepseek_v3.py"
,
221
),
TestFile
(
"test_no_chunked_prefill.py"
,
108
),
# TestFile("test_no_overlap_scheduler.py", 234), # Disabled temporarily and track in #7703
TestFile
(
"test_penalty.py"
,
41
),
TestFile
(
"test_page_size.py"
,
60
),
TestFile
(
"test_pytorch_sampling_backend.py"
,
66
),
TestFile
(
"test_radix_attention.py"
,
105
),
TestFile
(
"test_regex_constrained.py"
,
64
),
TestFile
(
"test_retract_decode.py"
,
54
),
TestFile
(
"test_reasoning_parser.py"
,
5
),
TestFile
(
"test_rope_rocm.py"
,
3
),
TestFile
(
"test_server_args.py"
,
1
),
TestFile
(
"test_skip_tokenizer_init.py"
,
117
),
TestFile
(
"test_srt_engine.py"
,
261
),
TestFile
(
"test_srt_endpoint.py"
,
130
),
TestFile
(
"test_torch_compile.py"
,
169
),
TestFile
(
"test_torch_compile_moe.py"
,
172
),
TestFile
(
"test_torch_native_attention_backend.py"
,
123
),
TestFile
(
"test_triton_attention_backend.py"
,
150
),
# TestFile("test_vision_chunked_prefill.py", 175), # Disabled temporarily and track in #7701
TestFile
(
"test_wave_attention_kernels.py"
,
2
),
# TestFile("test_wave_attention_backend.py", 150), # Disabled temporarily, see https://github.com/sgl-project/sglang/issues/11127
],
"per-commit-amd-mi35x"
:
[
TestFile
(
"test_mla.py"
,
242
),
TestFile
(
"test_gpt_oss_1gpu.py"
,
600
),
],
"per-commit-2-gpu-amd"
:
[
TestFile
(
"lora/test_lora_tp.py"
,
116
),
TestFile
(
"rl/test_update_weights_from_distributed.py"
,
103
),
TestFile
(
"test_data_parallelism.py"
,
73
),
TestFile
(
"test_load_weights_from_remote_instance.py"
,
72
),
# TestFile("test_patch_torch.py", 19), # Disabled temporarily, see https://github.com/sgl-project/sglang/issues/11127
],
"per-commit-4-gpu-amd"
:
[
TestFile
(
"test_pp_single_node.py"
,
150
),
],
"per-commit-8-gpu-amd"
:
[
TestFile
(
"test_deepseek_v3_basic.py"
,
275
),
TestFile
(
"test_deepseek_v3_mtp.py"
,
275
),
],
"nightly-amd"
:
[
TestFile
(
"test_nightly_gsm8k_eval_amd.py"
),
],
}
# Add Intel Xeon tests
# NOTE: please sort the test cases alphabetically by the test file name
suite_xeon
=
{
"per-commit-cpu"
:
[
TestFile
(
"cpu/test_activation.py"
),
TestFile
(
"cpu/test_binding.py"
),
TestFile
(
"cpu/test_decode.py"
),
TestFile
(
"cpu/test_extend.py"
),
TestFile
(
"cpu/test_gemm.py"
),
TestFile
(
"cpu/test_mla.py"
),
TestFile
(
"cpu/test_moe.py"
),
TestFile
(
"cpu/test_norm.py"
),
TestFile
(
"cpu/test_qkv_proj_with_rope.py"
),
TestFile
(
"cpu/test_rope.py"
),
TestFile
(
"cpu/test_shared_expert.py"
),
TestFile
(
"cpu/test_topk.py"
),
TestFile
(
"test_intel_amx_attention_backend.py"
),
TestFile
(
"test_cpu_graph.py"
),
],
}
# Add Ascend NPU tests
# NOTE: please sort the test cases alphabetically by the test file name
suite_ascend
=
{
"per-commit-1-ascend-npu"
:
[
TestFile
(
"ascend/test_ascend_graph_tp1_bf16.py"
,
400
),
TestFile
(
"ascend/test_ascend_tp1_bf16.py"
,
400
),
],
"per-commit-2-ascend-npu"
:
[
TestFile
(
"ascend/test_ascend_graph_tp2_bf16.py"
,
400
),
TestFile
(
"ascend/test_ascend_mla_fia_w8a8int8.py"
,
400
),
TestFile
(
"ascend/test_ascend_tp2_bf16.py"
,
400
),
TestFile
(
"ascend/test_ascend_tp2_fia_bf16.py"
,
400
),
],
"per-commit-4-ascend-npu"
:
[
TestFile
(
"ascend/test_ascend_mla_w8a8int8.py"
,
400
),
TestFile
(
"ascend/test_ascend_tp4_bf16.py"
,
400
),
],
"per-commit-16-ascend-a3"
:
[
TestFile
(
"ascend/test_ascend_deepep.py"
,
400
),
],
}
suites
.
update
(
suite_amd
)
suites
.
update
(
suite_xeon
)
suites
.
update
(
suite_ascend
)
if
__name__
==
"__main__"
:
for
key
in
suites
:
cases
=
suites
[
key
]
names
=
[
x
.
name
for
x
in
cases
]
names
.
sort
()
print
(
f
' "
{
key
}
": ['
)
for
name
in
names
:
estimated_time
=
[
x
.
estimated_time
for
x
in
cases
if
x
.
name
==
name
][
0
]
print
(
f
' TestFile("
{
name
}
",
{
estimated_time
}
),'
)
print
(
f
" ],
\n
"
)
test/srt/models/test_nvidia_nemotron_nano_v2.py
View file @
61055cb3
import
unittest
from
types
import
SimpleNamespace
from
types
import
SimpleNamespace
from
sglang.srt.utils
import
kill_process_tree
from
sglang.srt.utils
import
kill_process_tree
...
@@ -42,3 +43,7 @@ class TestNvidiaNemotronNanoV2(CustomTestCase):
...
@@ -42,3 +43,7 @@ class TestNvidiaNemotronNanoV2(CustomTestCase):
metrics
=
run_eval
(
args
)
metrics
=
run_eval
(
args
)
print
(
f
"
{
metrics
=
}
"
)
print
(
f
"
{
metrics
=
}
"
)
self
.
assertGreater
(
metrics
[
"accuracy"
],
0.87
)
self
.
assertGreater
(
metrics
[
"accuracy"
],
0.87
)
if
__name__
==
"__main__"
:
unittest
.
main
()
test/srt/run_suite.py
View file @
61055cb3
...
@@ -2,7 +2,6 @@ import argparse
...
@@ -2,7 +2,6 @@ import argparse
import
glob
import
glob
from
dataclasses
import
dataclass
from
dataclasses
import
dataclass
from
sglang.srt.utils
import
is_hip
from
sglang.test.test_utils
import
run_unittest_files
from
sglang.test.test_utils
import
run_unittest_files
...
@@ -12,38 +11,37 @@ class TestFile:
...
@@ -12,38 +11,37 @@ class TestFile:
estimated_time
:
float
=
60
estimated_time
:
float
=
60
# NOTE: please sort the test cases alphabetically by the test file name
suites
=
{
suites
=
{
"per-commit"
:
[
"per-commit"
:
[
TestFile
(
"function_call/test_json_schema_constraint.py"
,
30
),
TestFile
(
"function_call/test_json_schema_constraint.py"
,
30
),
TestFile
(
"hicache/test_hicache.py"
,
116
),
TestFile
(
"hicache/test_hicache.py"
,
116
),
TestFile
(
"hicache/test_hicache_eagle.py"
,
150
),
TestFile
(
"hicache/test_hicache_mla.py"
,
127
),
TestFile
(
"hicache/test_hicache_mla.py"
,
127
),
TestFile
(
"hicache/test_hicache_storage.py"
,
127
),
TestFile
(
"hicache/test_hicache_storage.py"
,
127
),
TestFile
(
"hicache/test_hicache_eagle.py"
,
150
),
TestFile
(
"lora/test_lora.py"
,
200
),
TestFile
(
"lora/test_lora.py"
,
200
),
TestFile
(
"lora/test_lora_eviction.py"
,
200
),
TestFile
(
"lora/test_lora_backend.py"
,
99
),
TestFile
(
"lora/test_lora_backend.py"
,
99
),
TestFile
(
"lora/test_multi_lora_backend.py"
,
60
),
TestFile
(
"lora/test_lora_eviction.py"
,
200
),
TestFile
(
"lora/test_lora_update.py"
,
400
),
TestFile
(
"lora/test_lora_qwen3.py"
,
97
),
TestFile
(
"lora/test_lora_qwen3.py"
,
97
),
TestFile
(
"lora/test_lora_radix_cache.py"
,
100
),
TestFile
(
"lora/test_lora_radix_cache.py"
,
100
),
TestFile
(
"lora/test_lora_update.py"
,
400
),
TestFile
(
"lora/test_multi_lora_backend.py"
,
60
),
TestFile
(
"models/test_compressed_tensors_models.py"
,
42
),
TestFile
(
"models/test_cross_encoder_models.py"
,
100
),
TestFile
(
"models/test_embedding_models.py"
,
73
),
TestFile
(
"models/test_embedding_models.py"
,
73
),
# TestFile("models/test_clip_models.py", 52),
TestFile
(
"models/test_encoder_embedding_models.py"
,
100
),
TestFile
(
"models/test_encoder_embedding_models.py"
,
100
),
TestFile
(
"models/test_cross_encoder_models.py"
,
100
),
TestFile
(
"models/test_compressed_tensors_models.py"
,
42
),
TestFile
(
"models/test_generation_models.py"
,
103
),
TestFile
(
"models/test_generation_models.py"
,
103
),
# TestFile("models/test_gme_qwen_models.py", 45),
TestFile
(
"models/test_nvidia_nemotron_nano_v2.py"
,
180
),
# TestFile("models/test_grok_models.py", 60), # Disabled due to illegal memory access
TestFile
(
"models/test_qwen_models.py"
,
82
),
TestFile
(
"models/test_qwen_models.py"
,
82
),
TestFile
(
"models/test_reward_models.py"
,
132
),
TestFile
(
"models/test_reward_models.py"
,
132
),
TestFile
(
"models/test_vlm_models.py"
,
741
),
TestFile
(
"models/test_transformers_models.py"
,
320
),
TestFile
(
"models/test_transformers_models.py"
,
320
),
TestFile
(
"models/test_vlm_models.py"
,
741
),
TestFile
(
"openai_server/basic/test_openai_embedding.py"
,
141
),
TestFile
(
"openai_server/basic/test_openai_server.py"
,
149
),
TestFile
(
"openai_server/basic/test_protocol.py"
,
10
),
TestFile
(
"openai_server/basic/test_protocol.py"
,
10
),
TestFile
(
"openai_server/basic/test_serving_chat.py"
,
10
),
TestFile
(
"openai_server/basic/test_serving_chat.py"
,
10
),
TestFile
(
"openai_server/basic/test_serving_completions.py"
,
10
),
TestFile
(
"openai_server/basic/test_serving_completions.py"
,
10
),
TestFile
(
"openai_server/basic/test_serving_embedding.py"
,
10
),
TestFile
(
"openai_server/basic/test_serving_embedding.py"
,
10
),
TestFile
(
"openai_server/basic/test_openai_embedding.py"
,
141
),
TestFile
(
"openai_server/basic/test_openai_server.py"
,
149
),
TestFile
(
"openai_server/features/test_enable_thinking.py"
,
70
),
TestFile
(
"openai_server/features/test_enable_thinking.py"
,
70
),
TestFile
(
"openai_server/features/test_json_constrained.py"
,
98
),
TestFile
(
"openai_server/features/test_json_constrained.py"
,
98
),
TestFile
(
"openai_server/features/test_json_mode.py"
,
90
),
TestFile
(
"openai_server/features/test_json_mode.py"
,
90
),
...
@@ -65,8 +63,8 @@ suites = {
...
@@ -65,8 +63,8 @@ suites = {
TestFile
(
"rl/test_update_weights_from_disk.py"
,
114
),
TestFile
(
"rl/test_update_weights_from_disk.py"
,
114
),
TestFile
(
"rl/test_update_weights_from_tensor.py"
,
48
),
TestFile
(
"rl/test_update_weights_from_tensor.py"
,
48
),
TestFile
(
"test_abort.py"
,
51
),
TestFile
(
"test_abort.py"
,
51
),
TestFile
(
"test_create_kvindices.py"
,
2
),
TestFile
(
"test_chunked_prefill.py"
,
313
),
TestFile
(
"test_chunked_prefill.py"
,
313
),
TestFile
(
"test_create_kvindices.py"
,
2
),
TestFile
(
"test_deterministic.py"
,
300
),
TestFile
(
"test_deterministic.py"
,
300
),
TestFile
(
"test_eagle_infer_a.py"
,
370
),
TestFile
(
"test_eagle_infer_a.py"
,
370
),
TestFile
(
"test_eagle_infer_b.py"
,
700
),
TestFile
(
"test_eagle_infer_b.py"
,
700
),
...
@@ -80,8 +78,6 @@ suites = {
...
@@ -80,8 +78,6 @@ suites = {
TestFile
(
"test_harmony_parser.py"
,
20
),
TestFile
(
"test_harmony_parser.py"
,
20
),
TestFile
(
"test_hidden_states.py"
,
55
),
TestFile
(
"test_hidden_states.py"
,
55
),
TestFile
(
"test_hybrid_attn_backend.py"
,
100
),
TestFile
(
"test_hybrid_attn_backend.py"
,
100
),
TestFile
(
"test_standalone_speculative_decoding.py"
,
250
),
TestFile
(
"test_ngram_speculative_decoding.py"
,
250
),
TestFile
(
"test_input_embeddings.py"
,
38
),
TestFile
(
"test_input_embeddings.py"
,
38
),
TestFile
(
"test_io_struct.py"
,
8
),
TestFile
(
"test_io_struct.py"
,
8
),
TestFile
(
"test_jinja_template_utils.py"
,
1
),
TestFile
(
"test_jinja_template_utils.py"
,
1
),
...
@@ -90,74 +86,76 @@ suites = {
...
@@ -90,74 +86,76 @@ suites = {
TestFile
(
"test_metrics_utils.py"
,
1
),
TestFile
(
"test_metrics_utils.py"
,
1
),
TestFile
(
"test_mla.py"
,
167
),
TestFile
(
"test_mla.py"
,
167
),
TestFile
(
"test_mla_deepseek_v3.py"
,
500
),
TestFile
(
"test_mla_deepseek_v3.py"
,
500
),
TestFile
(
"test_mla_int8_deepseek_v3.py"
,
429
),
TestFile
(
"test_mla_flashinfer.py"
,
302
),
TestFile
(
"test_mla_flashinfer.py"
,
302
),
TestFile
(
"test_mla_fp8.py"
,
93
),
TestFile
(
"test_mla_fp8.py"
,
93
),
TestFile
(
"test_mla_int8_deepseek_v3.py"
,
429
),
TestFile
(
"test_modelopt_loader.py"
,
30
),
TestFile
(
"test_multi_tokenizer.py"
,
230
),
TestFile
(
"test_multi_tokenizer.py"
,
230
),
TestFile
(
"test_ngram_speculative_decoding.py"
,
250
),
TestFile
(
"test_no_chunked_prefill.py"
,
108
),
TestFile
(
"test_no_chunked_prefill.py"
,
108
),
TestFile
(
"test_no_overlap_scheduler.py"
,
234
),
TestFile
(
"test_no_overlap_scheduler.py"
,
234
),
TestFile
(
"test_original_logprobs.py"
,
41
),
TestFile
(
"test_original_logprobs.py"
,
41
),
TestFile
(
"test_penalty.py"
,
41
),
TestFile
(
"test_page_size.py"
,
60
),
TestFile
(
"test_page_size.py"
,
60
),
TestFile
(
"test_penalty.py"
,
41
),
TestFile
(
"test_priority_scheduling.py"
,
100
),
TestFile
(
"test_priority_scheduling.py"
,
100
),
TestFile
(
"test_pytorch_sampling_backend.py"
,
66
),
TestFile
(
"test_pytorch_sampling_backend.py"
,
66
),
TestFile
(
"test_radix_attention.py"
,
105
),
TestFile
(
"test_radix_attention.py"
,
105
),
TestFile
(
"test_radix_cache_unit.py"
,
5
),
TestFile
(
"test_radix_cache_unit.py"
,
5
),
TestFile
(
"test_regex_constrained.py"
,
64
),
TestFile
(
"test_reasoning_parser.py"
,
5
),
TestFile
(
"test_reasoning_parser.py"
,
5
),
TestFile
(
"test_re
tract_decode
.py"
,
5
4
),
TestFile
(
"test_re
gex_constrained
.py"
,
6
4
),
TestFile
(
"test_request_queue_validation.py"
,
30
),
TestFile
(
"test_request_queue_validation.py"
,
30
),
TestFile
(
"test_retract_decode.py"
,
54
),
TestFile
(
"test_score_api.py"
,
180
),
TestFile
(
"test_score_api.py"
,
180
),
TestFile
(
"test_server_args.py"
,
1
),
TestFile
(
"test_server_args.py"
,
1
),
TestFile
(
"test_skip_tokenizer_init.py"
,
117
),
TestFile
(
"test_skip_tokenizer_init.py"
,
117
),
TestFile
(
"test_srt_engine.py"
,
261
),
TestFile
(
"test_srt_endpoint.py"
,
130
),
TestFile
(
"test_srt_endpoint.py"
,
130
),
TestFile
(
"test_srt_engine.py"
,
261
),
TestFile
(
"test_standalone_speculative_decoding.py"
,
250
),
TestFile
(
"test_start_profile.py"
,
60
),
TestFile
(
"test_start_profile.py"
,
60
),
TestFile
(
"test_swa_unittest.py"
,
1
),
TestFile
(
"test_swa_unittest.py"
,
1
),
TestFile
(
"test_torch_compile.py"
,
76
),
TestFile
(
"test_torch_compile.py"
,
76
),
TestFile
(
"test_torch_compile_moe.py"
,
172
),
TestFile
(
"test_torch_compile_moe.py"
,
172
),
TestFile
(
"test_torch_native_attention_backend.py"
,
123
),
TestFile
(
"test_torch_native_attention_backend.py"
,
123
),
TestFile
(
"test_torchao.py"
,
70
),
TestFile
(
"test_torchao.py"
,
70
),
TestFile
(
"test_triton_attention_kernels.py"
,
4
),
TestFile
(
"test_triton_attention_backend.py"
,
150
),
TestFile
(
"test_triton_attention_backend.py"
,
150
),
TestFile
(
"test_triton_attention_kernels.py"
,
4
),
TestFile
(
"test_triton_moe_channel_fp8_kernel.py"
,
25
),
TestFile
(
"test_triton_moe_channel_fp8_kernel.py"
,
25
),
TestFile
(
"test_triton_sliding_window.py"
,
250
),
TestFile
(
"test_triton_sliding_window.py"
,
250
),
TestFile
(
"test_utils_update_weights.py"
,
48
),
TestFile
(
"test_utils_update_weights.py"
,
48
),
TestFile
(
"test_vision_chunked_prefill.py"
,
175
),
TestFile
(
"test_vision_chunked_prefill.py"
,
175
),
TestFile
(
"test_vlm_input_format.py"
,
300
),
TestFile
(
"test_vision_openai_server_a.py"
,
724
),
TestFile
(
"test_vision_openai_server_a.py"
,
724
),
TestFile
(
"test_vision_openai_server_b.py"
,
446
),
TestFile
(
"test_vision_openai_server_b.py"
,
446
),
TestFile
(
"layers/attention/mamba/test_causal_conv1d.py"
,
85
),
TestFile
(
"test_vlm_input_format.py"
,
300
),
TestFile
(
"layers/attention/mamba/test_mamba_ssm.py"
,
85
),
TestFile
(
"layers/attention/mamba/test_mamba_ssm_ssd.py"
,
220
),
TestFile
(
"models/test_nvidia_nemotron_nano_v2.py"
,
180
),
TestFile
(
"test_modelopt_loader.py"
,
30
),
],
],
"per-commit-2-gpu"
:
[
"per-commit-2-gpu"
:
[
TestFile
(
"ep/test_moe_ep.py"
,
140
),
TestFile
(
"ep/test_moe_ep.py"
,
140
),
TestFile
(
"hicache/test_hicache_storage_3fs_backend.py"
,
200
),
TestFile
(
"hicache/test_hicache_storage_file_backend.py"
,
200
),
TestFile
(
"hicache/test_hicache_storage_mooncake_backend.py"
,
400
),
TestFile
(
"layers/attention/mamba/test_mamba2_mixer.py"
,
110
),
TestFile
(
"lora/test_lora_tp.py"
,
116
),
TestFile
(
"lora/test_lora_tp.py"
,
116
),
TestFile
(
"rl/test_update_weights_from_distributed.py"
,
103
),
TestFile
(
"rl/test_update_weights_from_distributed.py"
,
103
),
TestFile
(
"test_data_parallelism.py"
,
73
),
TestFile
(
"test_data_parallelism.py"
,
73
),
TestFile
(
"test_disaggregation_basic.py"
,
400
),
TestFile
(
"test_dp_attention.py"
,
594
),
TestFile
(
"test_dp_attention.py"
,
594
),
TestFile
(
"test_load_weights_from_remote_instance.py"
,
72
),
TestFile
(
"test_load_weights_from_remote_instance.py"
,
72
),
TestFile
(
"test_patch_torch.py"
,
19
),
TestFile
(
"test_patch_torch.py"
,
19
),
TestFile
(
"test_release_memory_occupation.py"
,
257
),
TestFile
(
"test_release_memory_occupation.py"
,
257
),
TestFile
(
"hicache/test_hicache_storage_file_backend.py"
,
200
),
TestFile
(
"hicache/test_hicache_storage_mooncake_backend.py"
,
400
),
TestFile
(
"hicache/test_hicache_storage_3fs_backend.py"
,
200
),
TestFile
(
"layers/attention/mamba/test_mamba2_mixer.py"
,
110
),
],
],
"per-commit-4-gpu"
:
[
"per-commit-4-gpu"
:
[
TestFile
(
"models/test_qwen3_next_models.py"
,
291
),
TestFile
(
"test_disaggregation_dp_attention.py"
,
155
),
TestFile
(
"test_gpt_oss_4gpu.py"
,
300
),
TestFile
(
"test_gpt_oss_4gpu.py"
,
300
),
TestFile
(
"test_local_attn.py"
,
411
),
TestFile
(
"test_local_attn.py"
,
411
),
TestFile
(
"test_pp_single_node.py"
,
481
),
TestFile
(
"models/test_qwen3_next_models.py"
,
291
),
TestFile
(
"test_multi_instance_release_memory_occupation.py"
,
64
),
TestFile
(
"test_multi_instance_release_memory_occupation.py"
,
64
),
TestFile
(
"test_pp_single_node.py"
,
481
),
],
],
"per-commit-8-gpu"
:
[
"per-commit-8-gpu"
:
[
TestFile
(
"lora/test_lora_llama4.py"
,
400
),
TestFile
(
"lora/test_lora_llama4.py"
,
400
),
TestFile
(
"test_deepseek_v3_basic.py"
,
275
),
TestFile
(
"test_deepseek_v3_basic.py"
,
275
),
TestFile
(
"test_deepseek_v3_mtp.py"
,
275
),
TestFile
(
"test_deepseek_v3_mtp.py"
,
275
),
TestFile
(
"test_disaggregation_different_tp.py"
,
600
),
TestFile
(
"test_disaggregation_pp.py"
,
140
),
],
],
"per-commit-4-gpu-b200"
:
[
"per-commit-4-gpu-b200"
:
[
# TestFile("test_gpt_oss_4gpu.py", 600),
# TestFile("test_gpt_oss_4gpu.py", 600),
...
@@ -169,16 +167,6 @@ suites = {
...
@@ -169,16 +167,6 @@ suites = {
"per-commit-8-gpu-deepep"
:
[
"per-commit-8-gpu-deepep"
:
[
TestFile
(
"ep/test_deepep_large.py"
,
338
),
TestFile
(
"ep/test_deepep_large.py"
,
338
),
],
],
"per-commit-2-gpu-disaggregation"
:
[
TestFile
(
"test_disaggregation_basic.py"
,
400
),
],
"per-commit-4-gpu-disaggregation"
:
[
TestFile
(
"test_disaggregation_dp_attention.py"
,
155
),
],
"per-commit-8-gpu-disaggregation"
:
[
TestFile
(
"test_disaggregation_different_tp.py"
,
600
),
TestFile
(
"test_disaggregation_pp.py"
,
140
),
],
"per-commit-8-gpu-h20"
:
[
"per-commit-8-gpu-h20"
:
[
TestFile
(
"quant/test_w4a8_deepseek_v3.py"
,
371
),
TestFile
(
"quant/test_w4a8_deepseek_v3.py"
,
371
),
],
],
...
@@ -192,48 +180,46 @@ suites = {
...
@@ -192,48 +180,46 @@ suites = {
}
}
# Add AMD tests
# Add AMD tests
# NOTE: please sort the test cases alphabetically by the test file name
suite_amd
=
{
suite_amd
=
{
"per-commit-amd"
:
[
"per-commit-amd"
:
[
TestFile
(
"function_call/test_json_schema_constraint.py"
,
30
),
TestFile
(
"hicache/test_hicache.py"
,
116
),
TestFile
(
"hicache/test_hicache.py"
,
116
),
TestFile
(
"hicache/test_hicache_mla.py"
,
127
),
TestFile
(
"hicache/test_hicache_mla.py"
,
127
),
TestFile
(
"hicache/test_hicache_storage.py"
,
127
),
TestFile
(
"hicache/test_hicache_storage.py"
,
127
),
TestFile
(
"lora/test_lora.py"
,
200
),
TestFile
(
"lora/test_lora.py"
,
200
),
TestFile
(
"lora/test_lora_eviction.py"
,
200
),
TestFile
(
"lora/test_lora_backend.py"
,
99
),
TestFile
(
"lora/test_lora_backend.py"
,
99
),
TestFile
(
"lora/test_multi_lora_backend.py"
,
60
),
TestFile
(
"lora/test_lora_cuda_graph.py"
,
250
),
TestFile
(
"lora/test_lora_cuda_graph.py"
,
250
),
TestFile
(
"lora/test_lora_eviction.py"
,
200
),
TestFile
(
"lora/test_lora_qwen3.py"
,
97
),
TestFile
(
"lora/test_lora_qwen3.py"
,
97
),
#
TestFile("
models/test_embedding_models.py", 73), # Disabled temporarily, see https://github.com/sgl-project/sglang/issues/11127
TestFile
(
"
lora/test_multi_lora_backend.py"
,
60
),
TestFile
(
"models/test_compressed_tensors_models.py"
,
42
),
TestFile
(
"models/test_compressed_tensors_models.py"
,
42
),
TestFile
(
"models/test_qwen_models.py"
,
82
),
TestFile
(
"models/test_qwen_models.py"
,
82
),
TestFile
(
"models/test_reward_models.py"
,
132
),
TestFile
(
"models/test_reward_models.py"
,
132
),
TestFile
(
"models/test_transformers_models.py"
,
320
),
TestFile
(
"models/test_transformers_models.py"
,
320
),
TestFile
(
"openai_server/basic/test_openai_embedding.py"
,
141
),
TestFile
(
"openai_server/basic/test_openai_server.py"
,
149
),
TestFile
(
"openai_server/basic/test_protocol.py"
,
10
),
TestFile
(
"openai_server/basic/test_protocol.py"
,
10
),
TestFile
(
"openai_server/basic/test_serving_chat.py"
,
10
),
TestFile
(
"openai_server/basic/test_serving_chat.py"
,
10
),
TestFile
(
"openai_server/basic/test_serving_completions.py"
,
10
),
TestFile
(
"openai_server/basic/test_serving_completions.py"
,
10
),
TestFile
(
"openai_server/basic/test_serving_embedding.py"
,
10
),
TestFile
(
"openai_server/basic/test_serving_embedding.py"
,
10
),
TestFile
(
"openai_server/basic/test_openai_embedding.py"
,
141
),
TestFile
(
"openai_server/basic/test_openai_server.py"
,
149
),
TestFile
(
"openai_server/features/test_enable_thinking.py"
,
70
),
TestFile
(
"openai_server/features/test_enable_thinking.py"
,
70
),
TestFile
(
"openai_server/features/test_json_constrained.py"
,
98
),
TestFile
(
"openai_server/features/test_json_constrained.py"
,
98
),
TestFile
(
"openai_server/features/test_json_mode.py"
,
90
),
TestFile
(
"openai_server/features/test_json_mode.py"
,
90
),
TestFile
(
"openai_server/features/test_openai_server_ebnf.py"
,
95
),
TestFile
(
"openai_server/features/test_openai_server_ebnf.py"
,
95
),
# TestFile("openai_server/features/test_openai_server_hidden_states.py", 240),
TestFile
(
"openai_server/features/test_reasoning_content.py"
,
89
),
TestFile
(
"openai_server/features/test_reasoning_content.py"
,
89
),
TestFile
(
"openai_server/function_call/test_openai_function_calling.py"
,
60
),
TestFile
(
"openai_server/function_call/test_openai_function_calling.py"
,
60
),
TestFile
(
"openai_server/function_call/test_tool_choice.py"
,
226
),
TestFile
(
"openai_server/function_call/test_tool_choice.py"
,
226
),
TestFile
(
"function_call/test_json_schema_constraint.py"
,
30
),
TestFile
(
"openai_server/validation/test_large_max_new_tokens.py"
,
41
),
TestFile
(
"openai_server/validation/test_large_max_new_tokens.py"
,
41
),
TestFile
(
"openai_server/validation/test_matched_stop.py"
,
60
),
TestFile
(
"openai_server/validation/test_matched_stop.py"
,
60
),
TestFile
(
"openai_server/validation/test_openai_server_ignore_eos.py"
,
85
),
TestFile
(
"openai_server/validation/test_openai_server_ignore_eos.py"
,
85
),
TestFile
(
"openai_server/validation/test_request_length_validation.py"
,
31
),
TestFile
(
"openai_server/validation/test_request_length_validation.py"
,
31
),
TestFile
(
"quant/test_block_int8.py"
,
22
),
TestFile
(
"quant/test_awq_dequant.py"
,
2
),
TestFile
(
"quant/test_awq_dequant.py"
,
2
),
TestFile
(
"quant/test_block_int8.py"
,
22
),
TestFile
(
"rl/test_update_weights_from_disk.py"
,
114
),
TestFile
(
"rl/test_update_weights_from_disk.py"
,
114
),
# TestFile("rl/test_update_weights_from_tensor.py", 48),
TestFile
(
"test_abort.py"
,
51
),
TestFile
(
"test_abort.py"
,
51
),
TestFile
(
"test_create_kvindices.py"
,
2
),
TestFile
(
"test_chunked_prefill.py"
,
313
),
TestFile
(
"test_chunked_prefill.py"
,
313
),
TestFile
(
"test_create_kvindices.py"
,
2
),
TestFile
(
"test_ebnf_constrained.py"
,
108
),
TestFile
(
"test_ebnf_constrained.py"
,
108
),
TestFile
(
"test_eval_fp8_accuracy.py"
,
303
),
TestFile
(
"test_eval_fp8_accuracy.py"
,
303
),
TestFile
(
"test_function_call_parser.py"
,
10
),
TestFile
(
"test_function_call_parser.py"
,
10
),
...
@@ -246,30 +232,34 @@ suite_amd = {
...
@@ -246,30 +232,34 @@ suite_amd = {
TestFile
(
"test_mla.py"
,
242
),
TestFile
(
"test_mla.py"
,
242
),
TestFile
(
"test_mla_deepseek_v3.py"
,
221
),
TestFile
(
"test_mla_deepseek_v3.py"
,
221
),
TestFile
(
"test_no_chunked_prefill.py"
,
108
),
TestFile
(
"test_no_chunked_prefill.py"
,
108
),
# TestFile("test_no_overlap_scheduler.py", 234), # Disabled temporarily and track in #7703
TestFile
(
"test_penalty.py"
,
41
),
TestFile
(
"test_page_size.py"
,
60
),
TestFile
(
"test_page_size.py"
,
60
),
TestFile
(
"test_penalty.py"
,
41
),
TestFile
(
"test_pytorch_sampling_backend.py"
,
66
),
TestFile
(
"test_pytorch_sampling_backend.py"
,
66
),
TestFile
(
"test_radix_attention.py"
,
105
),
TestFile
(
"test_radix_attention.py"
,
105
),
TestFile
(
"test_reasoning_parser.py"
,
5
),
TestFile
(
"test_regex_constrained.py"
,
64
),
TestFile
(
"test_regex_constrained.py"
,
64
),
TestFile
(
"test_retract_decode.py"
,
54
),
TestFile
(
"test_retract_decode.py"
,
54
),
TestFile
(
"test_reasoning_parser.py"
,
5
),
TestFile
(
"test_rope_rocm.py"
,
3
),
TestFile
(
"test_rope_rocm.py"
,
3
),
TestFile
(
"test_server_args.py"
,
1
),
TestFile
(
"test_server_args.py"
,
1
),
TestFile
(
"test_skip_tokenizer_init.py"
,
117
),
TestFile
(
"test_skip_tokenizer_init.py"
,
117
),
TestFile
(
"test_srt_engine.py"
,
261
),
TestFile
(
"test_srt_endpoint.py"
,
130
),
TestFile
(
"test_srt_endpoint.py"
,
130
),
TestFile
(
"test_srt_engine.py"
,
261
),
TestFile
(
"test_torch_compile.py"
,
169
),
TestFile
(
"test_torch_compile.py"
,
169
),
TestFile
(
"test_torch_compile_moe.py"
,
172
),
TestFile
(
"test_torch_compile_moe.py"
,
172
),
TestFile
(
"test_torch_native_attention_backend.py"
,
123
),
TestFile
(
"test_torch_native_attention_backend.py"
,
123
),
TestFile
(
"test_triton_attention_backend.py"
,
150
),
TestFile
(
"test_triton_attention_backend.py"
,
150
),
# TestFile("test_vision_chunked_prefill.py", 175), # Disabled temporarily and track in #7701
TestFile
(
"test_wave_attention_kernels.py"
,
2
),
TestFile
(
"test_wave_attention_kernels.py"
,
2
),
# Disabled temporarily
# TestFile("models/test_embedding_models.py", 73), # Disabled temporarily, see https://github.com/sgl-project/sglang/issues/11127
# TestFile("openai_server/features/test_openai_server_hidden_states.py", 240),
# TestFile("rl/test_update_weights_from_tensor.py", 48),
# TestFile("test_no_overlap_scheduler.py", 234), # Disabled temporarily and track in #7703
# TestFile("test_vision_chunked_prefill.py", 175), # Disabled temporarily and track in #7701
# TestFile("test_wave_attention_backend.py", 150), # Disabled temporarily, see https://github.com/sgl-project/sglang/issues/11127
# TestFile("test_wave_attention_backend.py", 150), # Disabled temporarily, see https://github.com/sgl-project/sglang/issues/11127
],
],
"per-commit-amd-mi35x"
:
[
"per-commit-amd-mi35x"
:
[
TestFile
(
"test_mla.py"
,
242
),
TestFile
(
"test_gpt_oss_1gpu.py"
,
600
),
TestFile
(
"test_gpt_oss_1gpu.py"
,
600
),
TestFile
(
"test_mla.py"
,
242
),
],
],
"per-commit-2-gpu-amd"
:
[
"per-commit-2-gpu-amd"
:
[
TestFile
(
"lora/test_lora_tp.py"
,
116
),
TestFile
(
"lora/test_lora_tp.py"
,
116
),
...
@@ -291,6 +281,7 @@ suite_amd = {
...
@@ -291,6 +281,7 @@ suite_amd = {
}
}
# Add Intel Xeon tests
# Add Intel Xeon tests
# NOTE: please sort the test cases alphabetically by the test file name
suite_xeon
=
{
suite_xeon
=
{
"per-commit-cpu"
:
[
"per-commit-cpu"
:
[
TestFile
(
"cpu/test_activation.py"
),
TestFile
(
"cpu/test_activation.py"
),
...
@@ -305,22 +296,23 @@ suite_xeon = {
...
@@ -305,22 +296,23 @@ suite_xeon = {
TestFile
(
"cpu/test_rope.py"
),
TestFile
(
"cpu/test_rope.py"
),
TestFile
(
"cpu/test_shared_expert.py"
),
TestFile
(
"cpu/test_shared_expert.py"
),
TestFile
(
"cpu/test_topk.py"
),
TestFile
(
"cpu/test_topk.py"
),
TestFile
(
"test_intel_amx_attention_backend.py"
),
TestFile
(
"test_cpu_graph.py"
),
TestFile
(
"test_cpu_graph.py"
),
TestFile
(
"test_intel_amx_attention_backend.py"
),
],
],
}
}
# Add Ascend NPU tests
# Add Ascend NPU tests
# NOTE: please sort the test cases alphabetically by the test file name
suite_ascend
=
{
suite_ascend
=
{
"per-commit-1-ascend-npu"
:
[
"per-commit-1-ascend-npu"
:
[
TestFile
(
"ascend/test_ascend_tp1_bf16.py"
,
400
),
TestFile
(
"ascend/test_ascend_graph_tp1_bf16.py"
,
400
),
TestFile
(
"ascend/test_ascend_graph_tp1_bf16.py"
,
400
),
TestFile
(
"ascend/test_ascend_tp1_bf16.py"
,
400
),
],
],
"per-commit-2-ascend-npu"
:
[
"per-commit-2-ascend-npu"
:
[
TestFile
(
"ascend/test_ascend_tp2_bf16.py"
,
400
),
TestFile
(
"ascend/test_ascend_graph_tp2_bf16.py"
,
400
),
TestFile
(
"ascend/test_ascend_graph_tp2_bf16.py"
,
400
),
TestFile
(
"ascend/test_ascend_tp2_fia_bf16.py"
,
400
),
TestFile
(
"ascend/test_ascend_mla_fia_w8a8int8.py"
,
400
),
TestFile
(
"ascend/test_ascend_mla_fia_w8a8int8.py"
,
400
),
TestFile
(
"ascend/test_ascend_tp2_bf16.py"
,
400
),
TestFile
(
"ascend/test_ascend_tp2_fia_bf16.py"
,
400
),
],
],
"per-commit-4-ascend-npu"
:
[
"per-commit-4-ascend-npu"
:
[
TestFile
(
"ascend/test_ascend_mla_w8a8int8.py"
,
400
),
TestFile
(
"ascend/test_ascend_mla_w8a8int8.py"
,
400
),
...
...
test/srt/test_disaggregation_basic.py
View file @
61055cb3
import
json
import
json
import
os
import
os
import
time
import
unittest
import
unittest
from
types
import
SimpleNamespace
from
types
import
SimpleNamespace
...
...
test/srt/test_disaggregation_dp_attention.py
View file @
61055cb3
import
os
import
os
import
time
import
unittest
import
unittest
from
types
import
SimpleNamespace
from
types
import
SimpleNamespace
from
urllib.parse
import
urlparse
from
sglang.test.few_shot_gsm8k
import
run_eval
as
run_eval_few_shot_gsm8k
from
sglang.test.few_shot_gsm8k
import
run_eval
as
run_eval_few_shot_gsm8k
from
sglang.test.test_disaggregation_utils
import
TestDisaggregationBase
from
sglang.test.test_disaggregation_utils
import
TestDisaggregationBase
from
sglang.test.test_utils
import
(
from
sglang.test.test_utils
import
(
DEFAULT_MODEL_NAME_FOR_TEST_MLA
,
DEFAULT_MODEL_NAME_FOR_TEST_MLA
,
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
DEFAULT_URL_FOR_TEST
,
popen_launch_pd_server
,
popen_launch_pd_server
,
)
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment