Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
548a57b1
Unverified
Commit
548a57b1
authored
Oct 12, 2025
by
Lianmin Zheng
Committed by
GitHub
Oct 12, 2025
Browse files
Fix port conflicts in CI (#11497)
parent
88e73ed0
Changes
6
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
34 additions
and
336 deletions
+34
-336
.github/workflows/pr-test.yml
.github/workflows/pr-test.yml
+2
-2
python/pyproject.toml
python/pyproject.toml
+5
-4
python/sglang/test/test_utils.py
python/sglang/test/test_utils.py
+2
-5
scripts/sort_testcases_alphabetically.py
scripts/sort_testcases_alphabetically.py
+1
-314
test/srt/run_suite.py
test/srt/run_suite.py
+1
-1
test/srt/test_mla_int8_deepseek_v3.py
test/srt/test_mla_int8_deepseek_v3.py
+23
-10
No files found.
.github/workflows/pr-test.yml
View file @
548a57b1
...
...
@@ -327,7 +327,7 @@ jobs:
strategy
:
fail-fast
:
false
matrix
:
part
:
[
0
,
1
,
2
]
part
:
[
0
,
1
]
steps
:
-
name
:
Checkout code
uses
:
actions/checkout@v4
...
...
@@ -348,7 +348,7 @@ jobs:
timeout-minutes
:
20
run
:
|
cd test/srt
python3 run_suite.py --suite per-commit-8-gpu --auto-partition-id ${{ matrix.part }} --auto-partition-size
3
python3 run_suite.py --suite per-commit-8-gpu --auto-partition-id ${{ matrix.part }} --auto-partition-size
2
performance-test-1-gpu-part-1
:
needs
:
[
check-changes
,
sgl-kernel-build-wheels
]
...
...
python/pyproject.toml
View file @
548a57b1
...
...
@@ -76,6 +76,7 @@ decord = ["decord"]
test
=
[
"accelerate"
,
"expecttest"
,
"gguf"
,
"jsonlines"
,
"matplotlib"
,
"pandas"
,
...
...
@@ -85,10 +86,10 @@ test = [
"tabulate"
,
]
tracing
=
[
"opentelemetry-api"
,
"opentelemetry-exporter-otlp"
,
"opentelemetry-exporter-otlp-proto-grpc"
,
"opentelemetry-sdk"
,
"opentelemetry-api"
,
"opentelemetry-exporter-otlp"
,
"opentelemetry-exporter-otlp-proto-grpc"
,
"opentelemetry-sdk"
,
]
all
=
["sglang[test]
", "
sglang
[decord]"]
all_aarch64
=
["sglang[test]"]
...
...
python/sglang/test/test_utils.py
View file @
548a57b1
...
...
@@ -135,11 +135,11 @@ def _use_cached_default_models(model_repo: str):
if
is_in_ci
():
DEFAULT_PORT_FOR_SRT_TEST_RUNNER
=
(
5
000
+
int
(
os
.
environ
.
get
(
"CUDA_VISIBLE_DEVICES"
,
"0"
)[
0
])
*
100
10
000
+
int
(
os
.
environ
.
get
(
"CUDA_VISIBLE_DEVICES"
,
"0"
)[
0
])
*
100
0
)
else
:
DEFAULT_PORT_FOR_SRT_TEST_RUNNER
=
(
7
000
+
int
(
os
.
environ
.
get
(
"CUDA_VISIBLE_DEVICES"
,
"0"
)[
0
])
*
100
20
000
+
int
(
os
.
environ
.
get
(
"CUDA_VISIBLE_DEVICES"
,
"0"
)[
0
])
*
100
0
)
DEFAULT_URL_FOR_TEST
=
f
"http://127.0.0.1:
{
DEFAULT_PORT_FOR_SRT_TEST_RUNNER
+
1000
}
"
...
...
@@ -396,8 +396,6 @@ def _get_call_generate(args: argparse.Namespace):
return
partial
(
call_generate_vllm
,
url
=
f
"
{
args
.
host
}
:
{
args
.
port
}
/generate"
)
elif
args
.
backend
==
"srt-raw"
:
return
partial
(
call_generate_srt_raw
,
url
=
f
"
{
args
.
host
}
:
{
args
.
port
}
/generate"
)
elif
args
.
backend
==
"gserver"
:
return
partial
(
call_generate_gserver
,
url
=
f
"
{
args
.
host
}
:
{
args
.
port
}
"
)
elif
args
.
backend
==
"outlines"
:
return
partial
(
call_generate_outlines
,
url
=
f
"
{
args
.
host
}
:
{
args
.
port
}
/generate"
)
elif
args
.
backend
==
"guidance"
:
...
...
@@ -521,7 +519,6 @@ def popen_launch_server(
# Auto-detect device if needed
if
device
==
"auto"
:
device
=
auto_config_device
()
print
(
f
"Auto-configed device:
{
device
}
"
,
flush
=
True
)
other_args
=
list
(
other_args
)
other_args
+=
[
"--device"
,
str
(
device
)]
...
...
scripts/sort_testcases_alphabetically.py
View file @
548a57b1
...
...
@@ -11,320 +11,7 @@ class TestFile:
estimated_time
:
float
=
60
suites
=
{
"per-commit"
:
[
TestFile
(
"function_call/test_json_schema_constraint.py"
,
30
),
TestFile
(
"hicache/test_hicache.py"
,
116
),
TestFile
(
"hicache/test_hicache_eagle.py"
,
150
),
TestFile
(
"hicache/test_hicache_mla.py"
,
127
),
TestFile
(
"hicache/test_hicache_storage.py"
,
127
),
TestFile
(
"lora/test_lora.py"
,
200
),
TestFile
(
"lora/test_lora_backend.py"
,
99
),
TestFile
(
"lora/test_lora_eviction.py"
,
200
),
TestFile
(
"lora/test_lora_qwen3.py"
,
97
),
TestFile
(
"lora/test_lora_radix_cache.py"
,
100
),
TestFile
(
"lora/test_lora_update.py"
,
400
),
TestFile
(
"lora/test_multi_lora_backend.py"
,
60
),
TestFile
(
"models/test_embedding_models.py"
,
73
),
TestFile
(
"models/test_encoder_embedding_models.py"
,
100
),
TestFile
(
"models/test_cross_encoder_models.py"
,
100
),
TestFile
(
"models/test_compressed_tensors_models.py"
,
42
),
TestFile
(
"models/test_generation_models.py"
,
103
),
TestFile
(
"models/test_nvidia_nemotron_nano_v2.py"
,
180
),
TestFile
(
"models/test_qwen_models.py"
,
82
),
TestFile
(
"models/test_reward_models.py"
,
132
),
TestFile
(
"models/test_transformers_models.py"
,
320
),
TestFile
(
"models/test_vlm_models.py"
,
741
),
TestFile
(
"openai_server/basic/test_protocol.py"
,
10
),
TestFile
(
"openai_server/basic/test_serving_chat.py"
,
10
),
TestFile
(
"openai_server/basic/test_serving_completions.py"
,
10
),
TestFile
(
"openai_server/basic/test_serving_embedding.py"
,
10
),
TestFile
(
"openai_server/basic/test_openai_embedding.py"
,
141
),
TestFile
(
"openai_server/basic/test_openai_server.py"
,
149
),
TestFile
(
"openai_server/features/test_enable_thinking.py"
,
70
),
TestFile
(
"openai_server/features/test_json_constrained.py"
,
98
),
TestFile
(
"openai_server/features/test_json_mode.py"
,
90
),
TestFile
(
"openai_server/features/test_openai_server_ebnf.py"
,
95
),
TestFile
(
"openai_server/features/test_openai_server_hidden_states.py"
,
240
),
TestFile
(
"openai_server/features/test_reasoning_content.py"
,
89
),
TestFile
(
"openai_server/function_call/test_openai_function_calling.py"
,
60
),
TestFile
(
"openai_server/function_call/test_tool_choice.py"
,
226
),
TestFile
(
"openai_server/validation/test_large_max_new_tokens.py"
,
41
),
TestFile
(
"openai_server/validation/test_matched_stop.py"
,
60
),
TestFile
(
"openai_server/validation/test_openai_server_ignore_eos.py"
,
85
),
TestFile
(
"openai_server/validation/test_request_length_validation.py"
,
31
),
TestFile
(
"quant/test_block_int8.py"
,
22
),
TestFile
(
"quant/test_fp8_kernel.py"
,
8
),
TestFile
(
"quant/test_int8_kernel.py"
,
8
),
TestFile
(
"quant/test_triton_scaled_mm.py"
,
8
),
TestFile
(
"quant/test_w8a8_quantization.py"
,
46
),
TestFile
(
"rl/test_fp32_lm_head.py"
,
30
),
TestFile
(
"rl/test_update_weights_from_disk.py"
,
114
),
TestFile
(
"rl/test_update_weights_from_tensor.py"
,
48
),
TestFile
(
"test_abort.py"
,
51
),
TestFile
(
"test_create_kvindices.py"
,
2
),
TestFile
(
"test_chunked_prefill.py"
,
313
),
TestFile
(
"test_deterministic.py"
,
300
),
TestFile
(
"test_eagle_infer_a.py"
,
370
),
TestFile
(
"test_eagle_infer_b.py"
,
700
),
TestFile
(
"test_eagle_infer_beta.py"
,
300
),
TestFile
(
"test_ebnf_constrained.py"
,
108
),
TestFile
(
"test_eval_fp8_accuracy.py"
,
303
),
TestFile
(
"test_fa3.py"
,
376
),
# TestFile("test_flashmla.py", 352),
TestFile
(
"test_function_call_parser.py"
,
10
),
TestFile
(
"test_fused_moe.py"
,
30
),
TestFile
(
"test_gpt_oss_1gpu.py"
,
600
),
TestFile
(
"test_harmony_parser.py"
,
20
),
TestFile
(
"test_hidden_states.py"
,
55
),
TestFile
(
"test_hybrid_attn_backend.py"
,
100
),
TestFile
(
"test_input_embeddings.py"
,
38
),
TestFile
(
"test_io_struct.py"
,
8
),
TestFile
(
"test_jinja_template_utils.py"
,
1
),
TestFile
(
"test_logprobs.py"
,
55
),
TestFile
(
"test_metrics.py"
,
32
),
TestFile
(
"test_metrics_utils.py"
,
1
),
TestFile
(
"test_mla.py"
,
167
),
TestFile
(
"test_mla_deepseek_v3.py"
,
500
),
TestFile
(
"test_mla_int8_deepseek_v3.py"
,
429
),
TestFile
(
"test_mla_flashinfer.py"
,
302
),
TestFile
(
"test_mla_fp8.py"
,
93
),
TestFile
(
"test_modelopt_loader.py"
,
30
),
TestFile
(
"test_multi_tokenizer.py"
,
230
),
TestFile
(
"test_ngram_speculative_decoding.py"
,
250
),
TestFile
(
"test_no_chunked_prefill.py"
,
108
),
TestFile
(
"test_no_overlap_scheduler.py"
,
234
),
TestFile
(
"test_original_logprobs.py"
,
41
),
TestFile
(
"test_penalty.py"
,
41
),
TestFile
(
"test_page_size.py"
,
60
),
TestFile
(
"test_priority_scheduling.py"
,
100
),
TestFile
(
"test_pytorch_sampling_backend.py"
,
66
),
TestFile
(
"test_radix_attention.py"
,
105
),
TestFile
(
"test_radix_cache_unit.py"
,
5
),
TestFile
(
"test_regex_constrained.py"
,
64
),
TestFile
(
"test_reasoning_parser.py"
,
5
),
TestFile
(
"test_retract_decode.py"
,
54
),
TestFile
(
"test_request_queue_validation.py"
,
30
),
TestFile
(
"test_score_api.py"
,
180
),
TestFile
(
"test_server_args.py"
,
1
),
TestFile
(
"test_skip_tokenizer_init.py"
,
117
),
TestFile
(
"test_srt_engine.py"
,
261
),
TestFile
(
"test_srt_endpoint.py"
,
130
),
TestFile
(
"test_standalone_speculative_decoding.py"
,
250
),
TestFile
(
"test_start_profile.py"
,
60
),
TestFile
(
"test_swa_unittest.py"
,
1
),
TestFile
(
"test_torch_compile.py"
,
76
),
TestFile
(
"test_torch_compile_moe.py"
,
172
),
TestFile
(
"test_torch_native_attention_backend.py"
,
123
),
TestFile
(
"test_torchao.py"
,
70
),
TestFile
(
"test_triton_attention_kernels.py"
,
4
),
TestFile
(
"test_triton_attention_backend.py"
,
150
),
TestFile
(
"test_triton_moe_channel_fp8_kernel.py"
,
25
),
TestFile
(
"test_triton_sliding_window.py"
,
250
),
TestFile
(
"test_utils_update_weights.py"
,
48
),
TestFile
(
"test_vision_chunked_prefill.py"
,
175
),
TestFile
(
"test_vlm_input_format.py"
,
300
),
TestFile
(
"test_vision_openai_server_a.py"
,
724
),
TestFile
(
"test_vision_openai_server_b.py"
,
446
),
],
"per-commit-2-gpu"
:
[
TestFile
(
"ep/test_moe_ep.py"
,
140
),
TestFile
(
"hicache/test_hicache_storage_file_backend.py"
,
200
),
TestFile
(
"hicache/test_hicache_storage_mooncake_backend.py"
,
400
),
TestFile
(
"hicache/test_hicache_storage_3fs_backend.py"
,
200
),
TestFile
(
"layers/attention/mamba/test_mamba2_mixer.py"
,
110
),
TestFile
(
"lora/test_lora_tp.py"
,
116
),
TestFile
(
"rl/test_update_weights_from_distributed.py"
,
103
),
TestFile
(
"test_data_parallelism.py"
,
73
),
TestFile
(
"test_disaggregation_basic.py"
,
400
),
TestFile
(
"test_dp_attention.py"
,
594
),
TestFile
(
"test_load_weights_from_remote_instance.py"
,
72
),
TestFile
(
"test_patch_torch.py"
,
19
),
TestFile
(
"test_release_memory_occupation.py"
,
257
),
],
"per-commit-4-gpu"
:
[
TestFile
(
"models/test_qwen3_next_models.py"
,
291
),
TestFile
(
"test_disaggregation_dp_attention.py"
,
155
),
TestFile
(
"test_gpt_oss_4gpu.py"
,
300
),
TestFile
(
"test_local_attn.py"
,
411
),
TestFile
(
"test_multi_instance_release_memory_occupation.py"
,
64
),
TestFile
(
"test_pp_single_node.py"
,
481
),
],
"per-commit-8-gpu"
:
[
TestFile
(
"lora/test_lora_llama4.py"
,
400
),
TestFile
(
"test_deepseek_v3_basic.py"
,
275
),
TestFile
(
"test_deepseek_v3_mtp.py"
,
275
),
TestFile
(
"test_disaggregation_different_tp.py"
,
600
),
TestFile
(
"test_disaggregation_pp.py"
,
140
),
],
"per-commit-4-gpu-b200"
:
[
# TestFile("test_gpt_oss_4gpu.py", 600),
# TestFile("test_deepseek_v3_fp4_4gpu.py", 3600),
],
"per-commit-4-gpu-deepep"
:
[
TestFile
(
"ep/test_deepep_small.py"
,
531
),
],
"per-commit-8-gpu-deepep"
:
[
TestFile
(
"ep/test_deepep_large.py"
,
338
),
],
"per-commit-8-gpu-h20"
:
[
TestFile
(
"quant/test_w4a8_deepseek_v3.py"
,
371
),
],
"vllm_dependency_test"
:
[
TestFile
(
"quant/test_awq.py"
,
163
),
TestFile
(
"test_bnb.py"
,
5
),
TestFile
(
"test_gptqmodel_dynamic.py"
,
102
),
TestFile
(
"test_vllm_dependency.py"
,
185
),
# TestFile("test_gguf.py", 96),
],
}
# Add AMD tests
# NOTE: please sort the test cases alphabetically by the test file name
suite_amd
=
{
"per-commit-amd"
:
[
TestFile
(
"hicache/test_hicache.py"
,
116
),
TestFile
(
"hicache/test_hicache_mla.py"
,
127
),
TestFile
(
"hicache/test_hicache_storage.py"
,
127
),
TestFile
(
"lora/test_lora.py"
,
200
),
TestFile
(
"lora/test_lora_eviction.py"
,
200
),
TestFile
(
"lora/test_lora_backend.py"
,
99
),
TestFile
(
"lora/test_multi_lora_backend.py"
,
60
),
TestFile
(
"lora/test_lora_cuda_graph.py"
,
250
),
TestFile
(
"lora/test_lora_qwen3.py"
,
97
),
# TestFile("models/test_embedding_models.py", 73), # Disabled temporarily, see https://github.com/sgl-project/sglang/issues/11127
TestFile
(
"models/test_compressed_tensors_models.py"
,
42
),
TestFile
(
"models/test_qwen_models.py"
,
82
),
TestFile
(
"models/test_reward_models.py"
,
132
),
TestFile
(
"models/test_transformers_models.py"
,
320
),
TestFile
(
"openai_server/basic/test_protocol.py"
,
10
),
TestFile
(
"openai_server/basic/test_serving_chat.py"
,
10
),
TestFile
(
"openai_server/basic/test_serving_completions.py"
,
10
),
TestFile
(
"openai_server/basic/test_serving_embedding.py"
,
10
),
TestFile
(
"openai_server/basic/test_openai_embedding.py"
,
141
),
TestFile
(
"openai_server/basic/test_openai_server.py"
,
149
),
TestFile
(
"openai_server/features/test_enable_thinking.py"
,
70
),
TestFile
(
"openai_server/features/test_json_constrained.py"
,
98
),
TestFile
(
"openai_server/features/test_json_mode.py"
,
90
),
TestFile
(
"openai_server/features/test_openai_server_ebnf.py"
,
95
),
# TestFile("openai_server/features/test_openai_server_hidden_states.py", 240),
TestFile
(
"openai_server/features/test_reasoning_content.py"
,
89
),
TestFile
(
"openai_server/function_call/test_openai_function_calling.py"
,
60
),
TestFile
(
"openai_server/function_call/test_tool_choice.py"
,
226
),
TestFile
(
"function_call/test_json_schema_constraint.py"
,
30
),
TestFile
(
"openai_server/validation/test_large_max_new_tokens.py"
,
41
),
TestFile
(
"openai_server/validation/test_matched_stop.py"
,
60
),
TestFile
(
"openai_server/validation/test_openai_server_ignore_eos.py"
,
85
),
TestFile
(
"openai_server/validation/test_request_length_validation.py"
,
31
),
TestFile
(
"quant/test_block_int8.py"
,
22
),
TestFile
(
"quant/test_awq_dequant.py"
,
2
),
TestFile
(
"rl/test_update_weights_from_disk.py"
,
114
),
# TestFile("rl/test_update_weights_from_tensor.py", 48),
TestFile
(
"test_abort.py"
,
51
),
TestFile
(
"test_create_kvindices.py"
,
2
),
TestFile
(
"test_chunked_prefill.py"
,
313
),
TestFile
(
"test_ebnf_constrained.py"
,
108
),
TestFile
(
"test_eval_fp8_accuracy.py"
,
303
),
TestFile
(
"test_function_call_parser.py"
,
10
),
TestFile
(
"test_fused_moe.py"
,
30
),
TestFile
(
"test_input_embeddings.py"
,
38
),
TestFile
(
"test_io_struct.py"
,
8
),
TestFile
(
"test_jinja_template_utils.py"
,
1
),
TestFile
(
"test_metrics.py"
,
32
),
TestFile
(
"test_metrics_utils.py"
,
1
),
TestFile
(
"test_mla.py"
,
242
),
TestFile
(
"test_mla_deepseek_v3.py"
,
221
),
TestFile
(
"test_no_chunked_prefill.py"
,
108
),
# TestFile("test_no_overlap_scheduler.py", 234), # Disabled temporarily and track in #7703
TestFile
(
"test_penalty.py"
,
41
),
TestFile
(
"test_page_size.py"
,
60
),
TestFile
(
"test_pytorch_sampling_backend.py"
,
66
),
TestFile
(
"test_radix_attention.py"
,
105
),
TestFile
(
"test_regex_constrained.py"
,
64
),
TestFile
(
"test_retract_decode.py"
,
54
),
TestFile
(
"test_reasoning_parser.py"
,
5
),
TestFile
(
"test_rope_rocm.py"
,
3
),
TestFile
(
"test_server_args.py"
,
1
),
TestFile
(
"test_skip_tokenizer_init.py"
,
117
),
TestFile
(
"test_srt_engine.py"
,
261
),
TestFile
(
"test_srt_endpoint.py"
,
130
),
TestFile
(
"test_torch_compile.py"
,
169
),
TestFile
(
"test_torch_compile_moe.py"
,
172
),
TestFile
(
"test_torch_native_attention_backend.py"
,
123
),
TestFile
(
"test_triton_attention_backend.py"
,
150
),
# TestFile("test_vision_chunked_prefill.py", 175), # Disabled temporarily and track in #7701
TestFile
(
"test_wave_attention_kernels.py"
,
2
),
# TestFile("test_wave_attention_backend.py", 150), # Disabled temporarily, see https://github.com/sgl-project/sglang/issues/11127
],
"per-commit-amd-mi35x"
:
[
TestFile
(
"test_mla.py"
,
242
),
TestFile
(
"test_gpt_oss_1gpu.py"
,
600
),
],
"per-commit-2-gpu-amd"
:
[
TestFile
(
"lora/test_lora_tp.py"
,
116
),
TestFile
(
"rl/test_update_weights_from_distributed.py"
,
103
),
TestFile
(
"test_data_parallelism.py"
,
73
),
TestFile
(
"test_load_weights_from_remote_instance.py"
,
72
),
# TestFile("test_patch_torch.py", 19), # Disabled temporarily, see https://github.com/sgl-project/sglang/issues/11127
],
"per-commit-4-gpu-amd"
:
[
TestFile
(
"test_pp_single_node.py"
,
150
),
],
"per-commit-8-gpu-amd"
:
[
TestFile
(
"test_deepseek_v3_basic.py"
,
275
),
TestFile
(
"test_deepseek_v3_mtp.py"
,
275
),
],
"nightly-amd"
:
[
TestFile
(
"test_nightly_gsm8k_eval_amd.py"
),
],
}
# Add Intel Xeon tests
# NOTE: please sort the test cases alphabetically by the test file name
suite_xeon
=
{
"per-commit-cpu"
:
[
TestFile
(
"cpu/test_activation.py"
),
TestFile
(
"cpu/test_binding.py"
),
TestFile
(
"cpu/test_decode.py"
),
TestFile
(
"cpu/test_extend.py"
),
TestFile
(
"cpu/test_gemm.py"
),
TestFile
(
"cpu/test_mla.py"
),
TestFile
(
"cpu/test_moe.py"
),
TestFile
(
"cpu/test_norm.py"
),
TestFile
(
"cpu/test_qkv_proj_with_rope.py"
),
TestFile
(
"cpu/test_rope.py"
),
TestFile
(
"cpu/test_shared_expert.py"
),
TestFile
(
"cpu/test_topk.py"
),
TestFile
(
"test_intel_amx_attention_backend.py"
),
TestFile
(
"test_cpu_graph.py"
),
],
}
# Add Ascend NPU tests
# NOTE: please sort the test cases alphabetically by the test file name
suite_ascend
=
{
"per-commit-1-ascend-npu"
:
[
TestFile
(
"ascend/test_ascend_graph_tp1_bf16.py"
,
400
),
TestFile
(
"ascend/test_ascend_tp1_bf16.py"
,
400
),
],
"per-commit-2-ascend-npu"
:
[
TestFile
(
"ascend/test_ascend_graph_tp2_bf16.py"
,
400
),
TestFile
(
"ascend/test_ascend_mla_fia_w8a8int8.py"
,
400
),
TestFile
(
"ascend/test_ascend_tp2_bf16.py"
,
400
),
TestFile
(
"ascend/test_ascend_tp2_fia_bf16.py"
,
400
),
],
"per-commit-4-ascend-npu"
:
[
TestFile
(
"ascend/test_ascend_mla_w8a8int8.py"
,
400
),
TestFile
(
"ascend/test_ascend_tp4_bf16.py"
,
400
),
],
"per-commit-16-ascend-a3"
:
[
TestFile
(
"ascend/test_ascend_deepep.py"
,
400
),
],
}
suites
.
update
(
suite_amd
)
suites
.
update
(
suite_xeon
)
suites
.
update
(
suite_ascend
)
suites
=
{}
if
__name__
==
"__main__"
:
...
...
test/srt/run_suite.py
View file @
548a57b1
...
...
@@ -79,7 +79,7 @@ suites = {
TestFile
(
"test_gpt_oss_1gpu.py"
,
600
),
TestFile
(
"test_harmony_parser.py"
,
20
),
TestFile
(
"test_hidden_states.py"
,
55
),
TestFile
(
"test_hybrid_attn_backend.py"
,
100
),
TestFile
(
"test_hybrid_attn_backend.py"
,
379
),
TestFile
(
"test_input_embeddings.py"
,
38
),
TestFile
(
"test_io_struct.py"
,
8
),
TestFile
(
"test_jinja_template_utils.py"
,
1
),
...
...
test/srt/test_mla_int8_deepseek_v3.py
View file @
548a57b1
...
...
@@ -22,7 +22,15 @@ class TestMLADeepseekV3ChannelInt8(CustomTestCase):
cls
.
base_url
=
DEFAULT_URL_FOR_TEST
other_args
=
[
"--trust-remote-code"
]
if
torch
.
cuda
.
is_available
()
and
torch
.
version
.
cuda
:
other_args
.
extend
([
"--enable-torch-compile"
,
"--cuda-graph-max-bs"
,
"2"
])
other_args
.
extend
(
[
"--cuda-graph-max-bs"
,
"16"
,
"--enable-torch-compile"
,
"--torch-compile-max-bs"
,
"2"
,
]
)
cls
.
process
=
popen_launch_server
(
cls
.
model
,
cls
.
base_url
,
...
...
@@ -50,6 +58,7 @@ class TestMLADeepseekV3ChannelInt8(CustomTestCase):
self
.
assertGreaterEqual
(
metrics
[
"accuracy"
],
0.61
)
@
unittest
.
skipIf
(
is_in_ci
(),
"To reduce the CI execution time."
)
class
TestDeepseekV3MTPChannelInt8
(
CustomTestCase
):
@
classmethod
def
setUpClass
(
cls
):
...
...
@@ -60,12 +69,10 @@ class TestDeepseekV3MTPChannelInt8(CustomTestCase):
other_args
.
extend
(
[
"--cuda-graph-max-bs"
,
"2"
,
"--disable-radix"
,
"16"
,
"--enable-torch-compile"
,
"--torch-compile-max-bs"
,
"1"
,
"--speculative-algorithm"
,
"2"
"--speculative-algorithm"
,
"EAGLE"
,
"--speculative-draft-model-path"
,
"sgl-project/sglang-ci-dsv3-channel-int8-test-NextN"
,
...
...
@@ -121,7 +128,15 @@ class TestMLADeepseekV3BlockInt8(CustomTestCase):
cls
.
base_url
=
DEFAULT_URL_FOR_TEST
other_args
=
[
"--trust-remote-code"
]
if
torch
.
cuda
.
is_available
()
and
torch
.
version
.
cuda
:
other_args
.
extend
([
"--enable-torch-compile"
,
"--cuda-graph-max-bs"
,
"2"
])
other_args
.
extend
(
[
"--cuda-graph-max-bs"
,
"16"
,
"--enable-torch-compile"
,
"--torch-compile-max-bs"
,
"2"
,
]
)
cls
.
process
=
popen_launch_server
(
cls
.
model
,
cls
.
base_url
,
...
...
@@ -159,12 +174,10 @@ class TestDeepseekV3MTPBlockInt8(CustomTestCase):
other_args
.
extend
(
[
"--cuda-graph-max-bs"
,
"2"
,
"--disable-radix"
,
"16"
,
"--enable-torch-compile"
,
"--torch-compile-max-bs"
,
"1"
,
"--speculative-algorithm"
,
"2"
"--speculative-algorithm"
,
"EAGLE"
,
"--speculative-num-steps"
,
"2"
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment