Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
zhaoyu6
sglang
Commits
fbd56002
"audio_classification_scripts/run_dropout_sweep.yaml" did not exist on "ca2cd16deb7954e7f5f764e9a6e655f41005b6a1"
Unverified
Commit
fbd56002
authored
Mar 09, 2025
by
Lianmin Zheng
Committed by
GitHub
Mar 09, 2025
Browse files
Auto balance CI tests (#4238)
parent
730d084f
Changes
5
Show whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
151 additions
and
84 deletions
+151
-84
.github/workflows/pr-test.yml
.github/workflows/pr-test.yml
+2
-6
python/sglang/test/test_utils.py
python/sglang/test/test_utils.py
+12
-3
test/lang/run_suite.py
test/lang/run_suite.py
+9
-1
test/srt/run_suite.py
test/srt/run_suite.py
+126
-73
test/srt/test_custom_allreduce.py
test/srt/test_custom_allreduce.py
+2
-1
No files found.
.github/workflows/pr-test.yml
View file @
fbd56002
...
@@ -95,7 +95,7 @@ jobs:
...
@@ -95,7 +95,7 @@ jobs:
strategy
:
strategy
:
fail-fast
:
false
fail-fast
:
false
matrix
:
matrix
:
range
:
[
0-6
,
6-15
,
15-22
,
22-32
,
32-40
,
40-48
,
48-100
]
part
:
[
0
,
1
,
2
,
3
,
4
,
5
,
6
]
steps
:
steps
:
-
name
:
Checkout code
-
name
:
Checkout code
uses
:
actions/checkout@v3
uses
:
actions/checkout@v3
...
@@ -109,11 +109,8 @@ jobs:
...
@@ -109,11 +109,8 @@ jobs:
-
name
:
Run test
-
name
:
Run test
timeout-minutes
:
30
timeout-minutes
:
30
run
:
|
run
:
|
RANGE=${{ matrix.range }}
range_begin=${RANGE%-*}
range_end=${RANGE#*-}
cd test/srt
cd test/srt
python3 run_suite.py --suite per-commit --
range-begin ${range_begin} --range-end ${range_end}
python3 run_suite.py --suite per-commit --
auto-partition-id ${{ matrix.part }} --auto-partition-size 7
unit-test-backend-2-gpu
:
unit-test-backend-2-gpu
:
needs
:
filter
needs
:
filter
...
@@ -340,7 +337,6 @@ jobs:
...
@@ -340,7 +337,6 @@ jobs:
python3 test_moe_eval_accuracy_large.py
python3 test_moe_eval_accuracy_large.py
finish
:
finish
:
if
:
always()
needs
:
[
needs
:
[
unit-test-frontend
,
unit-test-backend-1-gpu
,
unit-test-backend-2-gpu
,
unit-test-frontend
,
unit-test-backend-1-gpu
,
unit-test-backend-2-gpu
,
performance-test-1-gpu-part-1
,
performance-test-1-gpu-part-2
,
performance-test-2-gpu
,
performance-test-1-gpu-part-1
,
performance-test-1-gpu-part-2
,
performance-test-2-gpu
,
...
...
python/sglang/test/test_utils.py
View file @
fbd56002
...
@@ -446,22 +446,31 @@ def run_with_timeout(
...
@@ -446,22 +446,31 @@ def run_with_timeout(
return
ret_value
[
0
]
return
ret_value
[
0
]
def
run_unittest_files
(
files
:
List
[
str
]
,
timeout_per_file
:
float
):
def
run_unittest_files
(
files
:
List
,
timeout_per_file
:
float
):
tic
=
time
.
time
()
tic
=
time
.
time
()
success
=
True
success
=
True
for
filename
in
files
:
for
file
in
files
:
filename
,
estimated_time
=
file
.
name
,
file
.
estimated_time
process
=
None
process
=
None
def
run_one_file
(
filename
):
def
run_one_file
(
filename
):
nonlocal
process
nonlocal
process
filename
=
os
.
path
.
join
(
os
.
getcwd
(),
filename
)
filename
=
os
.
path
.
join
(
os
.
getcwd
(),
filename
)
print
(
f
"
\n\n
Run:
\n
python3
{
filename
}
\n\n
"
,
flush
=
True
)
print
(
f
".
\n
.
\n
Begin:
\n
python3
{
filename
}
\n
.
\n
.
\n
"
,
flush
=
True
)
tic
=
time
.
time
()
process
=
subprocess
.
Popen
(
process
=
subprocess
.
Popen
(
[
"python3"
,
filename
],
stdout
=
None
,
stderr
=
None
,
env
=
os
.
environ
[
"python3"
,
filename
],
stdout
=
None
,
stderr
=
None
,
env
=
os
.
environ
)
)
process
.
wait
()
process
.
wait
()
elapsed
=
time
.
time
()
-
tic
print
(
f
".
\n
.
\n
End:
\n
{
filename
=
}
,
{
elapsed
=
:.
0
f
}
,
{
estimated_time
=
}
\n
.
\n
.
\n
"
,
flush
=
True
,
)
return
process
.
returncode
return
process
.
returncode
try
:
try
:
...
...
test/lang/run_suite.py
View file @
fbd56002
import
argparse
import
argparse
import
glob
import
glob
from
dataclasses
import
dataclass
from
sglang.test.test_utils
import
run_unittest_files
from
sglang.test.test_utils
import
run_unittest_files
@
dataclass
class
TestFile
:
name
:
str
estimated_time
:
float
=
60
suites
=
{
suites
=
{
"per-commit"
:
[
"per-commit"
:
[
"test_srt_backend.py"
,
TestFile
(
"test_srt_backend.py"
)
,
# Skip this due to some OPENAI_API_KEY issues
# Skip this due to some OPENAI_API_KEY issues
# "test_openai_backend.py",
# "test_openai_backend.py",
],
],
...
...
test/srt/run_suite.py
View file @
fbd56002
import
argparse
import
argparse
import
glob
import
glob
from
dataclasses
import
dataclass
from
sglang.test.test_utils
import
run_unittest_files
from
sglang.test.test_utils
import
run_unittest_files
@
dataclass
class
TestFile
:
name
:
str
estimated_time
:
float
=
60
suites
=
{
suites
=
{
"per-commit"
:
[
"per-commit"
:
[
"models/lora/test_lora.py"
,
TestFile
(
"models/lora/test_lora.py"
,
76
),
"models/lora/test_lora_backend.py"
,
TestFile
(
"models/lora/test_lora_backend.py"
,
420
),
"models/lora/test_multi_lora_backend.py"
,
TestFile
(
"models/lora/test_multi_lora_backend.py"
,
1
),
"models/test_embedding_models.py"
,
TestFile
(
"models/test_embedding_models.py"
,
119
),
"models/test_generation_models.py"
,
TestFile
(
"models/test_generation_models.py"
,
103
),
"models/test_qwen_models.py"
,
TestFile
(
"models/test_qwen_models.py"
,
82
),
"models/test_reward_models.py"
,
TestFile
(
"models/test_reward_models.py"
,
83
),
"test_gptqmodel_dynamic.py"
,
TestFile
(
"test_gptqmodel_dynamic.py"
,
72
),
"models/test_gme_qwen_models.py"
,
TestFile
(
"models/test_gme_qwen_models.py"
,
45
),
"test_abort.py"
,
TestFile
(
"test_abort.py"
,
51
),
"test_chunked_prefill.py"
,
TestFile
(
"test_chunked_prefill.py"
,
336
),
"test_custom_allreduce.py"
,
TestFile
(
"test_custom_allreduce.py"
,
1
),
"test_double_sparsity.py"
,
TestFile
(
"test_double_sparsity.py"
,
50
),
"test_eagle_infer.py"
,
TestFile
(
"test_eagle_infer.py"
,
447
),
"test_embedding_openai_server.py"
,
TestFile
(
"test_embedding_openai_server.py"
,
36
),
"test_eval_accuracy_mini.py"
,
TestFile
(
"test_eval_accuracy_mini.py"
,
63
),
"test_gguf.py"
,
TestFile
(
"test_gguf.py"
,
78
),
"test_input_embeddings.py"
,
TestFile
(
"test_input_embeddings.py"
,
38
),
"test_mla.py"
,
TestFile
(
"test_mla.py"
,
92
),
"test_mla_deepseek_v3.py"
,
TestFile
(
"test_mla_deepseek_v3.py"
,
221
),
"test_mla_flashinfer.py"
,
TestFile
(
"test_mla_flashinfer.py"
,
395
),
"test_mla_fp8.py"
,
TestFile
(
"test_mla_fp8.py"
,
93
),
"test_json_constrained.py"
,
TestFile
(
"test_json_constrained.py"
,
98
),
"test_large_max_new_tokens.py"
,
TestFile
(
"test_large_max_new_tokens.py"
,
41
),
"test_metrics.py"
,
TestFile
(
"test_metrics.py"
,
32
),
"test_no_chunked_prefill.py"
,
TestFile
(
"test_no_chunked_prefill.py"
,
126
),
"test_no_overlap_scheduler.py"
,
TestFile
(
"test_no_overlap_scheduler.py"
,
262
),
"test_openai_server.py"
,
TestFile
(
"test_openai_server.py"
,
124
),
"test_penalty.py"
,
TestFile
(
"test_penalty.py"
,
41
),
"test_pytorch_sampling_backend.py"
,
TestFile
(
"test_pytorch_sampling_backend.py"
,
66
),
"test_radix_attention.py"
,
TestFile
(
"test_radix_attention.py"
,
167
),
"test_regex_constrained.py"
,
TestFile
(
"test_regex_constrained.py"
,
64
),
"test_release_memory_occupation.py"
,
TestFile
(
"test_release_memory_occupation.py"
,
44
),
"test_request_length_validation.py"
,
TestFile
(
"test_request_length_validation.py"
,
31
),
"test_retract_decode.py"
,
TestFile
(
"test_retract_decode.py"
,
54
),
"test_server_args.py"
,
TestFile
(
"test_server_args.py"
,
1
),
# Disabled temporarily
TestFile
(
"test_skip_tokenizer_init.py"
,
72
),
# "test_session_control.py",
TestFile
(
"test_srt_engine.py"
,
237
),
"test_skip_tokenizer_init.py"
,
TestFile
(
"test_srt_endpoint.py"
,
94
),
"test_srt_engine.py"
,
TestFile
(
"test_torch_compile.py"
,
76
),
"test_srt_endpoint.py"
,
TestFile
(
"test_torch_compile_moe.py"
,
85
),
"test_torch_compile.py"
,
TestFile
(
"test_torch_native_attention_backend.py"
,
149
),
"test_torch_compile_moe.py"
,
TestFile
(
"test_torchao.py"
,
70
),
"test_torch_native_attention_backend.py"
,
TestFile
(
"test_triton_attention_kernels.py"
,
4
),
"test_torchao.py"
,
TestFile
(
"test_triton_attention_backend.py"
,
134
),
"test_triton_attention_kernels.py"
,
TestFile
(
"test_hidden_states.py"
,
55
),
"test_triton_attention_backend.py"
,
TestFile
(
"test_update_weights_from_disk.py"
,
114
),
"test_hidden_states.py"
,
TestFile
(
"test_update_weights_from_tensor.py"
,
48
),
"test_update_weights_from_disk.py"
,
TestFile
(
"test_vertex_endpoint.py"
,
31
),
"test_update_weights_from_tensor.py"
,
TestFile
(
"test_vision_chunked_prefill.py"
,
223
),
"test_vertex_endpoint.py"
,
TestFile
(
"test_vision_llm.py"
,
18.4
),
"test_vision_chunked_prefill.py"
,
TestFile
(
"test_vision_openai_server.py"
,
344
),
"test_vision_llm.py"
,
TestFile
(
"test_w8a8_quantization.py"
,
46
),
"test_vision_openai_server.py"
,
TestFile
(
"test_fp8_kernel.py"
,
2
),
"test_w8a8_quantization.py"
,
TestFile
(
"test_block_int8.py"
,
22
),
"test_fp8_kernel.py"
,
TestFile
(
"test_int8_kernel.py"
,
1
),
"test_block_int8.py"
,
TestFile
(
"test_reasoning_content.py"
,
89
),
"test_int8_kernel.py"
,
"test_reasoning_content.py"
,
],
],
"nightly"
:
[
"nightly"
:
[
"test_nightly_gsm8k_eval.py"
,
TestFile
(
"test_nightly_gsm8k_eval.py"
),
# Disable temporarily
# "test_nightly_math_eval.py",
],
],
}
}
# Expand suite
for
target_suite_name
,
target_tests
in
suites
.
items
():
def
auto_partition
(
files
,
rank
,
size
):
for
suite_name
,
tests
in
suites
.
items
():
"""
if
suite_name
==
target_suite_name
:
Partition files into size sublists with approximately equal sums of estimated times
continue
using stable sorting, and return the partition for the specified rank.
if
target_suite_name
in
tests
:
tests
.
remove
(
target_suite_name
)
Args:
tests
.
extend
(
target_tests
)
files (list): List of file objects with estimated_time attribute
rank (int): Index of the partition to return (0 to size-1)
size (int): Number of partitions
Returns:
list: List of file objects in the specified rank's partition
"""
weights
=
[
f
.
estimated_time
for
f
in
files
]
if
not
weights
or
size
<=
0
or
size
>
len
(
weights
):
return
[]
# Create list of (weight, original_index) tuples
# Using negative index as secondary key to maintain original order for equal weights
indexed_weights
=
[(
w
,
-
i
)
for
i
,
w
in
enumerate
(
weights
)]
# Stable sort in descending order by weight
# If weights are equal, larger (negative) index comes first (i.e., earlier original position)
indexed_weights
=
sorted
(
indexed_weights
,
reverse
=
True
)
# Extract original indices (negate back to positive)
indexed_weights
=
[(
w
,
-
i
)
for
w
,
i
in
indexed_weights
]
# Initialize partitions and their sums
partitions
=
[[]
for
_
in
range
(
size
)]
sums
=
[
0.0
]
*
size
# Greedy approach: assign each weight to partition with smallest current sum
for
weight
,
idx
in
indexed_weights
:
# Find partition with minimum sum
min_sum_idx
=
sums
.
index
(
min
(
sums
))
partitions
[
min_sum_idx
].
append
(
idx
)
sums
[
min_sum_idx
]
+=
weight
# Return the files corresponding to the indices in the specified rank's partition
indices
=
partitions
[
rank
]
return
[
files
[
i
]
for
i
in
indices
]
if
__name__
==
"__main__"
:
if
__name__
==
"__main__"
:
arg_parser
=
argparse
.
ArgumentParser
()
arg_parser
=
argparse
.
ArgumentParser
()
...
@@ -108,17 +148,30 @@ if __name__ == "__main__":
...
@@ -108,17 +148,30 @@ if __name__ == "__main__":
default
=
None
,
default
=
None
,
help
=
"The end index of the range of the files to run."
,
help
=
"The end index of the range of the files to run."
,
)
)
arg_parser
.
add_argument
(
"--auto-partition-id"
,
type
=
int
,
help
=
"Use auto load balancing. The part id."
,
)
arg_parser
.
add_argument
(
"--auto-partition-size"
,
type
=
int
,
help
=
"Use auto load balancing. The number of parts."
,
)
args
=
arg_parser
.
parse_args
()
args
=
arg_parser
.
parse_args
()
print
(
f
"
{
args
=
}
"
)
if
args
.
suite
==
"all"
:
if
args
.
suite
==
"all"
:
files
=
glob
.
glob
(
"**/test_*.py"
,
recursive
=
True
)
files
=
glob
.
glob
(
"**/test_*.py"
,
recursive
=
True
)
else
:
else
:
files
=
suites
[
args
.
suite
]
files
=
suites
[
args
.
suite
]
if
args
.
auto_partition_size
:
files
=
auto_partition
(
files
,
args
.
auto_partition_id
,
args
.
auto_partition_size
)
else
:
files
=
files
[
args
.
range_begin
:
args
.
range_end
]
files
=
files
[
args
.
range_begin
:
args
.
range_end
]
print
(
f
"
{
args
=
}
"
)
print
(
"The running tests are "
,
[
f
.
name
for
f
in
files
])
print
(
"The running tests are "
,
files
)
exit_code
=
run_unittest_files
(
files
,
args
.
timeout_per_file
)
exit_code
=
run_unittest_files
(
files
,
args
.
timeout_per_file
)
exit
(
exit_code
)
exit
(
exit_code
)
test/srt/test_custom_allreduce.py
View file @
fbd56002
...
@@ -42,7 +42,8 @@ def multi_process_parallel(
...
@@ -42,7 +42,8 @@ def multi_process_parallel(
# as compared to multiprocessing.
# as compared to multiprocessing.
# NOTE: We need to set working_dir for distributed tests,
# NOTE: We need to set working_dir for distributed tests,
# otherwise we may get import errors on ray workers
# otherwise we may get import errors on ray workers
ray
.
init
(
log_to_driver
=
False
)
ray
.
init
(
log_to_driver
=
True
)
distributed_init_port
=
get_open_port
()
distributed_init_port
=
get_open_port
()
refs
=
[]
refs
=
[]
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment