Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
3c4e0ee6
"vscode:/vscode.git/clone" did not exist on "e61138be19cce30b1cf9e16dd8c35bbdbb86530d"
Unverified
Commit
3c4e0ee6
authored
Apr 27, 2025
by
Lianmin Zheng
Committed by
GitHub
Apr 27, 2025
Browse files
[CI] Tune threshold (#5787)
parent
9c088829
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
5 additions
and
5 deletions
+5
-5
test/srt/run_suite.py
test/srt/run_suite.py
+3
-3
test/srt/test_bench_one_batch.py
test/srt/test_bench_one_batch.py
+1
-1
test/srt/test_torch_native_attention_backend.py
test/srt/test_torch_native_attention_backend.py
+1
-1
No files found.
test/srt/run_suite.py
View file @
3c4e0ee6
...
@@ -82,10 +82,10 @@ suites = {
...
@@ -82,10 +82,10 @@ suites = {
TestFile
(
"test_triton_moe_channel_fp8_kernel.py"
,
25
),
TestFile
(
"test_triton_moe_channel_fp8_kernel.py"
,
25
),
],
],
"per-commit-2-gpu"
:
[
"per-commit-2-gpu"
:
[
TestFile
(
"models/lora/test_lora_tp.py"
,
30
0
),
TestFile
(
"models/lora/test_lora_tp.py"
,
15
0
),
TestFile
(
"test_data_parallelism.py"
,
90
),
TestFile
(
"test_data_parallelism.py"
,
90
),
TestFile
(
"test_dp_attention.py"
,
9
0
),
TestFile
(
"test_dp_attention.py"
,
15
0
),
TestFile
(
"test_mla_tp.py"
,
420
),
TestFile
(
"test_mla_tp.py"
,
174
),
TestFile
(
"test_moe_ep.py"
,
220
),
TestFile
(
"test_moe_ep.py"
,
220
),
TestFile
(
"test_patch_torch.py"
,
30
),
TestFile
(
"test_patch_torch.py"
,
30
),
TestFile
(
"test_update_weights_from_distributed.py"
,
100
),
TestFile
(
"test_update_weights_from_distributed.py"
,
100
),
...
...
test/srt/test_bench_one_batch.py
View file @
3c4e0ee6
...
@@ -51,7 +51,7 @@ class TestBenchOneBatch(CustomTestCase):
...
@@ -51,7 +51,7 @@ class TestBenchOneBatch(CustomTestCase):
f
"### test_torch_compile_tp2_bs1
\n
"
f
"### test_torch_compile_tp2_bs1
\n
"
f
"output_throughput :
{
output_throughput
:.
2
f
}
token/s
\n
"
f
"output_throughput :
{
output_throughput
:.
2
f
}
token/s
\n
"
)
)
self
.
assertGreater
(
output_throughput
,
2
3
5
)
self
.
assertGreater
(
output_throughput
,
2
2
5
)
if
__name__
==
"__main__"
:
if
__name__
==
"__main__"
:
...
...
test/srt/test_torch_native_attention_backend.py
View file @
3c4e0ee6
...
@@ -28,7 +28,7 @@ class TestTorchNativeAttnBackend(CustomTestCase):
...
@@ -28,7 +28,7 @@ class TestTorchNativeAttnBackend(CustomTestCase):
if
is_in_ci
():
if
is_in_ci
():
# Torch native backend is expected to be slower
# Torch native backend is expected to be slower
assert
output_throughput
>
50
,
f
"
{
output_throughput
=
}
"
self
.
assert
Greater
(
output_throughput
,
40
)
def
test_mmlu
(
self
):
def
test_mmlu
(
self
):
model
=
DEFAULT_MODEL_NAME_FOR_TEST
model
=
DEFAULT_MODEL_NAME_FOR_TEST
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment