Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
2afba1b1
Unverified
Commit
2afba1b1
authored
Apr 30, 2025
by
Sai Enduri
Committed by
GitHub
Apr 30, 2025
Browse files
Add TP2 MOE benchmarks for AMD. (#5909)
parent
e330f2b8
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
35 additions
and
4 deletions
+35
-4
.github/workflows/pr-test-amd.yml
.github/workflows/pr-test-amd.yml
+21
-1
test/srt/test_bench_one_batch.py
test/srt/test_bench_one_batch.py
+5
-1
test/srt/test_bench_serving.py
test/srt/test_bench_serving.py
+9
-2
No files found.
.github/workflows/pr-test-amd.yml
View file @
2afba1b1
...
@@ -141,11 +141,31 @@ jobs:
...
@@ -141,11 +141,31 @@ jobs:
mkdir -p dummy-grok && wget https://sharkpublic.blob.core.windows.net/sharkpublic/sglang/dummy_grok.json -O dummy-grok/config.json
mkdir -p dummy-grok && wget https://sharkpublic.blob.core.windows.net/sharkpublic/sglang/dummy_grok.json -O dummy-grok/config.json
docker cp ./dummy-grok ci_sglang:/
docker cp ./dummy-grok ci_sglang:/
-
name
:
Evaluate
Benchmark
-
name
:
Benchmark
dummy grok (TP=2)
timeout-minutes
:
20
timeout-minutes
:
20
run
:
|
run
:
|
docker exec -w /sglang-checkout/test/srt -e SGLANG_IS_IN_CI=1 ci_sglang python3 models/test_dummy_grok_models.py
docker exec -w /sglang-checkout/test/srt -e SGLANG_IS_IN_CI=1 ci_sglang python3 models/test_dummy_grok_models.py
-
name
:
Benchmark single latency (TP=2)
timeout-minutes
:
20
run
:
|
docker exec -w /sglang-checkout/test/srt -e SGLANG_IS_IN_CI=1 -e SGLANG_AMD_CI=1 ci_sglang python3 -m unittest test_bench_one_batch.TestBenchOneBatch.test_moe_tp2_bs1
-
name
:
Benchmark single latency + torch.compile (TP=2)
timeout-minutes
:
20
run
:
|
docker exec -w /sglang-checkout/test/srt -e SGLANG_IS_IN_CI=1 ci_sglang python3 -m unittest test_bench_one_batch.TestBenchOneBatch.test_torch_compile_tp2_bs1
-
name
:
Benchmark offline throughput (TP=2)
timeout-minutes
:
20
run
:
|
docker exec -w /sglang-checkout/test/srt -e SGLANG_IS_IN_CI=1 -e SGLANG_AMD_CI=1 ci_sglang python3 -m unittest test_bench_serving.TestBenchServing.test_moe_offline_throughput_default
-
name
:
Benchmark offline throughput (w/o RadixAttention) (TP=2)
timeout-minutes
:
20
run
:
|
docker exec -w /sglang-checkout/test/srt -e SGLANG_IS_IN_CI=1 -e SGLANG_AMD_CI=1 ci_sglang python3 -m unittest test_bench_serving.TestBenchServing.test_moe_offline_throughput_without_radix_cache
finish
:
finish
:
if
:
always()
if
:
always()
needs
:
[
needs
:
[
...
...
test/srt/test_bench_one_batch.py
View file @
2afba1b1
import
os
import
unittest
import
unittest
from
sglang.test.test_utils
import
(
from
sglang.test.test_utils
import
(
...
@@ -45,7 +46,10 @@ class TestBenchOneBatch(CustomTestCase):
...
@@ -45,7 +46,10 @@ class TestBenchOneBatch(CustomTestCase):
f
"### test_moe_tp2_bs1 (Mixtral-8x7B)
\n
"
f
"### test_moe_tp2_bs1 (Mixtral-8x7B)
\n
"
f
"output_throughput:
{
output_throughput
:.
2
f
}
token/s
\n
"
f
"output_throughput:
{
output_throughput
:.
2
f
}
token/s
\n
"
)
)
self
.
assertGreater
(
output_throughput
,
125
)
if
os
.
getenv
(
"SGLANG_AMD_CI"
)
==
"1"
:
self
.
assertGreater
(
output_throughput
,
85
)
else
:
self
.
assertGreater
(
output_throughput
,
125
)
def
test_torch_compile_tp2_bs1
(
self
):
def
test_torch_compile_tp2_bs1
(
self
):
output_throughput
=
run_bench_offline_throughput
(
output_throughput
=
run_bench_offline_throughput
(
...
...
test/srt/test_bench_serving.py
View file @
2afba1b1
import
os
import
unittest
import
unittest
from
sglang.test.test_utils
import
(
from
sglang.test.test_utils
import
(
...
@@ -180,7 +181,10 @@ class TestBenchServing(CustomTestCase):
...
@@ -180,7 +181,10 @@ class TestBenchServing(CustomTestCase):
f
"### test_moe_offline_throughput_default
\n
"
f
"### test_moe_offline_throughput_default
\n
"
f
'Output throughput:
{
res
[
"output_throughput"
]:.
2
f
}
token/s
\n
'
f
'Output throughput:
{
res
[
"output_throughput"
]:.
2
f
}
token/s
\n
'
)
)
self
.
assertGreater
(
res
[
"output_throughput"
],
2200
)
if
os
.
getenv
(
"SGLANG_AMD_CI"
)
==
"1"
:
self
.
assertGreater
(
res
[
"output_throughput"
],
2100
)
else
:
self
.
assertGreater
(
res
[
"output_throughput"
],
2200
)
def
test_moe_offline_throughput_without_radix_cache
(
self
):
def
test_moe_offline_throughput_without_radix_cache
(
self
):
res
=
run_bench_serving
(
res
=
run_bench_serving
(
...
@@ -195,7 +199,10 @@ class TestBenchServing(CustomTestCase):
...
@@ -195,7 +199,10 @@ class TestBenchServing(CustomTestCase):
f
"### test_moe_offline_throughput_without_radix_cache
\n
"
f
"### test_moe_offline_throughput_without_radix_cache
\n
"
f
'Output throughput:
{
res
[
"output_throughput"
]:.
2
f
}
token/s
\n
'
f
'Output throughput:
{
res
[
"output_throughput"
]:.
2
f
}
token/s
\n
'
)
)
self
.
assertGreater
(
res
[
"output_throughput"
],
2200
)
if
os
.
getenv
(
"SGLANG_AMD_CI"
)
==
"1"
:
self
.
assertGreater
(
res
[
"output_throughput"
],
2100
)
else
:
self
.
assertGreater
(
res
[
"output_throughput"
],
2200
)
if
__name__
==
"__main__"
:
if
__name__
==
"__main__"
:
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment