Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
e3a53044
Unverified
Commit
e3a53044
authored
Apr 29, 2025
by
saienduri
Committed by
GitHub
Apr 29, 2025
Browse files
Add AMD MI300x Nightly Testing. (#5861)
parent
28b26dbf
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
249 additions
and
0 deletions
+249
-0
.github/workflows/nightly-test-amd.yml
.github/workflows/nightly-test-amd.yml
+54
-0
test/srt/run_suite.py
test/srt/run_suite.py
+3
-0
test/srt/test_nightly_gsm8k_eval_amd.py
test/srt/test_nightly_gsm8k_eval_amd.py
+192
-0
No files found.
.github/workflows/nightly-test-amd.yml
0 → 100644
View file @
e3a53044
name
:
Nightly Test (AMD)
on
:
schedule
:
-
cron
:
'
0
0
*
*
*'
push
:
branches
:
-
main
paths
:
-
"
python/sglang/version.py"
workflow_dispatch
:
concurrency
:
group
:
nightly-test-${{ github.ref }}
cancel-in-progress
:
true
jobs
:
nightly-test
:
if
:
github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request'
runs-on
:
linux-mi300-gpu-2
steps
:
-
name
:
Checkout code
uses
:
actions/checkout@v4
-
name
:
Setup docker
run
:
|
# Ensure GPU isolation if pod is part of kubernetes setup with DEVICE_FLAG.
if [ -f "/etc/podinfo/gha-render-devices" ]; then
DEVICE_FLAG=$(cat /etc/podinfo/gha-render-devices)
else
DEVICE_FLAG="--device /dev/dri"
fi
touch github_summary.md
docker pull ghcr.io/saienduri/sglang-aiter-v0.1.1:428
docker run -dt --user root --device=/dev/kfd $DEVICE_FLAG \
-v ${{ github.workspace }}:/sglang-checkout --ipc=host --group-add video \
--cap-add=SYS_PTRACE -e HF_TOKEN=${HF_TOKEN} --security-opt seccomp=unconfined \
-w /sglang-checkout --name ci_sglang \
ghcr.io/saienduri/sglang-aiter-v0.1.1:428
-
name
:
Install dependencies
run
:
|
docker exec ci_sglang pip install --upgrade pip
docker exec ci_sglang pip uninstall sgl-kernel -y || true
docker exec -w /sglang-checkout/sgl-kernel ci_sglang bash -c "rm -f pyproject.toml && mv pyproject_rocm.toml pyproject.toml && python3 setup_rocm.py install"
docker exec ci_sglang pip install -e "python[dev_hip]"
docker exec -w / ci_sglang git clone https://github.com/merrymercy/human-eval.git
docker exec -w /human-eval ci_sglang pip install -e .
-
name
:
Nightly Test
run
:
|
docker exec -w /sglang-checkout/test/srt -e SGLANG_IS_IN_CI=1 -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" ci_sglang python3 run_suite.py --suite nightly-amd --timeout-per-file 7200
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY
test/srt/run_suite.py
View file @
e3a53044
...
...
@@ -100,6 +100,9 @@ suites = {
"nightly"
:
[
TestFile
(
"test_nightly_gsm8k_eval.py"
),
],
"nightly-amd"
:
[
TestFile
(
"test_nightly_gsm8k_eval_amd.py"
),
],
"vllm_dependency_test"
:
[
TestFile
(
"test_vllm_dependency.py"
),
TestFile
(
"test_awq.py"
),
...
...
test/srt/test_nightly_gsm8k_eval_amd.py
0 → 100644
View file @
e3a53044
import
json
import
os
import
unittest
import
warnings
from
datetime
import
datetime
from
types
import
SimpleNamespace
from
sglang.srt.utils
import
kill_process_tree
from
sglang.test.run_eval
import
run_eval
from
sglang.test.test_utils
import
(
DEFAULT_MODEL_NAME_FOR_NIGHTLY_EVAL_FP8_TP1
,
DEFAULT_MODEL_NAME_FOR_NIGHTLY_EVAL_FP8_TP2
,
DEFAULT_MODEL_NAME_FOR_NIGHTLY_EVAL_TP1
,
DEFAULT_MODEL_NAME_FOR_NIGHTLY_EVAL_TP2
,
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
DEFAULT_URL_FOR_TEST
,
is_in_ci
,
popen_launch_server
,
write_github_step_summary
,
)
MODEL_SCORE_THRESHOLDS
=
{
"meta-llama/Llama-3.1-8B-Instruct"
:
0.82
,
"mistralai/Mistral-7B-Instruct-v0.3"
:
0.56
,
"deepseek-ai/DeepSeek-Coder-V2-Lite-Instruct"
:
0.85
,
"meta-llama/Llama-3.1-70B-Instruct"
:
0.95
,
"mistralai/Mixtral-8x7B-Instruct-v0.1"
:
0.64
,
"Qwen/Qwen2-57B-A14B-Instruct"
:
0.86
,
"neuralmagic/Meta-Llama-3.1-8B-Instruct-FP8"
:
0.81
,
"neuralmagic/Mistral-7B-Instruct-v0.3-FP8"
:
0.54
,
"neuralmagic/Meta-Llama-3.1-70B-Instruct-FP8"
:
0.94
,
"neuralmagic/Qwen2-72B-Instruct-FP8"
:
0.94
,
"neuralmagic/Qwen2-57B-A14B-Instruct-FP8"
:
0.82
,
}
# Models currently failing on AMD MI300x.
failing_models
=
{
"google/gemma-2-27b-it"
,
"neuralmagic/DeepSeek-Coder-V2-Lite-Instruct-FP8"
,
"neuralmagic/gemma-2-2b-it-FP8"
,
"neuralmagic/Mixtral-8x7B-Instruct-v0.1-FP8"
,
}
def
remove_failing_models
(
model_str
):
models
=
model_str
.
split
(
","
)
filtered
=
[
m
for
m
in
models
if
m
not
in
failing_models
]
return
","
.
join
(
filtered
)
DEFAULT_MODEL_NAME_FOR_NIGHTLY_EVAL_TP1
=
remove_failing_models
(
DEFAULT_MODEL_NAME_FOR_NIGHTLY_EVAL_TP1
)
DEFAULT_MODEL_NAME_FOR_NIGHTLY_EVAL_TP2
=
remove_failing_models
(
DEFAULT_MODEL_NAME_FOR_NIGHTLY_EVAL_TP2
)
DEFAULT_MODEL_NAME_FOR_NIGHTLY_EVAL_FP8_TP1
=
remove_failing_models
(
DEFAULT_MODEL_NAME_FOR_NIGHTLY_EVAL_FP8_TP1
)
DEFAULT_MODEL_NAME_FOR_NIGHTLY_EVAL_FP8_TP2
=
remove_failing_models
(
DEFAULT_MODEL_NAME_FOR_NIGHTLY_EVAL_FP8_TP2
)
def
parse_models
(
model_string
):
return
[
model
.
strip
()
for
model
in
model_string
.
split
(
","
)
if
model
.
strip
()]
def
popen_launch_server_wrapper
(
base_url
,
model
,
is_tp2
):
other_args
=
[
"--log-level-http"
,
"warning"
,
"--trust-remote-code"
]
if
is_tp2
:
other_args
.
extend
([
"--tp"
,
"2"
])
process
=
popen_launch_server
(
model
,
base_url
,
timeout
=
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
other_args
=
other_args
,
)
return
process
def
write_results_to_json
(
model
,
metrics
,
mode
=
"a"
):
result
=
{
"timestamp"
:
datetime
.
now
().
isoformat
(),
"model"
:
model
,
"metrics"
:
metrics
,
"score"
:
metrics
[
"score"
],
}
existing_results
=
[]
if
mode
==
"a"
and
os
.
path
.
exists
(
"results.json"
):
try
:
with
open
(
"results.json"
,
"r"
)
as
f
:
existing_results
=
json
.
load
(
f
)
except
json
.
JSONDecodeError
:
existing_results
=
[]
if
isinstance
(
existing_results
,
list
):
existing_results
.
append
(
result
)
else
:
existing_results
=
[
result
]
with
open
(
"results.json"
,
"w"
)
as
f
:
json
.
dump
(
existing_results
,
f
,
indent
=
2
)
def
check_model_scores
(
results
):
failed_models
=
[]
summary
=
" | model | score | threshold |
\n
"
summary
+=
"| ----- | ----- | --------- |
\n
"
for
model
,
score
in
results
:
threshold
=
MODEL_SCORE_THRESHOLDS
.
get
(
model
)
if
threshold
is
None
:
print
(
f
"Warning: No threshold defined for model
{
model
}
"
)
continue
if
score
<
threshold
:
failed_models
.
append
(
f
"
\n
Score Check Failed:
{
model
}
\n
"
f
"Model
{
model
}
score (
{
score
:.
4
f
}
) is below threshold (
{
threshold
:.
4
f
}
)"
)
line
=
f
"|
{
model
}
|
{
score
}
|
{
threshold
}
|
\n
"
summary
+=
line
print
(
summary
)
if
is_in_ci
():
write_github_step_summary
(
f
"### TestNightlyGsm8KEval
\n
{
summary
}
"
)
if
failed_models
:
raise
AssertionError
(
"
\n
"
.
join
(
failed_models
))
# Do not use `CustomTestCase` since `test_mgsm_en_all_models` does not want retry
class
TestNightlyGsm8KEval
(
unittest
.
TestCase
):
@
classmethod
def
setUpClass
(
cls
):
cls
.
model_groups
=
[
(
parse_models
(
DEFAULT_MODEL_NAME_FOR_NIGHTLY_EVAL_TP1
),
False
,
False
),
(
parse_models
(
DEFAULT_MODEL_NAME_FOR_NIGHTLY_EVAL_TP2
),
False
,
True
),
(
parse_models
(
DEFAULT_MODEL_NAME_FOR_NIGHTLY_EVAL_FP8_TP1
),
True
,
False
),
(
parse_models
(
DEFAULT_MODEL_NAME_FOR_NIGHTLY_EVAL_FP8_TP2
),
True
,
True
),
]
cls
.
base_url
=
DEFAULT_URL_FOR_TEST
def
test_mgsm_en_all_models
(
self
):
warnings
.
filterwarnings
(
"ignore"
,
category
=
ResourceWarning
,
message
=
"unclosed.*socket"
)
is_first
=
True
all_results
=
[]
for
model_group
,
is_fp8
,
is_tp2
in
self
.
model_groups
:
for
model
in
model_group
:
with
self
.
subTest
(
model
=
model
):
process
=
popen_launch_server_wrapper
(
self
.
base_url
,
model
,
is_tp2
)
args
=
SimpleNamespace
(
base_url
=
self
.
base_url
,
model
=
model
,
eval_name
=
"mgsm_en"
,
num_examples
=
None
,
num_threads
=
1024
,
)
metrics
=
run_eval
(
args
)
print
(
f
"
{
'='
*
42
}
\n
{
model
}
- metrics=
{
metrics
}
score=
{
metrics
[
'score'
]
}
\n
{
'='
*
42
}
\n
"
)
write_results_to_json
(
model
,
metrics
,
"w"
if
is_first
else
"a"
)
is_first
=
False
all_results
.
append
((
model
,
metrics
[
"score"
]))
kill_process_tree
(
process
.
pid
)
try
:
with
open
(
"results.json"
,
"r"
)
as
f
:
print
(
"
\n
Final Results from results.json:"
)
print
(
json
.
dumps
(
json
.
load
(
f
),
indent
=
2
))
except
Exception
as
e
:
print
(
f
"Error reading results.json:
{
e
}
"
)
# Check all scores after collecting all results
check_model_scores
(
all_results
)
if
__name__
==
"__main__"
:
unittest
.
main
()
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment