Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
e3a53044
"test/vscode:/vscode.git/clone" did not exist on "3ded6235c9e423d39cb12e7fd3453c714e7a33ef"
Unverified
Commit
e3a53044
authored
Apr 29, 2025
by
saienduri
Committed by
GitHub
Apr 29, 2025
Browse files
Add AMD MI300x Nightly Testing. (#5861)
parent
28b26dbf
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
249 additions
and
0 deletions
+249
-0
.github/workflows/nightly-test-amd.yml
.github/workflows/nightly-test-amd.yml
+54
-0
test/srt/run_suite.py
test/srt/run_suite.py
+3
-0
test/srt/test_nightly_gsm8k_eval_amd.py
test/srt/test_nightly_gsm8k_eval_amd.py
+192
-0
No files found.
.github/workflows/nightly-test-amd.yml
0 → 100644
View file @
e3a53044
name
:
Nightly Test (AMD)
on
:
schedule
:
-
cron
:
'
0
0
*
*
*'
push
:
branches
:
-
main
paths
:
-
"
python/sglang/version.py"
workflow_dispatch
:
concurrency
:
group
:
nightly-test-${{ github.ref }}
cancel-in-progress
:
true
jobs
:
nightly-test
:
if
:
github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request'
runs-on
:
linux-mi300-gpu-2
steps
:
-
name
:
Checkout code
uses
:
actions/checkout@v4
-
name
:
Setup docker
run
:
|
# Ensure GPU isolation if pod is part of kubernetes setup with DEVICE_FLAG.
if [ -f "/etc/podinfo/gha-render-devices" ]; then
DEVICE_FLAG=$(cat /etc/podinfo/gha-render-devices)
else
DEVICE_FLAG="--device /dev/dri"
fi
touch github_summary.md
docker pull ghcr.io/saienduri/sglang-aiter-v0.1.1:428
docker run -dt --user root --device=/dev/kfd $DEVICE_FLAG \
-v ${{ github.workspace }}:/sglang-checkout --ipc=host --group-add video \
--cap-add=SYS_PTRACE -e HF_TOKEN=${HF_TOKEN} --security-opt seccomp=unconfined \
-w /sglang-checkout --name ci_sglang \
ghcr.io/saienduri/sglang-aiter-v0.1.1:428
-
name
:
Install dependencies
run
:
|
docker exec ci_sglang pip install --upgrade pip
docker exec ci_sglang pip uninstall sgl-kernel -y || true
docker exec -w /sglang-checkout/sgl-kernel ci_sglang bash -c "rm -f pyproject.toml && mv pyproject_rocm.toml pyproject.toml && python3 setup_rocm.py install"
docker exec ci_sglang pip install -e "python[dev_hip]"
docker exec -w / ci_sglang git clone https://github.com/merrymercy/human-eval.git
docker exec -w /human-eval ci_sglang pip install -e .
-
name
:
Nightly Test
run
:
|
docker exec -w /sglang-checkout/test/srt -e SGLANG_IS_IN_CI=1 -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" ci_sglang python3 run_suite.py --suite nightly-amd --timeout-per-file 7200
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY
test/srt/run_suite.py
View file @
e3a53044
...
...
@@ -100,6 +100,9 @@ suites = {
"nightly"
:
[
TestFile
(
"test_nightly_gsm8k_eval.py"
),
],
"nightly-amd"
:
[
TestFile
(
"test_nightly_gsm8k_eval_amd.py"
),
],
"vllm_dependency_test"
:
[
TestFile
(
"test_vllm_dependency.py"
),
TestFile
(
"test_awq.py"
),
...
...
test/srt/test_nightly_gsm8k_eval_amd.py
0 → 100644
View file @
e3a53044
import
json
import
os
import
unittest
import
warnings
from
datetime
import
datetime
from
types
import
SimpleNamespace
from
sglang.srt.utils
import
kill_process_tree
from
sglang.test.run_eval
import
run_eval
from
sglang.test.test_utils
import
(
DEFAULT_MODEL_NAME_FOR_NIGHTLY_EVAL_FP8_TP1
,
DEFAULT_MODEL_NAME_FOR_NIGHTLY_EVAL_FP8_TP2
,
DEFAULT_MODEL_NAME_FOR_NIGHTLY_EVAL_TP1
,
DEFAULT_MODEL_NAME_FOR_NIGHTLY_EVAL_TP2
,
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
DEFAULT_URL_FOR_TEST
,
is_in_ci
,
popen_launch_server
,
write_github_step_summary
,
)
MODEL_SCORE_THRESHOLDS
=
{
"meta-llama/Llama-3.1-8B-Instruct"
:
0.82
,
"mistralai/Mistral-7B-Instruct-v0.3"
:
0.56
,
"deepseek-ai/DeepSeek-Coder-V2-Lite-Instruct"
:
0.85
,
"meta-llama/Llama-3.1-70B-Instruct"
:
0.95
,
"mistralai/Mixtral-8x7B-Instruct-v0.1"
:
0.64
,
"Qwen/Qwen2-57B-A14B-Instruct"
:
0.86
,
"neuralmagic/Meta-Llama-3.1-8B-Instruct-FP8"
:
0.81
,
"neuralmagic/Mistral-7B-Instruct-v0.3-FP8"
:
0.54
,
"neuralmagic/Meta-Llama-3.1-70B-Instruct-FP8"
:
0.94
,
"neuralmagic/Qwen2-72B-Instruct-FP8"
:
0.94
,
"neuralmagic/Qwen2-57B-A14B-Instruct-FP8"
:
0.82
,
}
# Models currently failing on AMD MI300x.
failing_models
=
{
"google/gemma-2-27b-it"
,
"neuralmagic/DeepSeek-Coder-V2-Lite-Instruct-FP8"
,
"neuralmagic/gemma-2-2b-it-FP8"
,
"neuralmagic/Mixtral-8x7B-Instruct-v0.1-FP8"
,
}
def
remove_failing_models
(
model_str
):
models
=
model_str
.
split
(
","
)
filtered
=
[
m
for
m
in
models
if
m
not
in
failing_models
]
return
","
.
join
(
filtered
)
DEFAULT_MODEL_NAME_FOR_NIGHTLY_EVAL_TP1
=
remove_failing_models
(
DEFAULT_MODEL_NAME_FOR_NIGHTLY_EVAL_TP1
)
DEFAULT_MODEL_NAME_FOR_NIGHTLY_EVAL_TP2
=
remove_failing_models
(
DEFAULT_MODEL_NAME_FOR_NIGHTLY_EVAL_TP2
)
DEFAULT_MODEL_NAME_FOR_NIGHTLY_EVAL_FP8_TP1
=
remove_failing_models
(
DEFAULT_MODEL_NAME_FOR_NIGHTLY_EVAL_FP8_TP1
)
DEFAULT_MODEL_NAME_FOR_NIGHTLY_EVAL_FP8_TP2
=
remove_failing_models
(
DEFAULT_MODEL_NAME_FOR_NIGHTLY_EVAL_FP8_TP2
)
def
parse_models
(
model_string
):
return
[
model
.
strip
()
for
model
in
model_string
.
split
(
","
)
if
model
.
strip
()]
def
popen_launch_server_wrapper
(
base_url
,
model
,
is_tp2
):
other_args
=
[
"--log-level-http"
,
"warning"
,
"--trust-remote-code"
]
if
is_tp2
:
other_args
.
extend
([
"--tp"
,
"2"
])
process
=
popen_launch_server
(
model
,
base_url
,
timeout
=
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
other_args
=
other_args
,
)
return
process
def
write_results_to_json
(
model
,
metrics
,
mode
=
"a"
):
result
=
{
"timestamp"
:
datetime
.
now
().
isoformat
(),
"model"
:
model
,
"metrics"
:
metrics
,
"score"
:
metrics
[
"score"
],
}
existing_results
=
[]
if
mode
==
"a"
and
os
.
path
.
exists
(
"results.json"
):
try
:
with
open
(
"results.json"
,
"r"
)
as
f
:
existing_results
=
json
.
load
(
f
)
except
json
.
JSONDecodeError
:
existing_results
=
[]
if
isinstance
(
existing_results
,
list
):
existing_results
.
append
(
result
)
else
:
existing_results
=
[
result
]
with
open
(
"results.json"
,
"w"
)
as
f
:
json
.
dump
(
existing_results
,
f
,
indent
=
2
)
def
check_model_scores
(
results
):
failed_models
=
[]
summary
=
" | model | score | threshold |
\n
"
summary
+=
"| ----- | ----- | --------- |
\n
"
for
model
,
score
in
results
:
threshold
=
MODEL_SCORE_THRESHOLDS
.
get
(
model
)
if
threshold
is
None
:
print
(
f
"Warning: No threshold defined for model
{
model
}
"
)
continue
if
score
<
threshold
:
failed_models
.
append
(
f
"
\n
Score Check Failed:
{
model
}
\n
"
f
"Model
{
model
}
score (
{
score
:.
4
f
}
) is below threshold (
{
threshold
:.
4
f
}
)"
)
line
=
f
"|
{
model
}
|
{
score
}
|
{
threshold
}
|
\n
"
summary
+=
line
print
(
summary
)
if
is_in_ci
():
write_github_step_summary
(
f
"### TestNightlyGsm8KEval
\n
{
summary
}
"
)
if
failed_models
:
raise
AssertionError
(
"
\n
"
.
join
(
failed_models
))
# Do not use `CustomTestCase` since `test_mgsm_en_all_models` does not want retry
class
TestNightlyGsm8KEval
(
unittest
.
TestCase
):
@
classmethod
def
setUpClass
(
cls
):
cls
.
model_groups
=
[
(
parse_models
(
DEFAULT_MODEL_NAME_FOR_NIGHTLY_EVAL_TP1
),
False
,
False
),
(
parse_models
(
DEFAULT_MODEL_NAME_FOR_NIGHTLY_EVAL_TP2
),
False
,
True
),
(
parse_models
(
DEFAULT_MODEL_NAME_FOR_NIGHTLY_EVAL_FP8_TP1
),
True
,
False
),
(
parse_models
(
DEFAULT_MODEL_NAME_FOR_NIGHTLY_EVAL_FP8_TP2
),
True
,
True
),
]
cls
.
base_url
=
DEFAULT_URL_FOR_TEST
def
test_mgsm_en_all_models
(
self
):
warnings
.
filterwarnings
(
"ignore"
,
category
=
ResourceWarning
,
message
=
"unclosed.*socket"
)
is_first
=
True
all_results
=
[]
for
model_group
,
is_fp8
,
is_tp2
in
self
.
model_groups
:
for
model
in
model_group
:
with
self
.
subTest
(
model
=
model
):
process
=
popen_launch_server_wrapper
(
self
.
base_url
,
model
,
is_tp2
)
args
=
SimpleNamespace
(
base_url
=
self
.
base_url
,
model
=
model
,
eval_name
=
"mgsm_en"
,
num_examples
=
None
,
num_threads
=
1024
,
)
metrics
=
run_eval
(
args
)
print
(
f
"
{
'='
*
42
}
\n
{
model
}
- metrics=
{
metrics
}
score=
{
metrics
[
'score'
]
}
\n
{
'='
*
42
}
\n
"
)
write_results_to_json
(
model
,
metrics
,
"w"
if
is_first
else
"a"
)
is_first
=
False
all_results
.
append
((
model
,
metrics
[
"score"
]))
kill_process_tree
(
process
.
pid
)
try
:
with
open
(
"results.json"
,
"r"
)
as
f
:
print
(
"
\n
Final Results from results.json:"
)
print
(
json
.
dumps
(
json
.
load
(
f
),
indent
=
2
))
except
Exception
as
e
:
print
(
f
"Error reading results.json:
{
e
}
"
)
# Check all scores after collecting all results
check_model_scores
(
all_results
)
if
__name__
==
"__main__"
:
unittest
.
main
()
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment