Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
91b3555d
Unverified
Commit
91b3555d
authored
Sep 10, 2025
by
Hubert Lu
Committed by
GitHub
Sep 10, 2025
Browse files
Add tests to AMD CI for MI35x (#9662)
Co-authored-by:
Sai Enduri
<
saimanas.enduri@amd.com
>
parent
9e2f7252
Changes
7
Hide whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
147 additions
and
110 deletions
+147
-110
.github/workflows/pr-test-amd.yml
.github/workflows/pr-test-amd.yml
+25
-16
python/sglang/srt/models/deepseek_v2.py
python/sglang/srt/models/deepseek_v2.py
+9
-2
scripts/ci/amd_ci_exec.sh
scripts/ci/amd_ci_exec.sh
+17
-0
scripts/ci/amd_ci_install_dependency.sh
scripts/ci/amd_ci_install_dependency.sh
+30
-5
scripts/ci/amd_ci_start_container.sh
scripts/ci/amd_ci_start_container.sh
+57
-85
test/srt/run_suite.py
test/srt/run_suite.py
+4
-0
test/srt/test_gpt_oss_common.py
test/srt/test_gpt_oss_common.py
+5
-2
No files found.
.github/workflows/pr-test-amd.yml
View file @
91b3555d
...
@@ -28,6 +28,7 @@ jobs:
...
@@ -28,6 +28,7 @@ jobs:
if
:
(github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') &&
if
:
(github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') &&
github.event.pull_request.draft ==
false
github.event.pull_request.draft ==
false
strategy
:
strategy
:
fail-fast
:
false
matrix
:
matrix
:
runner
:
[
linux-mi300-gpu-1
,
linux-mi325-gpu-1
]
runner
:
[
linux-mi300-gpu-1
,
linux-mi325-gpu-1
]
runs-on
:
${{matrix.runner}}
runs-on
:
${{matrix.runner}}
...
@@ -54,8 +55,9 @@ jobs:
...
@@ -54,8 +55,9 @@ jobs:
if
:
(github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') &&
if
:
(github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') &&
github.event.pull_request.draft ==
false
github.event.pull_request.draft ==
false
strategy
:
strategy
:
fail-fast
:
false
matrix
:
matrix
:
runner
:
[
linux-mi300-gpu-2
,
linux-mi325-gpu-2
]
runner
:
[
linux-mi300-gpu-2
,
linux-mi325-gpu-2
,
linux-mi35x-gpu-2
]
runs-on
:
${{matrix.runner}}
runs-on
:
${{matrix.runner}}
steps
:
steps
:
-
name
:
Checkout code
-
name
:
Checkout code
...
@@ -70,7 +72,7 @@ jobs:
...
@@ -70,7 +72,7 @@ jobs:
run
:
bash scripts/ci/amd_ci_install_dependency.sh
run
:
bash scripts/ci/amd_ci_install_dependency.sh
-
name
:
Evaluate accuracy (TP=2)
-
name
:
Evaluate accuracy (TP=2)
timeout-minutes
:
3
0
timeout-minutes
:
6
0
run
:
|
run
:
|
bash scripts/ci/amd_ci_exec.sh -e SGLANG_USE_AITER=0 python3 test_moe_eval_accuracy_large.py
bash scripts/ci/amd_ci_exec.sh -e SGLANG_USE_AITER=0 python3 test_moe_eval_accuracy_large.py
...
@@ -78,6 +80,7 @@ jobs:
...
@@ -78,6 +80,7 @@ jobs:
if
:
(github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') &&
if
:
(github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') &&
github.event.pull_request.draft ==
false
github.event.pull_request.draft ==
false
strategy
:
strategy
:
fail-fast
:
false
matrix
:
matrix
:
runner
:
[
linux-mi300-gpu-1
,
linux-mi325-gpu-1
]
runner
:
[
linux-mi300-gpu-1
,
linux-mi325-gpu-1
]
runs-on
:
${{matrix.runner}}
runs-on
:
${{matrix.runner}}
...
@@ -102,6 +105,7 @@ jobs:
...
@@ -102,6 +105,7 @@ jobs:
if
:
(github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') &&
if
:
(github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') &&
github.event.pull_request.draft ==
false
github.event.pull_request.draft ==
false
strategy
:
strategy
:
fail-fast
:
false
matrix
:
matrix
:
runner
:
[
linux-mi300-gpu-1
,
linux-mi325-gpu-1
]
runner
:
[
linux-mi300-gpu-1
,
linux-mi325-gpu-1
]
runs-on
:
${{matrix.runner}}
runs-on
:
${{matrix.runner}}
...
@@ -142,6 +146,7 @@ jobs:
...
@@ -142,6 +146,7 @@ jobs:
if
:
(github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') &&
if
:
(github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') &&
github.event.pull_request.draft ==
false
github.event.pull_request.draft ==
false
strategy
:
strategy
:
fail-fast
:
false
matrix
:
matrix
:
runner
:
[
linux-mi300-gpu-1
,
linux-mi325-gpu-1
]
runner
:
[
linux-mi300-gpu-1
,
linux-mi325-gpu-1
]
runs-on
:
${{matrix.runner}}
runs-on
:
${{matrix.runner}}
...
@@ -176,6 +181,7 @@ jobs:
...
@@ -176,6 +181,7 @@ jobs:
if
:
(github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') &&
if
:
(github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') &&
github.event.pull_request.draft ==
false
github.event.pull_request.draft ==
false
strategy
:
strategy
:
fail-fast
:
false
matrix
:
matrix
:
runner
:
[
linux-mi300-gpu-2
,
linux-mi325-gpu-2
]
runner
:
[
linux-mi300-gpu-2
,
linux-mi325-gpu-2
]
runs-on
:
${{matrix.runner}}
runs-on
:
${{matrix.runner}}
...
@@ -242,12 +248,13 @@ jobs:
...
@@ -242,12 +248,13 @@ jobs:
run
:
|
run
:
|
bash scripts/ci/amd_ci_exec.sh python3 run_suite.py --suite per-commit-amd --auto-partition-id ${{ matrix.part }} --auto-partition-size 8
bash scripts/ci/amd_ci_exec.sh python3 run_suite.py --suite per-commit-amd --auto-partition-id ${{ matrix.part }} --auto-partition-size 8
unit-test-backend-
2
-gpu-amd
:
unit-test-backend-
1
-gpu-amd
-mi35x
:
if
:
(github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') &&
if
:
(github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') &&
github.event.pull_request.draft ==
false
github.event.pull_request.draft ==
false
strategy
:
strategy
:
fail-fast
:
false
matrix
:
matrix
:
runner
:
[
linux-mi3
00-gpu-2
,
linux-mi32
5-gpu-
2
]
runner
:
[
linux-mi35
x
-gpu-
1
]
runs-on
:
${{matrix.runner}}
runs-on
:
${{matrix.runner}}
steps
:
steps
:
-
name
:
Checkout code
-
name
:
Checkout code
...
@@ -262,16 +269,17 @@ jobs:
...
@@ -262,16 +269,17 @@ jobs:
run
:
bash scripts/ci/amd_ci_install_dependency.sh
run
:
bash scripts/ci/amd_ci_install_dependency.sh
-
name
:
Run test
-
name
:
Run test
timeout-minutes
:
4
0
timeout-minutes
:
5
0
run
:
|
run
:
|
bash scripts/ci/amd_ci_exec.sh python3 run_suite.py --suite per-commit-
2-gpu-amd
bash scripts/ci/amd_ci_exec.sh python3 run_suite.py --suite per-commit-
amd-mi35x
unit-test-backend-
8
-gpu-amd
:
unit-test-backend-
2
-gpu-amd
:
if
:
(github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') &&
if
:
(github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') &&
github.event.pull_request.draft ==
false
github.event.pull_request.draft ==
false
strategy
:
strategy
:
fail-fast
:
false
matrix
:
matrix
:
runner
:
[
linux-mi300-gpu-
8
]
runner
:
[
linux-mi300-gpu-
2
,
linux-mi325-gpu-2
]
runs-on
:
${{matrix.runner}}
runs-on
:
${{matrix.runner}}
steps
:
steps
:
-
name
:
Checkout code
-
name
:
Checkout code
...
@@ -286,14 +294,15 @@ jobs:
...
@@ -286,14 +294,15 @@ jobs:
run
:
bash scripts/ci/amd_ci_install_dependency.sh
run
:
bash scripts/ci/amd_ci_install_dependency.sh
-
name
:
Run test
-
name
:
Run test
timeout-minutes
:
6
0
timeout-minutes
:
4
0
run
:
|
run
:
|
bash scripts/ci/amd_ci_exec.sh python3 run_suite.py --suite per-commit-
8
-gpu-amd
--timeout-per-file 3600
bash scripts/ci/amd_ci_exec.sh python3 run_suite.py --suite per-commit-
2
-gpu-amd
unit-test-backend-8-gpu-
CAR-
amd
:
unit-test-backend-8-gpu-amd
:
if
:
(github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') &&
if
:
(github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') &&
github.event.pull_request.draft ==
false
github.event.pull_request.draft ==
false
strategy
:
strategy
:
fail-fast
:
false
matrix
:
matrix
:
runner
:
[
linux-mi300-gpu-8
]
runner
:
[
linux-mi300-gpu-8
]
runs-on
:
${{matrix.runner}}
runs-on
:
${{matrix.runner}}
...
@@ -309,10 +318,10 @@ jobs:
...
@@ -309,10 +318,10 @@ jobs:
-
name
:
Install dependencies
-
name
:
Install dependencies
run
:
bash scripts/ci/amd_ci_install_dependency.sh
run
:
bash scripts/ci/amd_ci_install_dependency.sh
-
name
:
Run
CustomAllReduce
test
-
name
:
Run test
timeout-minutes
:
2
0
timeout-minutes
:
6
0
run
:
|
run
:
|
bash scripts/ci/amd_ci_exec.sh
-e CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 python3 -m unittest test_custom_allreduce.TestCustomAllReduce
bash scripts/ci/amd_ci_exec.sh
python3 run_suite.py --suite per-commit-8-gpu-amd --timeout-per-file 3600
unit-test-sgl-kernel-amd
:
unit-test-sgl-kernel-amd
:
if
:
(github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') &&
if
:
(github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') &&
...
@@ -350,8 +359,8 @@ jobs:
...
@@ -350,8 +359,8 @@ jobs:
needs
:
[
needs
:
[
accuracy-test-1-gpu-amd
,
mla-test-1-gpu-amd
,
bench-test-2-gpu-amd
,
accuracy-test-1-gpu-amd
,
mla-test-1-gpu-amd
,
bench-test-2-gpu-amd
,
accuracy-test-2-gpu-amd
,
performance-test-1-gpu-part-1-amd
,
performance-test-1-gpu-part-2-amd
,
accuracy-test-2-gpu-amd
,
performance-test-1-gpu-part-1-amd
,
performance-test-1-gpu-part-2-amd
,
unit-test-backend-1-gpu-amd
,
unit-test-backend-
2
-gpu-amd
,
unit-test-backend-
8
-gpu-amd
,
unit-test-backend-1-gpu-amd
,
unit-test-backend-
1
-gpu-amd
-mi35x
,
unit-test-backend-
2
-gpu-amd
,
unit-test-sgl-kernel-amd
unit-test-backend-8-gpu-amd
,
unit-test-sgl-kernel-amd
]
]
runs-on
:
ubuntu-latest
runs-on
:
ubuntu-latest
steps
:
steps
:
...
...
python/sglang/srt/models/deepseek_v2.py
View file @
91b3555d
...
@@ -2027,7 +2027,10 @@ class DeepseekV2DecoderLayer(nn.Module):
...
@@ -2027,7 +2027,10 @@ class DeepseekV2DecoderLayer(nn.Module):
quant_format
=
(
quant_format
=
(
"mxfp4"
"mxfp4"
if
_is_gfx95_supported
if
_is_gfx95_supported
and
self
.
self_attn
.
fused_qkv_a_proj_with_mqa
.
weight
==
torch
.
uint8
and
getattr
(
self
.
self_attn
,
"fused_qkv_a_proj_with_mqa"
,
None
)
is
not
None
and
getattr
(
self
.
self_attn
.
fused_qkv_a_proj_with_mqa
,
"weight"
,
None
)
is
not
None
and
self
.
self_attn
.
fused_qkv_a_proj_with_mqa
.
weight
.
dtype
==
torch
.
uint8
else
""
else
""
)
)
...
@@ -2582,7 +2585,11 @@ class DeepseekV2ForCausalLM(nn.Module):
...
@@ -2582,7 +2585,11 @@ class DeepseekV2ForCausalLM(nn.Module):
0
,
(
-
1
,
self_attn
.
qk_nope_head_dim
+
self_attn
.
v_head_dim
)
0
,
(
-
1
,
self_attn
.
qk_nope_head_dim
+
self_attn
.
v_head_dim
)
).
split
([
self_attn
.
qk_nope_head_dim
,
self_attn
.
v_head_dim
],
dim
=
1
)
).
split
([
self_attn
.
qk_nope_head_dim
,
self_attn
.
v_head_dim
],
dim
=
1
)
if
_use_aiter_gfx95
and
self
.
quant_config
.
get_name
()
==
"quark"
:
if
(
_use_aiter_gfx95
and
self
.
quant_config
is
not
None
and
self
.
quant_config
.
get_name
()
==
"quark"
):
w_kc
,
self_attn
.
w_scale_k
,
w_vc
,
self_attn
.
w_scale_v
=
(
w_kc
,
self_attn
.
w_scale_k
,
w_vc
,
self_attn
.
w_scale_v
=
(
quark_post_load_weights
(
self_attn
,
w
,
"mxfp4"
)
quark_post_load_weights
(
self_attn
,
w
,
"mxfp4"
)
)
)
...
...
scripts/ci/amd_ci_exec.sh
View file @
91b3555d
#!/bin/bash
#!/bin/bash
set
-euo
pipefail
set
-euo
pipefail
# Detect GPU family from hostname (e.g., linux-mi35x-gpu-1-xxxxx-runner-zzzzz)
HOSTNAME_VALUE
=
$(
hostname
)
GPU_FAMILY
=
""
# Host names look like: linux-mi35x-gpu-1-xxxxx-runner-zzzzz
if
[[
"
${
HOSTNAME_VALUE
}
"
=
~ ^linux-
(
mi[0-9]+[a-z]
*
)
-gpu-
[
0-9]+
]]
;
then
GPU_FAMILY
=
"
${
BASH_REMATCH
[1]
}
"
echo
"Detected GPU family from hostname:
${
GPU_FAMILY
}
"
else
echo
"Warning: could not parse GPU family from '
${
HOSTNAME_VALUE
}
'"
fi
WORKDIR
=
"/sglang-checkout/test/srt"
WORKDIR
=
"/sglang-checkout/test/srt"
declare
-A
ENV_MAP
=(
declare
-A
ENV_MAP
=(
[
SGLANG_AMD_CI]
=
1
[
SGLANG_AMD_CI]
=
1
...
@@ -8,6 +20,11 @@ declare -A ENV_MAP=(
...
@@ -8,6 +20,11 @@ declare -A ENV_MAP=(
[
SGLANG_USE_AITER]
=
1
[
SGLANG_USE_AITER]
=
1
)
)
# Conditionally add GPU_ARCHS only for mi35x
if
[[
"
${
GPU_FAMILY
}
"
==
"mi35x"
]]
;
then
ENV_MAP[GPU_ARCHS]
=
"gfx950"
fi
# Parse -w/--workdir and -e ENV=VAL
# Parse -w/--workdir and -e ENV=VAL
while
[[
$#
-gt
0
]]
;
do
while
[[
$#
-gt
0
]]
;
do
case
"
$1
"
in
case
"
$1
"
in
...
...
scripts/ci/amd_ci_install_dependency.sh
View file @
91b3555d
#!/bin/bash
#!/bin/bash
set
-euo
pipefail
set
-euo
pipefail
HOSTNAME_VALUE
=
$(
hostname
)
GPU_ARCH
=
"mi30x"
# default
# Host names look like: linux-mi35x-gpu-1-xxxxx-runner-zzzzz
if
[[
"
${
HOSTNAME_VALUE
}
"
=
~ ^linux-
(
mi[0-9]+[a-z]
*
)
-gpu-
[
0-9]+
]]
;
then
GPU_ARCH
=
"
${
BASH_REMATCH
[1]
}
"
echo
"Detected GPU architecture from hostname:
${
GPU_ARCH
}
"
else
echo
"Warning: could not parse GPU architecture from '
${
HOSTNAME_VALUE
}
', defaulting to
${
GPU_ARCH
}
"
fi
# Install the required dependencies in CI.
# Install the required dependencies in CI.
docker
exec
ci_sglang pip
install
--upgrade
pip
docker
exec
ci_sglang pip
install
--upgrade
pip
docker
exec
ci_sglang pip uninstall sgl-kernel
-y
||
true
docker
exec
ci_sglang pip uninstall sgl-kernel
-y
||
true
docker
exec
-w
/sglang-checkout/sgl-kernel ci_sglang bash
-c
"rm -f pyproject.toml && mv pyproject_rocm.toml pyproject.toml && python3 setup_rocm.py install"
docker
exec
-w
/sglang-checkout/sgl-kernel ci_sglang bash
-c
"rm -f pyproject.toml && mv pyproject_rocm.toml pyproject.toml && python3 setup_rocm.py install"
docker
exec
ci_sglang pip
install
-e
"python[dev_hip]"
case
"
${
GPU_ARCH
}
"
in
mi35x
)
echo
"Runner uses
${
GPU_ARCH
}
; will fetch mi35x image."
docker
exec
ci_sglang pip
install
-e
"python[dev_hip]"
--no-deps
# TODO: only for mi35x
# For lmms_evals evaluating MMMU
docker
exec
-w
/ ci_sglang git clone
--branch
v0.3.3
--depth
1 https://github.com/EvolvingLMMs-Lab/lmms-eval.git
docker
exec
-w
/lmms-eval ci_sglang pip
install
-e
.
--no-deps
# TODO: only for mi35x
;;
mi30x|mi300|mi325
)
echo
"Runner uses
${
GPU_ARCH
}
; will fetch mi30x image."
docker
exec
ci_sglang pip
install
-e
"python[dev_hip]"
# For lmms_evals evaluating MMMU
docker
exec
-w
/ ci_sglang git clone
--branch
v0.3.3
--depth
1 https://github.com/EvolvingLMMs-Lab/lmms-eval.git
docker
exec
-w
/lmms-eval ci_sglang pip
install
-e
.
;;
*
)
echo
"Runner architecture '
${
GPU_ARCH
}
' unrecognised;"
>
&2
;;
esac
docker
exec
-w
/ ci_sglang git clone https://github.com/merrymercy/human-eval.git
docker
exec
-w
/ ci_sglang git clone https://github.com/merrymercy/human-eval.git
docker
exec
-w
/human-eval ci_sglang pip
install
-e
.
docker
exec
-w
/human-eval ci_sglang pip
install
-e
.
# For lmms_evals evaluating MMMU
docker
exec
-w
/ ci_sglang git clone
--branch
v0.3.3
--depth
1 https://github.com/EvolvingLMMs-Lab/lmms-eval.git
docker
exec
-w
/lmms-eval ci_sglang pip
install
-e
.
docker
exec
-w
/ ci_sglang
mkdir
-p
/dummy-grok
docker
exec
-w
/ ci_sglang
mkdir
-p
/dummy-grok
mkdir
-p
dummy-grok
&&
wget https://sharkpublic.blob.core.windows.net/sharkpublic/sglang/dummy_grok.json
-O
dummy-grok/config.json
mkdir
-p
dummy-grok
&&
wget https://sharkpublic.blob.core.windows.net/sharkpublic/sglang/dummy_grok.json
-O
dummy-grok/config.json
docker
cp
./dummy-grok ci_sglang:/
docker
cp
./dummy-grok ci_sglang:/
...
...
scripts/ci/amd_ci_start_container.sh
View file @
91b3555d
...
@@ -3,7 +3,7 @@ set -euo pipefail
...
@@ -3,7 +3,7 @@ set -euo pipefail
# Get version from SGLang version.py file
# Get version from SGLang version.py file
SGLANG_VERSION_FILE
=
"
$(
dirname
"
$0
"
)
/../../python/sglang/version.py"
SGLANG_VERSION_FILE
=
"
$(
dirname
"
$0
"
)
/../../python/sglang/version.py"
SGLANG_VERSION
=
"v0.5.0rc0"
# Default version, will be overridden if version.py is found
SGLANG_VERSION
=
"v0.5.0rc0"
# Default version, will be overridden if version.py is found
if
[
-f
"
$SGLANG_VERSION_FILE
"
]
;
then
if
[
-f
"
$SGLANG_VERSION_FILE
"
]
;
then
VERSION_FROM_FILE
=
$(
python3
-c
'
VERSION_FROM_FILE
=
$(
python3
-c
'
...
@@ -25,130 +25,102 @@ else
...
@@ -25,130 +25,102 @@ else
echo
"Warning: version.py not found, using default version:
$SGLANG_VERSION
"
>
&2
echo
"Warning: version.py not found, using default version:
$SGLANG_VERSION
"
>
&2
fi
fi
# Default base tags (can be overridden by command line arguments)
# Default base tags (can be overridden by command line arguments)
DEFAULT_MI30X_BASE_TAG
=
"
${
SGLANG_VERSION
}
-rocm630-mi30x"
DEFAULT_MI30X_BASE_TAG
=
"
${
SGLANG_VERSION
}
-rocm630-mi30x"
DEFAULT_MI35X_BASE_TAG
=
"
${
SGLANG_VERSION
}
-rocm700-mi35x"
DEFAULT_MI35X_BASE_TAG
=
"
${
SGLANG_VERSION
}
-rocm700-mi35x"
# Parse command line arguments
# Parse command line arguments
MI30X_BASE_TAG
=
"
$DEFAULT_MI30X_BASE_TAG
"
MI30X_BASE_TAG
=
"
$
{
DEFAULT_MI30X_BASE_TAG
}
"
MI35X_BASE_TAG
=
"
$DEFAULT_MI35X_BASE_TAG
"
MI35X_BASE_TAG
=
"
$
{
DEFAULT_MI35X_BASE_TAG
}
"
while
[[
$#
-gt
0
]]
;
do
while
[[
$#
-gt
0
]]
;
do
case
$1
in
case
$1
in
--mi30x-base-tag
)
--mi30x-base-tag
)
MI30X_BASE_TAG
=
"
$2
"
;
shift
2
;;
MI30X_BASE_TAG
=
"
$2
"
--mi35x-base-tag
)
MI35X_BASE_TAG
=
"
$2
"
;
shift
2
;;
shift
2
;;
--mi35x-base-tag
)
MI35X_BASE_TAG
=
"
$2
"
shift
2
;;
-h
|
--help
)
-h
|
--help
)
echo
"Usage:
$0
[--mi30x-base-tag TAG] [--mi35x-base-tag TAG]"
echo
"Usage:
$0
[--mi30x-base-tag TAG] [--mi35x-base-tag TAG]"
echo
" --mi30x-base-tag TAG Base tag for mi30x images (default:
$DEFAULT_MI30X_BASE_TAG
)"
echo
" --mi35x-base-tag TAG Base tag for mi35x images (default:
$DEFAULT_MI35X_BASE_TAG
)"
exit
0
exit
0
;;
;;
*
)
*
)
echo
"Unknown option
$1
"
;
exit
1
;;
echo
"Unknown option
$1
"
echo
"Use --help for usage information"
exit
1
;;
esac
esac
done
done
# Detect GPU architecture from the Kubernetes runner hostname
HOSTNAME_VALUE
=
$(
hostname
)
GPU_ARCH
=
"mi30x"
# default
# Host names look like: linux-mi35x-gpu-1-xxxxx-runner-zzzzz
if
[[
"
${
HOSTNAME_VALUE
}
"
=
~ ^linux-
(
mi[0-9]+[a-z]
*
)
-gpu-
[
0-9]+
]]
;
then
GPU_ARCH
=
"
${
BASH_REMATCH
[1]
}
"
echo
"Detected GPU architecture from hostname:
${
GPU_ARCH
}
"
else
echo
"Warning: could not parse GPU architecture from '
${
HOSTNAME_VALUE
}
', defaulting to
${
GPU_ARCH
}
"
fi
# Normalise / collapse architectures we don’t yet build specifically for
case
"
${
GPU_ARCH
}
"
in
mi35x
)
echo
"Runner uses
${
GPU_ARCH
}
; will fetch mi35x image."
;;
mi30x|mi300|mi325
)
echo
"Runner uses
${
GPU_ARCH
}
; will fetch mi30x image."
GPU_ARCH
=
"mi30x"
;;
*
)
echo
"Runner architecture '
${
GPU_ARCH
}
' unrecognised; defaulting to mi30x image."
>
&2
GPU_ARCH
=
"mi30x"
;;
esac
# Set up DEVICE_FLAG based on Kubernetes pod info
# Set up DEVICE_FLAG based on Kubernetes pod info
if
[
-f
"
/etc/podinfo/gha-render-devices
"
]
;
then
if
[
[
-f
/etc/podinfo/gha-render-devices
]
]
;
then
DEVICE_FLAG
=
$(
cat
/etc/podinfo/gha-render-devices
)
DEVICE_FLAG
=
$(
cat
/etc/podinfo/gha-render-devices
)
else
else
DEVICE_FLAG
=
"--device /dev/dri"
DEVICE_FLAG
=
"--device /dev/dri"
fi
fi
# Find the latest image
# Function to find latest available image for a given GPU architecture
find_latest_image
()
{
find_latest_image
()
{
local
gpu_arch
=
$1
local
gpu_arch
=
$1
local
base_tag
local
base_tag days_back image_tag
if
[
"
$gpu_arch
"
==
"mi30x"
]
;
then
base_tag
=
"
$MI30X_BASE_TAG
"
elif
[
"
$gpu_arch
"
==
"mi35x"
]
;
then
base_tag
=
"
$MI35X_BASE_TAG
"
else
echo
"Error: Unsupported GPU architecture '
$gpu_arch
'"
>
&2
return
1
fi
local
days_back
=
0
case
"
${
gpu_arch
}
"
in
mi30x
)
base_tag
=
"
${
MI30X_BASE_TAG
}
"
;;
while
[
$days_back
-lt
7
]
;
do
mi35x
)
base_tag
=
"
${
MI35X_BASE_TAG
}
"
;;
local
check_date
=
$(
date
-d
"
$days_back
days ago"
+%Y%m%d
)
*
)
echo
"Error: unsupported GPU architecture '
${
gpu_arch
}
'"
>
&2
;
return
1
;;
local
image_tag
=
"
${
base_tag
}
-
${
check_date
}
"
esac
for
days_back
in
{
0..6
}
;
do
image_tag
=
"
${
base_tag
}
-
$(
date
-d
"
${
days_back
}
days ago"
+%Y%m%d
)
"
echo
"Checking for image: rocm/sgl-dev:
${
image_tag
}
"
>
&2
echo
"Checking for image: rocm/sgl-dev:
${
image_tag
}
"
>
&2
# Check if the image exists by trying to get its manifest
if
docker manifest inspect
"rocm/sgl-dev:
${
image_tag
}
"
>
/dev/null 2>&1
;
then
if
docker manifest inspect
"rocm/sgl-dev:
${
image_tag
}
"
>
/dev/null 2>&1
;
then
echo
"Found available image: rocm/sgl-dev:
${
image_tag
}
"
>
&2
echo
"Found available image: rocm/sgl-dev:
${
image_tag
}
"
>
&2
echo
"rocm/sgl-dev:
${
image_tag
}
"
echo
"rocm/sgl-dev:
${
image_tag
}
"
return
0
return
0
fi
fi
days_back
=
$((
days_back
+
1
))
done
done
echo
"Error: No
${
gpu_arch
}
image found in the last 7 days for version
${
base_tag
}
"
>
&2
echo
"Error: no
${
gpu_arch
}
image found in the last 7 days for base
${
base_tag
}
"
>
&2
echo
"Using hard-coded fallback…"
>
&2
# Final fallback to specific hardcoded images
if
[[
"
${
gpu_arch
}
"
==
"mi35x"
]]
;
then
echo
"Using final fallback images..."
>
&2
if
[
"
$gpu_arch
"
==
"mi30x"
]
;
then
echo
"rocm/sgl-dev:v0.5.0rc0-rocm630-mi30x-20250812"
elif
[
"
$gpu_arch
"
==
"mi35x"
]
;
then
echo
"rocm/sgl-dev:v0.5.0rc0-rocm700-mi35x-20250812"
echo
"rocm/sgl-dev:v0.5.0rc0-rocm700-mi35x-20250812"
else
else
echo
"rocm/sgl-dev:v0.5.0rc0-rocm630-mi30x-20250812"
# Default to mi30x
echo
"rocm/sgl-dev:v0.5.0rc0-rocm630-mi30x-20250812"
fi
fi
return
0
}
}
# Determine image finder and fallback based on runner
# Pull and run the latest image
# In Kubernetes, the hostname contains the GPU type (e.g., linux-mi300-gpu-1-bgg8r-runner-vknlb)
# Extract the GPU type from hostname
HOSTNAME_VALUE
=
$(
hostname
)
RUNNER_NAME
=
"unknown"
if
[[
"
${
HOSTNAME_VALUE
}
"
=
~ ^
(
linux-mi[0-9]+-gpu-[0-9]+
)
]]
;
then
RUNNER_NAME
=
"
${
BASH_REMATCH
[1]
}
"
echo
"Extracted runner from hostname:
${
RUNNER_NAME
}
"
else
echo
"Could not extract runner info from hostname:
${
HOSTNAME_VALUE
}
"
fi
echo
"The runner is:
${
RUNNER_NAME
}
"
GPU_ARCH
=
"mi30x"
# Check for mi350/mi355 runners
if
[[
"
${
RUNNER_NAME
}
"
=
~ ^linux-mi350-gpu-[0-9]+
$
]]
||
[[
"
${
RUNNER_NAME
}
"
=
~ ^linux-mi355-gpu-[0-9]+
$
]]
;
then
echo
"Runner is
${
RUNNER_NAME
}
, will find mi35x image."
GPU_ARCH
=
"mi35x"
# Check for mi300/mi325 runners
elif
[[
"
${
RUNNER_NAME
}
"
=
~ ^linux-mi300-gpu-[0-9]+
$
]]
||
[[
"
${
RUNNER_NAME
}
"
=
~ ^linux-mi325-gpu-[0-9]+
$
]]
;
then
echo
"Runner is
${
RUNNER_NAME
}
, will find mi30x image."
else
echo
"Runner type not recognized: '
${
RUNNER_NAME
}
'"
echo
"Defaulting to find mi30x image"
fi
# Find and pull the latest image
IMAGE
=
$(
find_latest_image
"
${
GPU_ARCH
}
"
)
IMAGE
=
$(
find_latest_image
"
${
GPU_ARCH
}
"
)
echo
"Pulling Docker image:
$IMAGE
"
echo
"Pulling Docker image:
$
{
IMAGE
}
"
docker pull
"
$IMAGE
"
docker pull
"
$
{
IMAGE
}
"
# Run the container
echo
"Launching container: ci_sglang"
echo
"Starting container: ci_sglang"
docker run
-dt
--user
root
--device
=
/dev/kfd
${
DEVICE_FLAG
}
\
docker run
-dt
--user
root
--device
=
/dev/kfd
$DEVICE_FLAG
\
-v
"
${
GITHUB_WORKSPACE
:-
$PWD
}
:/sglang-checkout"
\
-v
"
${
GITHUB_WORKSPACE
:-
$PWD
}
:/sglang-checkout"
\
--ipc
=
host
--group-add
video
\
--ipc
=
host
--group-add
video
\
--shm-size
32g
\
--shm-size
32g
\
...
@@ -157,4 +129,4 @@ docker run -dt --user root --device=/dev/kfd $DEVICE_FLAG \
...
@@ -157,4 +129,4 @@ docker run -dt --user root --device=/dev/kfd $DEVICE_FLAG \
--security-opt
seccomp
=
unconfined
\
--security-opt
seccomp
=
unconfined
\
-w
/sglang-checkout
\
-w
/sglang-checkout
\
--name
ci_sglang
\
--name
ci_sglang
\
"
$IMAGE
"
"
$
{
IMAGE
}
"
test/srt/run_suite.py
View file @
91b3555d
...
@@ -243,6 +243,10 @@ suite_amd = {
...
@@ -243,6 +243,10 @@ suite_amd = {
TestFile
(
"test_wave_attention_kernels.py"
,
2
),
TestFile
(
"test_wave_attention_kernels.py"
,
2
),
TestFile
(
"test_wave_attention_backend.py"
,
150
),
TestFile
(
"test_wave_attention_backend.py"
,
150
),
],
],
"per-commit-amd-mi35x"
:
[
TestFile
(
"test_mla.py"
,
242
),
TestFile
(
"test_gpt_oss_1gpu.py"
,
600
),
],
"per-commit-2-gpu-amd"
:
[
"per-commit-2-gpu-amd"
:
[
TestFile
(
"lora/test_lora_tp.py"
,
116
),
TestFile
(
"lora/test_lora_tp.py"
,
116
),
TestFile
(
"rl/test_update_weights_from_distributed.py"
,
103
),
TestFile
(
"rl/test_update_weights_from_distributed.py"
,
103
),
...
...
test/srt/test_gpt_oss_common.py
View file @
91b3555d
import
os
from
concurrent.futures
import
ThreadPoolExecutor
from
concurrent.futures
import
ThreadPoolExecutor
from
types
import
SimpleNamespace
from
types
import
SimpleNamespace
from
typing
import
Dict
,
List
,
Literal
,
Optional
from
typing
import
Dict
,
List
,
Literal
,
Optional
from
sglang.srt.utils
import
kill_process_tree
from
sglang.srt.utils
import
is_hip
,
kill_process_tree
from
sglang.test.run_eval
import
run_eval
from
sglang.test.run_eval
import
run_eval
from
sglang.test.test_utils
import
(
from
sglang.test.test_utils
import
(
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
...
@@ -14,6 +15,7 @@ from sglang.test.test_utils import (
...
@@ -14,6 +15,7 @@ from sglang.test.test_utils import (
)
)
_base_url
=
DEFAULT_URL_FOR_TEST
_base_url
=
DEFAULT_URL_FOR_TEST
_is_hip
=
is_hip
()
class
BaseTestGptOss
(
CustomTestCase
):
class
BaseTestGptOss
(
CustomTestCase
):
...
@@ -36,7 +38,8 @@ class BaseTestGptOss(CustomTestCase):
...
@@ -36,7 +38,8 @@ class BaseTestGptOss(CustomTestCase):
if
model_variant
==
"20b"
:
if
model_variant
==
"20b"
:
other_args
+=
[
"--cuda-graph-max-bs"
,
"600"
]
other_args
+=
[
"--cuda-graph-max-bs"
,
"600"
]
if
_is_hip
:
os
.
environ
[
"SGLANG_USE_AITER"
]
=
"0"
self
.
_run_test_raw
(
self
.
_run_test_raw
(
model
=
model
,
model
=
model
,
expected_score_of_reasoning_effort
=
expected_score_of_reasoning_effort
,
expected_score_of_reasoning_effort
=
expected_score_of_reasoning_effort
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment