Unverified Commit 91b3555d authored by Hubert Lu's avatar Hubert Lu Committed by GitHub
Browse files

Add tests to AMD CI for MI35x (#9662)


Co-authored-by: default avatarSai Enduri <saimanas.enduri@amd.com>
parent 9e2f7252
...@@ -28,6 +28,7 @@ jobs: ...@@ -28,6 +28,7 @@ jobs:
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') &&
github.event.pull_request.draft == false github.event.pull_request.draft == false
strategy: strategy:
fail-fast: false
matrix: matrix:
runner: [linux-mi300-gpu-1, linux-mi325-gpu-1] runner: [linux-mi300-gpu-1, linux-mi325-gpu-1]
runs-on: ${{matrix.runner}} runs-on: ${{matrix.runner}}
...@@ -54,8 +55,9 @@ jobs: ...@@ -54,8 +55,9 @@ jobs:
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') &&
github.event.pull_request.draft == false github.event.pull_request.draft == false
strategy: strategy:
fail-fast: false
matrix: matrix:
runner: [linux-mi300-gpu-2, linux-mi325-gpu-2] runner: [linux-mi300-gpu-2, linux-mi325-gpu-2, linux-mi35x-gpu-2]
runs-on: ${{matrix.runner}} runs-on: ${{matrix.runner}}
steps: steps:
- name: Checkout code - name: Checkout code
...@@ -70,7 +72,7 @@ jobs: ...@@ -70,7 +72,7 @@ jobs:
run: bash scripts/ci/amd_ci_install_dependency.sh run: bash scripts/ci/amd_ci_install_dependency.sh
- name: Evaluate accuracy (TP=2) - name: Evaluate accuracy (TP=2)
timeout-minutes: 30 timeout-minutes: 60
run: | run: |
bash scripts/ci/amd_ci_exec.sh -e SGLANG_USE_AITER=0 python3 test_moe_eval_accuracy_large.py bash scripts/ci/amd_ci_exec.sh -e SGLANG_USE_AITER=0 python3 test_moe_eval_accuracy_large.py
...@@ -78,6 +80,7 @@ jobs: ...@@ -78,6 +80,7 @@ jobs:
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') &&
github.event.pull_request.draft == false github.event.pull_request.draft == false
strategy: strategy:
fail-fast: false
matrix: matrix:
runner: [linux-mi300-gpu-1, linux-mi325-gpu-1] runner: [linux-mi300-gpu-1, linux-mi325-gpu-1]
runs-on: ${{matrix.runner}} runs-on: ${{matrix.runner}}
...@@ -102,6 +105,7 @@ jobs: ...@@ -102,6 +105,7 @@ jobs:
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') &&
github.event.pull_request.draft == false github.event.pull_request.draft == false
strategy: strategy:
fail-fast: false
matrix: matrix:
runner: [linux-mi300-gpu-1, linux-mi325-gpu-1] runner: [linux-mi300-gpu-1, linux-mi325-gpu-1]
runs-on: ${{matrix.runner}} runs-on: ${{matrix.runner}}
...@@ -142,6 +146,7 @@ jobs: ...@@ -142,6 +146,7 @@ jobs:
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') &&
github.event.pull_request.draft == false github.event.pull_request.draft == false
strategy: strategy:
fail-fast: false
matrix: matrix:
runner: [linux-mi300-gpu-1, linux-mi325-gpu-1] runner: [linux-mi300-gpu-1, linux-mi325-gpu-1]
runs-on: ${{matrix.runner}} runs-on: ${{matrix.runner}}
...@@ -176,6 +181,7 @@ jobs: ...@@ -176,6 +181,7 @@ jobs:
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') &&
github.event.pull_request.draft == false github.event.pull_request.draft == false
strategy: strategy:
fail-fast: false
matrix: matrix:
runner: [linux-mi300-gpu-2, linux-mi325-gpu-2] runner: [linux-mi300-gpu-2, linux-mi325-gpu-2]
runs-on: ${{matrix.runner}} runs-on: ${{matrix.runner}}
...@@ -242,12 +248,13 @@ jobs: ...@@ -242,12 +248,13 @@ jobs:
run: | run: |
bash scripts/ci/amd_ci_exec.sh python3 run_suite.py --suite per-commit-amd --auto-partition-id ${{ matrix.part }} --auto-partition-size 8 bash scripts/ci/amd_ci_exec.sh python3 run_suite.py --suite per-commit-amd --auto-partition-id ${{ matrix.part }} --auto-partition-size 8
unit-test-backend-2-gpu-amd: unit-test-backend-1-gpu-amd-mi35x:
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') &&
github.event.pull_request.draft == false github.event.pull_request.draft == false
strategy: strategy:
fail-fast: false
matrix: matrix:
runner: [linux-mi300-gpu-2, linux-mi325-gpu-2] runner: [linux-mi35x-gpu-1]
runs-on: ${{matrix.runner}} runs-on: ${{matrix.runner}}
steps: steps:
- name: Checkout code - name: Checkout code
...@@ -262,16 +269,17 @@ jobs: ...@@ -262,16 +269,17 @@ jobs:
run: bash scripts/ci/amd_ci_install_dependency.sh run: bash scripts/ci/amd_ci_install_dependency.sh
- name: Run test - name: Run test
timeout-minutes: 40 timeout-minutes: 50
run: | run: |
bash scripts/ci/amd_ci_exec.sh python3 run_suite.py --suite per-commit-2-gpu-amd bash scripts/ci/amd_ci_exec.sh python3 run_suite.py --suite per-commit-amd-mi35x
unit-test-backend-8-gpu-amd: unit-test-backend-2-gpu-amd:
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') &&
github.event.pull_request.draft == false github.event.pull_request.draft == false
strategy: strategy:
fail-fast: false
matrix: matrix:
runner: [linux-mi300-gpu-8] runner: [linux-mi300-gpu-2, linux-mi325-gpu-2]
runs-on: ${{matrix.runner}} runs-on: ${{matrix.runner}}
steps: steps:
- name: Checkout code - name: Checkout code
...@@ -286,14 +294,15 @@ jobs: ...@@ -286,14 +294,15 @@ jobs:
run: bash scripts/ci/amd_ci_install_dependency.sh run: bash scripts/ci/amd_ci_install_dependency.sh
- name: Run test - name: Run test
timeout-minutes: 60 timeout-minutes: 40
run: | run: |
bash scripts/ci/amd_ci_exec.sh python3 run_suite.py --suite per-commit-8-gpu-amd --timeout-per-file 3600 bash scripts/ci/amd_ci_exec.sh python3 run_suite.py --suite per-commit-2-gpu-amd
unit-test-backend-8-gpu-CAR-amd: unit-test-backend-8-gpu-amd:
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') &&
github.event.pull_request.draft == false github.event.pull_request.draft == false
strategy: strategy:
fail-fast: false
matrix: matrix:
runner: [linux-mi300-gpu-8] runner: [linux-mi300-gpu-8]
runs-on: ${{matrix.runner}} runs-on: ${{matrix.runner}}
...@@ -309,10 +318,10 @@ jobs: ...@@ -309,10 +318,10 @@ jobs:
- name: Install dependencies - name: Install dependencies
run: bash scripts/ci/amd_ci_install_dependency.sh run: bash scripts/ci/amd_ci_install_dependency.sh
- name: Run CustomAllReduce test - name: Run test
timeout-minutes: 20 timeout-minutes: 60
run: | run: |
bash scripts/ci/amd_ci_exec.sh -e CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 python3 -m unittest test_custom_allreduce.TestCustomAllReduce bash scripts/ci/amd_ci_exec.sh python3 run_suite.py --suite per-commit-8-gpu-amd --timeout-per-file 3600
unit-test-sgl-kernel-amd: unit-test-sgl-kernel-amd:
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') &&
...@@ -350,8 +359,8 @@ jobs: ...@@ -350,8 +359,8 @@ jobs:
needs: [ needs: [
accuracy-test-1-gpu-amd, mla-test-1-gpu-amd, bench-test-2-gpu-amd, accuracy-test-1-gpu-amd, mla-test-1-gpu-amd, bench-test-2-gpu-amd,
accuracy-test-2-gpu-amd, performance-test-1-gpu-part-1-amd, performance-test-1-gpu-part-2-amd, accuracy-test-2-gpu-amd, performance-test-1-gpu-part-1-amd, performance-test-1-gpu-part-2-amd,
unit-test-backend-1-gpu-amd, unit-test-backend-2-gpu-amd, unit-test-backend-8-gpu-amd, unit-test-backend-1-gpu-amd, unit-test-backend-1-gpu-amd-mi35x, unit-test-backend-2-gpu-amd,
unit-test-sgl-kernel-amd unit-test-backend-8-gpu-amd, unit-test-sgl-kernel-amd
] ]
runs-on: ubuntu-latest runs-on: ubuntu-latest
steps: steps:
......
...@@ -2027,7 +2027,10 @@ class DeepseekV2DecoderLayer(nn.Module): ...@@ -2027,7 +2027,10 @@ class DeepseekV2DecoderLayer(nn.Module):
quant_format = ( quant_format = (
"mxfp4" "mxfp4"
if _is_gfx95_supported if _is_gfx95_supported
and self.self_attn.fused_qkv_a_proj_with_mqa.weight == torch.uint8 and getattr(self.self_attn, "fused_qkv_a_proj_with_mqa", None) is not None
and getattr(self.self_attn.fused_qkv_a_proj_with_mqa, "weight", None)
is not None
and self.self_attn.fused_qkv_a_proj_with_mqa.weight.dtype == torch.uint8
else "" else ""
) )
...@@ -2582,7 +2585,11 @@ class DeepseekV2ForCausalLM(nn.Module): ...@@ -2582,7 +2585,11 @@ class DeepseekV2ForCausalLM(nn.Module):
0, (-1, self_attn.qk_nope_head_dim + self_attn.v_head_dim) 0, (-1, self_attn.qk_nope_head_dim + self_attn.v_head_dim)
).split([self_attn.qk_nope_head_dim, self_attn.v_head_dim], dim=1) ).split([self_attn.qk_nope_head_dim, self_attn.v_head_dim], dim=1)
if _use_aiter_gfx95 and self.quant_config.get_name() == "quark": if (
_use_aiter_gfx95
and self.quant_config is not None
and self.quant_config.get_name() == "quark"
):
w_kc, self_attn.w_scale_k, w_vc, self_attn.w_scale_v = ( w_kc, self_attn.w_scale_k, w_vc, self_attn.w_scale_v = (
quark_post_load_weights(self_attn, w, "mxfp4") quark_post_load_weights(self_attn, w, "mxfp4")
) )
......
#!/bin/bash #!/bin/bash
set -euo pipefail set -euo pipefail
# Detect GPU family from hostname (e.g., linux-mi35x-gpu-1-xxxxx-runner-zzzzz)
HOSTNAME_VALUE=$(hostname)
GPU_FAMILY=""
# Host names look like: linux-mi35x-gpu-1-xxxxx-runner-zzzzz
if [[ "${HOSTNAME_VALUE}" =~ ^linux-(mi[0-9]+[a-z]*)-gpu-[0-9]+ ]]; then
GPU_FAMILY="${BASH_REMATCH[1]}"
echo "Detected GPU family from hostname: ${GPU_FAMILY}"
else
echo "Warning: could not parse GPU family from '${HOSTNAME_VALUE}'"
fi
WORKDIR="/sglang-checkout/test/srt" WORKDIR="/sglang-checkout/test/srt"
declare -A ENV_MAP=( declare -A ENV_MAP=(
[SGLANG_AMD_CI]=1 [SGLANG_AMD_CI]=1
...@@ -8,6 +20,11 @@ declare -A ENV_MAP=( ...@@ -8,6 +20,11 @@ declare -A ENV_MAP=(
[SGLANG_USE_AITER]=1 [SGLANG_USE_AITER]=1
) )
# Conditionally add GPU_ARCHS only for mi35x
if [[ "${GPU_FAMILY}" == "mi35x" ]]; then
ENV_MAP[GPU_ARCHS]="gfx950"
fi
# Parse -w/--workdir and -e ENV=VAL # Parse -w/--workdir and -e ENV=VAL
while [[ $# -gt 0 ]]; do while [[ $# -gt 0 ]]; do
case "$1" in case "$1" in
......
#!/bin/bash #!/bin/bash
set -euo pipefail set -euo pipefail
HOSTNAME_VALUE=$(hostname)
GPU_ARCH="mi30x" # default
# Host names look like: linux-mi35x-gpu-1-xxxxx-runner-zzzzz
if [[ "${HOSTNAME_VALUE}" =~ ^linux-(mi[0-9]+[a-z]*)-gpu-[0-9]+ ]]; then
GPU_ARCH="${BASH_REMATCH[1]}"
echo "Detected GPU architecture from hostname: ${GPU_ARCH}"
else
echo "Warning: could not parse GPU architecture from '${HOSTNAME_VALUE}', defaulting to ${GPU_ARCH}"
fi
# Install the required dependencies in CI. # Install the required dependencies in CI.
docker exec ci_sglang pip install --upgrade pip docker exec ci_sglang pip install --upgrade pip
docker exec ci_sglang pip uninstall sgl-kernel -y || true docker exec ci_sglang pip uninstall sgl-kernel -y || true
docker exec -w /sglang-checkout/sgl-kernel ci_sglang bash -c "rm -f pyproject.toml && mv pyproject_rocm.toml pyproject.toml && python3 setup_rocm.py install" docker exec -w /sglang-checkout/sgl-kernel ci_sglang bash -c "rm -f pyproject.toml && mv pyproject_rocm.toml pyproject.toml && python3 setup_rocm.py install"
docker exec ci_sglang pip install -e "python[dev_hip]"
case "${GPU_ARCH}" in
mi35x)
echo "Runner uses ${GPU_ARCH}; will fetch mi35x image."
docker exec ci_sglang pip install -e "python[dev_hip]" --no-deps # TODO: only for mi35x
# For lmms_evals evaluating MMMU
docker exec -w / ci_sglang git clone --branch v0.3.3 --depth 1 https://github.com/EvolvingLMMs-Lab/lmms-eval.git
docker exec -w /lmms-eval ci_sglang pip install -e . --no-deps # TODO: only for mi35x
;;
mi30x|mi300|mi325)
echo "Runner uses ${GPU_ARCH}; will fetch mi30x image."
docker exec ci_sglang pip install -e "python[dev_hip]"
# For lmms_evals evaluating MMMU
docker exec -w / ci_sglang git clone --branch v0.3.3 --depth 1 https://github.com/EvolvingLMMs-Lab/lmms-eval.git
docker exec -w /lmms-eval ci_sglang pip install -e .
;;
*)
echo "Runner architecture '${GPU_ARCH}' unrecognised;" >&2
;;
esac
docker exec -w / ci_sglang git clone https://github.com/merrymercy/human-eval.git docker exec -w / ci_sglang git clone https://github.com/merrymercy/human-eval.git
docker exec -w /human-eval ci_sglang pip install -e . docker exec -w /human-eval ci_sglang pip install -e .
# For lmms_evals evaluating MMMU
docker exec -w / ci_sglang git clone --branch v0.3.3 --depth 1 https://github.com/EvolvingLMMs-Lab/lmms-eval.git
docker exec -w /lmms-eval ci_sglang pip install -e .
docker exec -w / ci_sglang mkdir -p /dummy-grok docker exec -w / ci_sglang mkdir -p /dummy-grok
mkdir -p dummy-grok && wget https://sharkpublic.blob.core.windows.net/sharkpublic/sglang/dummy_grok.json -O dummy-grok/config.json mkdir -p dummy-grok && wget https://sharkpublic.blob.core.windows.net/sharkpublic/sglang/dummy_grok.json -O dummy-grok/config.json
docker cp ./dummy-grok ci_sglang:/ docker cp ./dummy-grok ci_sglang:/
......
...@@ -25,130 +25,102 @@ else ...@@ -25,130 +25,102 @@ else
echo "Warning: version.py not found, using default version: $SGLANG_VERSION" >&2 echo "Warning: version.py not found, using default version: $SGLANG_VERSION" >&2
fi fi
# Default base tags (can be overridden by command line arguments) # Default base tags (can be overridden by command line arguments)
DEFAULT_MI30X_BASE_TAG="${SGLANG_VERSION}-rocm630-mi30x" DEFAULT_MI30X_BASE_TAG="${SGLANG_VERSION}-rocm630-mi30x"
DEFAULT_MI35X_BASE_TAG="${SGLANG_VERSION}-rocm700-mi35x" DEFAULT_MI35X_BASE_TAG="${SGLANG_VERSION}-rocm700-mi35x"
# Parse command line arguments # Parse command line arguments
MI30X_BASE_TAG="$DEFAULT_MI30X_BASE_TAG" MI30X_BASE_TAG="${DEFAULT_MI30X_BASE_TAG}"
MI35X_BASE_TAG="$DEFAULT_MI35X_BASE_TAG" MI35X_BASE_TAG="${DEFAULT_MI35X_BASE_TAG}"
while [[ $# -gt 0 ]]; do while [[ $# -gt 0 ]]; do
case $1 in case $1 in
--mi30x-base-tag) --mi30x-base-tag) MI30X_BASE_TAG="$2"; shift 2;;
MI30X_BASE_TAG="$2" --mi35x-base-tag) MI35X_BASE_TAG="$2"; shift 2;;
shift 2
;;
--mi35x-base-tag)
MI35X_BASE_TAG="$2"
shift 2
;;
-h|--help) -h|--help)
echo "Usage: $0 [--mi30x-base-tag TAG] [--mi35x-base-tag TAG]" echo "Usage: $0 [--mi30x-base-tag TAG] [--mi35x-base-tag TAG]"
echo " --mi30x-base-tag TAG Base tag for mi30x images (default: $DEFAULT_MI30X_BASE_TAG)"
echo " --mi35x-base-tag TAG Base tag for mi35x images (default: $DEFAULT_MI35X_BASE_TAG)"
exit 0 exit 0
;; ;;
*) *) echo "Unknown option $1"; exit 1;;
echo "Unknown option $1"
echo "Use --help for usage information"
exit 1
;;
esac esac
done done
# Detect GPU architecture from the Kubernetes runner hostname
HOSTNAME_VALUE=$(hostname)
GPU_ARCH="mi30x" # default
# Host names look like: linux-mi35x-gpu-1-xxxxx-runner-zzzzz
if [[ "${HOSTNAME_VALUE}" =~ ^linux-(mi[0-9]+[a-z]*)-gpu-[0-9]+ ]]; then
GPU_ARCH="${BASH_REMATCH[1]}"
echo "Detected GPU architecture from hostname: ${GPU_ARCH}"
else
echo "Warning: could not parse GPU architecture from '${HOSTNAME_VALUE}', defaulting to ${GPU_ARCH}"
fi
# Normalise / collapse architectures we don’t yet build specifically for
case "${GPU_ARCH}" in
mi35x)
echo "Runner uses ${GPU_ARCH}; will fetch mi35x image."
;;
mi30x|mi300|mi325)
echo "Runner uses ${GPU_ARCH}; will fetch mi30x image."
GPU_ARCH="mi30x"
;;
*)
echo "Runner architecture '${GPU_ARCH}' unrecognised; defaulting to mi30x image." >&2
GPU_ARCH="mi30x"
;;
esac
# Set up DEVICE_FLAG based on Kubernetes pod info # Set up DEVICE_FLAG based on Kubernetes pod info
if [ -f "/etc/podinfo/gha-render-devices" ]; then if [[ -f /etc/podinfo/gha-render-devices ]]; then
DEVICE_FLAG=$(cat /etc/podinfo/gha-render-devices) DEVICE_FLAG=$(cat /etc/podinfo/gha-render-devices)
else else
DEVICE_FLAG="--device /dev/dri" DEVICE_FLAG="--device /dev/dri"
fi fi
# Find the latest image
# Function to find latest available image for a given GPU architecture
find_latest_image() { find_latest_image() {
local gpu_arch=$1 local gpu_arch=$1
local base_tag local base_tag days_back image_tag
if [ "$gpu_arch" == "mi30x" ]; then
base_tag="$MI30X_BASE_TAG"
elif [ "$gpu_arch" == "mi35x" ]; then
base_tag="$MI35X_BASE_TAG"
else
echo "Error: Unsupported GPU architecture '$gpu_arch'" >&2
return 1
fi
local days_back=0
while [ $days_back -lt 7 ]; do case "${gpu_arch}" in
local check_date=$(date -d "$days_back days ago" +%Y%m%d) mi30x) base_tag="${MI30X_BASE_TAG}" ;;
local image_tag="${base_tag}-${check_date}" mi35x) base_tag="${MI35X_BASE_TAG}" ;;
*) echo "Error: unsupported GPU architecture '${gpu_arch}'" >&2; return 1 ;;
esac
for days_back in {0..6}; do
image_tag="${base_tag}-$(date -d "${days_back} days ago" +%Y%m%d)"
echo "Checking for image: rocm/sgl-dev:${image_tag}" >&2 echo "Checking for image: rocm/sgl-dev:${image_tag}" >&2
# Check if the image exists by trying to get its manifest
if docker manifest inspect "rocm/sgl-dev:${image_tag}" >/dev/null 2>&1; then if docker manifest inspect "rocm/sgl-dev:${image_tag}" >/dev/null 2>&1; then
echo "Found available image: rocm/sgl-dev:${image_tag}" >&2 echo "Found available image: rocm/sgl-dev:${image_tag}" >&2
echo "rocm/sgl-dev:${image_tag}" echo "rocm/sgl-dev:${image_tag}"
return 0 return 0
fi fi
days_back=$((days_back + 1))
done done
echo "Error: No ${gpu_arch} image found in the last 7 days for version ${base_tag}" >&2 echo "Error: no ${gpu_arch} image found in the last 7 days for base ${base_tag}" >&2
echo "Using hard-coded fallback…" >&2
# Final fallback to specific hardcoded images if [[ "${gpu_arch}" == "mi35x" ]]; then
echo "Using final fallback images..." >&2
if [ "$gpu_arch" == "mi30x" ]; then
echo "rocm/sgl-dev:v0.5.0rc0-rocm630-mi30x-20250812"
elif [ "$gpu_arch" == "mi35x" ]; then
echo "rocm/sgl-dev:v0.5.0rc0-rocm700-mi35x-20250812" echo "rocm/sgl-dev:v0.5.0rc0-rocm700-mi35x-20250812"
else else
echo "rocm/sgl-dev:v0.5.0rc0-rocm630-mi30x-20250812" # Default to mi30x echo "rocm/sgl-dev:v0.5.0rc0-rocm630-mi30x-20250812"
fi fi
return 0
} }
# Determine image finder and fallback based on runner # Pull and run the latest image
# In Kubernetes, the hostname contains the GPU type (e.g., linux-mi300-gpu-1-bgg8r-runner-vknlb)
# Extract the GPU type from hostname
HOSTNAME_VALUE=$(hostname)
RUNNER_NAME="unknown"
if [[ "${HOSTNAME_VALUE}" =~ ^(linux-mi[0-9]+-gpu-[0-9]+) ]]; then
RUNNER_NAME="${BASH_REMATCH[1]}"
echo "Extracted runner from hostname: ${RUNNER_NAME}"
else
echo "Could not extract runner info from hostname: ${HOSTNAME_VALUE}"
fi
echo "The runner is: ${RUNNER_NAME}"
GPU_ARCH="mi30x"
# Check for mi350/mi355 runners
if [[ "${RUNNER_NAME}" =~ ^linux-mi350-gpu-[0-9]+$ ]] || [[ "${RUNNER_NAME}" =~ ^linux-mi355-gpu-[0-9]+$ ]]; then
echo "Runner is ${RUNNER_NAME}, will find mi35x image."
GPU_ARCH="mi35x"
# Check for mi300/mi325 runners
elif [[ "${RUNNER_NAME}" =~ ^linux-mi300-gpu-[0-9]+$ ]] || [[ "${RUNNER_NAME}" =~ ^linux-mi325-gpu-[0-9]+$ ]]; then
echo "Runner is ${RUNNER_NAME}, will find mi30x image."
else
echo "Runner type not recognized: '${RUNNER_NAME}'"
echo "Defaulting to find mi30x image"
fi
# Find and pull the latest image
IMAGE=$(find_latest_image "${GPU_ARCH}") IMAGE=$(find_latest_image "${GPU_ARCH}")
echo "Pulling Docker image: $IMAGE" echo "Pulling Docker image: ${IMAGE}"
docker pull "$IMAGE" docker pull "${IMAGE}"
# Run the container echo "Launching container: ci_sglang"
echo "Starting container: ci_sglang" docker run -dt --user root --device=/dev/kfd ${DEVICE_FLAG} \
docker run -dt --user root --device=/dev/kfd $DEVICE_FLAG \
-v "${GITHUB_WORKSPACE:-$PWD}:/sglang-checkout" \ -v "${GITHUB_WORKSPACE:-$PWD}:/sglang-checkout" \
--ipc=host --group-add video \ --ipc=host --group-add video \
--shm-size 32g \ --shm-size 32g \
...@@ -157,4 +129,4 @@ docker run -dt --user root --device=/dev/kfd $DEVICE_FLAG \ ...@@ -157,4 +129,4 @@ docker run -dt --user root --device=/dev/kfd $DEVICE_FLAG \
--security-opt seccomp=unconfined \ --security-opt seccomp=unconfined \
-w /sglang-checkout \ -w /sglang-checkout \
--name ci_sglang \ --name ci_sglang \
"$IMAGE" "${IMAGE}"
...@@ -243,6 +243,10 @@ suite_amd = { ...@@ -243,6 +243,10 @@ suite_amd = {
TestFile("test_wave_attention_kernels.py", 2), TestFile("test_wave_attention_kernels.py", 2),
TestFile("test_wave_attention_backend.py", 150), TestFile("test_wave_attention_backend.py", 150),
], ],
"per-commit-amd-mi35x": [
TestFile("test_mla.py", 242),
TestFile("test_gpt_oss_1gpu.py", 600),
],
"per-commit-2-gpu-amd": [ "per-commit-2-gpu-amd": [
TestFile("lora/test_lora_tp.py", 116), TestFile("lora/test_lora_tp.py", 116),
TestFile("rl/test_update_weights_from_distributed.py", 103), TestFile("rl/test_update_weights_from_distributed.py", 103),
......
import os
from concurrent.futures import ThreadPoolExecutor from concurrent.futures import ThreadPoolExecutor
from types import SimpleNamespace from types import SimpleNamespace
from typing import Dict, List, Literal, Optional from typing import Dict, List, Literal, Optional
from sglang.srt.utils import kill_process_tree from sglang.srt.utils import is_hip, kill_process_tree
from sglang.test.run_eval import run_eval from sglang.test.run_eval import run_eval
from sglang.test.test_utils import ( from sglang.test.test_utils import (
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
...@@ -14,6 +15,7 @@ from sglang.test.test_utils import ( ...@@ -14,6 +15,7 @@ from sglang.test.test_utils import (
) )
_base_url = DEFAULT_URL_FOR_TEST _base_url = DEFAULT_URL_FOR_TEST
_is_hip = is_hip()
class BaseTestGptOss(CustomTestCase): class BaseTestGptOss(CustomTestCase):
...@@ -36,7 +38,8 @@ class BaseTestGptOss(CustomTestCase): ...@@ -36,7 +38,8 @@ class BaseTestGptOss(CustomTestCase):
if model_variant == "20b": if model_variant == "20b":
other_args += ["--cuda-graph-max-bs", "600"] other_args += ["--cuda-graph-max-bs", "600"]
if _is_hip:
os.environ["SGLANG_USE_AITER"] = "0"
self._run_test_raw( self._run_test_raw(
model=model, model=model,
expected_score_of_reasoning_effort=expected_score_of_reasoning_effort, expected_score_of_reasoning_effort=expected_score_of_reasoning_effort,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment