Unverified Commit 566ade03 authored by Baizhou Zhang's avatar Baizhou Zhang Committed by GitHub
Browse files

[CI] Build aarch64 kernels for sgl-kernel test (#12480)

parent 69193f71
...@@ -68,8 +68,9 @@ jobs: ...@@ -68,8 +68,9 @@ jobs:
include: include:
- python-version: "3.10" - python-version: "3.10"
cuda-version: "12.9" cuda-version: "12.9"
- python-version: "3.10" # Add back when CUDA 13.0 is supported on CI
cuda-version: "13.0" # - python-version: "3.10"
# cuda-version: "13.0"
name: Build Wheel name: Build Wheel
steps: steps:
- name: Cleanup - name: Cleanup
...@@ -96,6 +97,41 @@ jobs: ...@@ -96,6 +97,41 @@ jobs:
name: wheel-python${{ matrix.python-version }}-cuda${{ matrix.cuda-version }} name: wheel-python${{ matrix.python-version }}-cuda${{ matrix.cuda-version }}
path: sgl-kernel/dist/* path: sgl-kernel/dist/*
sgl-kernel-build-wheels-arm:
needs: [check-changes]
if: needs.check-changes.outputs.sgl_kernel == 'true'
runs-on: arm-kernel-build-node
strategy:
matrix:
include:
- python-version: "3.10"
cuda-version: "12.9"
name: Build Wheel Arm
steps:
- name: Cleanup
run: |
sudo rm -rf $GITHUB_WORKSPACE/* || true
- uses: actions/checkout@v4
with:
submodules: "recursive"
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
- name: Build wheel for Python ${{ matrix.python-version }} and CUDA ${{ matrix.cuda-version }}
run: |
cd sgl-kernel
./build.sh "${{ matrix.python-version }}" "${{ matrix.cuda-version }}"
- name: Upload artifacts
uses: actions/upload-artifact@v4
with:
name: wheel-python${{ matrix.python-version }}-cuda${{ matrix.cuda-version }}-aarch64
path: sgl-kernel/dist/*
sgl-kernel-unit-test: sgl-kernel-unit-test:
needs: [check-changes, sgl-kernel-build-wheels] needs: [check-changes, sgl-kernel-build-wheels]
if: needs.check-changes.outputs.sgl_kernel == 'true' if: needs.check-changes.outputs.sgl_kernel == 'true'
...@@ -785,7 +821,7 @@ jobs: ...@@ -785,7 +821,7 @@ jobs:
python3 run_suite.py --suite per-commit-4-gpu-b200 --auto-partition-id 0 --auto-partition-size 1 --timeout-per-file 3600 python3 run_suite.py --suite per-commit-4-gpu-b200 --auto-partition-id 0 --auto-partition-size 1 --timeout-per-file 3600
unit-test-backend-4-gpu-gb200: unit-test-backend-4-gpu-gb200:
needs: [check-changes, unit-test-backend-2-gpu, sgl-kernel-build-wheels] needs: [check-changes, unit-test-backend-2-gpu, sgl-kernel-build-wheels-arm]
if: always() && !failure() && !cancelled() && if: always() && !failure() && !cancelled() &&
((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true')) ((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true'))
runs-on: 4-gpu-gb200 runs-on: 4-gpu-gb200
...@@ -801,7 +837,7 @@ jobs: ...@@ -801,7 +837,7 @@ jobs:
with: with:
path: sgl-kernel/dist/ path: sgl-kernel/dist/
merge-multiple: true merge-multiple: true
pattern: wheel-python3.10-cuda12.9 pattern: wheel-python3.10-cuda12.9-aarch64
- name: Install dependencies - name: Install dependencies
run: | run: |
......
...@@ -105,8 +105,13 @@ echo "SGL_KERNEL_VERSION_FROM_KERNEL=${SGL_KERNEL_VERSION_FROM_KERNEL} SGL_KERNE ...@@ -105,8 +105,13 @@ echo "SGL_KERNEL_VERSION_FROM_KERNEL=${SGL_KERNEL_VERSION_FROM_KERNEL} SGL_KERNE
if [ "${CUSTOM_BUILD_SGL_KERNEL:-}" = "true" ]; then if [ "${CUSTOM_BUILD_SGL_KERNEL:-}" = "true" ]; then
ls -alh sgl-kernel/dist ls -alh sgl-kernel/dist
# TODO: Currently we don't support custom build sgl-kernel for aarch64. To be changed after kernel build for aarch64 is added. # Determine wheel architecture
$PIP_CMD install sgl-kernel/dist/sgl_kernel-${SGL_KERNEL_VERSION_FROM_KERNEL}-cp310-abi3-manylinux2014_x86_64.whl --force-reinstall $PIP_INSTALL_SUFFIX if [ "$ARCH" = "aarch64" ] || [ "$ARCH" = "arm64" ]; then
WHEEL_ARCH="aarch64"
else
WHEEL_ARCH="x86_64"
fi
$PIP_CMD install sgl-kernel/dist/sgl_kernel-${SGL_KERNEL_VERSION_FROM_KERNEL}-cp310-abi3-manylinux2014_${WHEEL_ARCH}.whl --force-reinstall $PIP_INSTALL_SUFFIX
else else
$PIP_CMD install sgl-kernel==${SGL_KERNEL_VERSION_FROM_SRT} --force-reinstall $PIP_INSTALL_SUFFIX $PIP_CMD install sgl-kernel==${SGL_KERNEL_VERSION_FROM_SRT} --force-reinstall $PIP_INSTALL_SUFFIX
fi fi
......
...@@ -105,3 +105,5 @@ m.impl("fwd", torch::kCUDA, make_pytorch_shim(&mha_fwd)); ...@@ -105,3 +105,5 @@ m.impl("fwd", torch::kCUDA, make_pytorch_shim(&mha_fwd));
## FAQ ## FAQ
- Q: Segmentation fault with CUDA 12.6 - Q: Segmentation fault with CUDA 12.6
- A: Update ptxas to 12.8, reference: [segment fault error](https://github.com/Dao-AILab/flash-attention/issues/1453) - A: Update ptxas to 12.8, reference: [segment fault error](https://github.com/Dao-AILab/flash-attention/issues/1453)
Trigger arm build test (Should be removed later)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment