Unverified Commit d645ae90 authored by Lianmin Zheng's avatar Lianmin Zheng Committed by GitHub
Browse files

Rename runner labels (#11228)

parent 41763ba0
...@@ -28,7 +28,7 @@ permissions: ...@@ -28,7 +28,7 @@ permissions:
jobs: jobs:
test-disaggregation: test-disaggregation:
if: github.event_name != 'pull_request' || (contains(github.event.pull_request.labels.*.name, 'run-ci') && contains(github.event.pull_request.labels.*.name, 'router-benchmark')) if: github.event_name != 'pull_request' || (contains(github.event.pull_request.labels.*.name, 'run-ci') && contains(github.event.pull_request.labels.*.name, 'router-benchmark'))
runs-on: [h200] runs-on: [8-gpu-h200-oracle]
timeout-minutes: 45 timeout-minutes: 45
steps: steps:
......
...@@ -83,7 +83,7 @@ jobs: ...@@ -83,7 +83,7 @@ jobs:
pytest-rust: pytest-rust:
if: github.event_name != 'pull_request' || contains(github.event.pull_request.labels.*.name, 'run-ci') if: github.event_name != 'pull_request' || contains(github.event.pull_request.labels.*.name, 'run-ci')
runs-on: BM.A10.4 runs-on: 4-gpu-a10
timeout-minutes: 25 timeout-minutes: 25
steps: steps:
- name: Checkout code - name: Checkout code
......
...@@ -62,7 +62,7 @@ jobs: ...@@ -62,7 +62,7 @@ jobs:
sgl-kernel-build-wheels: sgl-kernel-build-wheels:
needs: [check-changes] needs: [check-changes]
if: needs.check-changes.outputs.sgl_kernel == 'true' if: needs.check-changes.outputs.sgl_kernel == 'true'
runs-on: sgl-kernel-build-node runs-on: x64-kernel-build-node
strategy: strategy:
matrix: matrix:
include: include:
...@@ -323,7 +323,7 @@ jobs: ...@@ -323,7 +323,7 @@ jobs:
needs: [check-changes, unit-test-backend-2-gpu, sgl-kernel-build-wheels] needs: [check-changes, unit-test-backend-2-gpu, sgl-kernel-build-wheels]
if: always() && !failure() && !cancelled() && if: always() && !failure() && !cancelled() &&
((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true')) ((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true'))
runs-on: 8-gpu-runner runs-on: 8-gpu-h200
strategy: strategy:
fail-fast: false fail-fast: false
matrix: matrix:
...@@ -641,7 +641,7 @@ jobs: ...@@ -641,7 +641,7 @@ jobs:
needs: [check-changes, unit-test-backend-2-gpu, sgl-kernel-build-wheels] needs: [check-changes, unit-test-backend-2-gpu, sgl-kernel-build-wheels]
if: always() && !failure() && !cancelled() && if: always() && !failure() && !cancelled() &&
((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true')) ((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true'))
runs-on: 8-gpu-runner runs-on: 8-gpu-h200
steps: steps:
- name: Checkout code - name: Checkout code
uses: actions/checkout@v4 uses: actions/checkout@v4
...@@ -668,7 +668,7 @@ jobs: ...@@ -668,7 +668,7 @@ jobs:
needs: [check-changes, unit-test-backend-2-gpu, sgl-kernel-build-wheels] needs: [check-changes, unit-test-backend-2-gpu, sgl-kernel-build-wheels]
if: always() && !failure() && !cancelled() && if: always() && !failure() && !cancelled() &&
((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true')) ((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true'))
runs-on: 4-b200-runner runs-on: 4-gpu-b200
strategy: strategy:
fail-fast: false fail-fast: false
steps: steps:
...@@ -702,7 +702,8 @@ jobs: ...@@ -702,7 +702,8 @@ jobs:
unit-test-frontend, unit-test-backend-1-gpu, unit-test-frontend, unit-test-backend-1-gpu,
unit-test-backend-2-gpu, unit-test-backend-4-gpu, unit-test-backend-8-gpu, unit-test-backend-2-gpu, unit-test-backend-4-gpu, unit-test-backend-8-gpu,
performance-test-1-gpu-part-1, performance-test-1-gpu-part-2, performance-test-2-gpu, performance-test-1-gpu-part-1, performance-test-1-gpu-part-2, performance-test-1-gpu-part-3,
performance-test-2-gpu,
accuracy-test-1-gpu, accuracy-test-2-gpu, accuracy-test-1-gpu, accuracy-test-2-gpu,
unit-test-deepep-4-gpu, unit-test-deepep-8-gpu, unit-test-deepep-4-gpu, unit-test-deepep-8-gpu,
unit-test-backend-4-gpu-b200, unit-test-backend-4-gpu-b200,
......
...@@ -8,7 +8,7 @@ on: ...@@ -8,7 +8,7 @@ on:
jobs: jobs:
build-dev-x86: build-dev-x86:
if: ${{ github.repository == 'sgl-project/sglang' }} if: ${{ github.repository == 'sgl-project/sglang' }}
runs-on: nvidia runs-on: x64-docker-build-node
strategy: strategy:
matrix: matrix:
variant: variant:
...@@ -48,12 +48,12 @@ jobs: ...@@ -48,12 +48,12 @@ jobs:
build-dev-arm: build-dev-arm:
if: ${{ github.repository == 'sgl-project/sglang' }} if: ${{ github.repository == 'sgl-project/sglang' }}
runs-on: sgl-kernel-release-node-arm runs-on: arm-docker-build-node
strategy: strategy:
matrix: matrix:
variant: variant:
- version: 12.9.1 - version: 12.9.1
type: blackwell_aarch64 type: all_aarch64
tag: dev-arm64 tag: dev-arm64
steps: steps:
- name: Delete huge unnecessary tools folder - name: Delete huge unnecessary tools folder
......
...@@ -14,13 +14,9 @@ jobs: ...@@ -14,13 +14,9 @@ jobs:
strategy: strategy:
matrix: matrix:
variant: variant:
- cuda_version: "12.6.1"
build_type: "all"
- cuda_version: "12.8.1"
build_type: "blackwell"
- cuda_version: "12.9.1" - cuda_version: "12.9.1"
build_type: "blackwell" build_type: "all"
runs-on: nvidia runs-on: x64-docker-build-node
steps: steps:
- name: Delete huge unnecessary tools folder - name: Delete huge unnecessary tools folder
run: rm -rf /opt/hostedtoolcache run: rm -rf /opt/hostedtoolcache
...@@ -67,8 +63,6 @@ jobs: ...@@ -67,8 +63,6 @@ jobs:
if [ "${{ matrix.variant.build_type }}" = "all" ]; then if [ "${{ matrix.variant.build_type }}" = "all" ]; then
tag_suffix="" tag_suffix=""
elif [ "${{ matrix.variant.build_type }}" = "blackwell" ]; then
tag_suffix="-b200"
else else
echo "Unsupported build type" echo "Unsupported build type"
exit 1 exit 1
...@@ -87,8 +81,8 @@ jobs: ...@@ -87,8 +81,8 @@ jobs:
matrix: matrix:
variant: variant:
- cuda_version: "12.9.1" - cuda_version: "12.9.1"
build_type: "blackwell_aarch64" build_type: "all_aarch64"
runs-on: sgl-kernel-release-node-arm runs-on: arm-docker-build-node
steps: steps:
- name: Delete huge unnecessary tools folder - name: Delete huge unnecessary tools folder
run: rm -rf /opt/hostedtoolcache run: rm -rf /opt/hostedtoolcache
......
name: Release SGLang Kernel Wheel (cu118)
on:
workflow_dispatch:
inputs:
tag_name:
type: string
push:
branches:
- main
paths:
- sgl-kernel/python/sgl_kernel/version.py
jobs:
build-wheels:
if: github.repository == 'sgl-project/sglang'
runs-on: sgl-kernel-release-node
strategy:
matrix:
python-version: ["3.9"]
cuda-version: ["11.8"]
steps:
- uses: actions/checkout@v4
with:
submodules: "recursive"
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
- name: Build wheels for Python ${{ matrix.python-version }} and CUDA ${{ matrix.cuda-version }}
run: |
cd sgl-kernel
chmod +x ./build.sh
./build.sh "${{ matrix.python-version }}" "${{ matrix.cuda-version }}"
- name: Upload artifacts
uses: actions/upload-artifact@v4
with:
name: wheel-python${{ matrix.python-version }}-cuda${{ matrix.cuda-version }}
path: sgl-kernel/dist/*
release:
needs: build-wheels
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Download artifacts
uses: actions/download-artifact@v4
with:
path: sgl-kernel/dist/
merge-multiple: true
pattern: wheel-*
- name: Set tag name
id: set_tag_name
run: |
if [ -z "${{ inputs.tag_name }}" ]; then
TAG_NAME="v$(cat sgl-kernel/python/sgl_kernel/version.py | cut -d'"' -f2)"
echo "tag_name=$TAG_NAME" >> $GITHUB_OUTPUT
else
echo "tag_name=${{ inputs.tag_name }}" >> $GITHUB_OUTPUT
fi
- name: Release
uses: softprops/action-gh-release@v2
with:
tag_name: ${{ steps.set_tag_name.outputs.tag_name }}
repository: sgl-project/whl
token: ${{ secrets.WHL_TOKEN }}
files: |
sgl-kernel/dist/*
- name: Clone wheel index
run: git clone https://oauth2:${WHL_TOKEN}@github.com/sgl-project/whl.git sgl-whl
env:
WHL_TOKEN: ${{ secrets.WHL_TOKEN }}
- name: Update wheel index
run: python3 scripts/update_kernel_whl_index.py
- name: Push wheel index
run: |
cd sgl-whl
git config --local user.name "github-actions[bot]"
git config --local user.email "41898282+github-actions[bot]@users.noreply.github.com"
git add -A
git commit -m "update whl index"
git push
...@@ -19,7 +19,7 @@ concurrency: ...@@ -19,7 +19,7 @@ concurrency:
jobs: jobs:
build-cu129: build-cu129:
if: github.repository == 'sgl-project/sglang' if: github.repository == 'sgl-project/sglang'
runs-on: sgl-kernel-release-node runs-on: x64-kernel-build-node
strategy: strategy:
matrix: matrix:
python-version: ["3.10"] python-version: ["3.10"]
...@@ -46,117 +46,14 @@ jobs: ...@@ -46,117 +46,14 @@ jobs:
pip install twine pip install twine
python3 -m twine upload --skip-existing dist/* -u __token__ -p ${{ secrets.PYPI_TOKEN }} python3 -m twine upload --skip-existing dist/* -u __token__ -p ${{ secrets.PYPI_TOKEN }}
build-cu124:
if: github.repository == 'sgl-project/sglang'
needs: build-cu129
runs-on: sgl-kernel-release-node
strategy:
matrix:
python-version: ["3.10"]
cuda-version: ["12.4"]
steps:
- uses: actions/checkout@v4
with:
submodules: "recursive"
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
- name: Build wheels
run: |
cd sgl-kernel
chmod +x ./build.sh
./build.sh "${{ matrix.python-version }}" "${{ matrix.cuda-version }}"
- name: Upload artifacts - name: Upload artifacts
uses: actions/upload-artifact@v4 uses: actions/upload-artifact@v4
with: with:
name: wheel-python${{ matrix.python-version }}-cuda${{ matrix.cuda-version }} name: wheel-python${{ matrix.python-version }}-cuda${{ matrix.cuda-version }}
path: sgl-kernel/dist/* path: sgl-kernel/dist/*
release-cu124: release-cu129:
needs: build-cu124
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Download artifacts
uses: actions/download-artifact@v4
with:
path: sgl-kernel/dist/
merge-multiple: true
pattern: wheel-*
- name: Set tag name
id: set_tag_name
run: |
if [ -z "${{ inputs.tag_name }}" ]; then
TAG_NAME="v$(cat sgl-kernel/python/sgl_kernel/version.py | cut -d'"' -f2)"
echo "tag_name=$TAG_NAME" >> $GITHUB_OUTPUT
else
echo "tag_name=${{ inputs.tag_name }}" >> $GITHUB_OUTPUT
fi
- name: Release
uses: softprops/action-gh-release@v2
with:
tag_name: ${{ steps.set_tag_name.outputs.tag_name }}
repository: sgl-project/whl
token: ${{ secrets.WHL_TOKEN }}
files: |
sgl-kernel/dist/*
- name: Clone wheel index
run: git clone https://oauth2:${WHL_TOKEN}@github.com/sgl-project/whl.git sgl-whl
env:
WHL_TOKEN: ${{ secrets.WHL_TOKEN }}
- name: Update wheel index
run: python3 scripts/update_kernel_whl_index.py --cuda 124
- name: Push wheel index
run: |
cd sgl-whl
git config --local user.name "github-actions[bot]"
git config --local user.email "41898282+github-actions[bot]@users.noreply.github.com"
git add -A
git commit -m "update whl index"
git push
build-cu128:
if: github.repository == 'sgl-project/sglang'
needs: build-cu129 needs: build-cu129
runs-on: sgl-kernel-release-node
strategy:
matrix:
python-version: ["3.10"]
cuda-version: ["12.8"]
steps:
- uses: actions/checkout@v4
with:
submodules: "recursive"
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
- name: Build wheels
run: |
cd sgl-kernel
chmod +x ./build.sh
./build.sh "${{ matrix.python-version }}" "${{ matrix.cuda-version }}"
- name: Upload artifacts
uses: actions/upload-artifact@v4
with:
name: wheel-python${{ matrix.python-version }}-cuda${{ matrix.cuda-version }}
path: sgl-kernel/dist/*
release-cu128:
needs: build-cu128
runs-on: ubuntu-latest runs-on: ubuntu-latest
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v4
...@@ -193,20 +90,20 @@ jobs: ...@@ -193,20 +90,20 @@ jobs:
WHL_TOKEN: ${{ secrets.WHL_TOKEN }} WHL_TOKEN: ${{ secrets.WHL_TOKEN }}
- name: Update wheel index - name: Update wheel index
run: python3 scripts/update_kernel_whl_index.py --cuda 128 run: python3 scripts/update_kernel_whl_index.py --cuda 129
- name: Push wheel index - name: Push wheel index
run: | run: |
cd sgl-whl cd sgl-whl
git config --local user.name "github-actions[bot]" git config --local user.name "sglang-bot"
git config --local user.email "41898282+github-actions[bot]@users.noreply.github.com" git config --local user.email "sglangbot@gmail.com"
git add -A git add -A
git commit -m "update whl index" git commit -m "update whl index"
git push git push
build-cu129-aarch64: build-cu129-aarch64:
if: github.repository == 'sgl-project/sglang' if: github.repository == 'sgl-project/sglang'
runs-on: sgl-kernel-release-node-arm runs-on: arm-kernel-build-node
strategy: strategy:
matrix: matrix:
python-version: ["3.10"] python-version: ["3.10"]
...@@ -282,8 +179,8 @@ jobs: ...@@ -282,8 +179,8 @@ jobs:
- name: Push wheel index - name: Push wheel index
run: | run: |
cd sgl-whl cd sgl-whl
git config --local user.name "github-actions[bot]" git config --local user.name "sglang-bot"
git config --local user.email "41898282+github-actions[bot]@users.noreply.github.com" git config --local user.email "sglangbot@gmail.com"
git add -A git add -A
git commit -m "update whl index" git commit -m "update whl index"
git push git push
...@@ -3,7 +3,7 @@ ...@@ -3,7 +3,7 @@
You can install SGLang using one of the methods below. You can install SGLang using one of the methods below.
This page primarily applies to common NVIDIA GPU platforms. This page primarily applies to common NVIDIA GPU platforms.
For other or newer platforms, please refer to the dedicated pages for [NVIDIA Blackwell GPUs](../platforms/blackwell_gpu.md), [AMD GPUs](../platforms/amd_gpu.md), [Intel Xeon CPUs](../platforms/cpu_server.md), [NVIDIA Jetson](../platforms/nvidia_jetson.md), [Ascend NPUs](../platforms/ascend_npu.md). For other or newer platforms, please refer to the dedicated pages for [AMD GPUs](../platforms/amd_gpu.md), [Intel Xeon CPUs](../platforms/cpu_server.md), [NVIDIA Jetson](../platforms/nvidia_jetson.md), [Ascend NPUs](../platforms/ascend_npu.md).
## Method 1: With pip or uv ## Method 1: With pip or uv
......
# Blackwell GPUs
We will release the pre-built wheels soon. Before that, please try to compile from source or check the blackwell docker images from [lmsysorg/sglang](https://hub.docker.com/r/lmsysorg/sglang/tags).
## B200 with x86 CPUs
TODO
## GB200/GB300 with ARM CPUs
TODO
...@@ -91,9 +91,14 @@ tracing = [ ...@@ -91,9 +91,14 @@ tracing = [
"opentelemetry-sdk", "opentelemetry-sdk",
] ]
all = ["sglang[test]", "sglang[decord]"] all = ["sglang[test]", "sglang[decord]"]
all_aarch64 = ["sglang[test]"]
dev = ["sglang[test]", "sglang[decord]"]
# The following will be deprecated in 2 weeks
blackwell = ["sglang[test]", "sglang[decord]"] blackwell = ["sglang[test]", "sglang[decord]"]
blackwell_aarch64 = ["sglang[test]"] blackwell_aarch64 = ["sglang[test]"]
dev = ["sglang[test]", "sglang[decord]"]
[project.urls] [project.urls]
"Homepage" = "https://github.com/sgl-project/sglang" "Homepage" = "https://github.com/sgl-project/sglang"
......
...@@ -3,21 +3,16 @@ ...@@ -3,21 +3,16 @@
set -euxo pipefail set -euxo pipefail
IS_BLACKWELL=${IS_BLACKWELL:-0} IS_BLACKWELL=${IS_BLACKWELL:-0}
CU_VERSION="cu128"
if [ "$IS_BLACKWELL" = "1" ]; then
CU_VERSION="cu129"
else
CU_VERSION="cu126"
fi
# Clear torch compilation cache
python3 -c 'import os, shutil, tempfile, getpass; cache_dir = os.environ.get("TORCHINDUCTOR_CACHE_DIR") or os.path.join(tempfile.gettempdir(), "torchinductor_" + getpass.getuser()); shutil.rmtree(cache_dir, ignore_errors=True)'
# Kill existing processes # Kill existing processes
SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
bash "${SCRIPT_DIR}/../killall_sglang.sh" bash "${SCRIPT_DIR}/../killall_sglang.sh"
echo "CUDA_VISIBLE_DEVICES=${CUDA_VISIBLE_DEVICES:-}" echo "CUDA_VISIBLE_DEVICES=${CUDA_VISIBLE_DEVICES:-}"
# Clear torch compilation cache
python3 -c 'import os, shutil, tempfile, getpass; cache_dir = os.environ.get("TORCHINDUCTOR_CACHE_DIR") or os.path.join(tempfile.gettempdir(), "torchinductor_" + getpass.getuser()); shutil.rmtree(cache_dir, ignore_errors=True)'
# Install apt packages # Install apt packages
apt install -y git libnuma-dev apt install -y git libnuma-dev
...@@ -29,7 +24,7 @@ if [ "$IS_BLACKWELL" = "1" ]; then ...@@ -29,7 +24,7 @@ if [ "$IS_BLACKWELL" = "1" ]; then
PIP_INSTALL_SUFFIX="--break-system-packages" PIP_INSTALL_SUFFIX="--break-system-packages"
# Clean up existing installations # Clean up existing installations
$PIP_CMD uninstall -y flashinfer_python sgl-kernel sglang vllm $PIP_INSTALL_SUFFIX || true $PIP_CMD uninstall -y flashinfer_python sgl-kernel sglang vllm torch $PIP_INSTALL_SUFFIX || true
else else
# In normal cases, we use uv, which is much faster than pip. # In normal cases, we use uv, which is much faster than pip.
pip install --upgrade pip pip install --upgrade pip
...@@ -40,7 +35,7 @@ else ...@@ -40,7 +35,7 @@ else
PIP_INSTALL_SUFFIX="--index-strategy unsafe-best-match" PIP_INSTALL_SUFFIX="--index-strategy unsafe-best-match"
# Clean up existing installations # Clean up existing installations
$PIP_CMD uninstall flashinfer_python sgl-kernel sglang vllm || true $PIP_CMD uninstall flashinfer_python sgl-kernel sglang vllm torch || true
fi fi
# Install the main package # Install the main package
...@@ -49,26 +44,16 @@ $PIP_CMD install -e "python[dev]" --extra-index-url https://download.pytorch.org ...@@ -49,26 +44,16 @@ $PIP_CMD install -e "python[dev]" --extra-index-url https://download.pytorch.org
# Install router for pd-disagg test # Install router for pd-disagg test
SGLANG_ROUTER_BUILD_NO_RUST=1 $PIP_CMD install -e "sgl-router" $PIP_INSTALL_SUFFIX SGLANG_ROUTER_BUILD_NO_RUST=1 $PIP_CMD install -e "sgl-router" $PIP_INSTALL_SUFFIX
# Install sgl-kernel
SGL_KERNEL_VERSION_FROM_KERNEL=$(grep -Po '(?<=^version = ")[^"]*' sgl-kernel/pyproject.toml) SGL_KERNEL_VERSION_FROM_KERNEL=$(grep -Po '(?<=^version = ")[^"]*' sgl-kernel/pyproject.toml)
SGL_KERNEL_VERSION_FROM_SRT=$(grep -Po -m1 '(?<=sgl-kernel==)[0-9A-Za-z\.\-]+' python/pyproject.toml) SGL_KERNEL_VERSION_FROM_SRT=$(grep -Po -m1 '(?<=sgl-kernel==)[0-9A-Za-z\.\-]+' python/pyproject.toml)
echo "SGL_KERNEL_VERSION_FROM_KERNEL=${SGL_KERNEL_VERSION_FROM_KERNEL} SGL_KERNEL_VERSION_FROM_SRT=${SGL_KERNEL_VERSION_FROM_SRT}" echo "SGL_KERNEL_VERSION_FROM_KERNEL=${SGL_KERNEL_VERSION_FROM_KERNEL} SGL_KERNEL_VERSION_FROM_SRT=${SGL_KERNEL_VERSION_FROM_SRT}"
if [ "$IS_BLACKWELL" = "1" ]; then
SGL_KERNEL_CUDA_VERSION=cu128
else
SGL_KERNEL_CUDA_VERSION=cu124
fi
if [ "${CUSTOM_BUILD_SGL_KERNEL:-}" = "true" ]; then if [ "${CUSTOM_BUILD_SGL_KERNEL:-}" = "true" ]; then
ls -alh sgl-kernel/dist ls -alh sgl-kernel/dist
WHEEL_FILE=$(ls sgl-kernel/dist/sgl_kernel-${SGL_KERNEL_VERSION_FROM_KERNEL}+${SGL_KERNEL_CUDA_VERSION}-cp310-abi3-manylinux2014_x86_64.whl 2>/dev/null || true) $PIP_CMD install sgl-kernel/dist/sgl_kernel-${SGL_KERNEL_VERSION_FROM_KERNEL}-cp310-abi3-manylinux2014_x86_64.whl --force-reinstall $PIP_INSTALL_SUFFIX
if [ -f "$WHEEL_FILE" ]; then
$PIP_CMD install sgl-kernel/dist/sgl_kernel-${SGL_KERNEL_VERSION_FROM_KERNEL}+${SGL_KERNEL_CUDA_VERSION}-cp310-abi3-manylinux2014_x86_64.whl --force-reinstall $PIP_INSTALL_SUFFIX
else
$PIP_CMD install sgl-kernel/dist/sgl_kernel-${SGL_KERNEL_VERSION_FROM_KERNEL}-cp310-abi3-manylinux2014_x86_64.whl --force-reinstall $PIP_INSTALL_SUFFIX
fi
else else
$PIP_CMD install https://github.com/sgl-project/whl/releases/download/v${SGL_KERNEL_VERSION_FROM_SRT}/sgl_kernel-${SGL_KERNEL_VERSION_FROM_SRT}+${SGL_KERNEL_CUDA_VERSION}-cp310-abi3-manylinux2014_x86_64.whl --force-reinstall $PIP_INSTALL_SUFFIX $PIP_CMD install sgl-kernel==${SGL_KERNEL_VERSION_FROM_SRT} --force-reinstall $PIP_INSTALL_SUFFIX
fi fi
# Show current packages # Show current packages
...@@ -86,14 +71,6 @@ if [ "$IS_BLACKWELL" != "1" ]; then ...@@ -86,14 +71,6 @@ if [ "$IS_BLACKWELL" != "1" ]; then
$PIP_CMD install xformers --index-url https://download.pytorch.org/whl/${CU_VERSION} --no-deps $PIP_INSTALL_SUFFIX $PIP_CMD install xformers --index-url https://download.pytorch.org/whl/${CU_VERSION} --no-deps $PIP_INSTALL_SUFFIX
fi fi
# Install FlashMLA for attention backend tests
# $PIP_CMD install git+https://github.com/deepseek-ai/FlashMLA.git $PIP_INSTALL_SUFFIX
# Show current packages # Show current packages
$PIP_CMD list $PIP_CMD list
python3 -c "import torch; print(torch.version.cuda)"
if [ -n "${HF_TOKEN:-}" ]; then
$PIP_CMD install -U "huggingface_hub[cli]" $PIP_INSTALL_SUFFIX
hf auth login --token $HF_TOKEN
fi
...@@ -25,7 +25,6 @@ make build ...@@ -25,7 +25,6 @@ make build
``` ```
Note: Note:
The `sgl-kernel` is rapidly evolving. If you experience a compilation failure, try using `make rebuild`. The `sgl-kernel` is rapidly evolving. If you experience a compilation failure, try using `make rebuild`.
### Build with [ccache](https://github.com/ccache/ccache) ### Build with [ccache](https://github.com/ccache/ccache)
......
...@@ -88,7 +88,7 @@ suites = { ...@@ -88,7 +88,7 @@ suites = {
TestFile("test_metrics.py", 32), TestFile("test_metrics.py", 32),
TestFile("test_metrics_utils.py", 1), TestFile("test_metrics_utils.py", 1),
TestFile("test_mla.py", 167), TestFile("test_mla.py", 167),
TestFile("test_mla_deepseek_v3.py", 1420), TestFile("test_mla_deepseek_v3.py", 500),
TestFile("test_mla_int8_deepseek_v3.py", 429), TestFile("test_mla_int8_deepseek_v3.py", 429),
TestFile("test_mla_flashinfer.py", 302), TestFile("test_mla_flashinfer.py", 302),
TestFile("test_mla_fp8.py", 93), TestFile("test_mla_fp8.py", 93),
...@@ -130,7 +130,7 @@ suites = { ...@@ -130,7 +130,7 @@ suites = {
TestFile("lora/test_lora_tp.py", 116), TestFile("lora/test_lora_tp.py", 116),
TestFile("rl/test_update_weights_from_distributed.py", 103), TestFile("rl/test_update_weights_from_distributed.py", 103),
TestFile("test_data_parallelism.py", 73), TestFile("test_data_parallelism.py", 73),
TestFile("test_dp_attention.py", 277), TestFile("test_dp_attention.py", 594),
TestFile("test_load_weights_from_remote_instance.py", 72), TestFile("test_load_weights_from_remote_instance.py", 72),
TestFile("test_patch_torch.py", 19), TestFile("test_patch_torch.py", 19),
TestFile("test_release_memory_occupation.py", 257), TestFile("test_release_memory_occupation.py", 257),
...@@ -138,17 +138,16 @@ suites = { ...@@ -138,17 +138,16 @@ suites = {
TestFile("hicache/test_hicache_storage_3fs_backend.py", 200), TestFile("hicache/test_hicache_storage_3fs_backend.py", 200),
], ],
"per-commit-4-gpu": [ "per-commit-4-gpu": [
TestFile("test_gpt_oss_4gpu.py", 600), TestFile("test_gpt_oss_4gpu.py", 300),
TestFile("test_local_attn.py", 250), TestFile("test_local_attn.py", 411),
TestFile("test_pp_single_node.py", 372), TestFile("test_pp_single_node.py", 481),
TestFile("models/test_qwen3_next_models.py", 200), TestFile("models/test_qwen3_next_models.py", 291),
TestFile("models/test_falcon_h1_models.py", 200),
TestFile("test_multi_instance_release_memory_occupation.py", 64), TestFile("test_multi_instance_release_memory_occupation.py", 64),
], ],
"per-commit-8-gpu": [ "per-commit-8-gpu": [
TestFile("hicache/test_hicache_storage_mooncake_backend.py", 400), TestFile("hicache/test_hicache_storage_mooncake_backend.py", 400),
TestFile("lora/test_lora_llama4.py", 400), TestFile("lora/test_lora_llama4.py", 400),
TestFile("test_disaggregation.py", 600), TestFile("test_disaggregation.py", 499),
TestFile("test_disaggregation_dp_attention.py", 155), TestFile("test_disaggregation_dp_attention.py", 155),
TestFile("test_disaggregation_different_tp.py", 600), TestFile("test_disaggregation_different_tp.py", 600),
TestFile("test_disaggregation_pp.py", 140), TestFile("test_disaggregation_pp.py", 140),
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment