fix: solve release issue (#5434)

88defc4d · Yineng Zhang · GitHub · 6f509d55 · 6f509d55 · 88defc4d
Unverified Commit 88defc4d authored Apr 15, 2025 by Yineng Zhang Committed by GitHub Apr 15, 2025
5 changed files
--- a/.github/workflows/release-pypi-kernel.yml
+++ b/.github/workflows/release-pypi-kernel.yml
-name: Release SGLang Kernel to PyPI
-on:
-  push:
-    branches:
-      - main
-    paths:
-      - sgl-kernel/python/sgl_kernel/version.py
-  workflow_dispatch:
-concurrency:
-  group: release-pypi-kernel-${{ github.ref }}
-  cancel-in-progress: true
-jobs:
-  build-wheels:
-    if: github.repository == 'sgl-project/sglang'
-    runs-on: sgl-kernel-release-node
-    strategy:
-      matrix:
-        python-version: ['3.9']
-        cuda-version: ['12.4']
-    steps:
-    - uses: actions/checkout@v4
-      with:
-        submodules: 'recursive'
-    - name: Set up Python ${{ matrix.python-version }}
-      uses: actions/setup-python@v5
-      with:
-        python-version: ${{ matrix.python-version }}
-    - name: Build wheels for Python ${{ matrix.python-version }} and CUDA ${{ matrix.cuda-version }}
-      run: |
-        cd sgl-kernel
-        chmod +x ./build.sh
-        ./build.sh "${{ matrix.python-version }}" "${{ matrix.cuda-version }}"
-    - name: Upload to pypi
-      working-directory: sgl-kernel
-      run: |
-        pip install twine
-        python3 -m twine upload dist/* -u __token__ -p ${{ secrets.PYPI_TOKEN }}
--- a/.github/workflows/release-whl-kernel-cu128.yml
+++ b/.github/workflows/release-whl-kernel-cu128.yml
-name: Release SGLang Kernel Wheel (cu128)
+name: Release SGLang Kernel Wheel (cu118)
 on:
  workflow_dispatch:
@@ -18,7 +18,7 @@ jobs:
    strategy:
      matrix:
        python-version: ['3.9']
-        cuda-version: ['12.8']
+        cuda-version: ['11.8']
    steps:
      - uses: actions/checkout@v4
@@ -80,7 +80,7 @@ jobs:
          WHL_TOKEN: ${{ secrets.WHL_TOKEN }}
      - name: Update wheel index
-        run: python3 scripts/update_kernel_whl_index.py --cuda 128
+        run: python3 scripts/update_kernel_whl_index.py
      - name: Push wheel index
        run: |

--- a/.github/workflows/release-whl-kernel.yml
+++ b/.github/workflows/release-whl-kernel.yml
-name: Release SGLang Kernel Wheel (cu118)
+name: Release SGLang Kernels
 on:
-  workflow_dispatch:
-    inputs:
-      tag_name:
-        type: string
  push:
    branches:
      - main
    paths:
      - sgl-kernel/python/sgl_kernel/version.py
+  workflow_dispatch:
+    inputs:
+      tag_name:
+        type: string
+        required: false
+concurrency:
+  group: release-sglang-kernels-${{ github.ref }}
+  cancel-in-progress: true
 jobs:
-  build-wheels:
+  build-cu124:
    if: github.repository == 'sgl-project/sglang'
    runs-on: sgl-kernel-release-node
    strategy:
      matrix:
        python-version: ['3.9']
-        cuda-version: ['11.8']
+        cuda-version: ['12.4']
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          submodules: 'recursive'
+      - name: Set up Python ${{ matrix.python-version }}
+        uses: actions/setup-python@v5
+        with:
+          python-version: ${{ matrix.python-version }}
+      - name: Build wheels
+        run: |
+          cd sgl-kernel
+          chmod +x ./build.sh
+          ./build.sh "${{ matrix.python-version }}" "${{ matrix.cuda-version }}"
+      - name: Upload to PyPI
+        working-directory: sgl-kernel
+        run: |
+          pip install twine
+          python3 -m twine upload dist/* -u __token__ -p ${{ secrets.PYPI_TOKEN }}
+  build-cu128:
+    if: github.repository == 'sgl-project/sglang'
+    needs: build-cu124
+    runs-on: sgl-kernel-release-node
+    strategy:
+      matrix:
+        python-version: ['3.9']
+        cuda-version: ['12.8']
    steps:
      - uses: actions/checkout@v4
        with:
@@ -30,7 +64,7 @@ jobs:
        with:
          python-version: ${{ matrix.python-version }}
-      - name: Build wheels for Python ${{ matrix.python-version }} and CUDA ${{ matrix.cuda-version }}
+      - name: Build wheels
        run: |
          cd sgl-kernel
          chmod +x ./build.sh
@@ -43,7 +77,7 @@ jobs:
          path: sgl-kernel/dist/*
  release:
-    needs: build-wheels
+    needs: build-cu128
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v4
@@ -80,7 +114,7 @@ jobs:
          WHL_TOKEN: ${{ secrets.WHL_TOKEN }}
      - name: Update wheel index
-        run: python3 scripts/update_kernel_whl_index.py
+        run: python3 scripts/update_kernel_whl_index.py --cuda 128
      - name: Push wheel index
        run: |

--- a/docker/Dockerfile.blackwell
+++ b/docker/Dockerfile.blackwell
@@ -6,7 +6,7 @@ WORKDIR /sgl-workspace
 RUN pip3 install --pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/cu128
-RUN pip3 install https://github.com/sgl-project/whl/releases/download/v0.0.9/sgl_kernel-0.0.9+cu128-cp39-abi3-manylinux2014_x86_64.whl \
+RUN pip3 install https://github.com/sgl-project/whl/releases/download/v0.0.9.post1/sgl_kernel-0.0.9.post1+cu128-cp39-abi3-manylinux2014_x86_64.whl \
    && pip3 install setuptools==75.0.0 wheel==0.41.0 scikit-build-core
 RUN git clone --depth=1 https://github.com/sgl-project/sglang.git \

--- a/sgl-kernel/build.sh
+++ b/sgl-kernel/build.sh
@@ -35,8 +35,6 @@ docker run --rm \
   ${PYTHON_ROOT_PATH}/bin/pip install --no-cache-dir ninja setuptools==75.0.0 wheel==0.41.0 numpy uv scikit-build-core && \
   export TORCH_CUDA_ARCH_LIST='7.5 8.0 8.9 9.0+PTX' && \
   export CUDA_VERSION=${CUDA_VERSION} && \
-   export CMAKE_BUILD_PARALLEL_LEVEL=96
-   export MAX_JOBS=96
   mkdir -p /usr/lib/x86_64-linux-gnu/ && \
   ln -s /usr/local/cuda-${CUDA_VERSION}/targets/x86_64-linux/lib/stubs/libcuda.so /usr/lib/x86_64-linux-gnu/libcuda.so && \
   cd /sgl-kernel && \