Unverified Commit 812e82f3 authored by Yineng Zhang's avatar Yineng Zhang Committed by GitHub
Browse files

fix: solve cu118 issue for cutlass mla (#5331)

parent 4879e50c
...@@ -35,9 +35,14 @@ jobs: ...@@ -35,9 +35,14 @@ jobs:
runs-on: sgl-kernel-build-node runs-on: sgl-kernel-build-node
strategy: strategy:
matrix: matrix:
python-version: ['3.9'] include:
cuda-version: ['12.4'] - python-version: '3.9'
cuda-version: '11.8'
- python-version: '3.9'
cuda-version: '12.4'
- python-version: '3.9'
cuda-version: '12.8'
name: Build Wheel (CUDA ${{ matrix.cuda-version }})
steps: steps:
- name: Cleanup - name: Cleanup
run: | run: |
...@@ -52,13 +57,14 @@ jobs: ...@@ -52,13 +57,14 @@ jobs:
with: with:
python-version: ${{ matrix.python-version }} python-version: ${{ matrix.python-version }}
- name: Build wheels for Python ${{ matrix.python-version }} and CUDA ${{ matrix.cuda-version }} - name: Build wheel for Python ${{ matrix.python-version }} and CUDA ${{ matrix.cuda-version }}
run: | run: |
cd sgl-kernel cd sgl-kernel
chmod +x ./build.sh chmod +x ./build.sh
./build.sh "${{ matrix.python-version }}" "${{ matrix.cuda-version }}" ./build.sh "${{ matrix.python-version }}" "${{ matrix.cuda-version }}"
- name: Upload artifacts - name: Upload artifacts (only for CUDA 12.4)
if: ${{ matrix.cuda-version == '12.4' }}
uses: actions/upload-artifact@v4 uses: actions/upload-artifact@v4
with: with:
name: wheel-python${{ matrix.python-version }}-cuda${{ matrix.cuda-version }} name: wheel-python${{ matrix.python-version }}-cuda${{ matrix.cuda-version }}
...@@ -128,7 +134,7 @@ jobs: ...@@ -128,7 +134,7 @@ jobs:
pip3 uninstall sgl-kernel -y pip3 uninstall sgl-kernel -y
finish: finish:
needs: [unit-test, mla-test, lint] needs: [unit-test, mla-test, lint, build-wheels]
runs-on: ubuntu-latest runs-on: ubuntu-latest
steps: steps:
- name: Check all dependent job statuses - name: Check all dependent job statuses
......
...@@ -14,7 +14,7 @@ on: ...@@ -14,7 +14,7 @@ on:
jobs: jobs:
build-wheels: build-wheels:
if: github.repository == 'sgl-project/sglang' if: github.repository == 'sgl-project/sglang'
runs-on: ubuntu-latest runs-on: sgl-kernel-build-node
strategy: strategy:
matrix: matrix:
python-version: ['3.9'] python-version: ['3.9']
......
...@@ -25,6 +25,8 @@ limitations under the License. ...@@ -25,6 +25,8 @@ limitations under the License.
#include <device/sm100_mla.hpp> #include <device/sm100_mla.hpp>
#include <kernel/sm100_mla_tile_scheduler.hpp> #include <kernel/sm100_mla_tile_scheduler.hpp>
#if defined CUDA_VERSION && CUDA_VERSION >= 12040
#define CUTLASS_CHECK(status) \ #define CUTLASS_CHECK(status) \
{ \ { \
cutlass::Status error = status; \ cutlass::Status error = status; \
...@@ -205,3 +207,5 @@ int64_t cutlass_mla_get_workspace_size(int64_t max_seq_len, int64_t num_batches, ...@@ -205,3 +207,5 @@ int64_t cutlass_mla_get_workspace_size(int64_t max_seq_len, int64_t num_batches,
return MlaSm100Type::Fmha::get_workspace_size(arguments); return MlaSm100Type::Fmha::get_workspace_size(arguments);
} }
#endif
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment