"vscode:/vscode.git/clone" did not exist on "6efd2ca11b059c90b5f12539eb2f6aa5f96af724"
Unverified Commit f265d15b authored by Yineng Zhang's avatar Yineng Zhang Committed by GitHub
Browse files

use self-hosted to build sgl-kernel (#3154)

parent 02431b9a
......@@ -32,13 +32,17 @@ jobs:
build-wheels:
if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request'
runs-on: ubuntu-latest
runs-on: sgl-kernel-build-node
strategy:
matrix:
python-version: ['3.9']
cuda-version: ['12.4']
steps:
- name: Cleanup
run: |
sudo rm -rf $GITHUB_WORKSPACE/* || true
- uses: actions/checkout@v4
with:
submodules: 'recursive'
......
......@@ -15,6 +15,7 @@ docker run --rm \
pytorch/manylinux-builder:cuda${CUDA_VERSION} \
bash -c "
${PYTHON_ROOT_PATH}/bin/pip install --no-cache-dir torch==2.5.1 --index-url https://download.pytorch.org/whl/cu${CUDA_VERSION//.} && \
${PYTHON_ROOT_PATH}/bin/pip install --no-cache-dir ninja && \
export TORCH_CUDA_ARCH_LIST='7.5 8.0 8.9 9.0+PTX' && \
export CUDA_VERSION=${CUDA_VERSION} && \
export SGL_KERNEL_ENABLE_BF16=1 && \
......
import multiprocessing
import os
from pathlib import Path
......@@ -70,6 +71,8 @@ nvcc_flags = [
"-std=c++17",
"-use_fast_math",
"-DFLASHINFER_ENABLE_F16",
"-Xcompiler",
"-w",
]
nvcc_flags_fp8 = [
"-DFLASHINFER_ENABLE_FP8",
......@@ -151,7 +154,11 @@ setup(
packages=find_packages(),
package_dir={"": "src"},
ext_modules=ext_modules,
cmdclass={"build_ext": BuildExtension},
cmdclass={
"build_ext": BuildExtension.with_options(
use_ninja=True, max_jobs=multiprocessing.cpu_count()
)
},
options={"bdist_wheel": {"py_limited_api": "cp39"}},
)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment