Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
4e7f0252
Unverified
Commit
4e7f0252
authored
Aug 08, 2025
by
ishandhanani
Committed by
GitHub
Aug 08, 2025
Browse files
chore(gb200): update to CUDA 12.9 and improve build process (#8772)
parent
36bfddec
Changes
7
Hide whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
86 additions
and
50 deletions
+86
-50
.github/workflows/release-docker-gb200.yml
.github/workflows/release-docker-gb200.yml
+3
-3
.github/workflows/release-whl-kernel-aarch64.yml
.github/workflows/release-whl-kernel-aarch64.yml
+7
-7
docker/Dockerfile
docker/Dockerfile
+19
-13
docker/Dockerfile.gb200
docker/Dockerfile.gb200
+36
-24
python/sglang/srt/entrypoints/engine.py
python/sglang/srt/entrypoints/engine.py
+1
-1
sgl-kernel/build.sh
sgl-kernel/build.sh
+7
-0
sgl-kernel/rename_wheels.sh
sgl-kernel/rename_wheels.sh
+13
-2
No files found.
.github/workflows/release-docker-gb200.yml
View file @
4e7f0252
...
...
@@ -11,7 +11,7 @@ jobs:
publish
:
if
:
github.repository == 'sgl-project/sglang'
runs-on
:
ubuntu-22.04-arm
environment
:
'
prod
'
environment
:
"
prod
"
steps
:
-
name
:
Delete huge unnecessary tools folder
run
:
rm -rf /opt/hostedtoolcache
...
...
@@ -31,6 +31,6 @@ jobs:
-
name
:
Build and Push
run
:
|
version=$(cat python/sglang/version.py | cut -d'"' -f2)
tag=v${version}-cu12
8
-gb200
tag=v${version}-cu12
9
-gb200
docker buildx build --platform linux/arm64 --push --output type=image -t lmsysorg/sglang:${tag} -f docker/Dockerfile.gb200 --build-arg CUDA_VERSION=12.
8
.1 --build-arg BUILD_TYPE=blackwell --no-cache .
docker buildx build --platform linux/arm64 --push --output type=image -t lmsysorg/sglang:${tag} -f docker/Dockerfile.gb200 --build-arg CUDA_VERSION=12.
9
.1 --build-arg BUILD_TYPE=blackwell --no-cache .
.github/workflows/release-whl-kernel-aarch64.yml
View file @
4e7f0252
...
...
@@ -17,17 +17,17 @@ concurrency:
cancel-in-progress
:
true
jobs
:
build-cu12
8
-aarch64
:
build-cu12
9
-aarch64
:
if
:
github.repository == 'sgl-project/sglang'
runs-on
:
sgl-kernel-release-node
runs-on
:
sgl-kernel-release-node
-arm
strategy
:
matrix
:
python-version
:
[
'
3.
9'
]
cuda-version
:
[
'
12.
8'
]
python-version
:
[
"
3.
10"
]
cuda-version
:
[
"
12.
9"
]
steps
:
-
uses
:
actions/checkout@v4
with
:
submodules
:
'
recursive
'
submodules
:
"
recursive
"
-
name
:
Set up Python ${{ matrix.python-version }}
uses
:
actions/setup-python@v5
...
...
@@ -47,7 +47,7 @@ jobs:
path
:
sgl-kernel/dist/*
release
:
needs
:
build-cu12
8
-aarch64
needs
:
build-cu12
9
-aarch64
runs-on
:
ubuntu-latest
steps
:
-
uses
:
actions/checkout@v4
...
...
@@ -84,7 +84,7 @@ jobs:
WHL_TOKEN
:
${{ secrets.WHL_TOKEN }}
-
name
:
Update wheel index
run
:
python3 scripts/update_kernel_whl_index.py --cuda
12
8
run
:
python3 scripts/update_kernel_whl_index.py --cuda
12
9
-
name
:
Push wheel index
run
:
|
...
...
docker/Dockerfile
View file @
4e7f0252
...
...
@@ -79,14 +79,17 @@ RUN python3 -m pip install --no-cache-dir --upgrade pip setuptools wheel html5li
python3
-m
pip
install
--no-cache-dir
https://github.com/sgl-project/whl/releases/download/v0.3.3/sgl_kernel-0.3.3-cp39-abi3-manylinux2014_x86_64.whl
--force-reinstall
--no-deps
;
\
fi
# Build and install NVSHMEM + DeepEP
RUN
wget https://developer.download.nvidia.com/compute/redist/nvshmem/3.3.9/source/nvshmem_src_cuda12-all-all-3.3.9.tar.gz
\
&&
git clone https://github.com/deepseek-ai/DeepEP.git
\
&&
cd
DeepEP
&&
git checkout
${
DEEPEP_COMMIT
}
&&
cd
..
\
&&
tar
-xf
nvshmem_src_cuda12-all-all-3.3.9.tar.gz
&&
mv
nvshmem_src nvshmem
\
&&
cd
nvshmem
\
&&
rm
-f
/sgl-workspace/nvshmem_src_cuda12-all-all-3.3.9.tar.gz
\
&&
NVSHMEM_SHMEM_SUPPORT
=
0
\
# Download source files
RUN
wget https://developer.download.nvidia.com/compute/redist/nvshmem/3.3.9/source/nvshmem_src_cuda12-all-all-3.3.9.tar.gz
&&
\
git clone https://github.com/deepseek-ai/DeepEP.git
&&
\
cd
DeepEP
&&
git checkout
${
DEEPEP_COMMIT
}
&&
cd
..
&&
\
tar
-xf
nvshmem_src_cuda12-all-all-3.3.9.tar.gz
&&
\
mv
nvshmem_src nvshmem
&&
\
rm
-f
/sgl-workspace/nvshmem_src_cuda12-all-all-3.3.9.tar.gz
# Build and install NVSHMEM
RUN
cd
/sgl-workspace/nvshmem
&&
\
NVSHMEM_SHMEM_SUPPORT
=
0
\
NVSHMEM_UCX_SUPPORT
=
0
\
NVSHMEM_USE_NCCL
=
0
\
NVSHMEM_MPI_SUPPORT
=
0
\
...
...
@@ -94,10 +97,12 @@ RUN wget https://developer.download.nvidia.com/compute/redist/nvshmem/3.3.9/sour
NVSHMEM_PMIX_SUPPORT
=
0
\
NVSHMEM_TIMEOUT_DEVICE_POLLING
=
0
\
NVSHMEM_USE_GDRCOPY
=
1
\
cmake
-S
.
-B
build/
-DCMAKE_INSTALL_PREFIX
=
${
NVSHMEM_DIR
}
-DCMAKE_CUDA_ARCHITECTURES
=
90
\
&&
cmake
--build
build
--target
install
-j
${
CMAKE_BUILD_PARALLEL_LEVEL
}
\
&&
cd
/sgl-workspace/DeepEP
\
&&
NVSHMEM_DIR
=
${
NVSHMEM_DIR
}
pip
install
.
cmake
-S
.
-B
build/
-DCMAKE_INSTALL_PREFIX
=
${
NVSHMEM_DIR
}
-DCMAKE_CUDA_ARCHITECTURES
=
"100;120"
&&
\
cmake
--build
build
--target
install
-j
${
CMAKE_BUILD_PARALLEL_LEVEL
}
# Install DeepEP
RUN
cd
/sgl-workspace/DeepEP
&&
\
NVSHMEM_DIR
=
${
NVSHMEM_DIR
}
pip
install
.
# Python tools
RUN
python3
-m
pip
install
--no-cache-dir
\
...
...
@@ -110,7 +115,8 @@ RUN python3 -m pip install --no-cache-dir \
icdiff
\
uv
\
wheel
\
scikit-build-core
scikit-build-core
\
nixl
# Install development tools and utilities
RUN
apt-get update
&&
apt-get
install
-y
\
...
...
docker/Dockerfile.gb200
View file @
4e7f0252
ARG CUDA_VERSION=12.
8
.1
ARG CUDA_VERSION=12.
9
.1
FROM nvidia/cuda:${CUDA_VERSION}-cudnn-devel-ubuntu22.04
ARG BUILD_TYPE=blackwell
...
...
@@ -38,7 +38,7 @@ RUN echo 'tzdata tzdata/Areas select America' | debconf-set-selections \
&& rm -rf /var/lib/apt/lists/* \
&& apt-get clean
#
---
Install SGLang missing package for blackwell build type
# Install SGLang missing package for blackwell build type
RUN python3 -m pip install openai httpx
# GDRCopy installation
...
...
@@ -60,33 +60,39 @@ RUN python3 -m pip install --no-cache-dir --upgrade pip setuptools wheel html5li
&& case "$CUDA_VERSION" in \
12.6.1) CUINDEX=126 ;; \
12.8.1) CUINDEX=128 ;; \
12.9.1) CUINDEX=129 ;; \
*) echo "Unsupported CUDA version: $CUDA_VERSION" && exit 1 ;; \
esac \
&& python3 -m pip install --no-cache-dir -e "python[${BUILD_TYPE}]" --extra-index-url https://download.pytorch.org/whl/cu${CUINDEX} \
&& if [ "$CUDA_VERSION" = "12.
8
.1" ]; then \
&& if [ "$CUDA_VERSION" = "12.
9
.1" ]; then \
python3 -m pip install --no-cache-dir nvidia-nccl-cu12==2.27.6 --force-reinstall --no-deps ; \
python3 -m pip install --no-cache-dir https://github.com/sgl-project/whl/releases/download/v0.
2.7
/sgl_kernel-0.
2.7
+cu12
8
-cp3
9
-abi3-manylinux2014_$(uname -m).whl --force-reinstall --no-deps ; \
python3 -m pip install --no-cache-dir https://github.com/sgl-project/whl/releases/download/v0.
3.3
/sgl_kernel-0.
3.3
+cu12
9
-cp3
10
-abi3-manylinux2014_$(uname -m).whl --force-reinstall --no-deps ; \
fi
# Build and install NVSHMEM + DeepEP
RUN wget https://developer.download.nvidia.com/compute/redist/nvshmem/3.3.9/source/nvshmem_src_cuda12-all-all-3.3.9.tar.gz \
&& git clone https://github.com/fzyzcjy/DeepEP.git \
&& cd DeepEP && git checkout ${DEEPEP_COMMIT} && cd .. \
&& tar -xf nvshmem_src_cuda12-all-all-3.3.9.tar.gz && mv nvshmem_src nvshmem \
&& cd nvshmem \
&& rm -f /sgl-workspace/nvshmem_src_cuda12-all-all-3.3.9.tar.gz \
&& NVSHMEM_SHMEM_SUPPORT=0 \
NVSHMEM_UCX_SUPPORT=0 \
NVSHMEM_USE_NCCL=0 \
NVSHMEM_MPI_SUPPORT=0 \
NVSHMEM_IBGDA_SUPPORT=1 \
NVSHMEM_PMIX_SUPPORT=0 \
NVSHMEM_TIMEOUT_DEVICE_POLLING=0 \
NVSHMEM_USE_GDRCOPY=1 \
cmake -S . -B build/ -DCMAKE_INSTALL_PREFIX=${NVSHMEM_DIR} -DCMAKE_CUDA_ARCHITECTURES="100;120" \
&& cmake --build build --target install -j${CMAKE_BUILD_PARALLEL_LEVEL} \
&& cd /sgl-workspace/DeepEP \
&& NVSHMEM_DIR=${NVSHMEM_DIR} pip install .
# Download source files
RUN wget https://developer.download.nvidia.com/compute/redist/nvshmem/3.3.9/source/nvshmem_src_cuda12-all-all-3.3.9.tar.gz && \
git clone https://github.com/fzyzcjy/DeepEP.git && \
cd DeepEP && git checkout ${DEEPEP_COMMIT} && cd .. && \
tar -xf nvshmem_src_cuda12-all-all-3.3.9.tar.gz && \
mv nvshmem_src nvshmem && \
rm -f /sgl-workspace/nvshmem_src_cuda12-all-all-3.3.9.tar.gz
# Build and install NVSHMEM
RUN cd /sgl-workspace/nvshmem && \
NVSHMEM_SHMEM_SUPPORT=0 \
NVSHMEM_UCX_SUPPORT=0 \
NVSHMEM_USE_NCCL=0 \
NVSHMEM_MPI_SUPPORT=0 \
NVSHMEM_IBGDA_SUPPORT=1 \
NVSHMEM_PMIX_SUPPORT=0 \
NVSHMEM_TIMEOUT_DEVICE_POLLING=0 \
NVSHMEM_USE_GDRCOPY=1 \
cmake -S . -B build/ -DCMAKE_INSTALL_PREFIX=${NVSHMEM_DIR} -DCMAKE_CUDA_ARCHITECTURES="100;120" && \
cmake --build build --target install -j${CMAKE_BUILD_PARALLEL_LEVEL}
# Install DeepEP
RUN cd /sgl-workspace/DeepEP && \
NVSHMEM_DIR=${NVSHMEM_DIR} pip install .
# Python tools
RUN python3 -m pip install --no-cache-dir \
...
...
@@ -106,7 +112,7 @@ RUN python3 -m pip install --no-cache-dir \
nvidia-cudnn-cu12 \
nvidia-cudnn-frontend
#
Allows for FP4 disaggregation
#
Install nixl kv transfer backend
RUN python3 -m pip install --no-cache-dir \
nixl
...
...
@@ -163,6 +169,12 @@ RUN python3 -m pip install --no-cache-dir --break-system-packages \
matplotlib \
tabulate
# Install flashinfer from source to fix a bug
# https://github.com/flashinfer-ai/flashinfer/pull/1413
# FIXME: remove this once flashinfer release > 0.2.10
WORKDIR /sgl-workspace
RUN git clone https://github.com/flashinfer-ai/flashinfer.git --recursive && cd flashinfer && python3 -m pip install -v .
# Install diff-so-fancy
RUN curl -LSso /usr/local/bin/diff-so-fancy https://github.com/so-fancy/diff-so-fancy/releases/download/v1.4.4/diff-so-fancy \
&& chmod +x /usr/local/bin/diff-so-fancy
...
...
python/sglang/srt/entrypoints/engine.py
View file @
4e7f0252
...
...
@@ -259,7 +259,7 @@ class Engine(EngineBase):
f
"data_parallel_rank must be in range [0,
{
self
.
server_args
.
dp_size
-
1
}
]"
)
logger
.
info
(
f
"data_parallel_rank:
{
data_parallel_rank
}
"
)
logger
.
debug
(
f
"data_parallel_rank:
{
data_parallel_rank
}
"
)
obj
=
GenerateReqInput
(
text
=
prompt
,
input_ids
=
input_ids
,
...
...
sgl-kernel/build.sh
View file @
4e7f0252
...
...
@@ -39,6 +39,13 @@ docker run --rm \
# Install CMake (version >= 3.26) - Robust Installation
export CMAKE_VERSION_MAJOR=3.31
export CMAKE_VERSION_MINOR=1
# Setting these flags to reduce OOM chance only on ARM
if [
\"
${
ARCH
}
\"
=
\"
aarch64
\"
]; then
export CUDA_NVCC_FLAGS=
\"
-Xcudafe --threads=2
\"
export MAKEFLAGS='-j2'
export CMAKE_BUILD_PARALLEL_LEVEL=2
export NINJAFLAGS='-j2'
fi
echo
\"
Downloading CMake from: https://cmake.org/files/v
\$
{CMAKE_VERSION_MAJOR}/cmake-
\$
{CMAKE_VERSION_MAJOR}.
\$
{CMAKE_VERSION_MINOR}-linux-
${
ARCH
}
.tar.gz
\"
wget https://cmake.org/files/v
\$
{CMAKE_VERSION_MAJOR}/cmake-
\$
{CMAKE_VERSION_MAJOR}.
\$
{CMAKE_VERSION_MINOR}-linux-
${
ARCH
}
.tar.gz
tar -xzf cmake-
\$
{CMAKE_VERSION_MAJOR}.
\$
{CMAKE_VERSION_MINOR}-linux-
${
ARCH
}
.tar.gz
...
...
sgl-kernel/rename_wheels.sh
View file @
4e7f0252
...
...
@@ -7,8 +7,19 @@ wheel_files=($WHEEL_DIR/*.whl)
for
wheel
in
"
${
wheel_files
[@]
}
"
;
do
intermediate_wheel
=
"
${
wheel
/linux/manylinux2014
}
"
if
ls
/usr/local/ |
grep
-q
"12.8"
;
then
new_wheel
=
"
${
intermediate_wheel
/-cp39/+cu128-cp39
}
"
# Extract the current python version from the wheel name
if
[[
$intermediate_wheel
=
~
-cp
([
0-9]+
)
-
]]
;
then
cp_version
=
"
${
BASH_REMATCH
[1]
}
"
else
echo
"Could not extract Python version from wheel name:
$intermediate_wheel
"
continue
fi
# Detect CUDA version and add appropriate suffix
if
ls
/usr/local/ |
grep
-q
"12.9"
;
then
new_wheel
=
"
${
intermediate_wheel
/-cp
${
cp_version
}
/+cu129-cp
${
cp_version
}}
"
elif
ls
/usr/local/ |
grep
-q
"12.8"
;
then
new_wheel
=
"
${
intermediate_wheel
/-cp
${
cp_version
}
/+cu128-cp
${
cp_version
}}
"
else
new_wheel
=
"
$intermediate_wheel
"
fi
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment