Unverified Commit 4e7f0252 authored by ishandhanani's avatar ishandhanani Committed by GitHub
Browse files

chore(gb200): update to CUDA 12.9 and improve build process (#8772)

parent 36bfddec
...@@ -11,7 +11,7 @@ jobs: ...@@ -11,7 +11,7 @@ jobs:
publish: publish:
if: github.repository == 'sgl-project/sglang' if: github.repository == 'sgl-project/sglang'
runs-on: ubuntu-22.04-arm runs-on: ubuntu-22.04-arm
environment: 'prod' environment: "prod"
steps: steps:
- name: Delete huge unnecessary tools folder - name: Delete huge unnecessary tools folder
run: rm -rf /opt/hostedtoolcache run: rm -rf /opt/hostedtoolcache
...@@ -31,6 +31,6 @@ jobs: ...@@ -31,6 +31,6 @@ jobs:
- name: Build and Push - name: Build and Push
run: | run: |
version=$(cat python/sglang/version.py | cut -d'"' -f2) version=$(cat python/sglang/version.py | cut -d'"' -f2)
tag=v${version}-cu128-gb200 tag=v${version}-cu129-gb200
docker buildx build --platform linux/arm64 --push --output type=image -t lmsysorg/sglang:${tag} -f docker/Dockerfile.gb200 --build-arg CUDA_VERSION=12.8.1 --build-arg BUILD_TYPE=blackwell --no-cache . docker buildx build --platform linux/arm64 --push --output type=image -t lmsysorg/sglang:${tag} -f docker/Dockerfile.gb200 --build-arg CUDA_VERSION=12.9.1 --build-arg BUILD_TYPE=blackwell --no-cache .
...@@ -17,17 +17,17 @@ concurrency: ...@@ -17,17 +17,17 @@ concurrency:
cancel-in-progress: true cancel-in-progress: true
jobs: jobs:
build-cu128-aarch64: build-cu129-aarch64:
if: github.repository == 'sgl-project/sglang' if: github.repository == 'sgl-project/sglang'
runs-on: sgl-kernel-release-node runs-on: sgl-kernel-release-node-arm
strategy: strategy:
matrix: matrix:
python-version: ['3.9'] python-version: ["3.10"]
cuda-version: ['12.8'] cuda-version: ["12.9"]
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v4
with: with:
submodules: 'recursive' submodules: "recursive"
- name: Set up Python ${{ matrix.python-version }} - name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v5 uses: actions/setup-python@v5
...@@ -47,7 +47,7 @@ jobs: ...@@ -47,7 +47,7 @@ jobs:
path: sgl-kernel/dist/* path: sgl-kernel/dist/*
release: release:
needs: build-cu128-aarch64 needs: build-cu129-aarch64
runs-on: ubuntu-latest runs-on: ubuntu-latest
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v4
...@@ -84,7 +84,7 @@ jobs: ...@@ -84,7 +84,7 @@ jobs:
WHL_TOKEN: ${{ secrets.WHL_TOKEN }} WHL_TOKEN: ${{ secrets.WHL_TOKEN }}
- name: Update wheel index - name: Update wheel index
run: python3 scripts/update_kernel_whl_index.py --cuda 128 run: python3 scripts/update_kernel_whl_index.py --cuda 129
- name: Push wheel index - name: Push wheel index
run: | run: |
......
...@@ -79,14 +79,17 @@ RUN python3 -m pip install --no-cache-dir --upgrade pip setuptools wheel html5li ...@@ -79,14 +79,17 @@ RUN python3 -m pip install --no-cache-dir --upgrade pip setuptools wheel html5li
python3 -m pip install --no-cache-dir https://github.com/sgl-project/whl/releases/download/v0.3.3/sgl_kernel-0.3.3-cp39-abi3-manylinux2014_x86_64.whl --force-reinstall --no-deps ; \ python3 -m pip install --no-cache-dir https://github.com/sgl-project/whl/releases/download/v0.3.3/sgl_kernel-0.3.3-cp39-abi3-manylinux2014_x86_64.whl --force-reinstall --no-deps ; \
fi fi
# Build and install NVSHMEM + DeepEP # Download source files
RUN wget https://developer.download.nvidia.com/compute/redist/nvshmem/3.3.9/source/nvshmem_src_cuda12-all-all-3.3.9.tar.gz \ RUN wget https://developer.download.nvidia.com/compute/redist/nvshmem/3.3.9/source/nvshmem_src_cuda12-all-all-3.3.9.tar.gz && \
&& git clone https://github.com/deepseek-ai/DeepEP.git \ git clone https://github.com/deepseek-ai/DeepEP.git && \
&& cd DeepEP && git checkout ${DEEPEP_COMMIT} && cd .. \ cd DeepEP && git checkout ${DEEPEP_COMMIT} && cd .. && \
&& tar -xf nvshmem_src_cuda12-all-all-3.3.9.tar.gz && mv nvshmem_src nvshmem \ tar -xf nvshmem_src_cuda12-all-all-3.3.9.tar.gz && \
&& cd nvshmem \ mv nvshmem_src nvshmem && \
&& rm -f /sgl-workspace/nvshmem_src_cuda12-all-all-3.3.9.tar.gz \ rm -f /sgl-workspace/nvshmem_src_cuda12-all-all-3.3.9.tar.gz
&& NVSHMEM_SHMEM_SUPPORT=0 \
# Build and install NVSHMEM
RUN cd /sgl-workspace/nvshmem && \
NVSHMEM_SHMEM_SUPPORT=0 \
NVSHMEM_UCX_SUPPORT=0 \ NVSHMEM_UCX_SUPPORT=0 \
NVSHMEM_USE_NCCL=0 \ NVSHMEM_USE_NCCL=0 \
NVSHMEM_MPI_SUPPORT=0 \ NVSHMEM_MPI_SUPPORT=0 \
...@@ -94,10 +97,12 @@ RUN wget https://developer.download.nvidia.com/compute/redist/nvshmem/3.3.9/sour ...@@ -94,10 +97,12 @@ RUN wget https://developer.download.nvidia.com/compute/redist/nvshmem/3.3.9/sour
NVSHMEM_PMIX_SUPPORT=0 \ NVSHMEM_PMIX_SUPPORT=0 \
NVSHMEM_TIMEOUT_DEVICE_POLLING=0 \ NVSHMEM_TIMEOUT_DEVICE_POLLING=0 \
NVSHMEM_USE_GDRCOPY=1 \ NVSHMEM_USE_GDRCOPY=1 \
cmake -S . -B build/ -DCMAKE_INSTALL_PREFIX=${NVSHMEM_DIR} -DCMAKE_CUDA_ARCHITECTURES=90 \ cmake -S . -B build/ -DCMAKE_INSTALL_PREFIX=${NVSHMEM_DIR} -DCMAKE_CUDA_ARCHITECTURES="100;120" && \
&& cmake --build build --target install -j${CMAKE_BUILD_PARALLEL_LEVEL} \ cmake --build build --target install -j${CMAKE_BUILD_PARALLEL_LEVEL}
&& cd /sgl-workspace/DeepEP \
&& NVSHMEM_DIR=${NVSHMEM_DIR} pip install . # Install DeepEP
RUN cd /sgl-workspace/DeepEP && \
NVSHMEM_DIR=${NVSHMEM_DIR} pip install .
# Python tools # Python tools
RUN python3 -m pip install --no-cache-dir \ RUN python3 -m pip install --no-cache-dir \
...@@ -110,7 +115,8 @@ RUN python3 -m pip install --no-cache-dir \ ...@@ -110,7 +115,8 @@ RUN python3 -m pip install --no-cache-dir \
icdiff \ icdiff \
uv \ uv \
wheel \ wheel \
scikit-build-core scikit-build-core \
nixl
# Install development tools and utilities # Install development tools and utilities
RUN apt-get update && apt-get install -y \ RUN apt-get update && apt-get install -y \
......
ARG CUDA_VERSION=12.8.1 ARG CUDA_VERSION=12.9.1
FROM nvidia/cuda:${CUDA_VERSION}-cudnn-devel-ubuntu22.04 FROM nvidia/cuda:${CUDA_VERSION}-cudnn-devel-ubuntu22.04
ARG BUILD_TYPE=blackwell ARG BUILD_TYPE=blackwell
...@@ -38,7 +38,7 @@ RUN echo 'tzdata tzdata/Areas select America' | debconf-set-selections \ ...@@ -38,7 +38,7 @@ RUN echo 'tzdata tzdata/Areas select America' | debconf-set-selections \
&& rm -rf /var/lib/apt/lists/* \ && rm -rf /var/lib/apt/lists/* \
&& apt-get clean && apt-get clean
# --- Install SGLang missing package for blackwell build type # Install SGLang missing package for blackwell build type
RUN python3 -m pip install openai httpx RUN python3 -m pip install openai httpx
# GDRCopy installation # GDRCopy installation
...@@ -60,33 +60,39 @@ RUN python3 -m pip install --no-cache-dir --upgrade pip setuptools wheel html5li ...@@ -60,33 +60,39 @@ RUN python3 -m pip install --no-cache-dir --upgrade pip setuptools wheel html5li
&& case "$CUDA_VERSION" in \ && case "$CUDA_VERSION" in \
12.6.1) CUINDEX=126 ;; \ 12.6.1) CUINDEX=126 ;; \
12.8.1) CUINDEX=128 ;; \ 12.8.1) CUINDEX=128 ;; \
12.9.1) CUINDEX=129 ;; \
*) echo "Unsupported CUDA version: $CUDA_VERSION" && exit 1 ;; \ *) echo "Unsupported CUDA version: $CUDA_VERSION" && exit 1 ;; \
esac \ esac \
&& python3 -m pip install --no-cache-dir -e "python[${BUILD_TYPE}]" --extra-index-url https://download.pytorch.org/whl/cu${CUINDEX} \ && python3 -m pip install --no-cache-dir -e "python[${BUILD_TYPE}]" --extra-index-url https://download.pytorch.org/whl/cu${CUINDEX} \
&& if [ "$CUDA_VERSION" = "12.8.1" ]; then \ && if [ "$CUDA_VERSION" = "12.9.1" ]; then \
python3 -m pip install --no-cache-dir nvidia-nccl-cu12==2.27.6 --force-reinstall --no-deps ; \ python3 -m pip install --no-cache-dir nvidia-nccl-cu12==2.27.6 --force-reinstall --no-deps ; \
python3 -m pip install --no-cache-dir https://github.com/sgl-project/whl/releases/download/v0.2.7/sgl_kernel-0.2.7+cu128-cp39-abi3-manylinux2014_$(uname -m).whl --force-reinstall --no-deps ; \ python3 -m pip install --no-cache-dir https://github.com/sgl-project/whl/releases/download/v0.3.3/sgl_kernel-0.3.3+cu129-cp310-abi3-manylinux2014_$(uname -m).whl --force-reinstall --no-deps ; \
fi fi
# Build and install NVSHMEM + DeepEP # Download source files
RUN wget https://developer.download.nvidia.com/compute/redist/nvshmem/3.3.9/source/nvshmem_src_cuda12-all-all-3.3.9.tar.gz \ RUN wget https://developer.download.nvidia.com/compute/redist/nvshmem/3.3.9/source/nvshmem_src_cuda12-all-all-3.3.9.tar.gz && \
&& git clone https://github.com/fzyzcjy/DeepEP.git \ git clone https://github.com/fzyzcjy/DeepEP.git && \
&& cd DeepEP && git checkout ${DEEPEP_COMMIT} && cd .. \ cd DeepEP && git checkout ${DEEPEP_COMMIT} && cd .. && \
&& tar -xf nvshmem_src_cuda12-all-all-3.3.9.tar.gz && mv nvshmem_src nvshmem \ tar -xf nvshmem_src_cuda12-all-all-3.3.9.tar.gz && \
&& cd nvshmem \ mv nvshmem_src nvshmem && \
&& rm -f /sgl-workspace/nvshmem_src_cuda12-all-all-3.3.9.tar.gz \ rm -f /sgl-workspace/nvshmem_src_cuda12-all-all-3.3.9.tar.gz
&& NVSHMEM_SHMEM_SUPPORT=0 \
NVSHMEM_UCX_SUPPORT=0 \ # Build and install NVSHMEM
NVSHMEM_USE_NCCL=0 \ RUN cd /sgl-workspace/nvshmem && \
NVSHMEM_MPI_SUPPORT=0 \ NVSHMEM_SHMEM_SUPPORT=0 \
NVSHMEM_IBGDA_SUPPORT=1 \ NVSHMEM_UCX_SUPPORT=0 \
NVSHMEM_PMIX_SUPPORT=0 \ NVSHMEM_USE_NCCL=0 \
NVSHMEM_TIMEOUT_DEVICE_POLLING=0 \ NVSHMEM_MPI_SUPPORT=0 \
NVSHMEM_USE_GDRCOPY=1 \ NVSHMEM_IBGDA_SUPPORT=1 \
cmake -S . -B build/ -DCMAKE_INSTALL_PREFIX=${NVSHMEM_DIR} -DCMAKE_CUDA_ARCHITECTURES="100;120" \ NVSHMEM_PMIX_SUPPORT=0 \
&& cmake --build build --target install -j${CMAKE_BUILD_PARALLEL_LEVEL} \ NVSHMEM_TIMEOUT_DEVICE_POLLING=0 \
&& cd /sgl-workspace/DeepEP \ NVSHMEM_USE_GDRCOPY=1 \
&& NVSHMEM_DIR=${NVSHMEM_DIR} pip install . cmake -S . -B build/ -DCMAKE_INSTALL_PREFIX=${NVSHMEM_DIR} -DCMAKE_CUDA_ARCHITECTURES="100;120" && \
cmake --build build --target install -j${CMAKE_BUILD_PARALLEL_LEVEL}
# Install DeepEP
RUN cd /sgl-workspace/DeepEP && \
NVSHMEM_DIR=${NVSHMEM_DIR} pip install .
# Python tools # Python tools
RUN python3 -m pip install --no-cache-dir \ RUN python3 -m pip install --no-cache-dir \
...@@ -106,7 +112,7 @@ RUN python3 -m pip install --no-cache-dir \ ...@@ -106,7 +112,7 @@ RUN python3 -m pip install --no-cache-dir \
nvidia-cudnn-cu12 \ nvidia-cudnn-cu12 \
nvidia-cudnn-frontend nvidia-cudnn-frontend
# Allows for FP4 disaggregation # Install nixl kv transfer backend
RUN python3 -m pip install --no-cache-dir \ RUN python3 -m pip install --no-cache-dir \
nixl nixl
...@@ -163,6 +169,12 @@ RUN python3 -m pip install --no-cache-dir --break-system-packages \ ...@@ -163,6 +169,12 @@ RUN python3 -m pip install --no-cache-dir --break-system-packages \
matplotlib \ matplotlib \
tabulate tabulate
# Install flashinfer from source to fix a bug
# https://github.com/flashinfer-ai/flashinfer/pull/1413
# FIXME: remove this once flashinfer release > 0.2.10
WORKDIR /sgl-workspace
RUN git clone https://github.com/flashinfer-ai/flashinfer.git --recursive && cd flashinfer && python3 -m pip install -v .
# Install diff-so-fancy # Install diff-so-fancy
RUN curl -LSso /usr/local/bin/diff-so-fancy https://github.com/so-fancy/diff-so-fancy/releases/download/v1.4.4/diff-so-fancy \ RUN curl -LSso /usr/local/bin/diff-so-fancy https://github.com/so-fancy/diff-so-fancy/releases/download/v1.4.4/diff-so-fancy \
&& chmod +x /usr/local/bin/diff-so-fancy && chmod +x /usr/local/bin/diff-so-fancy
......
...@@ -259,7 +259,7 @@ class Engine(EngineBase): ...@@ -259,7 +259,7 @@ class Engine(EngineBase):
f"data_parallel_rank must be in range [0, {self.server_args.dp_size-1}]" f"data_parallel_rank must be in range [0, {self.server_args.dp_size-1}]"
) )
logger.info(f"data_parallel_rank: {data_parallel_rank}") logger.debug(f"data_parallel_rank: {data_parallel_rank}")
obj = GenerateReqInput( obj = GenerateReqInput(
text=prompt, text=prompt,
input_ids=input_ids, input_ids=input_ids,
......
...@@ -39,6 +39,13 @@ docker run --rm \ ...@@ -39,6 +39,13 @@ docker run --rm \
# Install CMake (version >= 3.26) - Robust Installation # Install CMake (version >= 3.26) - Robust Installation
export CMAKE_VERSION_MAJOR=3.31 export CMAKE_VERSION_MAJOR=3.31
export CMAKE_VERSION_MINOR=1 export CMAKE_VERSION_MINOR=1
# Setting these flags to reduce OOM chance only on ARM
if [ \"${ARCH}\" = \"aarch64\" ]; then
export CUDA_NVCC_FLAGS=\"-Xcudafe --threads=2\"
export MAKEFLAGS='-j2'
export CMAKE_BUILD_PARALLEL_LEVEL=2
export NINJAFLAGS='-j2'
fi
echo \"Downloading CMake from: https://cmake.org/files/v\${CMAKE_VERSION_MAJOR}/cmake-\${CMAKE_VERSION_MAJOR}.\${CMAKE_VERSION_MINOR}-linux-${ARCH}.tar.gz\" echo \"Downloading CMake from: https://cmake.org/files/v\${CMAKE_VERSION_MAJOR}/cmake-\${CMAKE_VERSION_MAJOR}.\${CMAKE_VERSION_MINOR}-linux-${ARCH}.tar.gz\"
wget https://cmake.org/files/v\${CMAKE_VERSION_MAJOR}/cmake-\${CMAKE_VERSION_MAJOR}.\${CMAKE_VERSION_MINOR}-linux-${ARCH}.tar.gz wget https://cmake.org/files/v\${CMAKE_VERSION_MAJOR}/cmake-\${CMAKE_VERSION_MAJOR}.\${CMAKE_VERSION_MINOR}-linux-${ARCH}.tar.gz
tar -xzf cmake-\${CMAKE_VERSION_MAJOR}.\${CMAKE_VERSION_MINOR}-linux-${ARCH}.tar.gz tar -xzf cmake-\${CMAKE_VERSION_MAJOR}.\${CMAKE_VERSION_MINOR}-linux-${ARCH}.tar.gz
......
...@@ -7,8 +7,19 @@ wheel_files=($WHEEL_DIR/*.whl) ...@@ -7,8 +7,19 @@ wheel_files=($WHEEL_DIR/*.whl)
for wheel in "${wheel_files[@]}"; do for wheel in "${wheel_files[@]}"; do
intermediate_wheel="${wheel/linux/manylinux2014}" intermediate_wheel="${wheel/linux/manylinux2014}"
if ls /usr/local/ | grep -q "12.8"; then # Extract the current python version from the wheel name
new_wheel="${intermediate_wheel/-cp39/+cu128-cp39}" if [[ $intermediate_wheel =~ -cp([0-9]+)- ]]; then
cp_version="${BASH_REMATCH[1]}"
else
echo "Could not extract Python version from wheel name: $intermediate_wheel"
continue
fi
# Detect CUDA version and add appropriate suffix
if ls /usr/local/ | grep -q "12.9"; then
new_wheel="${intermediate_wheel/-cp${cp_version}/+cu129-cp${cp_version}}"
elif ls /usr/local/ | grep -q "12.8"; then
new_wheel="${intermediate_wheel/-cp${cp_version}/+cu128-cp${cp_version}}"
else else
new_wheel="$intermediate_wheel" new_wheel="$intermediate_wheel"
fi fi
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment