Unverified Commit 285a8e69 authored by ishandhanani's avatar ishandhanani Committed by GitHub
Browse files

docker: add CUDA13 support in dockerfile and update GDRCopy/NVSHMEM for blackwell support (#11517)


Co-authored-by: default avatarBaizhou Zhang <sobereddiezhang@gmail.com>
parent 813bd6f8
name: Build and Push CUDA 13 Docker Images
# release this manually via workflow_dispatch for now
on:
workflow_dispatch:
jobs:
build-dev:
if: ${{ github.repository == 'sgl-project/sglang' }}
runs-on: ${{ matrix.runner }}
strategy:
matrix:
include:
- runner: x64-docker-build-node
platform: linux/amd64
build_type: all
grace_blackwell: 0
tag: dev-x86-cu13
version: 13.0.1
- runner: arm-docker-build-node
platform: linux/arm64
build_type: all
grace_blackwell: 1
tag: dev-arm64-cu13
version: 13.0.1
steps:
- name: Delete huge unnecessary tools folder
run: rm -rf /opt/hostedtoolcache
- name: Checkout repository
uses: actions/checkout@v4
- name: Free disk space
uses: jlumbroso/free-disk-space@main
with:
tool-cache: true
docker-images: true
android: true
dotnet: true
haskell: true
large-packages: true
swap-storage: true
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Login to Docker Hub
uses: docker/login-action@v2
with:
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_TOKEN }}
- name: Build and Push Dev Image
run: |
docker buildx build \
--platform ${{ matrix.platform }} \
--push \
-f docker/Dockerfile \
--build-arg CUDA_VERSION=${{ matrix.version }} \
--build-arg BUILD_TYPE=${{ matrix.build_type }} \
--build-arg CMAKE_BUILD_PARALLEL_LEVEL=$(nproc) \
--build-arg GRACE_BLACKWELL=${{ matrix.grace_blackwell }} \
-t lmsysorg/sglang:${{ matrix.tag }} \
--no-cache \
.
create-manifests:
runs-on: ubuntu-22.04
needs: [build-dev]
if: ${{ github.repository == 'sgl-project/sglang' }}
strategy:
matrix:
variant:
- tag: dev-cu13
x86_tag: dev-x86-cu13
arm64_tag: dev-arm64-cu13
steps:
- uses: docker/setup-buildx-action@v3
- uses: docker/login-action@v2
with:
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_TOKEN }}
- run: |
docker buildx imagetools create \
-t lmsysorg/sglang:${{ matrix.variant.tag }} \
-t lmsysorg/sglang:nightly-${{ matrix.variant.tag }}-$(date +%Y%m%d)-${GITHUB_SHA:0:8} \
lmsysorg/sglang:${{ matrix.variant.x86_tag }} \
lmsysorg/sglang:${{ matrix.variant.arm64_tag }}
- name: Cleanup Old Nightly Builds
run: |
# Get JWT token for Docker Hub API
TOKEN=$(curl -s -H "Content-Type: application/json" -X POST -d '{"username": "${{ secrets.DOCKERHUB_USERNAME }}", "password": "${{ secrets.DOCKERHUB_TOKEN }}"}' https://hub.docker.com/v2/users/login/ | jq -r .token)
# Get all tags for the repository
TAGS_RESPONSE=$(curl -s -H "Authorization: JWT $TOKEN" "https://hub.docker.com/v2/repositories/lmsysorg/sglang/tags/?page_size=100")
# Extract tags that match our pattern and sort by last_updated timestamp (most recent first)
TAGS=$(echo "$TAGS_RESPONSE" | jq -r '.results[] | select(.name | startswith("nightly-${{ matrix.variant.tag }}-")) | "\(.last_updated)|\(.name)"' | sort -r | cut -d'|' -f2)
# Count total tags and keep only the 14 most recent
TAG_COUNT=$(echo "$TAGS" | wc -l)
if [ "$TAG_COUNT" -gt 14 ]; then
echo "Found $TAG_COUNT nightly builds, keeping only the 14 most recent"
TAGS_TO_DELETE=$(echo "$TAGS" | tail -n +15)
echo "Tags to delete: $TAGS_TO_DELETE"
# Delete old tags
for tag in $TAGS_TO_DELETE; do
echo "Deleting tag: $tag"
curl -X DELETE \
-H "Authorization: JWT $TOKEN" \
"https://hub.docker.com/v2/repositories/lmsysorg/sglang/tags/$tag/"
done
else
echo "Only $TAG_COUNT nightly builds found, no cleanup needed"
fi
...@@ -53,7 +53,17 @@ jobs: ...@@ -53,7 +53,17 @@ jobs:
- name: Build and Push Dev Image - name: Build and Push Dev Image
run: | run: |
docker buildx build --platform ${{ matrix.platform }} --push -f docker/Dockerfile --build-arg CUDA_VERSION=${{ matrix.version }} --build-arg BUILD_TYPE=${{ matrix.build_type }} --build-arg GRACE_BLACKWELL=${{ matrix.grace_blackwell }} --build-arg CMAKE_BUILD_PARALLEL_LEVEL=$(nproc) -t lmsysorg/sglang:${{ matrix.tag }} --no-cache . docker buildx build \
--platform ${{ matrix.platform }} \
--push \
-f docker/Dockerfile \
--build-arg CUDA_VERSION=${{ matrix.version }} \
--build-arg BUILD_TYPE=${{ matrix.build_type }} \
--build-arg CMAKE_BUILD_PARALLEL_LEVEL=$(nproc) \
--build-arg GRACE_BLACKWELL=${{ matrix.grace_blackwell }} \
-t lmsysorg/sglang:${{ matrix.tag }} \
--no-cache \
.
create-manifests: create-manifests:
runs-on: ubuntu-22.04 runs-on: ubuntu-22.04
......
ARG CUDA_VERSION=12.9.1 ARG CUDA_VERSION=12.9.1
FROM nvidia/cuda:${CUDA_VERSION}-cudnn-devel-ubuntu22.04 AS base FROM nvidia/cuda:${CUDA_VERSION}-cudnn-devel-ubuntu22.04 AS base
ARG TARGETARCH
ARG GRACE_BLACKWELL=0 ARG TARGETARCH
ARG BUILD_TYPE=all ARG BUILD_TYPE=all
ARG BRANCH_TYPE=remote ARG BRANCH_TYPE=remote
ARG GRACE_BLACKWELL=0
ARG GRACE_BLACKWELL_DEEPEP_BRANCH=gb200_blog_part_2
ARG DEEPEP_COMMIT=9af0e0d0e74f3577af1979c9b9e1ac2cad0104ee ARG DEEPEP_COMMIT=9af0e0d0e74f3577af1979c9b9e1ac2cad0104ee
ARG FLASHMLA_COMMIT=1408756a88e52a25196b759eaf8db89d2b51b5a1 ARG FLASHMLA_COMMIT=1408756a88e52a25196b759eaf8db89d2b51b5a1
ARG FAST_HADAMARD_TRANSFORM_COMMIT=7fd811c2b47f63b0b08d2582619f939e14dad77c
ARG CMAKE_BUILD_PARALLEL_LEVEL=2 ARG TRITON_LANG_COMMIT=4caa0328bf8df64896dd5f6fb9df41b0eb2e750a
ARG SGL_KERNEL_VERSION=0.3.16.post4 ARG SGL_KERNEL_VERSION=0.3.16.post4
ARG GDRCOPY_VERSION=2.5.1
ARG NVSHMEM_VERSION=3.4.5
ENV DEBIAN_FRONTEND=noninteractive \ ENV DEBIAN_FRONTEND=noninteractive \
CUDA_HOME=/usr/local/cuda \ CUDA_HOME=/usr/local/cuda \
GDRCOPY_HOME=/usr/src/gdrdrv-2.4.4/ \ GDRCOPY_HOME=/usr/src/gdrdrv-${GDRCOPY_VERSION}/ \
NVSHMEM_DIR=/sgl-workspace/nvshmem/install NVSHMEM_DIR=/sgl-workspace/nvshmem/install
# Add GKE default lib and bin locations. # Add GKE default lib and bin locations.
ENV PATH="${PATH}:/usr/local/nvidia/bin" \ ENV PATH="${PATH}:/usr/local/nvidia/bin" \
...@@ -55,7 +61,7 @@ RUN echo 'tzdata tzdata/Areas select America' | debconf-set-selections \ ...@@ -55,7 +61,7 @@ RUN echo 'tzdata tzdata/Areas select America' | debconf-set-selections \
# GDRCopy installation # GDRCopy installation
RUN mkdir -p /tmp/gdrcopy && cd /tmp \ RUN mkdir -p /tmp/gdrcopy && cd /tmp \
&& git clone https://github.com/NVIDIA/gdrcopy.git -b v2.4.4 \ && git clone https://github.com/NVIDIA/gdrcopy.git -b v${GDRCOPY_VERSION} \
&& cd gdrcopy/packages \ && cd gdrcopy/packages \
&& CUDA=/usr/local/cuda ./build-deb-packages.sh \ && CUDA=/usr/local/cuda ./build-deb-packages.sh \
&& dpkg -i gdrdrv-dkms_*.deb libgdrapi_*.deb gdrcopy-tests_*.deb gdrcopy_*.deb \ && dpkg -i gdrdrv-dkms_*.deb libgdrapi_*.deb gdrcopy-tests_*.deb gdrcopy_*.deb \
...@@ -69,6 +75,7 @@ COPY . /src ...@@ -69,6 +75,7 @@ COPY . /src
FROM base AS build-image FROM base AS build-image
# Install SGLang # Install SGLang
# Until torch 2.9 and cu13 are stable we manually update torch if you are on CUDA 13
WORKDIR /sgl-workspace WORKDIR /sgl-workspace
ARG BRANCH_TYPE ARG BRANCH_TYPE
COPY --from=local_src /src /tmp/local_src COPY --from=local_src /src /tmp/local_src
...@@ -84,36 +91,64 @@ RUN python3 -m pip install --no-cache-dir --upgrade pip setuptools wheel html5li ...@@ -84,36 +91,64 @@ RUN python3 -m pip install --no-cache-dir --upgrade pip setuptools wheel html5li
12.6.1) CUINDEX=126 ;; \ 12.6.1) CUINDEX=126 ;; \
12.8.1) CUINDEX=128 ;; \ 12.8.1) CUINDEX=128 ;; \
12.9.1) CUINDEX=129 ;; \ 12.9.1) CUINDEX=129 ;; \
13.0.1) CUINDEX=130 ;; \
*) echo "Unsupported CUDA version: $CUDA_VERSION" && exit 1 ;; \ *) echo "Unsupported CUDA version: $CUDA_VERSION" && exit 1 ;; \
esac \ esac \
&& if [ "$CUDA_VERSION" = "12.6.1" ]; then \ && if [ "$CUDA_VERSION" = "12.6.1" ]; then \
python3 -m pip install --no-cache-dir https://github.com/sgl-project/whl/releases/download/v${SGL_KERNEL_VERSION}/sgl_kernel-${SGL_KERNEL_VERSION}+cu124-cp310-abi3-manylinux2014_$(uname -m).whl --force-reinstall --no-deps ; \ python3 -m pip install --no-cache-dir https://github.com/sgl-project/whl/releases/download/v${SGL_KERNEL_VERSION}/sgl_kernel-${SGL_KERNEL_VERSION}+cu124-cp310-abi3-manylinux2014_$(uname -m).whl --force-reinstall --no-deps \
fi \ ; \
&& if [ "$CUDA_VERSION" = "12.8.1" ] || [ "$CUDA_VERSION" = "12.9.1" ]; then \ elif [ "$CUDA_VERSION" = "12.8.1" ] || [ "$CUDA_VERSION" = "12.9.1" ]; then \
python3 -m pip install --no-cache-dir sgl-kernel==${SGL_KERNEL_VERSION} ; \ python3 -m pip install --no-cache-dir sgl-kernel==${SGL_KERNEL_VERSION} \
; \
elif [ "$CUDA_VERSION" = "13.0.1" ]; then \
python3 -m pip install --no-cache-dir https://github.com/sgl-project/whl/releases/download/v${SGL_KERNEL_VERSION}/sgl_kernel-${SGL_KERNEL_VERSION}+cu130-cp310-abi3-manylinux2014_$(uname -m).whl --force-reinstall --no-deps \
; \
else \
echo "Unsupported CUDA version: $CUDA_VERSION" && exit 1 \
; \
fi \ fi \
&& python3 -m pip install --no-cache-dir -e "python[${BUILD_TYPE}]" --extra-index-url https://download.pytorch.org/whl/cu${CUINDEX} \ && python3 -m pip install --no-cache-dir -e "python[${BUILD_TYPE}]" --extra-index-url https://download.pytorch.org/whl/cu${CUINDEX} \
&& python3 -m pip install --no-cache-dir nvidia-nccl-cu12==2.27.6 --force-reinstall --no-deps \ && if [ "${CUDA_VERSION%%.*}" = "12" ]; then \
python3 -m pip install --no-cache-dir nvidia-nccl-cu12==2.28.3 --force-reinstall --no-deps ; \
elif [ "${CUDA_VERSION%%.*}" = "13" ]; then \
python3 -m pip install --no-cache-dir nvidia-nccl-cu13==2.28.3 --force-reinstall --no-deps ; \
python3 -m pip uninstall -y torch torchaudio torchvision ; \
python3 -m pip install --no-cache-dir torch==2.9.0 torchaudio==2.9.0 torchvision --extra-index-url https://download.pytorch.org/whl/cu${CUINDEX} ; \
else \
echo "No NCCL mapping for CUDA_VERSION=${CUDA_VERSION}" && exit 1 ; \
fi \
&& FLASHINFER_LOGGING_LEVEL=warning python3 -m flashinfer --download-cubin && FLASHINFER_LOGGING_LEVEL=warning python3 -m flashinfer --download-cubin
# Download NVSHMEM source files # Download NVSHMEM source files
# We use Tom's DeepEP fork for GB200 for now; the 1fd57b0276311d035d16176bb0076426166e52f3 commit is https://github.com/fzyzcjy/DeepEP/tree/gb200_blog_part_2 # We use Tom's DeepEP fork for GB200 for now; the 1fd57b0276311d035d16176bb0076426166e52f3 commit is https://github.com/fzyzcjy/DeepEP/tree/gb200_blog_part_2
RUN wget https://developer.download.nvidia.com/compute/redist/nvshmem/3.3.9/source/nvshmem_src_cuda12-all-all-3.3.9.tar.gz && \ RUN set -eux; \
if [ "${CUDA_VERSION%%.*}" = "13" ]; then \
wget "https://github.com/NVIDIA/nvshmem/releases/download/v${NVSHMEM_VERSION}-0/nvshmem_src_cuda-all-all-${NVSHMEM_VERSION}.tar.gz"; \
NVSHMEM_TARBALL="nvshmem_src_cuda-all-all-${NVSHMEM_VERSION}.tar.gz"; \
else \
wget "https://developer.download.nvidia.com/compute/redist/nvshmem/${NVSHMEM_VERSION}/source/nvshmem_src_cuda12-all-all-${NVSHMEM_VERSION}.tar.gz"; \
NVSHMEM_TARBALL="nvshmem_src_cuda12-all-all-${NVSHMEM_VERSION}.tar.gz"; \
fi && \
if [ "$GRACE_BLACKWELL" = "1" ]; then \ if [ "$GRACE_BLACKWELL" = "1" ]; then \
git clone https://github.com/fzyzcjy/DeepEP.git \ git clone https://github.com/fzyzcjy/DeepEP.git && \
&& cd DeepEP && git checkout 1fd57b0276311d035d16176bb0076426166e52f3 && sed -i 's/#define NUM_CPU_TIMEOUT_SECS 100/#define NUM_CPU_TIMEOUT_SECS 1000/' csrc/kernels/configs.cuh && cd .. ; \ cd DeepEP && \
git checkout ${GRACE_BLACKWELL_DEEPEP_BRANCH} && \
sed -i 's/#define NUM_CPU_TIMEOUT_SECS 100/#define NUM_CPU_TIMEOUT_SECS 1000/' csrc/kernels/configs.cuh && \
cd .. ; \
else \ else \
git clone https://github.com/deepseek-ai/DeepEP.git \ git clone https://github.com/deepseek-ai/DeepEP.git && \
&& cd DeepEP && git checkout ${DEEPEP_COMMIT} && sed -i 's/#define NUM_CPU_TIMEOUT_SECS 100/#define NUM_CPU_TIMEOUT_SECS 1000/' csrc/kernels/configs.cuh && cd .. ; \ cd DeepEP && \
fi \ git checkout "${DEEPEP_COMMIT}" && \
&& tar -xf nvshmem_src_cuda12-all-all-3.3.9.tar.gz \ sed -i 's/#define NUM_CPU_TIMEOUT_SECS 100/#define NUM_CPU_TIMEOUT_SECS 1000/' csrc/kernels/configs.cuh && \
&& mv nvshmem_src nvshmem \ cd .. ; \
&& rm -f /sgl-workspace/nvshmem_src_cuda12-all-all-3.3.9.tar.gz fi && \
tar -xf "${NVSHMEM_TARBALL}" && \
mv nvshmem_src nvshmem && \
rm -f "/sgl-workspace/${NVSHMEM_TARBALL}"
# Build and install NVSHMEM # Build and install NVSHMEM
RUN cd /sgl-workspace/nvshmem && \ RUN cd /sgl-workspace/nvshmem && \
if [ "$GRACE_BLACKWELL" = "1" ]; then CUDA_ARCH="90;100;120"; else CUDA_ARCH="90"; fi && \ if [ "$GRACE_BLACKWELL" = "1" ]; then CUDA_ARCH="90;100;103;120"; else CUDA_ARCH="90"; fi && \
NVSHMEM_SHMEM_SUPPORT=0 \ NVSHMEM_SHMEM_SUPPORT=0 \
NVSHMEM_UCX_SUPPORT=0 \ NVSHMEM_UCX_SUPPORT=0 \
NVSHMEM_USE_NCCL=0 \ NVSHMEM_USE_NCCL=0 \
...@@ -126,29 +161,50 @@ RUN cd /sgl-workspace/nvshmem && \ ...@@ -126,29 +161,50 @@ RUN cd /sgl-workspace/nvshmem && \
cmake --build build --target install -j${CMAKE_BUILD_PARALLEL_LEVEL} cmake --build build --target install -j${CMAKE_BUILD_PARALLEL_LEVEL}
# Install DeepEP # Install DeepEP
# CTK13 requires the cccl include
RUN cd /sgl-workspace/DeepEP && \ RUN cd /sgl-workspace/DeepEP && \
case "$CUDA_VERSION" in \ case "$CUDA_VERSION" in \
12.6.1) \ 12.6.1) \
CHOSEN_TORCH_CUDA_ARCH_LIST='9.0' \ CHOSEN_TORCH_CUDA_ARCH_LIST='9.0' \
;; \ ;; \
12.8.1|12.9.1) \ 12.8.1|12.9.1|13.0.1) \
CHOSEN_TORCH_CUDA_ARCH_LIST='9.0;10.0' \ CHOSEN_TORCH_CUDA_ARCH_LIST='9.0;10.0;10.3' \
;; \ ;; \
*) \ *) \
echo "Unsupported CUDA version: $CUDA_VERSION" && exit 1 \ echo "Unsupported CUDA version: $CUDA_VERSION" && exit 1 \
;; \ ;; \
esac && \ esac && \
if [ "${CUDA_VERSION%%.*}" = "13" ]; then \
sed -i "/^ include_dirs = \['csrc\/'\]/a\ include_dirs.append('${CUDA_HOME}/include/cccl')" setup.py; \
fi && \
NVSHMEM_DIR=${NVSHMEM_DIR} TORCH_CUDA_ARCH_LIST="${CHOSEN_TORCH_CUDA_ARCH_LIST}" pip install --no-build-isolation . NVSHMEM_DIR=${NVSHMEM_DIR} TORCH_CUDA_ARCH_LIST="${CHOSEN_TORCH_CUDA_ARCH_LIST}" pip install --no-build-isolation .
# Install flashmla # Install flashmla
RUN git clone https://github.com/deepseek-ai/FlashMLA.git flash-mla && \ RUN if [ "$CUDA_VERSION" != "13.0.1" ]; then \
git clone https://github.com/deepseek-ai/FlashMLA.git flash-mla && \
cd flash-mla && \ cd flash-mla && \
git checkout ${FLASHMLA_COMMIT} && \ git checkout ${FLASHMLA_COMMIT} && \
git submodule update --init --recursive && \ git submodule update --init --recursive && \
if [ "$CUDA_VERSION" = "12.6.1" ]; then \ if [ "$CUDA_VERSION" = "12.6.1" ]; then \
export FLASH_MLA_DISABLE_SM100=1; \ export FLASH_MLA_DISABLE_SM100=1; \
fi && \ fi && \
pip install --no-build-isolation -v . ; pip install --no-build-isolation -v . ; \
fi
# In order to use flashinfer_cutedsl without IMA for WideEP configs we must install
# latest flashinfer_cutedsl. Once 0.4.3 is officially released, remove this
RUN python3 -m pip install --no-cache-dir --upgrade --pre "nvidia-cutlass-dsl==4.3.0.dev0"
# For cuda 13, we install triton from source to fix some sm103 issues
# This can be reverted after >3.4.5 is released
# See the conversation in: https://github.com/triton-lang/triton/pull/8536
RUN if [ "$CUDA_VERSION" = "13.0.1" ]; then \
git clone https://github.com/triton-lang/triton.git && \
cd triton && \
git checkout ${TRITON_LANG_COMMIT} && \
pip install --break-system-packages -r python/requirements.txt && \
MAX_JOBS=20 pip install --break-system-packages -e .; \
fi
# Python tools # Python tools
RUN python3 -m pip install --no-cache-dir \ RUN python3 -m pip install --no-cache-dir \
......
...@@ -12,10 +12,11 @@ It is recommended to use uv for faster installation: ...@@ -12,10 +12,11 @@ It is recommended to use uv for faster installation:
```bash ```bash
pip install --upgrade pip pip install --upgrade pip
pip install uv pip install uv
uv pip install sglang --prerelease=allow uv pip install "sglang" --prerelease=allow
``` ```
**Quick fixes to common problems** **Quick fixes to common problems**
- If you encounter `OSError: CUDA_HOME environment variable is not set`. Please set it to your CUDA install root with either of the following solutions: - If you encounter `OSError: CUDA_HOME environment variable is not set`. Please set it to your CUDA install root with either of the following solutions:
1. Use `export CUDA_HOME=/usr/local/cuda-<your-cuda-version>` to set the `CUDA_HOME` environment variable. 1. Use `export CUDA_HOME=/usr/local/cuda-<your-cuda-version>` to set the `CUDA_HOME` environment variable.
2. Install FlashInfer first following [FlashInfer installation doc](https://docs.flashinfer.ai/installation.html), then install SGLang as described above. 2. Install FlashInfer first following [FlashInfer installation doc](https://docs.flashinfer.ai/installation.html), then install SGLang as described above.
...@@ -33,6 +34,7 @@ pip install -e "python" ...@@ -33,6 +34,7 @@ pip install -e "python"
``` ```
**Quick fixes to common problems** **Quick fixes to common problems**
- If you want to develop SGLang, it is recommended to use docker. Please refer to [setup docker container](../developer_guide/development_guide_using_docker.md#setup-docker-container). The docker image is `lmsysorg/sglang:dev`. - If you want to develop SGLang, it is recommended to use docker. Please refer to [setup docker container](../developer_guide/development_guide_using_docker.md#setup-docker-container). The docker image is `lmsysorg/sglang:dev`.
## Method 3: Using docker ## Method 3: Using docker
......
...@@ -60,11 +60,11 @@ dependencies = [ ...@@ -60,11 +60,11 @@ dependencies = [
"soundfile==0.13.1", "soundfile==0.13.1",
"tiktoken", "tiktoken",
"timm==1.0.16", "timm==1.0.16",
"torch==2.8.0",
"torch_memory_saver==0.0.9", "torch_memory_saver==0.0.9",
"torchao==0.9.0", "torch==2.8.0",
"torchaudio==2.8.0", "torchaudio==2.8.0",
"torchvision", "torchvision",
"torchao==0.9.0",
"tqdm", "tqdm",
"transformers==4.57.1", "transformers==4.57.1",
"uvicorn", "uvicorn",
...@@ -77,7 +77,7 @@ dependencies = [ ...@@ -77,7 +77,7 @@ dependencies = [
] ]
[project.optional-dependencies] [project.optional-dependencies]
modelopt = ["nvidia-modelopt"] checkpoint-engine = ["checkpoint-engine==0.1.2"]
test = [ test = [
"accelerate", "accelerate",
"expecttest", "expecttest",
...@@ -89,21 +89,6 @@ test = [ ...@@ -89,21 +89,6 @@ test = [
"sentence_transformers", "sentence_transformers",
"tabulate", "tabulate",
] ]
checkpoint-engine = ["checkpoint-engine==0.1.2"]
all = []
dev = ["sglang[test]"]
# Temporary tags
cu130 = [
"torch==2.9.0",
"torchaudio==2.9.0",
"torchvision==0.24.0",
]
cu130_all = [
"sglang[test]",
"sglang[decord]",
"sglang[cu130]"
]
tracing = [ tracing = [
"opentelemetry-api", "opentelemetry-api",
"opentelemetry-exporter-otlp", "opentelemetry-exporter-otlp",
...@@ -111,10 +96,6 @@ tracing = [ ...@@ -111,10 +96,6 @@ tracing = [
"opentelemetry-sdk", "opentelemetry-sdk",
] ]
# To be deprecated in 2 weeks
blackwell = ["sglang[dev]"]
blackwell_aarch64 = ["sglang[dev]"]
[project.urls] [project.urls]
"Homepage" = "https://github.com/sgl-project/sglang" "Homepage" = "https://github.com/sgl-project/sglang"
"Bug Tracker" = "https://github.com/sgl-project/sglang/issues" "Bug Tracker" = "https://github.com/sgl-project/sglang/issues"
......
...@@ -4,7 +4,7 @@ set -euxo pipefail ...@@ -4,7 +4,7 @@ set -euxo pipefail
bash scripts/ci/ci_install_dependency.sh bash scripts/ci/ci_install_dependency.sh
export GDRCOPY_HOME=/usr/src/gdrdrv-2.4.4/ export GDRCOPY_HOME=/usr/src/gdrdrv-2.5.1/
export NVSHMEM_DIR=/opt/nvshmem/install export NVSHMEM_DIR=/opt/nvshmem/install
export LD_LIBRARY_PATH="${NVSHMEM_DIR}/lib:$LD_LIBRARY_PATH" export LD_LIBRARY_PATH="${NVSHMEM_DIR}/lib:$LD_LIBRARY_PATH"
export PATH="${NVSHMEM_DIR}/bin:$PATH" export PATH="${NVSHMEM_DIR}/bin:$PATH"
...@@ -27,9 +27,9 @@ rm -rf /opt/gdrcopy && mkdir -p /opt/gdrcopy ...@@ -27,9 +27,9 @@ rm -rf /opt/gdrcopy && mkdir -p /opt/gdrcopy
rm -rf /opt/nvshmem && mkdir -p /opt/nvshmem rm -rf /opt/nvshmem && mkdir -p /opt/nvshmem
cd /opt/gdrcopy cd /opt/gdrcopy
git clone https://github.com/NVIDIA/gdrcopy.git . git clone https://github.com/NVIDIA/gdrcopy.git .
git checkout v2.4.4 git checkout v2.5.1
apt update apt update
apt install -y nvidia-dkms-535 apt install -y nvidia-dkms-580
apt install -y build-essential devscripts debhelper fakeroot pkg-config dkms apt install -y build-essential devscripts debhelper fakeroot pkg-config dkms
apt install -y check libsubunit0 libsubunit-dev python3-venv apt install -y check libsubunit0 libsubunit-dev python3-venv
cd packages cd packages
...@@ -46,8 +46,8 @@ apt-get update && apt-get install -y libfabric-dev ...@@ -46,8 +46,8 @@ apt-get update && apt-get install -y libfabric-dev
# Install NVSHMEM # Install NVSHMEM
cd /opt/nvshmem cd /opt/nvshmem
wget https://developer.download.nvidia.com/compute/redist/nvshmem/3.3.9/source/nvshmem_src_cuda12-all-all-3.3.9.tar.gz wget https://developer.download.nvidia.com/compute/redist/nvshmem/3.4.5/source/nvshmem_src_cuda12-all-all-3.4.5.tar.gz
tar -xf nvshmem_src_cuda12-all-all-3.3.9.tar.gz tar -xf nvshmem_src_cuda12-all-all-3.4.5.tar.gz
mv nvshmem_src nvshmem && cd nvshmem mv nvshmem_src nvshmem && cd nvshmem
NVSHMEM_SHMEM_SUPPORT=0 \ NVSHMEM_SHMEM_SUPPORT=0 \
NVSHMEM_UCX_SUPPORT=0 \ NVSHMEM_UCX_SUPPORT=0 \
...@@ -57,7 +57,7 @@ NVSHMEM_IBGDA_SUPPORT=1 \ ...@@ -57,7 +57,7 @@ NVSHMEM_IBGDA_SUPPORT=1 \
NVSHMEM_PMIX_SUPPORT=0 \ NVSHMEM_PMIX_SUPPORT=0 \
NVSHMEM_TIMEOUT_DEVICE_POLLING=0 \ NVSHMEM_TIMEOUT_DEVICE_POLLING=0 \
NVSHMEM_USE_GDRCOPY=1 \ NVSHMEM_USE_GDRCOPY=1 \
cmake -S . -B build/ -DCMAKE_INSTALL_PREFIX=/opt/nvshmem/install -DCMAKE_CUDA_ARCHITECTURES=90 cmake -S . -B build/ -DCMAKE_INSTALL_PREFIX=/opt/nvshmem/install -DCMAKE_CUDA_ARCHITECTURES="90;100;103;121"
cd build cd build
make -j$(nproc) install make -j$(nproc) install
......
...@@ -55,7 +55,7 @@ else ...@@ -55,7 +55,7 @@ else
$PIP_CMD install flashinfer-python==0.4.1 --prerelease=allow $PIP_INSTALL_SUFFIX $PIP_CMD install flashinfer-python==0.4.1 --prerelease=allow $PIP_INSTALL_SUFFIX
# Install the main package # Install the main package
$PIP_CMD install -e "python[dev]" --extra-index-url https://download.pytorch.org/whl/${CU_VERSION} $PIP_INSTALL_SUFFIX --upgrade $PIP_CMD install -e "python" --extra-index-url https://download.pytorch.org/whl/${CU_VERSION} $PIP_INSTALL_SUFFIX --upgrade
fi fi
# Install router for pd-disagg test # Install router for pd-disagg test
...@@ -68,7 +68,7 @@ echo "SGL_KERNEL_VERSION_FROM_KERNEL=${SGL_KERNEL_VERSION_FROM_KERNEL} SGL_KERNE ...@@ -68,7 +68,7 @@ echo "SGL_KERNEL_VERSION_FROM_KERNEL=${SGL_KERNEL_VERSION_FROM_KERNEL} SGL_KERNE
if [ "${CUSTOM_BUILD_SGL_KERNEL:-}" = "true" ]; then if [ "${CUSTOM_BUILD_SGL_KERNEL:-}" = "true" ]; then
ls -alh sgl-kernel/dist ls -alh sgl-kernel/dist
$PIP_CMD install sgl-kernel/dist/sgl_kernel-${SGL_KERNEL_VERSION_FROM_KERNEL}-cp310-abi3-manylinux2014_x86_64.whl --force-reinstall $PIP_INSTALL_SUFFIX $PIP_CMD install sgl-kernel/dist/sgl_kernel-${SGL_KERNEL_VERSION_FROM_KERNEL}+${CU_VERSION}-cp310-abi3-manylinux2014_x86_64.whl --force-reinstall $PIP_INSTALL_SUFFIX
else else
$PIP_CMD install sgl-kernel==${SGL_KERNEL_VERSION_FROM_SRT} --force-reinstall $PIP_INSTALL_SUFFIX $PIP_CMD install sgl-kernel==${SGL_KERNEL_VERSION_FROM_SRT} --force-reinstall $PIP_INSTALL_SUFFIX
fi fi
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment