Unverified Commit 0166403c authored by fzyzcjy's avatar fzyzcjy Committed by GitHub
Browse files

Support Blackwell DeepEP docker images (#6868)

parent bcf66ef3
...@@ -9,6 +9,17 @@ jobs: ...@@ -9,6 +9,17 @@ jobs:
build-dev: build-dev:
if: ${{ github.repository == 'sgl-project/sglang' }} if: ${{ github.repository == 'sgl-project/sglang' }}
runs-on: ubuntu-22.04 runs-on: ubuntu-22.04
strategy:
matrix:
variant:
- base: lmsysorg/sglang:latest
tag: deepep
- base: lmsysorg/sglang:dev
tag: dev-deepep
- base: lmsysorg/sglang:blackwell
tag: blackwell-deepep
steps: steps:
- name: Checkout repository - name: Checkout repository
uses: actions/checkout@v4 uses: actions/checkout@v4
...@@ -30,7 +41,7 @@ jobs: ...@@ -30,7 +41,7 @@ jobs:
username: ${{ secrets.DOCKERHUB_USERNAME }} username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_TOKEN }} password: ${{ secrets.DOCKERHUB_TOKEN }}
- name: Build and Push DeepEP Image - name: Build and Push Docker Image
run: | run: |
docker build . -f docker/Dockerfile.deepep -t lmsysorg/sglang:deepep --no-cache docker build . -f docker/Dockerfile.deepep --build-arg BASE_IMAGE=${{ matrix.variant.base }} -t lmsysorg/sglang:${{ matrix.variant.tag }} --no-cache
docker push lmsysorg/sglang:deepep docker push lmsysorg/sglang:${{ matrix.variant.tag }}
name: Build Dev-DeepEP Docker Image
on:
workflow_dispatch:
schedule:
- cron: '0 0 * * *'
jobs:
build-dev:
if: ${{ github.repository == 'sgl-project/sglang' }}
runs-on: ubuntu-22.04
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Free disk space
uses: jlumbroso/free-disk-space@main
with:
tool-cache: false
docker-images: false
android: true
dotnet: true
haskell: true
large-packages: true
swap-storage: false
- name: Login to Docker Hub
uses: docker/login-action@v2
with:
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_TOKEN }}
- name: Build and Push DeepEP Image
run: |
docker build . -f docker/Dockerfile.dev-deepep -t lmsysorg/sglang:dev-deepep --no-cache
docker push lmsysorg/sglang:dev-deepep
FROM lmsysorg/sglang:latest ARG BASE_IMAGE
FROM ${BASE_IMAGE}
# CMake # CMake
RUN apt-get update \ RUN apt-get update \
...@@ -55,6 +56,9 @@ RUN tar -xf nvshmem_src_3.2.5-1.txz \ ...@@ -55,6 +56,9 @@ RUN tar -xf nvshmem_src_3.2.5-1.txz \
WORKDIR /sgl-workspace/nvshmem WORKDIR /sgl-workspace/nvshmem
RUN git apply /sgl-workspace/DeepEP/third-party/nvshmem.patch RUN git apply /sgl-workspace/DeepEP/third-party/nvshmem.patch
RUN sed -i '1i#include <unistd.h>' /sgl-workspace/nvshmem/examples/moe_shuffle.cu && \
cat /sgl-workspace/nvshmem/examples/moe_shuffle.cu
WORKDIR /sgl-workspace/nvshmem WORKDIR /sgl-workspace/nvshmem
ENV CUDA_HOME=/usr/local/cuda ENV CUDA_HOME=/usr/local/cuda
RUN NVSHMEM_SHMEM_SUPPORT=0 \ RUN NVSHMEM_SHMEM_SUPPORT=0 \
...@@ -71,7 +75,7 @@ RUN NVSHMEM_SHMEM_SUPPORT=0 \ ...@@ -71,7 +75,7 @@ RUN NVSHMEM_SHMEM_SUPPORT=0 \
WORKDIR /sgl-workspace/DeepEP WORKDIR /sgl-workspace/DeepEP
ENV NVSHMEM_DIR=/sgl-workspace/nvshmem/install ENV NVSHMEM_DIR=/sgl-workspace/nvshmem/install
RUN NVSHMEM_DIR=/sgl-workspace/nvshmem/install pip install . RUN NVSHMEM_DIR=/sgl-workspace/nvshmem/install pip install --break-system-packages .
# Set workspace # Set workspace
WORKDIR /sgl-workspace WORKDIR /sgl-workspace
...@@ -224,5 +224,8 @@ setopt HIST_FIND_NO_DUPS ...@@ -224,5 +224,8 @@ setopt HIST_FIND_NO_DUPS
setopt INC_APPEND_HISTORY setopt INC_APPEND_HISTORY
EOF EOF
RUN set -euxo ; \
curl --proto '=https' --tlsv1.2 -sSf https://just.systems/install.sh | bash -s -- --to /usr/local/bin
# Set workspace directory # Set workspace directory
WORKDIR /sgl-workspace/sglang WORKDIR /sgl-workspace/sglang
FROM lmsysorg/sglang:dev
# CMake
RUN apt-get update \
&& apt-get install -y --no-install-recommends \
build-essential \
wget \
libssl-dev \
&& wget https://github.com/Kitware/CMake/releases/download/v3.27.4/cmake-3.27.4-linux-x86_64.sh \
&& chmod +x cmake-3.27.4-linux-x86_64.sh \
&& ./cmake-3.27.4-linux-x86_64.sh --skip-license --prefix=/usr/local \
&& rm cmake-3.27.4-linux-x86_64.sh
# Python
RUN apt-get update \
&& apt-get install -y --no-install-recommends \
python3 \
python3-pip \
&& ln -s /usr/bin/python3 /usr/bin/python
# GDRCopy
WORKDIR /tmp
RUN git clone https://github.com/NVIDIA/gdrcopy.git
WORKDIR /tmp/gdrcopy
RUN git checkout v2.4.4
RUN apt update
RUN apt install -y nvidia-dkms-535
RUN apt install -y build-essential devscripts debhelper fakeroot pkg-config dkms
RUN apt install -y check libsubunit0 libsubunit-dev
WORKDIR /tmp/gdrcopy/packages
RUN CUDA=/usr/local/cuda ./build-deb-packages.sh
RUN dpkg -i gdrdrv-dkms_*.deb
RUN dpkg -i libgdrapi_*.deb
RUN dpkg -i gdrcopy-tests_*.deb
RUN dpkg -i gdrcopy_*.deb
ENV GDRCOPY_HOME=/usr/src/gdrdrv-2.4.4/
# IBGDA dependency
RUN ln -s /usr/lib/x86_64-linux-gnu/libmlx5.so.1 /usr/lib/x86_64-linux-gnu/libmlx5.so
RUN apt-get install -y libfabric-dev
# DeepEP
WORKDIR /sgl-workspace
RUN git clone https://github.com/deepseek-ai/DeepEP.git
# NVSHMEM
WORKDIR /sgl-workspace
RUN wget https://developer.download.nvidia.com/compute/redist/nvshmem/3.2.5/source/nvshmem_src_3.2.5-1.txz
RUN tar -xf nvshmem_src_3.2.5-1.txz \
&& mv nvshmem_src nvshmem
WORKDIR /sgl-workspace/nvshmem
RUN git apply /sgl-workspace/DeepEP/third-party/nvshmem.patch
WORKDIR /sgl-workspace/nvshmem
ENV CUDA_HOME=/usr/local/cuda
RUN NVSHMEM_SHMEM_SUPPORT=0 \
NVSHMEM_UCX_SUPPORT=0 \
NVSHMEM_USE_NCCL=0 \
NVSHMEM_MPI_SUPPORT=0 \
NVSHMEM_IBGDA_SUPPORT=1 \
NVSHMEM_PMIX_SUPPORT=0 \
NVSHMEM_TIMEOUT_DEVICE_POLLING=0 \
NVSHMEM_USE_GDRCOPY=1 \
cmake -S . -B build/ -DCMAKE_INSTALL_PREFIX=/sgl-workspace/nvshmem/install -DCMAKE_CUDA_ARCHITECTURES=90 \
&& cd build \
&& make install -j
WORKDIR /sgl-workspace/DeepEP
ENV NVSHMEM_DIR=/sgl-workspace/nvshmem/install
RUN NVSHMEM_DIR=/sgl-workspace/nvshmem/install pip install .
RUN set -euxo ; \
curl --proto '=https' --tlsv1.2 -sSf https://just.systems/install.sh | bash -s -- --to /usr/local/bin
# Set workspace
WORKDIR /sgl-workspace
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment