Dockerfile 13.1 KB
Newer Older
Yineng Zhang's avatar
Yineng Zhang committed
1
ARG CUDA_VERSION=12.9.1
2
FROM nvidia/cuda:${CUDA_VERSION}-cudnn-devel-ubuntu22.04 AS base
3

4
ARG TARGETARCH
Yineng Zhang's avatar
Yineng Zhang committed
5
ARG BUILD_TYPE=all
6
ARG BRANCH_TYPE=remote
7
8
9
ARG GRACE_BLACKWELL=0

ARG GRACE_BLACKWELL_DEEPEP_BRANCH=gb200_blog_part_2
10
ARG DEEPEP_COMMIT=9af0e0d0e74f3577af1979c9b9e1ac2cad0104ee
11
12
ARG TRITON_LANG_COMMIT=4caa0328bf8df64896dd5f6fb9df41b0eb2e750a

13
ARG SGL_KERNEL_VERSION=0.3.16.post4
14
15
16
ARG GDRCOPY_VERSION=2.5.1
ARG NVSHMEM_VERSION=3.4.5

17
18
ENV DEBIAN_FRONTEND=noninteractive \
    CUDA_HOME=/usr/local/cuda \
19
    GDRCOPY_HOME=/usr/src/gdrdrv-${GDRCOPY_VERSION}/ \
20
    NVSHMEM_DIR=/sgl-workspace/nvshmem/install
21
22
23
# Add GKE default lib and bin locations.
ENV PATH="${PATH}:/usr/local/nvidia/bin" \
    LD_LIBRARY_PATH="${LD_LIBRARY_PATH}:/usr/local/nvidia/lib:/usr/local/nvidia/lib64"
Ying Sheng's avatar
Ying Sheng committed
24

Yineng Zhang's avatar
Yineng Zhang committed
25
26
RUN apt update && apt install wget -y && apt install software-properties-common -y \
 && add-apt-repository ppa:deadsnakes/ppa -y \
Mick's avatar
Mick committed
27
  && apt install python3.12-full python3.12-dev python3.10-venv -y \
Yineng Zhang's avatar
Yineng Zhang committed
28
29
30
31
32
33
 && update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.10 1 \
 && update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.12 2 \
 && update-alternatives --set python3 /usr/bin/python3.12 \
 && wget https://bootstrap.pypa.io/get-pip.py \
 && python3 get-pip.py

34
# Set timezone and install all packages
Ying Sheng's avatar
Ying Sheng committed
35
RUN echo 'tzdata tzdata/Areas select America' | debconf-set-selections \
36
37
38
39
40
 && echo 'tzdata tzdata/Zones/America select Los_Angeles' | debconf-set-selections \
 && apt-get update && apt-get install -y --no-install-recommends \
    tzdata \
    software-properties-common netcat-openbsd kmod unzip openssh-server \
    curl wget lsof zsh ccache tmux htop git-lfs tree \
41
    build-essential cmake perl \
42
    libopenmpi-dev libnuma1 libnuma-dev numactl \
43
44
45
46
47
    libibverbs-dev libibverbs1 libibumad3 \
    librdmacm1 libnl-3-200 libnl-route-3-200 libnl-route-3-dev libnl-3-dev \
    ibverbs-providers infiniband-diags perftest \
    libgoogle-glog-dev libgtest-dev libjsoncpp-dev libunwind-dev \
    libboost-all-dev libssl-dev \
48
    libgrpc-dev libgrpc++-dev libprotobuf-dev protobuf-compiler protobuf-compiler-grpc \
49
50
51
52
53
54
55
    pybind11-dev \
    libhiredis-dev libcurl4-openssl-dev \
    libczmq4 libczmq-dev \
    libfabric-dev \
    patchelf \
    nvidia-dkms-550 \
    devscripts debhelper fakeroot dkms check libsubunit0 libsubunit-dev \
Mick's avatar
Mick committed
56
 && ln -sf /usr/bin/python3.12 /usr/bin/python \
57
58
59
60
61
 && rm -rf /var/lib/apt/lists/* \
 && apt-get clean

# GDRCopy installation
RUN mkdir -p /tmp/gdrcopy && cd /tmp \
62
 && git clone https://github.com/NVIDIA/gdrcopy.git -b v${GDRCOPY_VERSION} \
63
64
65
66
 && cd gdrcopy/packages \
 && CUDA=/usr/local/cuda ./build-deb-packages.sh \
 && dpkg -i gdrdrv-dkms_*.deb libgdrapi_*.deb gdrcopy-tests_*.deb gdrcopy_*.deb \
 && cd / && rm -rf /tmp/gdrcopy
67

68
# Fix DeepEP IBGDA symlink
69
RUN ln -sf /usr/lib/$(uname -m)-linux-gnu/libmlx5.so.1 /usr/lib/$(uname -m)-linux-gnu/libmlx5.so
70

71
72
73
74
75
FROM scratch AS local_src
COPY . /src

FROM base AS build-image
# Install SGLang
76
# Until torch 2.9 and cu13 are stable we manually update torch if you are on CUDA 13
Ying Sheng's avatar
Ying Sheng committed
77
WORKDIR /sgl-workspace
78
79
80
81
82
83
84
85
ARG BRANCH_TYPE
COPY --from=local_src /src /tmp/local_src
RUN if [ "$BRANCH_TYPE" = "local" ]; then \
        cp -r /tmp/local_src /sgl-workspace/sglang; \
    else \
        git clone --depth=1 https://github.com/sgl-project/sglang.git /sgl-workspace/sglang; \
    fi \
 && rm -rf /tmp/local_src
86
87
88
89
90
RUN python3 -m pip install --no-cache-dir --upgrade pip setuptools wheel html5lib six \
 && cd sglang \
 && case "$CUDA_VERSION" in \
      12.6.1) CUINDEX=126 ;; \
      12.8.1) CUINDEX=128 ;; \
Mick's avatar
Mick committed
91
      12.9.1) CUINDEX=129 ;; \
92
      13.0.1) CUINDEX=130 ;; \
93
94
      *) echo "Unsupported CUDA version: $CUDA_VERSION" && exit 1 ;; \
    esac \
95
 && if [ "$CUDA_VERSION" = "12.6.1" ]; then \
96
97
98
99
100
101
102
103
104
105
106
      python3 -m pip install --no-cache-dir https://github.com/sgl-project/whl/releases/download/v${SGL_KERNEL_VERSION}/sgl_kernel-${SGL_KERNEL_VERSION}+cu124-cp310-abi3-manylinux2014_$(uname -m).whl --force-reinstall --no-deps \
   ; \
   elif [ "$CUDA_VERSION" = "12.8.1" ] || [ "$CUDA_VERSION" = "12.9.1" ]; then \
      python3 -m pip install --no-cache-dir sgl-kernel==${SGL_KERNEL_VERSION} \
   ; \
   elif [ "$CUDA_VERSION" = "13.0.1" ]; then \
      python3 -m pip install --no-cache-dir https://github.com/sgl-project/whl/releases/download/v${SGL_KERNEL_VERSION}/sgl_kernel-${SGL_KERNEL_VERSION}+cu130-cp310-abi3-manylinux2014_$(uname -m).whl --force-reinstall --no-deps \
   ; \
   else \
      echo "Unsupported CUDA version: $CUDA_VERSION" && exit 1 \
   ; \
107
   fi \
108
 && python3 -m pip install --no-cache-dir -e "python[${BUILD_TYPE}]" --extra-index-url https://download.pytorch.org/whl/cu${CUINDEX} \
109
110
111
112
113
114
115
116
117
 && if [ "${CUDA_VERSION%%.*}" = "12" ]; then \
      python3 -m pip install --no-cache-dir nvidia-nccl-cu12==2.28.3 --force-reinstall --no-deps ; \
    elif [ "${CUDA_VERSION%%.*}" = "13" ]; then \
      python3 -m pip install --no-cache-dir nvidia-nccl-cu13==2.28.3 --force-reinstall --no-deps ; \
      python3 -m pip uninstall -y torch torchaudio torchvision ; \
      python3 -m pip install --no-cache-dir torch==2.9.0 torchaudio==2.9.0 torchvision --extra-index-url https://download.pytorch.org/whl/cu${CUINDEX} ; \
    else \
      echo "No NCCL mapping for CUDA_VERSION=${CUDA_VERSION}" && exit 1 ; \
    fi \
118
 && FLASHINFER_LOGGING_LEVEL=warning python3 -m flashinfer --download-cubin
119

120
# Download NVSHMEM source files
121
# We use Tom's DeepEP fork for GB200 for now; the 1fd57b0276311d035d16176bb0076426166e52f3 commit is https://github.com/fzyzcjy/DeepEP/tree/gb200_blog_part_2
122
123
124
125
126
127
128
129
RUN set -eux; \
    if [ "${CUDA_VERSION%%.*}" = "13" ]; then \
      wget "https://github.com/NVIDIA/nvshmem/releases/download/v${NVSHMEM_VERSION}-0/nvshmem_src_cuda-all-all-${NVSHMEM_VERSION}.tar.gz"; \
      NVSHMEM_TARBALL="nvshmem_src_cuda-all-all-${NVSHMEM_VERSION}.tar.gz"; \
    else \
      wget "https://developer.download.nvidia.com/compute/redist/nvshmem/${NVSHMEM_VERSION}/source/nvshmem_src_cuda12-all-all-${NVSHMEM_VERSION}.tar.gz"; \
      NVSHMEM_TARBALL="nvshmem_src_cuda12-all-all-${NVSHMEM_VERSION}.tar.gz"; \
    fi && \
130
    if [ "$GRACE_BLACKWELL" = "1" ]; then \
131
132
133
134
135
      git clone https://github.com/fzyzcjy/DeepEP.git && \
      cd DeepEP && \
      git checkout ${GRACE_BLACKWELL_DEEPEP_BRANCH} && \
      sed -i 's/#define NUM_CPU_TIMEOUT_SECS 100/#define NUM_CPU_TIMEOUT_SECS 1000/' csrc/kernels/configs.cuh && \
      cd .. ; \
136
    else \
137
138
139
140
141
142
143
144
145
      git clone https://github.com/deepseek-ai/DeepEP.git && \
      cd DeepEP && \
      git checkout "${DEEPEP_COMMIT}" && \
      sed -i 's/#define NUM_CPU_TIMEOUT_SECS 100/#define NUM_CPU_TIMEOUT_SECS 1000/' csrc/kernels/configs.cuh && \
      cd .. ; \
    fi && \
    tar -xf "${NVSHMEM_TARBALL}" && \
    mv nvshmem_src nvshmem && \
    rm -f "/sgl-workspace/${NVSHMEM_TARBALL}"
146
147
148

# Build and install NVSHMEM
RUN cd /sgl-workspace/nvshmem && \
149
    if [ "$GRACE_BLACKWELL" = "1" ]; then CUDA_ARCH="90;100;103;120"; else CUDA_ARCH="90"; fi && \
150
    NVSHMEM_SHMEM_SUPPORT=0 \
151
152
153
154
155
156
157
    NVSHMEM_UCX_SUPPORT=0 \
    NVSHMEM_USE_NCCL=0 \
    NVSHMEM_MPI_SUPPORT=0 \
    NVSHMEM_IBGDA_SUPPORT=1 \
    NVSHMEM_PMIX_SUPPORT=0 \
    NVSHMEM_TIMEOUT_DEVICE_POLLING=0 \
    NVSHMEM_USE_GDRCOPY=1 \
158
    cmake -S . -B build/ -DCMAKE_INSTALL_PREFIX=${NVSHMEM_DIR} -DCMAKE_CUDA_ARCHITECTURES=${CUDA_ARCH} && \
159
160
161
    cmake --build build --target install -j${CMAKE_BUILD_PARALLEL_LEVEL}

# Install DeepEP
162
# CTK13 requires the cccl include
163
RUN cd /sgl-workspace/DeepEP && \
164
165
166
167
    case "$CUDA_VERSION" in \
      12.6.1) \
        CHOSEN_TORCH_CUDA_ARCH_LIST='9.0' \
        ;; \
168
169
      12.8.1|12.9.1|13.0.1) \
        CHOSEN_TORCH_CUDA_ARCH_LIST='9.0;10.0;10.3' \
170
171
172
173
174
        ;; \
      *) \
        echo "Unsupported CUDA version: $CUDA_VERSION" && exit 1 \
        ;; \
    esac && \
175
176
177
    if [ "${CUDA_VERSION%%.*}" = "13" ]; then \
      sed -i "/^    include_dirs = \['csrc\/'\]/a\    include_dirs.append('${CUDA_HOME}/include/cccl')" setup.py; \
    fi && \
178
    NVSHMEM_DIR=${NVSHMEM_DIR} TORCH_CUDA_ARCH_LIST="${CHOSEN_TORCH_CUDA_ARCH_LIST}" pip install --no-build-isolation .
Ying Sheng's avatar
Ying Sheng committed
179

180
181
182
183
184
185
186
187
188
189
190
191
192
193
194

# In order to use flashinfer_cutedsl without IMA for WideEP configs we must install
# latest flashinfer_cutedsl. Once 0.4.3 is officially released, remove this
RUN python3 -m pip install --no-cache-dir --upgrade --pre "nvidia-cutlass-dsl==4.3.0.dev0"

# For cuda 13, we install triton from source to fix some sm103 issues
# This can be reverted after >3.4.5 is released
# See the conversation in: https://github.com/triton-lang/triton/pull/8536
RUN if [ "$CUDA_VERSION" = "13.0.1" ]; then \
    git clone https://github.com/triton-lang/triton.git && \
    cd triton && \
    git checkout ${TRITON_LANG_COMMIT} && \
    pip install --break-system-packages -r python/requirements.txt && \
    MAX_JOBS=20 pip install --break-system-packages -e .; \
fi
195

196
197
198
# Python tools
RUN python3 -m pip install --no-cache-dir \
    datamodel_code_generator \
199
    mooncake-transfer-engine==0.3.6.post1 \
200
201
202
203
204
205
206
    pre-commit \
    pytest \
    black \
    isort \
    icdiff \
    uv \
    wheel \
207
    scikit-build-core \
208
209
    nixl \
    py-spy
Yineng Zhang's avatar
Yineng Zhang committed
210

211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
# Install development tools and utilities
RUN apt-get update && apt-get install -y \
    gdb \
    ninja-build \
    vim \
    tmux \
    htop \
    wget \
    curl \
    locales \
    lsof \
    git \
    git-lfs \
    zsh \
    tree \
    silversearcher-ag \
    cloc \
    unzip \
    pkg-config \
    libssl-dev \
    bear \
    ccache \
    less \
    && apt install -y rdma-core infiniband-diags openssh-server perftest ibverbs-providers libibumad3 libibverbs1 libnl-3-200 libnl-route-3-200 librdmacm1 \
    && rm -rf /var/lib/apt/lists/* \
    && apt-get clean

RUN apt update -y \
    && apt install -y --no-install-recommends gnupg \
240
241
    && echo "deb http://developer.download.nvidia.com/devtools/repos/ubuntu2004/$(if [ "$(uname -m)" = "aarch64" ]; then echo "arm64"; else echo "amd64"; fi) /" | tee /etc/apt/sources.list.d/nvidia-devtools.list \
    && apt-key adv --fetch-keys http://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/$(if [ "$(uname -m)" = "aarch64" ]; then echo "arm64"; else echo "x86_64"; fi)/7fa2af80.pub \
242
243
244
245
246
    && apt update -y \
    && apt install nsight-systems-cli -y

# Set up locale
RUN locale-gen en_US.UTF-8
247
248
249
ENV LANG=en_US.UTF-8
ENV LANGUAGE=en_US:en
ENV LC_ALL=en_US.UTF-8
250
251
252
253
254
255
256

# Install minimal Python packages
RUN python3 -m pip install --no-cache-dir --break-system-packages \
    pytest \
    black \
    isort \
    icdiff \
257
    scikit-build-core \
258
    uv \
259
260
261
262
    pre-commit \
    pandas \
    matplotlib \
    tabulate
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279

# Install diff-so-fancy
RUN curl -LSso /usr/local/bin/diff-so-fancy https://github.com/so-fancy/diff-so-fancy/releases/download/v1.4.4/diff-so-fancy \
    && chmod +x /usr/local/bin/diff-so-fancy

# Install clang-format
RUN curl -LSso /usr/local/bin/clang-format https://github.com/muttleyxd/clang-tools-static-binaries/releases/download/master-32d3ac78/clang-format-16_linux-amd64 \
    && chmod +x /usr/local/bin/clang-format

# Install clangd
RUN curl -L https://github.com/clangd/clangd/releases/download/18.1.3/clangd-linux-18.1.3.zip -o clangd.zip \
    && unzip clangd.zip \
    && cp -r clangd_18.1.3/bin/* /usr/local/bin/ \
    && cp -r clangd_18.1.3/lib/* /usr/local/lib/ \
    && rm -rf clangd_18.1.3 clangd.zip

# Install CMake
280
281
282
283
284
285
286
287
RUN CMAKE_VERSION=3.31.1 \
    && ARCH=$(uname -m) \
    && CMAKE_INSTALLER="cmake-${CMAKE_VERSION}-linux-${ARCH}" \
    && wget "https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION}/${CMAKE_INSTALLER}.tar.gz" \
    && tar -xzf "${CMAKE_INSTALLER}.tar.gz" \
    && cp -r "${CMAKE_INSTALLER}/bin/"* /usr/local/bin/ \
    && cp -r "${CMAKE_INSTALLER}/share/"* /usr/local/share/ \
    && rm -rf "${CMAKE_INSTALLER}" "${CMAKE_INSTALLER}.tar.gz"
288

289
# Build and install sgl-router (Rust toolchain removed after build to save space)
290
RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y \
291
292
293
    && export PATH="/root/.cargo/bin:${PATH}" \
    && rustc --version && cargo --version \
    && python3 -m pip install --no-cache-dir maturin \
294
    && cd /sgl-workspace/sglang/sgl-router \
295
296
297
    && ulimit -n 65536 && maturin build --release --features vendored-openssl --out dist \
    && python3 -m pip install --no-cache-dir --force-reinstall dist/*.whl \
    && rm -rf /root/.cache /root/.cargo /root/.rustup target dist ~/.cargo
298
299


300
# Add yank script
301
COPY --chown=root:root --chmod=755 docker/configs/yank /usr/local/bin/yank
302
303
304
305
306
307

# Install oh-my-zsh and plugins
RUN sh -c "$(curl -fsSL https://raw.githubusercontent.com/ohmyzsh/ohmyzsh/master/tools/install.sh)" "" --unattended \
    && git clone https://github.com/zsh-users/zsh-autosuggestions ${ZSH_CUSTOM:-~/.oh-my-zsh/custom}/plugins/zsh-autosuggestions \
    && git clone https://github.com/zsh-users/zsh-syntax-highlighting.git ${ZSH_CUSTOM:-~/.oh-my-zsh/custom}/plugins/zsh-syntax-highlighting

308
309
310
# Configure Vim and tmux
COPY docker/configs/.vimrc /root/.vimrc
COPY docker/configs/.tmux.conf /root/.tmux.conf
311
312

# Configure Git
313
314
COPY docker/configs/.gitconfig /tmp/.gitconfig
RUN cat /tmp/.gitconfig >> /root/.gitconfig && rm /tmp/.gitconfig
315
316

# Configure zsh
317
COPY docker/configs/.zshrc /root/.zshrc
318
319
320
321
322
323

RUN set -euxo ; \
    curl --proto '=https' --tlsv1.2 -sSf https://just.systems/install.sh | bash -s -- --to /usr/local/bin

# Set workspace directory
WORKDIR /sgl-workspace/sglang