Dockerfile 14.7 KB
Newer Older
Yineng Zhang's avatar
Yineng Zhang committed
1
ARG CUDA_VERSION=12.9.1
2
FROM nvidia/cuda:${CUDA_VERSION}-cudnn-devel-ubuntu22.04 AS base
3

4
ARG TARGETARCH
Yineng Zhang's avatar
Yineng Zhang committed
5
ARG BUILD_TYPE=all
6
ARG BRANCH_TYPE=remote
7
8
9
ARG GRACE_BLACKWELL=0

ARG GRACE_BLACKWELL_DEEPEP_BRANCH=gb200_blog_part_2
10
ARG DEEPEP_COMMIT=9af0e0d0e74f3577af1979c9b9e1ac2cad0104ee
11
ARG TRITON_LANG_COMMIT=4caa0328bf8df64896dd5f6fb9df41b0eb2e750a
12
ARG BUILD_AND_DOWNLOAD_PARALLEL=8
13
ARG SGL_KERNEL_VERSION=0.3.16.post5
14
15
ARG SGL_VERSION=0.5.4.post3
ARG USE_LATEST_SGLANG=0
16
17
ARG GDRCOPY_VERSION=2.5.1
ARG NVSHMEM_VERSION=3.4.5
18
19
20
ARG PIP_DEFAULT_INDEX
ARG UBUNTU_MIRROR
ARG GITHUB_ARTIFACTORY=github.com
21

22
23
ENV DEBIAN_FRONTEND=noninteractive \
    CUDA_HOME=/usr/local/cuda \
24
    GDRCOPY_HOME=/usr/src/gdrdrv-${GDRCOPY_VERSION}/ \
25
    NVSHMEM_DIR=/sgl-workspace/nvshmem/install
26
27
28
# Add GKE default lib and bin locations.
ENV PATH="${PATH}:/usr/local/nvidia/bin" \
    LD_LIBRARY_PATH="${LD_LIBRARY_PATH}:/usr/local/nvidia/lib:/usr/local/nvidia/lib64"
Ying Sheng's avatar
Ying Sheng committed
29

30
31
32
33
34
35
# Replace Ubuntu sources if it is specified
RUN if [ -n "$UBUNTU_MIRROR" ]; then \
    sed -i "s|http://.*archive.ubuntu.com|$UBUNTU_MIRROR|g" /etc/apt/sources.list && \
    sed -i "s|http://.*security.ubuntu.com|$UBUNTU_MIRROR|g" /etc/apt/sources.list; \
fi

36
RUN --mount=type=cache,target=/var/cache/apt apt update && apt install wget -y && apt install software-properties-common -y \
Yineng Zhang's avatar
Yineng Zhang committed
37
 && add-apt-repository ppa:deadsnakes/ppa -y \
38
 && apt install python3.12-full python3.12-dev python3.10-venv -y \
Yineng Zhang's avatar
Yineng Zhang committed
39
40
41
42
43
44
 && update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.10 1 \
 && update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.12 2 \
 && update-alternatives --set python3 /usr/bin/python3.12 \
 && wget https://bootstrap.pypa.io/get-pip.py \
 && python3 get-pip.py

45
# Set timezone and install all packages
46
RUN --mount=type=cache,target=/var/cache/apt echo 'tzdata tzdata/Areas select America' | debconf-set-selections \
47
48
49
50
51
 && echo 'tzdata tzdata/Zones/America select Los_Angeles' | debconf-set-selections \
 && apt-get update && apt-get install -y --no-install-recommends \
    tzdata \
    software-properties-common netcat-openbsd kmod unzip openssh-server \
    curl wget lsof zsh ccache tmux htop git-lfs tree \
52
    build-essential cmake perl \
53
    libopenmpi-dev libnuma1 libnuma-dev numactl \
54
55
56
57
58
    libibverbs-dev libibverbs1 libibumad3 \
    librdmacm1 libnl-3-200 libnl-route-3-200 libnl-route-3-dev libnl-3-dev \
    ibverbs-providers infiniband-diags perftest \
    libgoogle-glog-dev libgtest-dev libjsoncpp-dev libunwind-dev \
    libboost-all-dev libssl-dev \
59
    libgrpc-dev libgrpc++-dev libprotobuf-dev protobuf-compiler protobuf-compiler-grpc \
60
61
62
63
64
65
66
    pybind11-dev \
    libhiredis-dev libcurl4-openssl-dev \
    libczmq4 libczmq-dev \
    libfabric-dev \
    patchelf \
    nvidia-dkms-550 \
    devscripts debhelper fakeroot dkms check libsubunit0 libsubunit-dev \
Mick's avatar
Mick committed
67
 && ln -sf /usr/bin/python3.12 /usr/bin/python \
68
69
70
 && rm -rf /var/lib/apt/lists/* \
 && apt-get clean

71
72
73
74
75
# Replace pip global cache if it is specified
RUN if [ -n "${PIP_DEFAULT_INDEX}" ]; then \
    python3 -m pip config set global.index-url ${PIP_DEFAULT_INDEX}; \
fi

76
77
# GDRCopy installation
RUN mkdir -p /tmp/gdrcopy && cd /tmp \
78
79
80
 && wget -q https://${GITHUB_ARTIFACTORY}/NVIDIA/gdrcopy/archive/refs/tags/v${GDRCOPY_VERSION}.tar.gz \
 && tar -xzf v${GDRCOPY_VERSION}.tar.gz && rm v${GDRCOPY_VERSION}.tar.gz \
 && cd gdrcopy-${GDRCOPY_VERSION}/packages \
81
82
83
 && CUDA=/usr/local/cuda ./build-deb-packages.sh \
 && dpkg -i gdrdrv-dkms_*.deb libgdrapi_*.deb gdrcopy-tests_*.deb gdrcopy_*.deb \
 && cd / && rm -rf /tmp/gdrcopy
84

85
# Fix DeepEP IBGDA symlink
86
RUN ln -sf /usr/lib/$(uname -m)-linux-gnu/libmlx5.so.1 /usr/lib/$(uname -m)-linux-gnu/libmlx5.so
87

88
89
90
91
92
FROM scratch AS local_src
COPY . /src

FROM base AS build-image
# Install SGLang
93
# Until torch 2.9 and cu13 are stable we manually update torch if you are on CUDA 13
Ying Sheng's avatar
Ying Sheng committed
94
WORKDIR /sgl-workspace
95
96
97
98
ARG BRANCH_TYPE
COPY --from=local_src /src /tmp/local_src
RUN if [ "$BRANCH_TYPE" = "local" ]; then \
        cp -r /tmp/local_src /sgl-workspace/sglang; \
99
    elif [ "$USE_LATEST_SGLANG" = "1" ]; then \
100
        git clone --depth=1 https://github.com/sgl-project/sglang.git /sgl-workspace/sglang; \
101
102
    else \
        git clone --depth=1 --branch v${SGL_VERSION} https://github.com/sgl-project/sglang.git /sgl-workspace/sglang; \
103
104
    fi \
 && rm -rf /tmp/local_src
105
RUN --mount=type=cache,target=/root/.cache/pip  python3 -m pip install --upgrade pip setuptools wheel html5lib six \
106
107
108
109
 && cd sglang \
 && case "$CUDA_VERSION" in \
      12.6.1) CUINDEX=126 ;; \
      12.8.1) CUINDEX=128 ;; \
Mick's avatar
Mick committed
110
      12.9.1) CUINDEX=129 ;; \
111
      13.0.1) CUINDEX=130 ;; \
112
113
      *) echo "Unsupported CUDA version: $CUDA_VERSION" && exit 1 ;; \
    esac \
114
 && if [ "$CUDA_VERSION" = "12.6.1" ]; then \
115
      python3 -m pip install https://${GITHUB_ARTIFACTORY}/sgl-project/whl/releases/download/v${SGL_KERNEL_VERSION}/sgl_kernel-${SGL_KERNEL_VERSION}+cu124-cp310-abi3-manylinux2014_$(uname -m).whl --force-reinstall --no-deps \
116
117
   ; \
   elif [ "$CUDA_VERSION" = "12.8.1" ] || [ "$CUDA_VERSION" = "12.9.1" ]; then \
118
      python3 -m pip install sgl-kernel==${SGL_KERNEL_VERSION} \
119
120
   ; \
   elif [ "$CUDA_VERSION" = "13.0.1" ]; then \
121
      python3 -m pip install https://github.com/sgl-project/whl/releases/download/v${SGL_KERNEL_VERSION}/sgl_kernel-${SGL_KERNEL_VERSION}+cu130-cp310-abi3-manylinux2014_$(uname -m).whl --force-reinstall --no-deps \
122
123
124
125
   ; \
   else \
      echo "Unsupported CUDA version: $CUDA_VERSION" && exit 1 \
   ; \
126
   fi \
127
 && python3 -m pip install -e "python[${BUILD_TYPE}]" --extra-index-url https://download.pytorch.org/whl/cu${CUINDEX} \
128
 && if [ "${CUDA_VERSION%%.*}" = "12" ]; then \
129
      python3 -m pip install nvidia-nccl-cu12==2.28.3 --force-reinstall --no-deps ; \
130
    elif [ "${CUDA_VERSION%%.*}" = "13" ]; then \
131
      python3 -m pip install nvidia-nccl-cu13==2.28.3 --force-reinstall --no-deps ; \
132
      python3 -m pip uninstall -y torch torchaudio torchvision ; \
133
      python3 -m pip install torch==2.9.0 torchaudio==2.9.0 torchvision --extra-index-url https://download.pytorch.org/whl/cu${CUINDEX} ; \
134
135
136
    else \
      echo "No NCCL mapping for CUDA_VERSION=${CUDA_VERSION}" && exit 1 ; \
    fi \
137
 && FLASHINFER_CUBIN_DOWNLOAD_THREADS=${BUILD_AND_DOWNLOAD_PARALLEL} FLASHINFER_LOGGING_LEVEL=warning python3 -m flashinfer --download-cubin
138

139
# Download NVSHMEM source files
140
# We use Tom's DeepEP fork for GB200 for now; the 1fd57b0276311d035d16176bb0076426166e52f3 commit is https://github.com/fzyzcjy/DeepEP/tree/gb200_blog_part_2
141
142
RUN set -eux; \
    if [ "${CUDA_VERSION%%.*}" = "13" ]; then \
143
      wget -q https://${GITHUB_ARTIFACTORY}/NVIDIA/nvshmem/releases/download/v${NVSHMEM_VERSION}-0/nvshmem_src_cuda-all-all-${NVSHMEM_VERSION}.tar.gz; \
144
145
      NVSHMEM_TARBALL="nvshmem_src_cuda-all-all-${NVSHMEM_VERSION}.tar.gz"; \
    else \
146
      wget -q https://developer.download.nvidia.com/compute/redist/nvshmem/${NVSHMEM_VERSION}/source/nvshmem_src_cuda12-all-all-${NVSHMEM_VERSION}.tar.gz; \
147
148
      NVSHMEM_TARBALL="nvshmem_src_cuda12-all-all-${NVSHMEM_VERSION}.tar.gz"; \
    fi && \
149
    if [ "$GRACE_BLACKWELL" = "1" ]; then \
150
151
152
153
154
      git clone https://github.com/fzyzcjy/DeepEP.git && \
      cd DeepEP && \
      git checkout ${GRACE_BLACKWELL_DEEPEP_BRANCH} && \
      sed -i 's/#define NUM_CPU_TIMEOUT_SECS 100/#define NUM_CPU_TIMEOUT_SECS 1000/' csrc/kernels/configs.cuh && \
      cd .. ; \
155
    else \
156
157
      wget -q https://${GITHUB_ARTIFACTORY}/deepseek-ai/DeepEP/archive/${DEEPEP_COMMIT}.zip && \
      unzip ${DEEPEP_COMMIT}.zip && rm ${DEEPEP_COMMIT}.zip && mv DeepEP-${DEEPEP_COMMIT} DeepEP && cd DeepEP && \
158
159
160
161
162
163
      sed -i 's/#define NUM_CPU_TIMEOUT_SECS 100/#define NUM_CPU_TIMEOUT_SECS 1000/' csrc/kernels/configs.cuh && \
      cd .. ; \
    fi && \
    tar -xf "${NVSHMEM_TARBALL}" && \
    mv nvshmem_src nvshmem && \
    rm -f "/sgl-workspace/${NVSHMEM_TARBALL}"
164
165
166

# Build and install NVSHMEM
RUN cd /sgl-workspace/nvshmem && \
167
    if [ "$GRACE_BLACKWELL" = "1" ]; then CUDA_ARCH="90;100;103;120"; else CUDA_ARCH="90"; fi && \
168
    NVSHMEM_SHMEM_SUPPORT=0 \
169
170
171
172
173
174
175
    NVSHMEM_UCX_SUPPORT=0 \
    NVSHMEM_USE_NCCL=0 \
    NVSHMEM_MPI_SUPPORT=0 \
    NVSHMEM_IBGDA_SUPPORT=1 \
    NVSHMEM_PMIX_SUPPORT=0 \
    NVSHMEM_TIMEOUT_DEVICE_POLLING=0 \
    NVSHMEM_USE_GDRCOPY=1 \
176
    cmake -S . -B build/ -DCMAKE_INSTALL_PREFIX=${NVSHMEM_DIR} -DCMAKE_CUDA_ARCHITECTURES=${CUDA_ARCH} && \
177
    cmake --build build --target install -j${BUILD_AND_DOWNLOAD_PARALLEL}
178
179

# Install DeepEP
180
# CTK13 requires the cccl include
181
RUN --mount=type=cache,target=/root/.cache/pip cd /sgl-workspace/DeepEP && \
182
183
184
185
    case "$CUDA_VERSION" in \
      12.6.1) \
        CHOSEN_TORCH_CUDA_ARCH_LIST='9.0' \
        ;; \
186
187
      12.8.1|12.9.1|13.0.1) \
        CHOSEN_TORCH_CUDA_ARCH_LIST='9.0;10.0;10.3' \
188
189
190
191
192
        ;; \
      *) \
        echo "Unsupported CUDA version: $CUDA_VERSION" && exit 1 \
        ;; \
    esac && \
193
194
195
    if [ "${CUDA_VERSION%%.*}" = "13" ]; then \
      sed -i "/^    include_dirs = \['csrc\/'\]/a\    include_dirs.append('${CUDA_HOME}/include/cccl')" setup.py; \
    fi && \
196
    NVSHMEM_DIR=${NVSHMEM_DIR} TORCH_CUDA_ARCH_LIST="${CHOSEN_TORCH_CUDA_ARCH_LIST}" MAX_JOBS=${BUILD_AND_DOWNLOAD_PARALLEL} pip install --no-build-isolation .
197
198
199

# In order to use flashinfer_cutedsl without IMA for WideEP configs we must install
# latest flashinfer_cutedsl. Once 0.4.3 is officially released, remove this
200
RUN --mount=type=cache,target=/root/.cache/pip python3 -m pip install --upgrade --pre "nvidia-cutlass-dsl==4.3.0.dev0" --extra-index-url https://pypi.org/simple/
201
202
203
204

# For cuda 13, we install triton from source to fix some sm103 issues
# This can be reverted after >3.4.5 is released
# See the conversation in: https://github.com/triton-lang/triton/pull/8536
205
RUN --mount=type=cache,target=/root/.cache/pip if [ "$CUDA_VERSION" = "13.0.1" ]; then \
206
207
    wget -q https://${GITHUB_ARTIFACTORY}/triton-lang/triton/archive/${TRITON_LANG_COMMIT}.zip && \
    unzip -q ${TRITON_LANG_COMMIT}.zip && rm ${TRITON_LANG_COMMIT}.zip && mv triton-${TRITON_LANG_COMMIT} triton && \
208
    cd triton && pip install --break-system-packages -r python/requirements.txt && \
209
    MAX_JOBS=${BUILD_AND_DOWNLOAD_PARALLEL} pip install --break-system-packages -e .; \
210
fi
211

212
# Python tools
213
RUN --mount=type=cache,target=/root/.cache/pip python3 -m pip install \
214
    datamodel_code_generator \
215
    mooncake-transfer-engine==0.3.7.post2 \
216
217
218
219
220
221
222
    pre-commit \
    pytest \
    black \
    isort \
    icdiff \
    uv \
    wheel \
223
    scikit-build-core \
224
225
    nixl \
    py-spy
Yineng Zhang's avatar
Yineng Zhang committed
226

227
# Install development tools and utilities
228
RUN --mount=type=cache,target=/var/cache/apt apt-get update && apt-get install -y \
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
    gdb \
    ninja-build \
    vim \
    tmux \
    htop \
    wget \
    curl \
    locales \
    lsof \
    git \
    git-lfs \
    zsh \
    tree \
    silversearcher-ag \
    cloc \
    unzip \
    pkg-config \
    libssl-dev \
    bear \
    ccache \
    less \
    && apt install -y rdma-core infiniband-diags openssh-server perftest ibverbs-providers libibumad3 libibverbs1 libnl-3-200 libnl-route-3-200 librdmacm1 \
    && rm -rf /var/lib/apt/lists/* \
    && apt-get clean

254
RUN --mount=type=cache,target=/var/cache/apt apt update -y \
255
    && apt install -y --no-install-recommends gnupg \
256
257
    && echo "deb http://developer.download.nvidia.com/devtools/repos/ubuntu2004/$(if [ "$(uname -m)" = "aarch64" ]; then echo "arm64"; else echo "amd64"; fi) /" | tee /etc/apt/sources.list.d/nvidia-devtools.list \
    && apt-key adv --fetch-keys http://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/$(if [ "$(uname -m)" = "aarch64" ]; then echo "arm64"; else echo "x86_64"; fi)/7fa2af80.pub \
258
259
260
261
262
    && apt update -y \
    && apt install nsight-systems-cli -y

# Set up locale
RUN locale-gen en_US.UTF-8
263
264
265
ENV LANG=en_US.UTF-8
ENV LANGUAGE=en_US:en
ENV LC_ALL=en_US.UTF-8
266
267

# Install minimal Python packages
268
RUN --mount=type=cache,target=/root/.cache/pip python3 -m pip install --break-system-packages \
269
270
271
272
    pytest \
    black \
    isort \
    icdiff \
273
    scikit-build-core \
274
    uv \
275
276
277
278
    pre-commit \
    pandas \
    matplotlib \
    tabulate
279
280

# Install diff-so-fancy
281
RUN curl -LSso /usr/local/bin/diff-so-fancy https://${GITHUB_ARTIFACTORY}/so-fancy/diff-so-fancy/releases/download/v1.4.4/diff-so-fancy \
282
283
284
    && chmod +x /usr/local/bin/diff-so-fancy

# Install clang-format
285
RUN curl -LSso /usr/local/bin/clang-format https://${GITHUB_ARTIFACTORY}/muttleyxd/clang-tools-static-binaries/releases/download/master-32d3ac78/clang-format-16_linux-amd64 \
286
287
288
    && chmod +x /usr/local/bin/clang-format

# Install clangd
289
RUN curl -L https://${GITHUB_ARTIFACTORY}/clangd/clangd/releases/download/18.1.3/clangd-linux-18.1.3.zip -o clangd.zip \
290
291
292
293
294
295
    && unzip clangd.zip \
    && cp -r clangd_18.1.3/bin/* /usr/local/bin/ \
    && cp -r clangd_18.1.3/lib/* /usr/local/lib/ \
    && rm -rf clangd_18.1.3 clangd.zip

# Install CMake
296
297
298
RUN CMAKE_VERSION=3.31.1 \
    && ARCH=$(uname -m) \
    && CMAKE_INSTALLER="cmake-${CMAKE_VERSION}-linux-${ARCH}" \
299
    && wget -q "https://${GITHUB_ARTIFACTORY}/Kitware/CMake/releases/download/v${CMAKE_VERSION}/${CMAKE_INSTALLER}.tar.gz" \
300
301
302
303
    && tar -xzf "${CMAKE_INSTALLER}.tar.gz" \
    && cp -r "${CMAKE_INSTALLER}/bin/"* /usr/local/bin/ \
    && cp -r "${CMAKE_INSTALLER}/share/"* /usr/local/share/ \
    && rm -rf "${CMAKE_INSTALLER}" "${CMAKE_INSTALLER}.tar.gz"
304

305
# Build and install sgl-router (Rust toolchain removed after build to save space)
306
RUN --mount=type=cache,target=/root/.cache/pip curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y \
307
308
    && export PATH="/root/.cargo/bin:${PATH}" \
    && rustc --version && cargo --version \
309
    && python3 -m pip install maturin \
310
    && cd /sgl-workspace/sglang/sgl-router \
311
    && ulimit -n 65536 && maturin build --release --features vendored-openssl --out dist \
312
313
    && python3 -m pip install --force-reinstall dist/*.whl \
    && rm -rf /root/.cargo /root/.rustup target dist ~/.cargo
314
315


316
# Add yank script
317
COPY --chown=root:root --chmod=755 docker/configs/yank /usr/local/bin/yank
318
319
320
321
322
323

# Install oh-my-zsh and plugins
RUN sh -c "$(curl -fsSL https://raw.githubusercontent.com/ohmyzsh/ohmyzsh/master/tools/install.sh)" "" --unattended \
    && git clone https://github.com/zsh-users/zsh-autosuggestions ${ZSH_CUSTOM:-~/.oh-my-zsh/custom}/plugins/zsh-autosuggestions \
    && git clone https://github.com/zsh-users/zsh-syntax-highlighting.git ${ZSH_CUSTOM:-~/.oh-my-zsh/custom}/plugins/zsh-syntax-highlighting

324
325
326
# Configure Vim and tmux
COPY docker/configs/.vimrc /root/.vimrc
COPY docker/configs/.tmux.conf /root/.tmux.conf
327
328

# Configure Git
329
330
COPY docker/configs/.gitconfig /tmp/.gitconfig
RUN cat /tmp/.gitconfig >> /root/.gitconfig && rm /tmp/.gitconfig
331
332

# Configure zsh
333
COPY docker/configs/.zshrc /root/.zshrc
334
335

RUN set -euxo ; \
336
337
338
    curl --proto '=https' --tlsv1.2 -sSf https://just.systems/install.sh | \
    sed "s|https://github.com|https://${GITHUB_ARTIFACTORY}|g" | \
    bash -s -- --tag 1.42.4 --to /usr/local/bin
339
340
341

# Set workspace directory
WORKDIR /sgl-workspace/sglang