Dockerfile.vllm 17.1 KB
Newer Older
1
2
3
# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0

4
ARG BASE_IMAGE="nvcr.io/nvidia/cuda-dl-base"
5
6
7
8
9
# FIXME: NCCL will hang with 25.03, so use 25.01 for now
# Please check https://github.com/ai-dynamo/dynamo/pull/1065
# for details and reproducer to manually test if the image
# can be updated to later versions.
ARG BASE_IMAGE_TAG="25.01-cuda12.8-devel-ubuntu24.04"
10
ARG RELEASE_BUILD
11
12
ARG RUNTIME_IMAGE="nvcr.io/nvidia/cuda"
ARG RUNTIME_IMAGE_TAG="12.8.1-runtime-ubuntu24.04"
13
ARG VLLM_REF="059d4cd"
14
ARG TORCH_BACKEND="cu128"
15
16
17
18
19

# After this commit deepgemm API changed
# 1.0.0 -> 2.0.0
ARG DEEPGEMM_REF="03d0be3"
ARG FLASHINF_REF="1d72ed4"
20

21
22
23
# Make sure to update the dependency version in pyproject.toml when updating this
ARG VLLM_VERSION="0.9.2"

24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
# Define general architecture ARGs for supporting both x86 and aarch64 builds.
#   ARCH: Used for package suffixes (e.g., amd64, arm64)
#   ARCH_ALT: Used for Rust targets, manylinux suffix (e.g., x86_64, aarch64)
#
# Default values are for x86/amd64:
#   --build-arg ARCH=amd64 --build-arg ARCH_ALT=x86_64
#
# For arm64/aarch64, build with:
#   --build-arg ARCH=arm64 --build-arg ARCH_ALT=aarch64
#
# NOTE: There isn't an easy way to define one of these values based on the other value
# without adding if statements everywhere, so just define both as ARGs for now.
ARG ARCH=amd64
ARG ARCH_ALT=x86_64

39
##################################
40
########## Base Image ############
41
42
##################################

43
FROM ${BASE_IMAGE}:${BASE_IMAGE_TAG} AS base
44

45
# Redeclare ARCH, ARCH_ALT, TORCH_BACKEND, VLLM_VERSION so they're available in this stage
46
47
ARG ARCH
ARG ARCH_ALT
48
ARG TORCH_BACKEND
49
ARG VLLM_VERSION
50

51
USER root
52
ARG PYTHON_VERSION=3.12
53
54

RUN apt-get update -y && \
55
    apt-get install -y --no-install-recommends  \
56
    # NIXL build dependencies
57
    cmake \
58
59
    meson \
    ninja-build \
60
    pybind11-dev \
61
    # Rust build dependencies
62
	clang \
63
    libclang-dev \
64
	git \
65
66
67
68
    build-essential \
    protobuf-compiler \
    libssl-dev \
    pkg-config \
69
70
    # Install utilities
    nvtop \
71
    tmux \
72
73
    vim \
    autoconf \
74
    automake \
75
    libtool \
76
77
78
79
80
81
82
83
    net-tools \
    # These headers are missing with the hpcx installer, required
    # by UCX to find RDMA devices
    libibverbs-dev rdma-core ibverbs-utils libibumad-dev \
    libnuma-dev librdmacm-dev ibverbs-providers \
    # For Prometheus
    curl tar ca-certificates && \
    rm -rf /var/lib/apt/lists/*
84
85

ARG NIXL_UCX_REF=v1.19.x
86
ARG NIXL_REF=3c47a48955e6f96bd5d4fb43a9d80bb64722f8e4
87

88
89
90
91
92
93
94
ENV NIXL_SRC_DIR=/opt/nixl
ENV NIXL_PREFIX=/opt/nvidia/nvda_nixl
ARG ARCH_ALT
ENV NIXL_LIB_DIR=$NIXL_PREFIX/lib/${ARCH_ALT}-linux-gnu
ENV NIXL_PLUGIN_DIR=$NIXL_LIB_DIR/plugins
ENV LD_LIBRARY_PATH=$NIXL_LIB_DIR:$NIXL_PLUGIN_DIR:$LD_LIBRARY_PATH

95
96
97
WORKDIR /workspace

### UCX EFA Setup ###
98
99
100
101
RUN rm -rf /opt/hpcx/ucx && \
    rm -rf /usr/local/ucx && \
    echo "Building UCX with reference $NIXL_UCX_REF" && \
    cd /usr/local/src &&                            \
102
    git clone https://github.com/openucx/ucx.git && \
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
    cd ucx &&                                       \
    git checkout $NIXL_UCX_REF &&                   \
    ./autogen.sh && ./configure                     \
    --prefix=/usr/local/ucx                         \
    --enable-shared                                 \
    --disable-static                                \
    --disable-doxygen-doc                           \
    --enable-optimizations                          \
    --enable-cma                                    \
    --enable-devel-headers                          \
    --with-cuda=/usr/local/cuda                     \
    --with-verbs                                    \
    --with-efa                                      \
    --with-dm                                       \
    --with-gdrcopy=/usr/local                       \
    --enable-mt &&                                  \
    make -j &&                                      \
    make -j install-strip &&                        \
121
122
    ldconfig

123
124
125
126
ENV LD_LIBRARY_PATH=\
/usr/lib:/usr/local/ucx/lib:\
/usr/local/ucx/lib/ucx:\
$LD_LIBRARY_PATH
127
ENV CPATH=/usr/include
128
ENV PATH=/usr/bin:$PATH
129
ENV PKG_CONFIG_PATH=/usr/lib/pkgconfig
130
SHELL ["/bin/bash", "-c"]
131
132
133

WORKDIR /workspace

134
### NIXL SETUP ###
135
# Clone nixl source
136
# TEMP: disable gds backend for arm64
137
138
RUN git clone "https://github.com/ai-dynamo/nixl.git" ${NIXL_SRC_DIR} && \
    cd ${NIXL_SRC_DIR} && \
139
140
    git checkout ${NIXL_REF} && \
    if [ "$ARCH" = "arm64" ]; then \
141
        nixl_build_args="-Ddisable_gds_backend=true"; \
142
    else \
143
144
145
146
147
148
149
        nixl_build_args=""; \
    fi && \
    mkdir build && \
    meson setup build/ --buildtype=release --prefix=$NIXL_PREFIX $nixl_build_args && \
    cd build/ && \
    ninja && \
    ninja install;
150

151
### NATS & ETCD SETUP ###
152
ENV ETCD_VERSION="v3.5.21"
153
154
155
RUN wget --tries=3 --waitretry=5 https://github.com/nats-io/nats-server/releases/download/v2.10.28/nats-server-v2.10.28-${ARCH}.deb && \
    dpkg -i nats-server-v2.10.28-${ARCH}.deb && rm nats-server-v2.10.28-${ARCH}.deb && \
    wget --tries=3 --waitretry=5 https://github.com/etcd-io/etcd/releases/download/$ETCD_VERSION/etcd-$ETCD_VERSION-linux-${ARCH}.tar.gz -O /tmp/etcd.tar.gz && \
156
157
158
    mkdir -p /usr/local/bin/etcd && \
    tar -xvf /tmp/etcd.tar.gz -C /usr/local/bin/etcd --strip-components=1 && \
    rm /tmp/etcd.tar.gz
159
160
161
162
ENV PATH=/usr/local/bin/etcd/:$PATH


### VIRTUAL ENVIRONMENT SETUP ###
163
164

# Install uv and create virtualenv
165
ENV VIRTUAL_ENV=/opt/dynamo/venv
166
COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/
Neelay Shah's avatar
Neelay Shah committed
167
RUN mkdir /opt/dynamo && \
168
    uv venv ${VIRTUAL_ENV} --python 3.12
169
170
171
172

# Activate virtual environment
ENV PATH="${VIRTUAL_ENV}/bin:${PATH}"

173
# Install NIXL Python module
174
# TODO: Move gds_path selection based on arch into NIXL build
175
# TEMP: disable gds backend for arm64
176
RUN if [ "$ARCH" = "arm64" ]; then \
177
        cd ${NIXL_SRC_DIR} && uv build . --out-dir /workspace/wheels/nixl \
178
        --config-settings=setup-args="-Ddisable_gds_backend=true"; \
179
    else \
180
        cd ${NIXL_SRC_DIR} && uv build . --out-dir /workspace/wheels/nixl; \
181
182
183
184
    fi && \
    # Install the wheel
    # TODO: Move NIXL wheel install to the wheel_builder stage
    uv pip install /workspace/wheels/nixl/*.whl
185

186
# Install vllm - keep this early in Dockerfile to avoid
187
# rebuilds from unrelated source code changes
188
189
190
191
ARG VLLM_REF
ARG DEEPGEMM_REF
ARG FLASHINF_REF

192
193
ARG MAX_JOBS=16
ENV MAX_JOBS=$MAX_JOBS
194
ENV CUDA_HOME=/usr/local/cuda
195

196
RUN --mount=type=bind,source=./container/deps/,target=/tmp/deps \
197
    --mount=type=cache,target=/root/.cache/uv \
198
199
200
201
202
203
204
205
206
    if [ "$ARCH" = "arm64" ]; then \
        # TODO - split vllm, DeepEP, DeepGeMM, PPLX installs
        # Should be able to select how you want your build to go
        cp /tmp/deps/vllm/install_vllm.sh /tmp/install_vllm.sh && \
        chmod +x /tmp/install_vllm.sh && \
        /tmp/install_vllm.sh --editable --vllm-ref $VLLM_REF --max-jobs $MAX_JOBS --arch $ARCH --installation-dir /opt --deepgemm-ref $DEEPGEMM_REF --flashinf-ref $FLASHINF_REF --torch-backend $TORCH_BACKEND; \
    else \
        uv pip install "vllm==${VLLM_VERSION}"; \
    fi
207
208
209
210

ENV LD_LIBRARY_PATH=\
/opt/vllm/tools/ep_kernels/ep_kernels_workspace/nvshmem_install/lib:\
$LD_LIBRARY_PATH
211

212
213
214
215
# Common dependencies
RUN --mount=type=bind,source=./container/deps/requirements.txt,target=/tmp/requirements.txt \
    uv pip install --requirement /tmp/requirements.txt

216
217
### MISC UTILITY SETUP ###

218
219
# Install test dependencies
RUN --mount=type=bind,source=./container/deps/requirements.test.txt,target=/tmp/requirements.txt \
220
221
222
    uv pip install --requirement /tmp/requirements.txt && \
    pyright --help > /dev/null 2>&1 && \
    printf "[safe]\n      directory=/workspace\n" > /root/.gitconfig
223

224
225
226
227
228
229
230
231
232
233
234
235
236
237
# Install prometheus
ARG PROM_VERSION=3.4.1
RUN ARCH=$(dpkg --print-architecture) && \
    case "$ARCH" in \
        amd64) PLATFORM=linux-amd64 ;; \
        arm64) PLATFORM=linux-arm64 ;; \
        *) echo "Unsupported architecture: $ARCH" && exit 1 ;; \
    esac && \
    curl -fsSL https://github.com/prometheus/prometheus/releases/download/v${PROM_VERSION}/prometheus-${PROM_VERSION}.${PLATFORM}.tar.gz \
    | tar -xz -C /tmp && \
    mv /tmp/prometheus-${PROM_VERSION}.${PLATFORM}/prometheus /usr/local/bin/ && \
    chmod +x /usr/local/bin/prometheus && \
    rm -rf /tmp/prometheus-${PROM_VERSION}.${PLATFORM}

238
239
### BUILDS ###

240
241
242
ENV RUSTUP_HOME=/usr/local/rustup \
    CARGO_HOME=/usr/local/cargo \
    PATH=/usr/local/cargo/bin:$PATH \
243
    RUST_VERSION=1.87.0
244

245
246
247
248
# Define Rust target based on ARCH_ALT ARG
ARG RUSTARCH=${ARCH_ALT}-unknown-linux-gnu

# Install Rust using RUSTARCH derived from ARCH_ALT
249
RUN wget --tries=3 --waitretry=5 "https://static.rust-lang.org/rustup/archive/1.28.1/${RUSTARCH}/rustup-init" && \
250
    # TODO: Add SHA check back based on RUSTARCH
251
    chmod +x rustup-init && \
252
    ./rustup-init -y --no-modify-path --profile default --default-toolchain $RUST_VERSION --default-host ${RUSTARCH} && \
253
254
    rm rustup-init && \
    chmod -R a+w $RUSTUP_HOME $CARGO_HOME
255

256
257
258
259
260
261
ARG CARGO_BUILD_JOBS
# Set CARGO_BUILD_JOBS to 16 if not provided
# This is to prevent cargo from building $(nproc) jobs in parallel,
# which might exceed the number of opened files limit.
ENV CARGO_BUILD_JOBS=${CARGO_BUILD_JOBS:-16}

262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
#######################################
########## Local Development ##########
#######################################

FROM base AS local-dev

# https://code.visualstudio.com/remote/advancedcontainers/add-nonroot-user
# Will use the default ubuntu user, but give sudo access
# Needed so files permissions aren't set to root ownership when writing from inside container

# Don't want ubuntu to be editable, just change uid and gid. User ubuntu is hardcoded in .devcontainer
ENV USERNAME=ubuntu
ARG USER_UID=1000
ARG USER_GID=1000

RUN apt-get update && apt-get install -y sudo gnupg2 gnupg1 \
    && echo "$USERNAME ALL=(root) NOPASSWD:ALL" > /etc/sudoers.d/$USERNAME \
    && chmod 0440 /etc/sudoers.d/$USERNAME \
    && mkdir -p /home/$USERNAME \
    && chown -R $USERNAME:$USERNAME /home/$USERNAME \
    && rm -rf /var/lib/apt/lists/* \
    && chsh -s /bin/bash $USERNAME

# This is a slow operation (~40s on my cpu)
# Much better than chown -R $USERNAME:$USERNAME /opt/dynamo/venv (~10min on my cpu)
287
288
COPY --from=base --chown=$USER_UID:$USER_GID ${VIRTUAL_ENV} ${VIRTUAL_ENV}
RUN chown $USERNAME:$USERNAME ${VIRTUAL_ENV}
289
290
COPY --from=base --chown=$USERNAME:$USERNAME /usr/local/bin /usr/local/bin

291
# so we can use maturin develop
292
RUN uv pip install maturin[patchelf]
293

294
295
USER $USERNAME
ENV HOME=/home/$USERNAME
296
ENV PYTHONPATH=$HOME/dynamo/deploy/sdk/src:$PYTHONPATH:$HOME/dynamo/components/planner/src:$PYTHONPATH
297
ENV CARGO_TARGET_DIR=$HOME/dynamo/.build/target
298
299
300
301
302
303
304
305
306
307
308
309
310
WORKDIR $HOME

# https://code.visualstudio.com/remote/advancedcontainers/persist-bash-history
RUN SNIPPET="export PROMPT_COMMAND='history -a' && export HISTFILE=$HOME/.commandhistory/.bash_history" \
    && mkdir -p $HOME/.commandhistory \
    && touch $HOME/.commandhistory/.bash_history \
    && echo "$SNIPPET" >> "$HOME/.bashrc"

RUN mkdir -p /home/$USERNAME/.cache/

ENTRYPOINT ["/opt/nvidia/nvidia_entrypoint.sh"]

##################################
311
##### Wheel Build Image ##########
312
313
##################################

314
315
316
317
# Redeclare ARCH_ALT ARG so it's available for interpolation in the FROM instruction
ARG ARCH_ALT

FROM quay.io/pypa/manylinux_2_28_${ARCH_ALT} AS wheel_builder
318

319
320
321
322
323
324
325
ARG CARGO_BUILD_JOBS
# Set CARGO_BUILD_JOBS to 16 if not provided
# This is to prevent cargo from building $(nproc) jobs in parallel,
# which might exceed the number of opened files limit.
ENV CARGO_BUILD_JOBS=${CARGO_BUILD_JOBS:-16}
# Use build arg RELEASE_BUILD = true to generate wheels for Python 3.10, 3.11 and 3.12.
ARG RELEASE_BUILD
326

327
328
329
# Keep in sync with the base image.
ENV NIXL_PREFIX=/opt/nvidia/nvda_nixl

330
331
WORKDIR /workspace

332
RUN yum update -y \
Ryan Olson's avatar
Ryan Olson committed
333
    && yum install -y llvm-toolset \
334
    && yum install -y python3.12-devel \
335
336
337
    && yum install -y protobuf-compiler \
    && yum clean all \
    && rm -rf /var/cache/yum
338
339
340

ENV RUSTUP_HOME=/usr/local/rustup \
    CARGO_HOME=/usr/local/cargo \
341
342
    CARGO_TARGET_DIR=/workspace/target \
    VIRTUAL_ENV=/opt/dynamo/venv
343

344
345
COPY --from=base $RUSTUP_HOME $RUSTUP_HOME
COPY --from=base $CARGO_HOME $CARGO_HOME
346
COPY --from=base $NIXL_PREFIX $NIXL_PREFIX
347
348
349
COPY --from=base /workspace /workspace
COPY --from=base $VIRTUAL_ENV $VIRTUAL_ENV
ENV PATH=$CARGO_HOME/bin:$VIRTUAL_ENV/bin:$PATH
350

351
352
353
354
355
356
357
# Copy configuration files
COPY pyproject.toml /workspace/
COPY README.md /workspace/
COPY LICENSE /workspace/
COPY Cargo.toml /workspace/
COPY Cargo.lock /workspace/
COPY rust-toolchain.toml /workspace/
358

359
360
361
362
# Copy source code
COPY lib/ /workspace/lib/
COPY components /workspace/components
COPY launch /workspace/launch
363
COPY deploy/sdk /workspace/deploy/sdk
364

365
366
367
368
369
RUN cargo build \
	--release \
	--locked \
	--features dynamo-llm/block-manager \
	--workspace
370
371

# Build dynamo wheel
372
RUN uv build --wheel --out-dir /workspace/dist && \
373
    cd /workspace/lib/bindings/python && \
374
375
    uv pip install maturin[patchelf] && \
    maturin build --release --features block-manager --out /workspace/dist && \
376
    if [ "$RELEASE_BUILD" = "true" ]; then \
377
378
379
        # do not enable KVBM feature, ensure compatibility with lower glibc
        uv run --python 3.11 maturin build --release --out /workspace/dist && \
        uv run --python 3.10 maturin build --release --out /workspace/dist; \
380
    fi
381

382
383
384
#######################################
########## CI Minimum Image ###########
#######################################
385
FROM base AS ci_minimum
386

387
ENV DYNAMO_HOME=/workspace
388
389
390
ENV CARGO_TARGET_DIR=/workspace/target

WORKDIR /workspace
391

392
COPY --from=wheel_builder /workspace /workspace
393
COPY --from=wheel_builder $NIXL_PREFIX $NIXL_PREFIX
394

395
396
397
# Copy Cargo cache to avoid re-downloading dependencies
COPY --from=wheel_builder $CARGO_HOME $CARGO_HOME

398
399
400
# Copy rest of the code
COPY . /workspace

401
402
403
404
# Package the bindings
RUN mkdir -p /opt/dynamo/bindings/wheels && \
    mkdir /opt/dynamo/bindings/lib && \
    cp dist/ai_dynamo*cp312*.whl /opt/dynamo/bindings/wheels/. && \
405
    cp target/release/metrics /usr/local/bin
406
407

RUN uv pip install /workspace/dist/ai_dynamo_runtime*cp312*.whl && \
408
    uv pip install /workspace/dist/ai_dynamo*any.whl
409

410
411
RUN uv pip install /workspace/benchmarks

412
413
414
415
416
# Copy launch banner
RUN --mount=type=bind,source=./container/launch_message.txt,target=/workspace/launch_message.txt \
    sed '/^#\s/d' /workspace/launch_message.txt > ~/.launch_screen && \
    echo "cat ~/.launch_screen" >> ~/.bashrc

417
########################################
418
########## Development Image ###########
419
########################################
420
FROM ci_minimum AS dev
421
422
423
424
425
426
427
428

ENTRYPOINT ["/opt/nvidia/nvidia_entrypoint.sh"]

CMD []

####################################
########## Runtime Image ###########
####################################
429
430
431
432

FROM ${RUNTIME_IMAGE}:${RUNTIME_IMAGE_TAG} AS runtime

WORKDIR /workspace
433
ENV DYNAMO_HOME=/workspace
434
ENV VIRTUAL_ENV=/opt/dynamo/venv
435
ENV PATH="${VIRTUAL_ENV}/bin:${PATH}"
436

437
438
439
440
441
ARG ARCH_ALT
ENV NIXL_PREFIX=/opt/nvidia/nvda_nixl
ENV NIXL_LIB_DIR=$NIXL_PREFIX/lib/${ARCH_ALT}-linux-gnu
ENV NIXL_PLUGIN_DIR=$NIXL_LIB_DIR/plugins

442
443
444
445
# Install build-essential and python3-dev as apt dependencies
RUN apt-get update && \
    DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
        build-essential \
446
447
448
449
450
        python3-dev \
        # JIT Kernel Compilation, flashinfer
        ninja-build \
        g++ \
        cuda-toolkit-12-8 && \
451
452
453
454
455
456
    rm -rf /var/lib/apt/lists/*

### COPY NATS & ETCD ###
# Copy nats and etcd from base image
COPY --from=base /usr/bin/nats-server /usr/bin/nats-server
COPY --from=base /usr/local/bin/etcd/ /usr/local/bin/etcd/
457
ENV PATH=/usr/local/bin/etcd/:$PATH
458
459

# Copy UCX from base image as plugin for NIXL
460
# Copy NIXL source from wheel_builder image
461
# Copy dynamo wheels for gitlab artifacts
462
COPY --from=base /usr/local/ucx /usr/local/ucx
463
COPY --from=wheel_builder $NIXL_PREFIX $NIXL_PREFIX
464
COPY --from=wheel_builder /workspace/dist/*.whl wheelhouse/
465

466
# Copies vllm, DeepEP, DeepGEMM, PPLX repos (all editable installs) and nvshmem binaries
467
468
469
470
RUN if [ "$ARCH" = "arm64" ]; then \
        COPY --from=base /opt/vllm /opt/vllm; \
    fi

471
472
ENV LD_LIBRARY_PATH=\
/opt/vllm/tools/ep_kernels/ep_kernels_workspace/nvshmem_install/lib:\
473
474
$NIXL_LIB_DIR:\
$NIXL_PLUGIN_DIR:\
475
476
477
478
479
/usr/local/ucx/lib:\
/usr/local/ucx/lib/ucx:\
$LD_LIBRARY_PATH

# Copy entire venv
480
481
482
483
# Theres a lot of stuff we'd have to re-compile (for arm64)
# TODO: use pip ai-dynamo[vllm] in venv to replicate end user environment
# Copy metrics binary from wheel_builder image, not part of ai-dynamo wheel
COPY --from=ci_minimum /workspace/target/release/metrics /usr/local/bin/metrics
484
485
486
487
488
489
490
491
492
493
COPY --from=ci_minimum ${VIRTUAL_ENV} ${VIRTUAL_ENV}

# Once UX refactor is merged
# Python components will have been pip installed and packaged in wheel
# Can remove these files
COPY components/ /workspace/components/
COPY tests/ /workspace/tests/
COPY examples/ /workspace/examples/
COPY deploy/ /workspace/deploy/
COPY benchmarks/ /workspace/benchmarks/
494

495
496
497
# Copy launch banner
RUN --mount=type=bind,source=./container/launch_message.txt,target=/workspace/launch_message.txt \
    sed '/^#\s/d' /workspace/launch_message.txt > ~/.launch_screen && \
498
499
    echo "cat ~/.launch_screen" >> ~/.bashrc && \
    echo "source $VIRTUAL_ENV/bin/activate" >> ~/.bashrc
500

501
ENTRYPOINT ["/opt/nvidia/nvidia_entrypoint.sh"]
502
CMD []