Dockerfile.vllm 15.6 KB
Newer Older
1
2
3
# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0

4
5
ARG BASE_IMAGE="nvcr.io/nvidia/cuda-dl-base"
ARG BASE_IMAGE_TAG="25.01-cuda12.8-devel-ubuntu24.04"
6
ARG RELEASE_BUILD
7
8
ARG RUNTIME_IMAGE="nvcr.io/nvidia/cuda"
ARG RUNTIME_IMAGE_TAG="12.8.1-runtime-ubuntu24.04"
9
ARG MANYLINUX_IMAGE="quay.io/pypa/manylinux_2_28_x86_64"
10
11
ARG GENAI_PERF_TAG="25d0188713adc47868d6b3f22426375237a90529"

12
13
14
15
16
17
18
19
20
FROM ${BASE_IMAGE}:${BASE_IMAGE_TAG} AS nixl_base
WORKDIR /opt/nixl
# Add a cache hint that only changes when the nixl commit changes
ARG NIXL_COMMIT
# This line acts as a cache key - it only changes when NIXL_COMMIT changes
RUN echo "NIXL commit: ${NIXL_COMMIT}" > /opt/nixl/commit.txt
# Copy the nixl source
COPY --from=nixl . .

21
22
23
24
25
##################################
########## Build Image ###########
##################################

FROM ${BASE_IMAGE}:${BASE_IMAGE_TAG} AS build
26
27
28

USER root

29
30
31
32
33
34
35
36
### NIXL SETUP ###

ARG MOFED_VERSION=24.10-1.1.4.0
ARG PYTHON_VERSION=3.12
ARG NSYS_URL=https://developer.nvidia.com/downloads/assets/tools/secure/nsight-systems/2025_1/
ARG NSYS_PKG=NsightSystems-linux-cli-public-2025.1.1.131-3554042.deb

RUN apt-get update -y && apt-get -y install curl \
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
    git \
    libnuma-dev \
    numactl \
    wget \
    autotools-dev \
    automake \
    libtool \
    libz-dev \
    libiberty-dev \
    flex \
    build-essential \
    cmake \
    libibverbs-dev \
    libgoogle-glog-dev \
    libgtest-dev \
    libjsoncpp-dev \
    libpython3-dev \
    libboost-all-dev \
    libssl-dev \
    libgrpc-dev \
    libgrpc++-dev \
    libprotobuf-dev \
59
    libclang-dev \
60
61
62
63
64
65
66
67
68
69
70
71
72
73
    protobuf-compiler-grpc \
    pybind11-dev \
    python3-full \
    python3-pip \
    python3-numpy \
    etcd-server \
    net-tools \
    pciutils \
    libpci-dev \
    vim \
    tmux \
    screen \
    ibverbs-utils \
    libibmad-dev
74
75
76
77
78

RUN apt-get install -y linux-tools-common linux-tools-generic ethtool iproute2
RUN apt-get install -y dkms linux-headers-generic
RUN apt-get install -y meson ninja-build uuid-dev gdb

79
80
81
82
RUN apt install -y libglib2.0-0
RUN wget ${NSYS_URL}${NSYS_PKG} &&\
    apt install -y ./${NSYS_PKG} &&\
    rm ${NSYS_PKG}
83
84
85
86
87
88

RUN cd /usr/local/src && \
    curl -fSsL "https://content.mellanox.com/ofed/MLNX_OFED-${MOFED_VERSION}/MLNX_OFED_LINUX-${MOFED_VERSION}-ubuntu24.04-x86_64.tgz" -o mofed.tgz && \
    tar -xf /usr/local/src/mofed.tgz && \
    cd MLNX_OFED_LINUX-* && \
    apt-get update && apt-get install -y --no-install-recommends \
89
    ./DEBS/libibverbs* ./DEBS/ibverbs-providers* ./DEBS/librdmacm* ./DEBS/libibumad* && \
90
    rm -rf /var/lib/apt/lists/* /usr/local/src/* mofed.tgz
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109

ENV LIBRARY_PATH=$LIBRARY_PATH:/usr/local/cuda/lib64 \
    LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/cuda/lib64

ENV LIBRARY_PATH=$LIBRARY_PATH:/usr/local/lib \
    LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/lib

WORKDIR /workspace
RUN git clone https://github.com/NVIDIA/gdrcopy.git
RUN PREFIX=/usr/local DESTLIB=/usr/local/lib make -C /workspace/gdrcopy lib_install
RUN cp gdrcopy/src/libgdrapi.so.2.* /usr/lib/x86_64-linux-gnu/
RUN ldconfig

ARG UCX_VERSION=v1.18.0

RUN cd /usr/local/src && \
    curl -fSsL "https://github.com/openucx/ucx/tarball/${UCX_VERSION}" | tar xz && \
    cd openucx-ucx* && \
    ./autogen.sh && ./configure     \
110
111
112
113
114
115
116
117
118
119
120
121
    --enable-shared             \
    --disable-static            \
    --disable-doxygen-doc       \
    --enable-optimizations      \
    --enable-cma                \
    --enable-devel-headers      \
    --with-cuda=/usr/local/cuda \
    --with-verbs                \
    --with-dm                   \
    --with-gdrcopy=/usr/local   \
    --enable-mt                 \
    --with-mlx5-dv &&           \
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
    make -j &&                      \
    make -j install-strip &&        \
    ldconfig

ENV LD_LIBRARY_PATH=/usr/lib:$LD_LIBRARY_PATH
ENV CPATH=/usr/include:$CPATH
ENV PATH=/usr/bin:$PATH
ENV PKG_CONFIG_PATH=/usr/lib/pkgconfig:$PKG_CONFIG_PATH
SHELL ["/bin/bash", "-c"]

WORKDIR /workspace

ENV LD_LIBRARY_PATH=/usr/local/ompi/lib:$LD_LIBRARY_PATH
ENV CPATH=/usr/local/ompi/include:$CPATH
ENV PATH=/usr/local/ompi/bin:$PATH
ENV PKG_CONFIG_PATH=/usr/local/ompi/lib/pkgconfig:$PKG_CONFIG_PATH

139
140
141
# Copy nixl source, and use commit hash as cache hint
COPY --from=nixl_base /opt/nixl /opt/nixl
COPY --from=nixl_base /opt/nixl/commit.txt /opt/nixl/commit.txt
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
RUN cd /opt/nixl && \
    mkdir build && \
    meson setup build/ --prefix=/usr/local/nixl && \
    cd build/ && \
    ninja && \
    ninja install

ENV LD_LIBRARY_PATH=/usr/local/nixl/lib/x86_64-linux-gnu/:$LD_LIBRARY_PATH
ENV PYTHONPATH=/usr/local/nixl/lib/python3/dist-packages/:/opt/nixl/test/python/:$PYTHONPATH
ENV UCX_TLS=^cuda_ipc
ENV NIXL_PLUGIN_DIR=/usr/local/nixl/lib/x86_64-linux-gnu/plugins

RUN ls -l /usr/local/nixl/
RUN ls -l /usr/local/nixl/include/

RUN ls /opt/nixl

159
160
161
# Install utilities
RUN apt update -y && apt install -y git wget curl nvtop tmux vim
# nats
162
163
RUN wget https://github.com/nats-io/nats-server/releases/download/v2.10.24/nats-server-v2.10.24-amd64.deb && \
    dpkg -i nats-server-v2.10.24-amd64.deb && rm nats-server-v2.10.24-amd64.deb
164
165
166
# etcd
ENV ETCD_VERSION="v3.5.18"
RUN wget https://github.com/etcd-io/etcd/releases/download/$ETCD_VERSION/etcd-$ETCD_VERSION-linux-amd64.tar.gz -O /tmp/etcd.tar.gz && \
167
168
169
    mkdir -p /usr/local/bin/etcd && \
    tar -xvf /tmp/etcd.tar.gz -C /usr/local/bin/etcd --strip-components=1 && \
    rm /tmp/etcd.tar.gz
170
171
172
173
ENV PATH=/usr/local/bin/etcd/:$PATH


### VIRTUAL ENVIRONMENT SETUP ###
174
175
176

# Install uv and create virtualenv
COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/
Neelay Shah's avatar
Neelay Shah committed
177
178
RUN mkdir /opt/dynamo && \
    uv venv /opt/dynamo/venv --python 3.12
179
180

# Activate virtual environment
Neelay Shah's avatar
Neelay Shah committed
181
ENV VIRTUAL_ENV=/opt/dynamo/venv
182
183
ENV PATH="${VIRTUAL_ENV}/bin:${PATH}"

184
185
186
187
# Common dependencies
RUN --mount=type=bind,source=./container/deps/requirements.txt,target=/tmp/requirements.txt \
    uv pip install --requirement /tmp/requirements.txt

188
189
# Install patched vllm - keep this early in Dockerfile to avoid
# rebuilds from unrelated source code changes
190
191
ARG VLLM_REF="0.7.2"
ARG VLLM_PATCH="vllm_v${VLLM_REF}-dynamo-kv-disagg-patch.patch"
192
ARG VLLM_PATCHED_PACKAGE_NAME="ai_dynamo_vllm"
193
ARG VLLM_PATCHED_PACKAGE_VERSION="0.7.2.post1"
194
RUN --mount=type=bind,source=./container/deps/,target=/tmp/deps \
195
196
    mkdir /tmp/vllm && \
    uv pip install pip wheel && \
197
    python -m pip download --only-binary=:all: --no-deps --dest /tmp/vllm vllm==v${VLLM_REF} && \
198
199
    cd /tmp/vllm && \
    wheel unpack *.whl && \
200
    cd vllm-${VLLM_REF}/ && \
201
    patch -p1 < /tmp/deps/vllm/${VLLM_PATCH} && \
202
    # Rename the package from vllm to ai_dynamo_vllm
203
204
205
    mv vllm-${VLLM_REF}.dist-info ${VLLM_PATCHED_PACKAGE_NAME}-${VLLM_PATCHED_PACKAGE_VERSION}.dist-info && \
    sed -i "s/^Name: vllm/Name: ${VLLM_PATCHED_PACKAGE_NAME}/g" ${VLLM_PATCHED_PACKAGE_NAME}-${VLLM_PATCHED_PACKAGE_VERSION}.dist-info/METADATA && \
    sed -i "s/^Version: ${VLLM_REF}/Version: ${VLLM_PATCHED_PACKAGE_VERSION}/g" ${VLLM_PATCHED_PACKAGE_NAME}-${VLLM_PATCHED_PACKAGE_VERSION}.dist-info/METADATA && \
206
    # Update wheel tag from linux_x86_64 to manylinux1_x86_64 in WHEEL file
207
    sed -i 's/Tag: cp38-abi3-linux_x86_64/Tag: cp38-abi3-manylinux1_x86_64/g' ${VLLM_PATCHED_PACKAGE_NAME}-${VLLM_PATCHED_PACKAGE_VERSION}.dist-info/WHEEL && \
208
    # Also update the tag in RECORD file to match
209
    sed -i "s/-cp38-abi3-linux_x86_64.whl/-cp38-abi3-manylinux1_x86_64.whl/g" ${VLLM_PATCHED_PACKAGE_NAME}-${VLLM_PATCHED_PACKAGE_VERSION}.dist-info/RECORD && \
210
211
    mkdir -p /workspace/dist && \
    wheel pack . --dest-dir /workspace/dist && \
212
    uv pip install /workspace/dist/${VLLM_PATCHED_PACKAGE_NAME}-*.whl
213

214
215
216
# Install test dependencies
RUN --mount=type=bind,source=./container/deps/requirements.test.txt,target=/tmp/requirements.txt \
    uv pip install --requirement /tmp/requirements.txt
217

218
# ### MISC UTILITY SETUP ###
219
220
221

# Finish pyright install
RUN pyright --help > /dev/null 2>&1
222

223
224
225
226
227
# Enable Git operations in the /workspace directory
RUN printf "[safe]\n      directory=/workspace\n" > /root/.gitconfig

RUN ln -sf /bin/bash /bin/sh

228
229
230
231
### BUILDS ###

# Rust build/dev dependencies
RUN apt update -y && \
232
    apt install --no-install-recommends -y \
233
    build-essential \
Biswa Panda's avatar
Biswa Panda committed
234
    protobuf-compiler \
Neelay Shah's avatar
Neelay Shah committed
235
236
    cmake \
    libssl-dev \
237
238
239
240
241
    pkg-config

ENV RUSTUP_HOME=/usr/local/rustup \
    CARGO_HOME=/usr/local/cargo \
    PATH=/usr/local/cargo/bin:$PATH \
242
    RUST_VERSION=1.86.0 \
243
244
245
246
247
248
249
250
    RUSTARCH=x86_64-unknown-linux-gnu

RUN wget --tries=3 --waitretry=5 "https://static.rust-lang.org/rustup/archive/1.28.1/${RUSTARCH}/rustup-init" && \
    echo "a3339fb004c3d0bb9862ba0bce001861fe5cbde9c10d16591eb3f39ee6cd3e7f *rustup-init" | sha256sum -c - && \
    chmod +x rustup-init && \
    ./rustup-init -y --no-modify-path --profile minimal --default-toolchain $RUST_VERSION --default-host ${RUSTARCH} && \
    rm rustup-init && \
    chmod -R a+w $RUSTUP_HOME $CARGO_HOME
251
252
253
254

# Working directory
WORKDIR /workspace

255
256
257
258
# Copy Python wheel configuration files
COPY pyproject.toml /workspace/
COPY README.md /workspace/
COPY LICENSE /workspace/
259
260
261
COPY Cargo.toml /workspace/
COPY Cargo.lock /workspace/
COPY rust-toolchain.toml /workspace/
262

263
COPY lib/ /workspace/lib/
264
265
COPY components /workspace/components
COPY launch /workspace/launch
266

267
ARG CARGO_BUILD_JOBS
268
269
270
271
# Set CARGO_BUILD_JOBS to 16 if not provided
# This is to prevent cargo from building $(nproc) jobs in parallel,
# which might exceed the number of opened files limit.
ENV CARGO_BUILD_JOBS=${CARGO_BUILD_JOBS:-16}
272
273
274
275

ENV CARGO_TARGET_DIR=/workspace/target

RUN cargo build --release --locked --features mistralrs,sglang,vllm,python && \
276
277
278
279
    cargo doc --no-deps && \
    cp target/release/dynamo-run /usr/local/bin && \
    cp target/release/http /usr/local/bin && \
    cp target/release/llmctl /usr/local/bin && \
280
281
    cp target/release/metrics /usr/local/bin && \
    cp target/release/mock_worker /usr/local/bin
282

Neelay Shah's avatar
Neelay Shah committed
283
COPY deploy/dynamo/sdk /workspace/deploy/dynamo/sdk
284
COPY deploy/dynamo/api-store /workspace/deploy/dynamo/api-store
Neelay Shah's avatar
Neelay Shah committed
285

Neelay Shah's avatar
Neelay Shah committed
286
287
# Tell vllm to use the Dynamo LLM C API for KV Cache Routing
ENV VLLM_KV_CAPI_PATH="/opt/dynamo/bindings/lib/libdynamo_llm_capi.so"
288

289
290
291
292
293
# Copy launch banner
RUN --mount=type=bind,source=./container/launch_message.txt,target=/workspace/launch_message.txt \
    sed '/^#\s/d' /workspace/launch_message.txt > ~/.launch_screen && \
    echo "cat ~/.launch_screen" >> ~/.bashrc

294
295
CMD []

296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
###################################
####### WHEEL BUILD STAGE #########
###################################

# Build the wheel in the manylinux environment
FROM ${MANYLINUX_IMAGE} AS wheel_builder
ARG CARGO_BUILD_JOBS
# Set CARGO_BUILD_JOBS to 16 if not provided
# This is to prevent cargo from building $(nproc) jobs in parallel,
# which might exceed the number of opened files limit.
ENV CARGO_BUILD_JOBS=${CARGO_BUILD_JOBS:-16}
# Use build arg RELEASE_BUILD = true to generate wheels for Python 3.10, 3.11 and 3.12.
ARG RELEASE_BUILD
WORKDIR /workspace

311
312
313
314
315
316
RUN yum update -y \
    && yum install -y protobuf-compiler \
    || yum install -y https://raw.repo.almalinux.org/almalinux/8.10/AppStream/x86_64/os/Packages/protobuf-3.5.0-15.el8.x86_64.rpm \
    https://raw.repo.almalinux.org/almalinux/8.10/AppStream/x86_64/os/Packages/protobuf-compiler-3.5.0-15.el8.x86_64.rpm \
    && yum clean all \
    && rm -rf /var/cache/yum
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348

ENV RUSTUP_HOME=/usr/local/rustup \
    CARGO_HOME=/usr/local/cargo \
    PATH=/usr/local/cargo/bin:$PATH \
    CARGO_TARGET_DIR=/workspace/target

COPY --from=build /workspace /workspace
COPY --from=build $RUSTUP_HOME $RUSTUP_HOME
COPY --from=build $CARGO_HOME $CARGO_HOME

# Copy uv from build and build wheel in virtualenv
RUN mkdir /opt/dynamo && \
    uv venv /opt/dynamo/venv --python 3.12

# Activate virtual environment
ENV VIRTUAL_ENV=/opt/dynamo/venv
ENV PATH="${VIRTUAL_ENV}/bin:${PATH}"
ENV PATH="$PATH:/usr/local/bin"

# Build dynamo wheel
RUN source /opt/dynamo/venv/bin/activate && \
    cd /workspace/lib/bindings/python && \
    uv build --wheel --out-dir /workspace/dist --python 3.12 && \
    if [ "$RELEASE_BUILD" = "true" ]; then \
        uv build --wheel --out-dir /workspace/dist --python 3.11 && \
        uv build --wheel --out-dir /workspace/dist --python 3.10; \
    fi && \
    cd /workspace && \
    uv build --wheel --out-dir /workspace/dist && \
    cd /workspace/deploy/dynamo/api-store && \
    uv build --wheel --out-dir /workspace/dist

349
350
351
352
353
#######################################
########## CI Minimum Image ###########
#######################################
FROM build AS ci_minimum

354
COPY . /workspace
355
356
357
358
359
360
361
362
363
364
365
366
COPY --from=wheel_builder /workspace/dist/ /workspace/dist/

# Package the bindings
RUN mkdir -p /opt/dynamo/bindings/wheels && \
    mkdir /opt/dynamo/bindings/lib && \
    cp dist/ai_dynamo*cp312*.whl /opt/dynamo/bindings/wheels/. && \
    cp target/release/libdynamo_llm_capi.so /opt/dynamo/bindings/lib/. && \
    cp -r lib/bindings/c/include /opt/dynamo/bindings/.

RUN uv pip install /workspace/dist/ai_dynamo_runtime*cp312*.whl && \
    uv pip install /workspace/dist/ai_dynamo*any.whl && \
    uv pip install /workspace/dist/ai_dynamo_store*any.whl
367

368
369
370
371
##########################################
########## Perf Analyzer Image ###########
##########################################
FROM ${BASE_IMAGE}:${BASE_IMAGE_TAG} AS perf_analyzer
372

373
374
375
376
377
378
379
380
381
382
383
384
385
ARG GENAI_PERF_TAG

WORKDIR /workspace

# Build and install Perf Analyzer for benchmarking
RUN apt-get update -y && apt-get -y install cmake g++ libssl-dev python3 rapidjson-dev zlib1g-dev
RUN git clone https://github.com/triton-inference-server/perf_analyzer.git
RUN git -C perf_analyzer checkout ${GENAI_PERF_TAG}
RUN mkdir perf_analyzer/build
RUN cmake -B perf_analyzer/build -S perf_analyzer -D TRITON_ENABLE_PERF_ANALYZER_OPENAI=ON
RUN cmake --build perf_analyzer/build -- -j8
RUN mkdir bin &&  \
    cp -r perf_analyzer/build/perf_analyzer/src/perf-analyzer-build /workspace/bin/
386

387
########################################
388
########## Development Image ###########
389
########################################
390
FROM ci_minimum AS dev
391
392
393

ARG GENAI_PERF_TAG

394
395
396
COPY --from=perf_analyzer /workspace/bin/perf-analyzer-build/ /perf/bin
COPY --from=perf_analyzer /workspace/perf_analyzer /perf_analyzer
ENV PATH="/perf/bin:${PATH}"
397
398
399
400
401
402
403
404
405
406
407
408
409
410

# Install genai-perf for benchmarking
RUN uv pip install "git+https://github.com/triton-inference-server/perf_analyzer.git@${GENAI_PERF_TAG}#subdirectory=genai-perf"
RUN uv pip uninstall tritonclient

COPY . /workspace

ENTRYPOINT ["/opt/nvidia/nvidia_entrypoint.sh"]

CMD []

####################################
########## Runtime Image ###########
####################################
411
412
413
414

FROM ${RUNTIME_IMAGE}:${RUNTIME_IMAGE_TAG} AS runtime

WORKDIR /workspace
415
ENV DYNAMO_HOME=/workspace
416
417
ENV VIRTUAL_ENV=/opt/dynamo/venv

418
# Copy NIXL
419
COPY --from=build /usr/local/nixl /usr/local/nixl
420
421
422
ENV LD_LIBRARY_PATH=/usr/local/nixl/lib/x86_64-linux-gnu/:$LD_LIBRARY_PATH
ENV PYTHONPATH=/usr/local/nixl/lib/python3/dist-packages/:/opt/nixl/test/python/:$PYTHONPATH

423
424
# Setup the python environment
COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/
425
426
427
RUN apt-get update && \
    DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends python3-dev && \
    rm -rf /var/lib/apt/lists/* && \
428
    uv venv $VIRTUAL_ENV --python 3.12 && \
429
430
431
    echo "source $VIRTUAL_ENV/bin/activate" >> ~/.bashrc

# Install the wheels
432
433
COPY --from=wheel_builder /workspace/dist/*.whl wheelhouse/
RUN uv pip install ai-dynamo[vllm] --find-links wheelhouse  && \
434
    rm -r wheelhouse
435
436
437

# Tell vllm to use the Dynamo LLM C API for KV Cache Routing
ENV VLLM_KV_CAPI_PATH="/opt/dynamo/bindings/lib/libdynamo_llm_capi.so"
438

439
440
441
442
443
444
# Copy launch banner
RUN --mount=type=bind,source=./container/launch_message.txt,target=/workspace/launch_message.txt \
    sed '/^#\s/d' /workspace/launch_message.txt > ~/.launch_screen && \
    echo "cat ~/.launch_screen" >> ~/.bashrc

# Copy examples
445
COPY ./examples examples/
446

447
448
ENTRYPOINT [ "/usr/bin/bash" ]
CMD []