Dockerfile.vllm 15.1 KB
Newer Older
1
2
3
# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0

4
5
ARG BASE_IMAGE="nvcr.io/nvidia/cuda-dl-base"
ARG BASE_IMAGE_TAG="25.01-cuda12.8-devel-ubuntu24.04"
6
ARG RELEASE_BUILD
7
8
ARG RUNTIME_IMAGE="nvcr.io/nvidia/cuda"
ARG RUNTIME_IMAGE_TAG="12.8.1-runtime-ubuntu24.04"
9
ARG MANYLINUX_IMAGE="quay.io/pypa/manylinux_2_28_x86_64"
10
11
ARG GENAI_PERF_TAG="25d0188713adc47868d6b3f22426375237a90529"

12
13
14
15
16
17
18
19
20
FROM ${BASE_IMAGE}:${BASE_IMAGE_TAG} AS nixl_base
WORKDIR /opt/nixl
# Add a cache hint that only changes when the nixl commit changes
ARG NIXL_COMMIT
# This line acts as a cache key - it only changes when NIXL_COMMIT changes
RUN echo "NIXL commit: ${NIXL_COMMIT}" > /opt/nixl/commit.txt
# Copy the nixl source
COPY --from=nixl . .

21
22
23
24
25
##################################
########## Build Image ###########
##################################

FROM ${BASE_IMAGE}:${BASE_IMAGE_TAG} AS build
26
27
28

USER root

29
30
31
32
33
34
35
36
### NIXL SETUP ###

ARG MOFED_VERSION=24.10-1.1.4.0
ARG PYTHON_VERSION=3.12
ARG NSYS_URL=https://developer.nvidia.com/downloads/assets/tools/secure/nsight-systems/2025_1/
ARG NSYS_PKG=NsightSystems-linux-cli-public-2025.1.1.131-3554042.deb

RUN apt-get update -y && apt-get -y install curl \
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
    git \
    libnuma-dev \
    numactl \
    wget \
    autotools-dev \
    automake \
    libtool \
    libz-dev \
    libiberty-dev \
    flex \
    build-essential \
    cmake \
    libibverbs-dev \
    libgoogle-glog-dev \
    libgtest-dev \
    libjsoncpp-dev \
    libpython3-dev \
    libboost-all-dev \
    libssl-dev \
    libgrpc-dev \
    libgrpc++-dev \
    libprotobuf-dev \
59
    libclang-dev \
60
61
62
63
64
65
66
67
68
69
70
71
72
73
    protobuf-compiler-grpc \
    pybind11-dev \
    python3-full \
    python3-pip \
    python3-numpy \
    etcd-server \
    net-tools \
    pciutils \
    libpci-dev \
    vim \
    tmux \
    screen \
    ibverbs-utils \
    libibmad-dev
74
75
76
77
78

RUN apt-get install -y linux-tools-common linux-tools-generic ethtool iproute2
RUN apt-get install -y dkms linux-headers-generic
RUN apt-get install -y meson ninja-build uuid-dev gdb

79
80
81
82
RUN apt install -y libglib2.0-0
RUN wget ${NSYS_URL}${NSYS_PKG} &&\
    apt install -y ./${NSYS_PKG} &&\
    rm ${NSYS_PKG}
83
84
85
86
87
88

RUN cd /usr/local/src && \
    curl -fSsL "https://content.mellanox.com/ofed/MLNX_OFED-${MOFED_VERSION}/MLNX_OFED_LINUX-${MOFED_VERSION}-ubuntu24.04-x86_64.tgz" -o mofed.tgz && \
    tar -xf /usr/local/src/mofed.tgz && \
    cd MLNX_OFED_LINUX-* && \
    apt-get update && apt-get install -y --no-install-recommends \
89
    ./DEBS/libibverbs* ./DEBS/ibverbs-providers* ./DEBS/librdmacm* ./DEBS/libibumad* && \
90
    rm -rf /var/lib/apt/lists/* /usr/local/src/* mofed.tgz
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109

ENV LIBRARY_PATH=$LIBRARY_PATH:/usr/local/cuda/lib64 \
    LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/cuda/lib64

ENV LIBRARY_PATH=$LIBRARY_PATH:/usr/local/lib \
    LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/lib

WORKDIR /workspace
RUN git clone https://github.com/NVIDIA/gdrcopy.git
RUN PREFIX=/usr/local DESTLIB=/usr/local/lib make -C /workspace/gdrcopy lib_install
RUN cp gdrcopy/src/libgdrapi.so.2.* /usr/lib/x86_64-linux-gnu/
RUN ldconfig

ARG UCX_VERSION=v1.18.0

RUN cd /usr/local/src && \
    curl -fSsL "https://github.com/openucx/ucx/tarball/${UCX_VERSION}" | tar xz && \
    cd openucx-ucx* && \
    ./autogen.sh && ./configure     \
110
111
112
113
114
115
116
117
118
119
120
121
    --enable-shared             \
    --disable-static            \
    --disable-doxygen-doc       \
    --enable-optimizations      \
    --enable-cma                \
    --enable-devel-headers      \
    --with-cuda=/usr/local/cuda \
    --with-verbs                \
    --with-dm                   \
    --with-gdrcopy=/usr/local   \
    --enable-mt                 \
    --with-mlx5-dv &&           \
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
    make -j &&                      \
    make -j install-strip &&        \
    ldconfig

ENV LD_LIBRARY_PATH=/usr/lib:$LD_LIBRARY_PATH
ENV CPATH=/usr/include:$CPATH
ENV PATH=/usr/bin:$PATH
ENV PKG_CONFIG_PATH=/usr/lib/pkgconfig:$PKG_CONFIG_PATH
SHELL ["/bin/bash", "-c"]

WORKDIR /workspace

ENV LD_LIBRARY_PATH=/usr/local/ompi/lib:$LD_LIBRARY_PATH
ENV CPATH=/usr/local/ompi/include:$CPATH
ENV PATH=/usr/local/ompi/bin:$PATH
ENV PKG_CONFIG_PATH=/usr/local/ompi/lib/pkgconfig:$PKG_CONFIG_PATH

139
140
141
# Copy nixl source, and use commit hash as cache hint
COPY --from=nixl_base /opt/nixl /opt/nixl
COPY --from=nixl_base /opt/nixl/commit.txt /opt/nixl/commit.txt
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
RUN cd /opt/nixl && \
    mkdir build && \
    meson setup build/ --prefix=/usr/local/nixl && \
    cd build/ && \
    ninja && \
    ninja install

ENV LD_LIBRARY_PATH=/usr/local/nixl/lib/x86_64-linux-gnu/:$LD_LIBRARY_PATH
ENV PYTHONPATH=/usr/local/nixl/lib/python3/dist-packages/:/opt/nixl/test/python/:$PYTHONPATH
ENV UCX_TLS=^cuda_ipc
ENV NIXL_PLUGIN_DIR=/usr/local/nixl/lib/x86_64-linux-gnu/plugins

RUN ls -l /usr/local/nixl/
RUN ls -l /usr/local/nixl/include/

RUN ls /opt/nixl

159
160
161
# Install utilities
RUN apt update -y && apt install -y git wget curl nvtop tmux vim
# nats
162
163
RUN wget https://github.com/nats-io/nats-server/releases/download/v2.10.24/nats-server-v2.10.24-amd64.deb && \
    dpkg -i nats-server-v2.10.24-amd64.deb && rm nats-server-v2.10.24-amd64.deb
164
165
166
# etcd
ENV ETCD_VERSION="v3.5.18"
RUN wget https://github.com/etcd-io/etcd/releases/download/$ETCD_VERSION/etcd-$ETCD_VERSION-linux-amd64.tar.gz -O /tmp/etcd.tar.gz && \
167
168
169
    mkdir -p /usr/local/bin/etcd && \
    tar -xvf /tmp/etcd.tar.gz -C /usr/local/bin/etcd --strip-components=1 && \
    rm /tmp/etcd.tar.gz
170
171
172
173
ENV PATH=/usr/local/bin/etcd/:$PATH


### VIRTUAL ENVIRONMENT SETUP ###
174
175
176

# Install uv and create virtualenv
COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/
Neelay Shah's avatar
Neelay Shah committed
177
178
RUN mkdir /opt/dynamo && \
    uv venv /opt/dynamo/venv --python 3.12
179
180

# Activate virtual environment
Neelay Shah's avatar
Neelay Shah committed
181
ENV VIRTUAL_ENV=/opt/dynamo/venv
182
183
ENV PATH="${VIRTUAL_ENV}/bin:${PATH}"

184
185
186
187
# Common dependencies
RUN --mount=type=bind,source=./container/deps/requirements.txt,target=/tmp/requirements.txt \
    uv pip install --requirement /tmp/requirements.txt

188
189
# Install patched vllm - keep this early in Dockerfile to avoid
# rebuilds from unrelated source code changes
190
191
ARG VLLM_REF="0.7.2"
ARG VLLM_PATCH="vllm_v${VLLM_REF}-dynamo-kv-disagg-patch.patch"
192
ARG VLLM_PATCHED_PACKAGE_NAME="ai_dynamo_vllm"
193
RUN --mount=type=bind,source=./container/deps/,target=/tmp/deps \
194
195
    mkdir /tmp/vllm && \
    uv pip install pip wheel && \
196
    python -m pip download --only-binary=:all: --no-deps --dest /tmp/vllm vllm==v${VLLM_REF} && \
197
198
    cd /tmp/vllm && \
    wheel unpack *.whl && \
199
    cd vllm-${VLLM_REF}/ && \
200
    patch -p1 < /tmp/deps/vllm/${VLLM_PATCH} && \
201
    # Rename the package from vllm to ai_dynamo_vllm
202
203
    mv vllm-${VLLM_REF}.dist-info ${VLLM_PATCHED_PACKAGE_NAME}-${VLLM_REF}.dist-info && \
    sed -i "s/^Name: vllm/Name: ${VLLM_PATCHED_PACKAGE_NAME}/g" ${VLLM_PATCHED_PACKAGE_NAME}-${VLLM_REF}.dist-info/METADATA && \
204
    # Update wheel tag from linux_x86_64 to manylinux1_x86_64 in WHEEL file
205
    sed -i 's/Tag: cp38-abi3-linux_x86_64/Tag: cp38-abi3-manylinux1_x86_64/g' ${VLLM_PATCHED_PACKAGE_NAME}-${VLLM_REF}.dist-info/WHEEL && \
206
    # Also update the tag in RECORD file to match
207
    sed -i "s/-cp38-abi3-linux_x86_64.whl/-cp38-abi3-manylinux1_x86_64.whl/g" ${VLLM_PATCHED_PACKAGE_NAME}-${VLLM_REF}.dist-info/RECORD && \
208
209
    mkdir -p /workspace/dist && \
    wheel pack . --dest-dir /workspace/dist && \
210
    uv pip install /workspace/dist/${VLLM_PATCHED_PACKAGE_NAME}-*.whl
211

212
213
214
# Install test dependencies
RUN --mount=type=bind,source=./container/deps/requirements.test.txt,target=/tmp/requirements.txt \
    uv pip install --requirement /tmp/requirements.txt
215

216
# ### MISC UTILITY SETUP ###
217
218
219

# Finish pyright install
RUN pyright --help > /dev/null 2>&1
220

221
222
223
224
225
# Enable Git operations in the /workspace directory
RUN printf "[safe]\n      directory=/workspace\n" > /root/.gitconfig

RUN ln -sf /bin/bash /bin/sh

226
227
228
229
### BUILDS ###

# Rust build/dev dependencies
RUN apt update -y && \
230
    apt install --no-install-recommends -y \
231
    build-essential \
Biswa Panda's avatar
Biswa Panda committed
232
    protobuf-compiler \
Neelay Shah's avatar
Neelay Shah committed
233
234
    cmake \
    libssl-dev \
235
236
237
238
239
    pkg-config

ENV RUSTUP_HOME=/usr/local/rustup \
    CARGO_HOME=/usr/local/cargo \
    PATH=/usr/local/cargo/bin:$PATH \
240
    RUST_VERSION=1.86.0 \
241
242
243
244
245
246
247
248
    RUSTARCH=x86_64-unknown-linux-gnu

RUN wget --tries=3 --waitretry=5 "https://static.rust-lang.org/rustup/archive/1.28.1/${RUSTARCH}/rustup-init" && \
    echo "a3339fb004c3d0bb9862ba0bce001861fe5cbde9c10d16591eb3f39ee6cd3e7f *rustup-init" | sha256sum -c - && \
    chmod +x rustup-init && \
    ./rustup-init -y --no-modify-path --profile minimal --default-toolchain $RUST_VERSION --default-host ${RUSTARCH} && \
    rm rustup-init && \
    chmod -R a+w $RUSTUP_HOME $CARGO_HOME
249
250
251
252

# Working directory
WORKDIR /workspace

253
254
255
256
# Copy Python wheel configuration files
COPY pyproject.toml /workspace/
COPY README.md /workspace/
COPY LICENSE /workspace/
257
258
259
COPY Cargo.toml /workspace/
COPY Cargo.lock /workspace/
COPY rust-toolchain.toml /workspace/
260

261
COPY lib/ /workspace/lib/
262
263
COPY components /workspace/components
COPY launch /workspace/launch
264

265
ARG CARGO_BUILD_JOBS
266
267
268
269
# Set CARGO_BUILD_JOBS to 16 if not provided
# This is to prevent cargo from building $(nproc) jobs in parallel,
# which might exceed the number of opened files limit.
ENV CARGO_BUILD_JOBS=${CARGO_BUILD_JOBS:-16}
270
271
272
273

ENV CARGO_TARGET_DIR=/workspace/target

RUN cargo build --release --locked --features mistralrs,sglang,vllm,python && \
274
275
276
277
    cargo doc --no-deps && \
    cp target/release/dynamo-run /usr/local/bin && \
    cp target/release/http /usr/local/bin && \
    cp target/release/llmctl /usr/local/bin && \
278
279
    cp target/release/metrics /usr/local/bin && \
    cp target/release/mock_worker /usr/local/bin
280

Neelay Shah's avatar
Neelay Shah committed
281
COPY deploy/dynamo/sdk /workspace/deploy/dynamo/sdk
282
COPY deploy/dynamo/api-store /workspace/deploy/dynamo/api-store
Neelay Shah's avatar
Neelay Shah committed
283

Neelay Shah's avatar
Neelay Shah committed
284
285
# Tell vllm to use the Dynamo LLM C API for KV Cache Routing
ENV VLLM_KV_CAPI_PATH="/opt/dynamo/bindings/lib/libdynamo_llm_capi.so"
286

287
288
289
290
291
# Copy launch banner
RUN --mount=type=bind,source=./container/launch_message.txt,target=/workspace/launch_message.txt \
    sed '/^#\s/d' /workspace/launch_message.txt > ~/.launch_screen && \
    echo "cat ~/.launch_screen" >> ~/.bashrc

292
293
CMD []

294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
###################################
####### WHEEL BUILD STAGE #########
###################################

# Build the wheel in the manylinux environment
FROM ${MANYLINUX_IMAGE} AS wheel_builder
ARG CARGO_BUILD_JOBS
# Set CARGO_BUILD_JOBS to 16 if not provided
# This is to prevent cargo from building $(nproc) jobs in parallel,
# which might exceed the number of opened files limit.
ENV CARGO_BUILD_JOBS=${CARGO_BUILD_JOBS:-16}
# Use build arg RELEASE_BUILD = true to generate wheels for Python 3.10, 3.11 and 3.12.
ARG RELEASE_BUILD
WORKDIR /workspace

RUN yum install -y  protobuf-compiler \
  && yum clean all \
  && rm -rf /var/cache/yum

ENV RUSTUP_HOME=/usr/local/rustup \
    CARGO_HOME=/usr/local/cargo \
    PATH=/usr/local/cargo/bin:$PATH \
    CARGO_TARGET_DIR=/workspace/target

COPY --from=build /workspace /workspace
COPY --from=build $RUSTUP_HOME $RUSTUP_HOME
COPY --from=build $CARGO_HOME $CARGO_HOME

# Copy uv from build and build wheel in virtualenv
RUN mkdir /opt/dynamo && \
    uv venv /opt/dynamo/venv --python 3.12

# Activate virtual environment
ENV VIRTUAL_ENV=/opt/dynamo/venv
ENV PATH="${VIRTUAL_ENV}/bin:${PATH}"
ENV PATH="$PATH:/usr/local/bin"

# Build dynamo wheel
RUN source /opt/dynamo/venv/bin/activate && \
    cd /workspace/lib/bindings/python && \
    uv build --wheel --out-dir /workspace/dist --python 3.12 && \
    if [ "$RELEASE_BUILD" = "true" ]; then \
        uv build --wheel --out-dir /workspace/dist --python 3.11 && \
        uv build --wheel --out-dir /workspace/dist --python 3.10; \
    fi && \
    cd /workspace && \
    uv build --wheel --out-dir /workspace/dist && \
    cd /workspace/deploy/dynamo/api-store && \
    uv build --wheel --out-dir /workspace/dist

344
345
346
347
348
#######################################
########## CI Minimum Image ###########
#######################################
FROM build AS ci_minimum

349
COPY . /workspace
350
351
352
353
354
355
356
357
358
359
360
361
COPY --from=wheel_builder /workspace/dist/ /workspace/dist/

# Package the bindings
RUN mkdir -p /opt/dynamo/bindings/wheels && \
    mkdir /opt/dynamo/bindings/lib && \
    cp dist/ai_dynamo*cp312*.whl /opt/dynamo/bindings/wheels/. && \
    cp target/release/libdynamo_llm_capi.so /opt/dynamo/bindings/lib/. && \
    cp -r lib/bindings/c/include /opt/dynamo/bindings/.

RUN uv pip install /workspace/dist/ai_dynamo_runtime*cp312*.whl && \
    uv pip install /workspace/dist/ai_dynamo*any.whl && \
    uv pip install /workspace/dist/ai_dynamo_store*any.whl
362

363
364
365
366
##########################################
########## Perf Analyzer Image ###########
##########################################
FROM ${BASE_IMAGE}:${BASE_IMAGE_TAG} AS perf_analyzer
367

368
369
370
371
372
373
374
375
376
377
378
379
380
ARG GENAI_PERF_TAG

WORKDIR /workspace

# Build and install Perf Analyzer for benchmarking
RUN apt-get update -y && apt-get -y install cmake g++ libssl-dev python3 rapidjson-dev zlib1g-dev
RUN git clone https://github.com/triton-inference-server/perf_analyzer.git
RUN git -C perf_analyzer checkout ${GENAI_PERF_TAG}
RUN mkdir perf_analyzer/build
RUN cmake -B perf_analyzer/build -S perf_analyzer -D TRITON_ENABLE_PERF_ANALYZER_OPENAI=ON
RUN cmake --build perf_analyzer/build -- -j8
RUN mkdir bin &&  \
    cp -r perf_analyzer/build/perf_analyzer/src/perf-analyzer-build /workspace/bin/
381

382
########################################
383
########## Development Image ###########
384
########################################
385
FROM ci_minimum AS dev
386
387
388

ARG GENAI_PERF_TAG

389
390
391
COPY --from=perf_analyzer /workspace/bin/perf-analyzer-build/ /perf/bin
COPY --from=perf_analyzer /workspace/perf_analyzer /perf_analyzer
ENV PATH="/perf/bin:${PATH}"
392
393
394
395
396
397
398
399
400
401
402
403
404
405

# Install genai-perf for benchmarking
RUN uv pip install "git+https://github.com/triton-inference-server/perf_analyzer.git@${GENAI_PERF_TAG}#subdirectory=genai-perf"
RUN uv pip uninstall tritonclient

COPY . /workspace

ENTRYPOINT ["/opt/nvidia/nvidia_entrypoint.sh"]

CMD []

####################################
########## Runtime Image ###########
####################################
406
407
408
409

FROM ${RUNTIME_IMAGE}:${RUNTIME_IMAGE_TAG} AS runtime

WORKDIR /workspace
410
ENV DYNAMO_HOME=/workspace
411
412
ENV VIRTUAL_ENV=/opt/dynamo/venv

413
# Copy NIXL
414
COPY --from=build /usr/local/nixl /usr/local/nixl
415
416
417
ENV LD_LIBRARY_PATH=/usr/local/nixl/lib/x86_64-linux-gnu/:$LD_LIBRARY_PATH
ENV PYTHONPATH=/usr/local/nixl/lib/python3/dist-packages/:/opt/nixl/test/python/:$PYTHONPATH

418
419
# Setup the python environment
COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/
420
421
422
RUN apt-get update && \
    DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends python3-dev && \
    rm -rf /var/lib/apt/lists/* && \
423
    uv venv $VIRTUAL_ENV --python 3.12 && \
424
425
426
    echo "source $VIRTUAL_ENV/bin/activate" >> ~/.bashrc

# Install the wheels
427
428
COPY --from=wheel_builder /workspace/dist/*.whl wheelhouse/
RUN uv pip install ai-dynamo[vllm] --find-links wheelhouse  && \
429
    rm -r wheelhouse
430
431
432

# Tell vllm to use the Dynamo LLM C API for KV Cache Routing
ENV VLLM_KV_CAPI_PATH="/opt/dynamo/bindings/lib/libdynamo_llm_capi.so"
433

434
435
436
437
438
439
# Copy launch banner
RUN --mount=type=bind,source=./container/launch_message.txt,target=/workspace/launch_message.txt \
    sed '/^#\s/d' /workspace/launch_message.txt > ~/.launch_screen && \
    echo "cat ~/.launch_screen" >> ~/.bashrc

# Copy examples
440
COPY ./examples examples/
441

442
443
ENTRYPOINT [ "/usr/bin/bash" ]
CMD []