Dockerfile.vllm 10.7 KB
Newer Older
1
2
3
# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0

4
5
ARG BASE_IMAGE="nvcr.io/nvidia/cuda-dl-base"
ARG BASE_IMAGE_TAG="25.01-cuda12.8-devel-ubuntu24.04"
6

7
8
9
ARG RUNTIME_IMAGE="nvcr.io/nvidia/cuda"
ARG RUNTIME_IMAGE_TAG="12.8.1-runtime-ubuntu24.04"

10
11
12
13
14
15
16
17
18
19
FROM ${BASE_IMAGE}:${BASE_IMAGE_TAG} AS nixl_base
WORKDIR /opt/nixl
# Add a cache hint that only changes when the nixl commit changes
ARG NIXL_COMMIT
# This line acts as a cache key - it only changes when NIXL_COMMIT changes
RUN echo "NIXL commit: ${NIXL_COMMIT}" > /opt/nixl/commit.txt
# Copy the nixl source
COPY --from=nixl . .

# Main build stage
20
FROM ${BASE_IMAGE}:${BASE_IMAGE_TAG} AS dev
21
22
23

USER root

24
25
26
27
28
29
30
31
### NIXL SETUP ###

ARG MOFED_VERSION=24.10-1.1.4.0
ARG PYTHON_VERSION=3.12
ARG NSYS_URL=https://developer.nvidia.com/downloads/assets/tools/secure/nsight-systems/2025_1/
ARG NSYS_PKG=NsightSystems-linux-cli-public-2025.1.1.131-3554042.deb

RUN apt-get update -y && apt-get -y install curl \
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
    git \
    libnuma-dev \
    numactl \
    wget \
    autotools-dev \
    automake \
    libtool \
    libz-dev \
    libiberty-dev \
    flex \
    build-essential \
    cmake \
    libibverbs-dev \
    libgoogle-glog-dev \
    libgtest-dev \
    libjsoncpp-dev \
    libpython3-dev \
    libboost-all-dev \
    libssl-dev \
    libgrpc-dev \
    libgrpc++-dev \
    libprotobuf-dev \
    protobuf-compiler-grpc \
    pybind11-dev \
    python3-full \
    python3-pip \
    python3-numpy \
    etcd-server \
    net-tools \
    pciutils \
    libpci-dev \
    vim \
    tmux \
    screen \
    ibverbs-utils \
    libibmad-dev
68
69
70
71
72

RUN apt-get install -y linux-tools-common linux-tools-generic ethtool iproute2
RUN apt-get install -y dkms linux-headers-generic
RUN apt-get install -y meson ninja-build uuid-dev gdb

73
74
75
76
RUN apt install -y libglib2.0-0
RUN wget ${NSYS_URL}${NSYS_PKG} &&\
    apt install -y ./${NSYS_PKG} &&\
    rm ${NSYS_PKG}
77
78
79
80
81
82

RUN cd /usr/local/src && \
    curl -fSsL "https://content.mellanox.com/ofed/MLNX_OFED-${MOFED_VERSION}/MLNX_OFED_LINUX-${MOFED_VERSION}-ubuntu24.04-x86_64.tgz" -o mofed.tgz && \
    tar -xf /usr/local/src/mofed.tgz && \
    cd MLNX_OFED_LINUX-* && \
    apt-get update && apt-get install -y --no-install-recommends \
83
    ./DEBS/libibverbs* ./DEBS/ibverbs-providers* ./DEBS/librdmacm* ./DEBS/libibumad* && \
84
    rm -rf /var/lib/apt/lists/* /usr/local/src/* mofed.tgz
85

86
# Build and install Perf Analyzer for benchmarking
87
88
89
90
91
92
93
94
95
RUN apt-get update -y && apt-get -y install rapidjson-dev zlib1g-dev
RUN git clone https://github.com/triton-inference-server/perf_analyzer.git
ARG GENAI_PERF_TAG="25d0188713adc47868d6b3f22426375237a90529"
RUN git -C perf_analyzer checkout ${GENAI_PERF_TAG}
RUN mkdir perf_analyzer/build
RUN cmake -B perf_analyzer/build -S perf_analyzer -D TRITON_ENABLE_PERF_ANALYZER_OPENAI=ON
RUN cmake --build perf_analyzer/build -- -j8
ENV PATH="$(pwd)/perf_analyzer/build/perf_analyzer/src/perf-analyzer-build:${PATH}"

96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
ENV LIBRARY_PATH=$LIBRARY_PATH:/usr/local/cuda/lib64 \
    LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/cuda/lib64

ENV LIBRARY_PATH=$LIBRARY_PATH:/usr/local/lib \
    LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/lib

WORKDIR /workspace
RUN git clone https://github.com/NVIDIA/gdrcopy.git
RUN PREFIX=/usr/local DESTLIB=/usr/local/lib make -C /workspace/gdrcopy lib_install
RUN cp gdrcopy/src/libgdrapi.so.2.* /usr/lib/x86_64-linux-gnu/
RUN ldconfig

ARG UCX_VERSION=v1.18.0

RUN cd /usr/local/src && \
    curl -fSsL "https://github.com/openucx/ucx/tarball/${UCX_VERSION}" | tar xz && \
    cd openucx-ucx* && \
    ./autogen.sh && ./configure     \
114
115
116
117
118
119
120
121
122
123
124
125
    --enable-shared             \
    --disable-static            \
    --disable-doxygen-doc       \
    --enable-optimizations      \
    --enable-cma                \
    --enable-devel-headers      \
    --with-cuda=/usr/local/cuda \
    --with-verbs                \
    --with-dm                   \
    --with-gdrcopy=/usr/local   \
    --enable-mt                 \
    --with-mlx5-dv &&           \
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
    make -j &&                      \
    make -j install-strip &&        \
    ldconfig

ENV LD_LIBRARY_PATH=/usr/lib:$LD_LIBRARY_PATH
ENV CPATH=/usr/include:$CPATH
ENV PATH=/usr/bin:$PATH
ENV PKG_CONFIG_PATH=/usr/lib/pkgconfig:$PKG_CONFIG_PATH
SHELL ["/bin/bash", "-c"]

WORKDIR /workspace

ENV LD_LIBRARY_PATH=/usr/local/ompi/lib:$LD_LIBRARY_PATH
ENV CPATH=/usr/local/ompi/include:$CPATH
ENV PATH=/usr/local/ompi/bin:$PATH
ENV PKG_CONFIG_PATH=/usr/local/ompi/lib/pkgconfig:$PKG_CONFIG_PATH

143
144
145
# Copy nixl source, and use commit hash as cache hint
COPY --from=nixl_base /opt/nixl /opt/nixl
COPY --from=nixl_base /opt/nixl/commit.txt /opt/nixl/commit.txt
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
RUN cd /opt/nixl && \
    mkdir build && \
    meson setup build/ --prefix=/usr/local/nixl && \
    cd build/ && \
    ninja && \
    ninja install

ENV LD_LIBRARY_PATH=/usr/local/nixl/lib/x86_64-linux-gnu/:$LD_LIBRARY_PATH
ENV PYTHONPATH=/usr/local/nixl/lib/python3/dist-packages/:/opt/nixl/test/python/:$PYTHONPATH
ENV UCX_TLS=^cuda_ipc
ENV NIXL_PLUGIN_DIR=/usr/local/nixl/lib/x86_64-linux-gnu/plugins

RUN ls -l /usr/local/nixl/
RUN ls -l /usr/local/nixl/include/
RUN ls -l /usr/local/nixl/include/internal/

RUN ls /opt/nixl

164
165
166
# Install utilities
RUN apt update -y && apt install -y git wget curl nvtop tmux vim
# nats
167
RUN wget https://github.com/nats-io/nats-server/releases/download/v2.10.24/nats-server-v2.10.24-amd64.deb && dpkg -i nats-server-v2.10.24-amd64.deb && rm nats-server-v2.10.24-amd64.deb
168
169
170
# etcd
ENV ETCD_VERSION="v3.5.18"
RUN wget https://github.com/etcd-io/etcd/releases/download/$ETCD_VERSION/etcd-$ETCD_VERSION-linux-amd64.tar.gz -O /tmp/etcd.tar.gz && \
171
172
173
    mkdir -p /usr/local/bin/etcd && \
    tar -xvf /tmp/etcd.tar.gz -C /usr/local/bin/etcd --strip-components=1 && \
    rm /tmp/etcd.tar.gz
174
175
176
177
ENV PATH=/usr/local/bin/etcd/:$PATH


### VIRTUAL ENVIRONMENT SETUP ###
178
179
180

# Install uv and create virtualenv
COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/
Neelay Shah's avatar
Neelay Shah committed
181
182
RUN mkdir /opt/dynamo && \
    uv venv /opt/dynamo/venv --python 3.12
183
184

# Activate virtual environment
Neelay Shah's avatar
Neelay Shah committed
185
ENV VIRTUAL_ENV=/opt/dynamo/venv
186
187
ENV PATH="${VIRTUAL_ENV}/bin:${PATH}"

188
189
190
191
# Common dependencies
RUN --mount=type=bind,source=./container/deps/requirements.txt,target=/tmp/requirements.txt \
    uv pip install --requirement /tmp/requirements.txt

192
193
194
# Install patched vllm - keep this early in Dockerfile to avoid
# rebuilds from unrelated source code changes
ARG VLLM_REF="v0.7.2"
Neelay Shah's avatar
Neelay Shah committed
195
ARG VLLM_PATCH="vllm_${VLLM_REF}-dynamo-kv-disagg-patch.patch"
196
RUN --mount=type=bind,source=./container/deps/,target=/tmp/deps \
197
198
199
200
201
202
203
204
205
206
207
    mkdir /tmp/vllm && \
    uv pip install pip wheel && \
    python -m pip download --only-binary=:all: --no-deps --dest /tmp/vllm vllm==${VLLM_REF} && \
    cd /tmp/vllm && \
    wheel unpack *.whl && \
    cd vllm-*/ && \
    patch -p1 < /tmp/deps/vllm/${VLLM_PATCH} && \
    sed -i "s/__version__ = version = '\(.*\)'/__version__ = version = '\1.dynamo_patch'/g; s/__version_tuple__ = version_tuple = (\(.*\))/__version_tuple__ = version_tuple = (\1, 'dynamo_patch')/g" vllm/_version.py && \
    mkdir -p /workspace/dist && \
    wheel pack . --dest-dir /workspace/dist && \
    uv pip install /workspace/dist/vllm-*.whl
208

209
# Install genai-perf for benchmarking
210
RUN uv pip install "git+https://github.com/triton-inference-server/perf_analyzer.git@${GENAI_PERF_TAG}#subdirectory=genai-perf"
211
RUN uv pip uninstall tritonclient
212

213
214
215
# Install test dependencies
RUN --mount=type=bind,source=./container/deps/requirements.test.txt,target=/tmp/requirements.txt \
    uv pip install --requirement /tmp/requirements.txt
216

217
# ### MISC UTILITY SETUP ###
218
219
220

# Finish pyright install
RUN pyright --help > /dev/null 2>&1
221

222
223
224
225
226
# Enable Git operations in the /workspace directory
RUN printf "[safe]\n      directory=/workspace\n" > /root/.gitconfig

RUN ln -sf /bin/bash /bin/sh

227
228
229
230
231
232
### BUILDS ###

# Rust build/dev dependencies
RUN apt update -y && \
    apt install -y \
    build-essential \
Biswa Panda's avatar
Biswa Panda committed
233
    protobuf-compiler \
Neelay Shah's avatar
Neelay Shah committed
234
235
236
    cmake \
    libssl-dev \
    pkg-config && \
237
238
    curl https://sh.rustup.rs -sSf | bash -s -- -y
ENV PATH="/root/.cargo/bin:${PATH}"
239
RUN rustup toolchain install 1.85.0-x86_64-unknown-linux-gnu
240
241
242
243

# Working directory
WORKDIR /workspace

244
245
246
247
# Copy Python wheel configuration files
COPY pyproject.toml /workspace/
COPY README.md /workspace/
COPY LICENSE /workspace/
248
249
250
COPY Cargo.toml /workspace/
COPY Cargo.lock /workspace/
COPY rust-toolchain.toml /workspace/
251

252
COPY lib/ /workspace/lib/
253
254
COPY components /workspace/components
COPY launch /workspace/launch
255

256
257
258
259
260
ARG CARGO_BUILD_JOBS

ENV CARGO_TARGET_DIR=/workspace/target

RUN cargo build --release --locked --features mistralrs,sglang,vllm,python && \
261
262
263
264
265
    cargo doc --no-deps && \
    cp target/release/dynamo-run /usr/local/bin && \
    cp target/release/http /usr/local/bin && \
    cp target/release/llmctl /usr/local/bin && \
    cp target/release/metrics /usr/local/bin
266

Neelay Shah's avatar
Neelay Shah committed
267
268
269
COPY deploy/dynamo/sdk /workspace/deploy/dynamo/sdk
# Build dynamo wheel
RUN source /opt/dynamo/venv/bin/activate && \
270
    cd /workspace/lib/bindings/python && \
271
    uv build --wheel --out-dir /workspace/dist && \
272
273
    uv pip install /workspace/dist/ai_dynamo_runtime*cp312*.whl && \
    cd /workspace && \
274
    uv build --wheel --out-dir /workspace/dist && \
275
    uv pip install /workspace/dist/ai_dynamo*any.whl
276

277
# Package the bindings
Neelay Shah's avatar
Neelay Shah committed
278
279
RUN mkdir -p /opt/dynamo/bindings/wheels && \
    mkdir /opt/dynamo/bindings/lib && \
280
    cp dist/ai_dynamo*cp312*.whl /opt/dynamo/bindings/wheels/. && \
281
    cp target/release/libdynamo_llm_capi.so /opt/dynamo/bindings/lib/. && \
Neelay Shah's avatar
Neelay Shah committed
282
    cp -r lib/bindings/c/include /opt/dynamo/bindings/.
Neelay Shah's avatar
Neelay Shah committed
283

Neelay Shah's avatar
Neelay Shah committed
284
285
# Tell vllm to use the Dynamo LLM C API for KV Cache Routing
ENV VLLM_KV_CAPI_PATH="/opt/dynamo/bindings/lib/libdynamo_llm_capi.so"
286

287
288
# FIXME: Copy more specific folders in for dev/debug after directory restructure
COPY . /workspace
289

Neelay Shah's avatar
Neelay Shah committed
290
# FIXME: May want a modification with dynamo banner on entry
291
ENTRYPOINT ["/opt/nvidia/nvidia_entrypoint.sh"]
292

293
CMD []
294

295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
########################################
########## RUNTIME CONTAINER ###########
########################################

FROM ${RUNTIME_IMAGE}:${RUNTIME_IMAGE_TAG} AS runtime

WORKDIR /workspace
ENV VIRTUAL_ENV=/opt/dynamo/venv

RUN rm /bin/sh && ln -s /bin/bash /bin/sh

# Setup the python environment
COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/
RUN uv venv $VIRTUAL_ENV --python 3.12 && \
    ln -s $(find / -name libpython3.12.so*) /usr/lib && \
    echo "source $VIRTUAL_ENV/bin/activate" >> ~/.bashrc

# Install the wheels
COPY --from=dev /workspace/dist/*.whl whls/
RUN uv pip install $(find whls -name ai_dynamo_runtime-*.whl) && \
    uv pip install $(find whls -name ai_dynamo-*.whl) && \
316
    uv pip install $(find whls -name vllm-*.whl) && \
317
318
319
320
    rm -r whls

# Tell vllm to use the Dynamo LLM C API for KV Cache Routing
ENV VLLM_KV_CAPI_PATH="/opt/dynamo/bindings/lib/libdynamo_llm_capi.so"
321

322
323
# dynamo run in=text out=mistralrs Qwen/Qwen2.5-3B-Instruct
CMD []