Dockerfile.vllm 17.5 KB
Newer Older
1
2
3
# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0

4
5
ARG BASE_IMAGE="nvcr.io/nvidia/cuda-dl-base"
ARG BASE_IMAGE_TAG="25.01-cuda12.8-devel-ubuntu24.04"
6
ARG RELEASE_BUILD
7
8
ARG RUNTIME_IMAGE="nvcr.io/nvidia/cuda"
ARG RUNTIME_IMAGE_TAG="12.8.1-runtime-ubuntu24.04"
9
ARG MANYLINUX_IMAGE="quay.io/pypa/manylinux_2_28_x86_64"
Hongkuan Zhou's avatar
Hongkuan Zhou committed
10
11
# TODO: Move to published pypi tags
ARG GENAI_PERF_TAG="e67e853413a07a778dd78a55e299be7fba9c9c24"
12

13
14
15
16
17
18
19
20
21
FROM ${BASE_IMAGE}:${BASE_IMAGE_TAG} AS nixl_base
WORKDIR /opt/nixl
# Add a cache hint that only changes when the nixl commit changes
ARG NIXL_COMMIT
# This line acts as a cache key - it only changes when NIXL_COMMIT changes
RUN echo "NIXL commit: ${NIXL_COMMIT}" > /opt/nixl/commit.txt
# Copy the nixl source
COPY --from=nixl . .

22
##################################
23
########## Base Image ############
24
25
##################################

26
FROM ${BASE_IMAGE}:${BASE_IMAGE_TAG} AS base
27
28
29

USER root

30
31
32
33
34
35
36
37
### NIXL SETUP ###

ARG MOFED_VERSION=24.10-1.1.4.0
ARG PYTHON_VERSION=3.12
ARG NSYS_URL=https://developer.nvidia.com/downloads/assets/tools/secure/nsight-systems/2025_1/
ARG NSYS_PKG=NsightSystems-linux-cli-public-2025.1.1.131-3554042.deb

RUN apt-get update -y && apt-get -y install curl \
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
    git \
    libnuma-dev \
    numactl \
    wget \
    autotools-dev \
    automake \
    libtool \
    libz-dev \
    libiberty-dev \
    flex \
    build-essential \
    cmake \
    libibverbs-dev \
    libgoogle-glog-dev \
    libgtest-dev \
    libjsoncpp-dev \
    libpython3-dev \
    libboost-all-dev \
    libssl-dev \
    libgrpc-dev \
    libgrpc++-dev \
    libprotobuf-dev \
60
    libclang-dev \
61
62
63
64
65
66
67
68
69
70
71
72
73
74
    protobuf-compiler-grpc \
    pybind11-dev \
    python3-full \
    python3-pip \
    python3-numpy \
    etcd-server \
    net-tools \
    pciutils \
    libpci-dev \
    vim \
    tmux \
    screen \
    ibverbs-utils \
    libibmad-dev
75
76
77
78
79

RUN apt-get install -y linux-tools-common linux-tools-generic ethtool iproute2
RUN apt-get install -y dkms linux-headers-generic
RUN apt-get install -y meson ninja-build uuid-dev gdb

80
81
82
83
RUN apt install -y libglib2.0-0
RUN wget ${NSYS_URL}${NSYS_PKG} &&\
    apt install -y ./${NSYS_PKG} &&\
    rm ${NSYS_PKG}
84
85
86
87
88
89

RUN cd /usr/local/src && \
    curl -fSsL "https://content.mellanox.com/ofed/MLNX_OFED-${MOFED_VERSION}/MLNX_OFED_LINUX-${MOFED_VERSION}-ubuntu24.04-x86_64.tgz" -o mofed.tgz && \
    tar -xf /usr/local/src/mofed.tgz && \
    cd MLNX_OFED_LINUX-* && \
    apt-get update && apt-get install -y --no-install-recommends \
90
    ./DEBS/libibverbs* ./DEBS/ibverbs-providers* ./DEBS/librdmacm* ./DEBS/libibumad* && \
91
    rm -rf /var/lib/apt/lists/* /usr/local/src/* mofed.tgz
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110

ENV LIBRARY_PATH=$LIBRARY_PATH:/usr/local/cuda/lib64 \
    LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/cuda/lib64

ENV LIBRARY_PATH=$LIBRARY_PATH:/usr/local/lib \
    LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/lib

WORKDIR /workspace
RUN git clone https://github.com/NVIDIA/gdrcopy.git
RUN PREFIX=/usr/local DESTLIB=/usr/local/lib make -C /workspace/gdrcopy lib_install
RUN cp gdrcopy/src/libgdrapi.so.2.* /usr/lib/x86_64-linux-gnu/
RUN ldconfig

ARG UCX_VERSION=v1.18.0

RUN cd /usr/local/src && \
    curl -fSsL "https://github.com/openucx/ucx/tarball/${UCX_VERSION}" | tar xz && \
    cd openucx-ucx* && \
    ./autogen.sh && ./configure     \
111
112
113
114
115
116
117
118
119
120
121
122
    --enable-shared             \
    --disable-static            \
    --disable-doxygen-doc       \
    --enable-optimizations      \
    --enable-cma                \
    --enable-devel-headers      \
    --with-cuda=/usr/local/cuda \
    --with-verbs                \
    --with-dm                   \
    --with-gdrcopy=/usr/local   \
    --enable-mt                 \
    --with-mlx5-dv &&           \
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
    make -j &&                      \
    make -j install-strip &&        \
    ldconfig

ENV LD_LIBRARY_PATH=/usr/lib:$LD_LIBRARY_PATH
ENV CPATH=/usr/include:$CPATH
ENV PATH=/usr/bin:$PATH
ENV PKG_CONFIG_PATH=/usr/lib/pkgconfig:$PKG_CONFIG_PATH
SHELL ["/bin/bash", "-c"]

WORKDIR /workspace

ENV LD_LIBRARY_PATH=/usr/local/ompi/lib:$LD_LIBRARY_PATH
ENV CPATH=/usr/local/ompi/include:$CPATH
ENV PATH=/usr/local/ompi/bin:$PATH
ENV PKG_CONFIG_PATH=/usr/local/ompi/lib/pkgconfig:$PKG_CONFIG_PATH

140
141
142
# Copy nixl source, and use commit hash as cache hint
COPY --from=nixl_base /opt/nixl /opt/nixl
COPY --from=nixl_base /opt/nixl/commit.txt /opt/nixl/commit.txt
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
RUN cd /opt/nixl && \
    mkdir build && \
    meson setup build/ --prefix=/usr/local/nixl && \
    cd build/ && \
    ninja && \
    ninja install

ENV LD_LIBRARY_PATH=/usr/local/nixl/lib/x86_64-linux-gnu/:$LD_LIBRARY_PATH
ENV PYTHONPATH=/usr/local/nixl/lib/python3/dist-packages/:/opt/nixl/test/python/:$PYTHONPATH
ENV UCX_TLS=^cuda_ipc
ENV NIXL_PLUGIN_DIR=/usr/local/nixl/lib/x86_64-linux-gnu/plugins

RUN ls -l /usr/local/nixl/
RUN ls -l /usr/local/nixl/include/

RUN ls /opt/nixl

160
161
162
# Install utilities
RUN apt update -y && apt install -y git wget curl nvtop tmux vim
# nats
163
RUN wget --tries=3 --waitretry=5 https://github.com/nats-io/nats-server/releases/download/v2.10.24/nats-server-v2.10.24-amd64.deb && \
164
    dpkg -i nats-server-v2.10.24-amd64.deb && rm nats-server-v2.10.24-amd64.deb
165
166
# etcd
ENV ETCD_VERSION="v3.5.18"
167
RUN wget --tries=3 --waitretry=5 https://github.com/etcd-io/etcd/releases/download/$ETCD_VERSION/etcd-$ETCD_VERSION-linux-amd64.tar.gz -O /tmp/etcd.tar.gz && \
168
169
170
    mkdir -p /usr/local/bin/etcd && \
    tar -xvf /tmp/etcd.tar.gz -C /usr/local/bin/etcd --strip-components=1 && \
    rm /tmp/etcd.tar.gz
171
172
173
174
ENV PATH=/usr/local/bin/etcd/:$PATH


### VIRTUAL ENVIRONMENT SETUP ###
175
176
177

# Install uv and create virtualenv
COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/
Neelay Shah's avatar
Neelay Shah committed
178
179
RUN mkdir /opt/dynamo && \
    uv venv /opt/dynamo/venv --python 3.12
180
181

# Activate virtual environment
Neelay Shah's avatar
Neelay Shah committed
182
ENV VIRTUAL_ENV=/opt/dynamo/venv
183
184
ENV PATH="${VIRTUAL_ENV}/bin:${PATH}"

185
186
# Install patched vllm - keep this early in Dockerfile to avoid
# rebuilds from unrelated source code changes
187
ARG VLLM_REF="0.8.4"
188
ARG VLLM_PATCH="vllm_v${VLLM_REF}-dynamo-kv-disagg-patch.patch"
189
ARG VLLM_PATCHED_PACKAGE_NAME="ai_dynamo_vllm"
190
ARG VLLM_PATCHED_PACKAGE_VERSION="0.8.4"
191
RUN --mount=type=bind,source=./container/deps/,target=/tmp/deps \
192
193
    mkdir /tmp/vllm && \
    uv pip install pip wheel && \
194
    python -m pip download --only-binary=:all: --no-deps --dest /tmp/vllm vllm==v${VLLM_REF} && \
195
196
    cd /tmp/vllm && \
    wheel unpack *.whl && \
197
    cd vllm-${VLLM_REF}/ && \
198
    patch -p1 < /tmp/deps/vllm/${VLLM_PATCH} && \
199
    # Rename the package from vllm to ai_dynamo_vllm
200
201
202
    mv vllm-${VLLM_REF}.dist-info ${VLLM_PATCHED_PACKAGE_NAME}-${VLLM_PATCHED_PACKAGE_VERSION}.dist-info && \
    sed -i "s/^Name: vllm/Name: ${VLLM_PATCHED_PACKAGE_NAME}/g" ${VLLM_PATCHED_PACKAGE_NAME}-${VLLM_PATCHED_PACKAGE_VERSION}.dist-info/METADATA && \
    sed -i "s/^Version: ${VLLM_REF}/Version: ${VLLM_PATCHED_PACKAGE_VERSION}/g" ${VLLM_PATCHED_PACKAGE_NAME}-${VLLM_PATCHED_PACKAGE_VERSION}.dist-info/METADATA && \
203
    # Update wheel tag from linux_x86_64 to manylinux1_x86_64 in WHEEL file
204
    sed -i 's/Tag: cp38-abi3-linux_x86_64/Tag: cp38-abi3-manylinux1_x86_64/g' ${VLLM_PATCHED_PACKAGE_NAME}-${VLLM_PATCHED_PACKAGE_VERSION}.dist-info/WHEEL && \
205
    # Also update the tag in RECORD file to match
206
    sed -i "s/-cp38-abi3-linux_x86_64.whl/-cp38-abi3-manylinux1_x86_64.whl/g" ${VLLM_PATCHED_PACKAGE_NAME}-${VLLM_PATCHED_PACKAGE_VERSION}.dist-info/RECORD && \
207
208
    mkdir -p /workspace/dist && \
    wheel pack . --dest-dir /workspace/dist && \
209
    uv pip install /workspace/dist/${VLLM_PATCHED_PACKAGE_NAME}-*.whl
210

211
212
213
214
# Common dependencies
RUN --mount=type=bind,source=./container/deps/requirements.txt,target=/tmp/requirements.txt \
    uv pip install --requirement /tmp/requirements.txt

215
216
217
# Install test dependencies
RUN --mount=type=bind,source=./container/deps/requirements.test.txt,target=/tmp/requirements.txt \
    uv pip install --requirement /tmp/requirements.txt
218

219
# ### MISC UTILITY SETUP ###
220
221
222

# Finish pyright install
RUN pyright --help > /dev/null 2>&1
223

224
225
226
227
228
# Enable Git operations in the /workspace directory
RUN printf "[safe]\n      directory=/workspace\n" > /root/.gitconfig

RUN ln -sf /bin/bash /bin/sh

229
230
231
232
### BUILDS ###

# Rust build/dev dependencies
RUN apt update -y && \
233
    apt install --no-install-recommends -y \
234
    build-essential \
Biswa Panda's avatar
Biswa Panda committed
235
    protobuf-compiler \
Neelay Shah's avatar
Neelay Shah committed
236
237
    cmake \
    libssl-dev \
238
239
240
241
242
    pkg-config

ENV RUSTUP_HOME=/usr/local/rustup \
    CARGO_HOME=/usr/local/cargo \
    PATH=/usr/local/cargo/bin:$PATH \
243
    RUST_VERSION=1.86.0 \
244
245
246
247
248
249
250
251
    RUSTARCH=x86_64-unknown-linux-gnu

RUN wget --tries=3 --waitretry=5 "https://static.rust-lang.org/rustup/archive/1.28.1/${RUSTARCH}/rustup-init" && \
    echo "a3339fb004c3d0bb9862ba0bce001861fe5cbde9c10d16591eb3f39ee6cd3e7f *rustup-init" | sha256sum -c - && \
    chmod +x rustup-init && \
    ./rustup-init -y --no-modify-path --profile minimal --default-toolchain $RUST_VERSION --default-host ${RUSTARCH} && \
    rm rustup-init && \
    chmod -R a+w $RUSTUP_HOME $CARGO_HOME
252

253
254
255
256
257
258
ARG CARGO_BUILD_JOBS
# Set CARGO_BUILD_JOBS to 16 if not provided
# This is to prevent cargo from building $(nproc) jobs in parallel,
# which might exceed the number of opened files limit.
ENV CARGO_BUILD_JOBS=${CARGO_BUILD_JOBS:-16}

259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
#######################################
########## Local Development ##########
#######################################

FROM base AS local-dev

# https://code.visualstudio.com/remote/advancedcontainers/add-nonroot-user
# Will use the default ubuntu user, but give sudo access
# Needed so files permissions aren't set to root ownership when writing from inside container

# Don't want ubuntu to be editable, just change uid and gid. User ubuntu is hardcoded in .devcontainer
ENV USERNAME=ubuntu
ARG USER_UID=1000
ARG USER_GID=1000

RUN apt-get update && apt-get install -y sudo gnupg2 gnupg1 \
    && echo "$USERNAME ALL=(root) NOPASSWD:ALL" > /etc/sudoers.d/$USERNAME \
    && chmod 0440 /etc/sudoers.d/$USERNAME \
    && mkdir -p /home/$USERNAME \
    && chown -R $USERNAME:$USERNAME /home/$USERNAME \
    && rm -rf /var/lib/apt/lists/* \
    && chsh -s /bin/bash $USERNAME

# This is a slow operation (~40s on my cpu)
# Much better than chown -R $USERNAME:$USERNAME /opt/dynamo/venv (~10min on my cpu)
COPY --from=base --chown=$USER_UID:$USER_GID /opt/dynamo/venv/ /opt/dynamo/venv/
285
RUN chown $USERNAME:$USERNAME /opt/dynamo/venv
286
287
288
289
290
291
292
293
294
295
296
297
298
299
COPY --from=base --chown=$USERNAME:$USERNAME /usr/local/bin /usr/local/bin

USER $USERNAME
ENV HOME=/home/$USERNAME
WORKDIR $HOME

# https://code.visualstudio.com/remote/advancedcontainers/persist-bash-history
RUN SNIPPET="export PROMPT_COMMAND='history -a' && export HISTFILE=$HOME/.commandhistory/.bash_history" \
    && mkdir -p $HOME/.commandhistory \
    && touch $HOME/.commandhistory/.bash_history \
    && echo "$SNIPPET" >> "$HOME/.bashrc"

RUN mkdir -p /home/$USERNAME/.cache/

300
ENV VLLM_KV_CAPI_PATH=$HOME/dynamo/.build/target/debug/libdynamo_llm_capi.so
301
302
303
304

ENTRYPOINT ["/opt/nvidia/nvidia_entrypoint.sh"]

##################################
305
##### Wheel Build Image ##########
306
307
##################################

308
309
# Build the wheel in the manylinux environment
FROM ${MANYLINUX_IMAGE} AS wheel_builder
310

311
312
313
314
315
316
317
ARG CARGO_BUILD_JOBS
# Set CARGO_BUILD_JOBS to 16 if not provided
# This is to prevent cargo from building $(nproc) jobs in parallel,
# which might exceed the number of opened files limit.
ENV CARGO_BUILD_JOBS=${CARGO_BUILD_JOBS:-16}
# Use build arg RELEASE_BUILD = true to generate wheels for Python 3.10, 3.11 and 3.12.
ARG RELEASE_BUILD
318

319
320
WORKDIR /workspace

321
RUN yum update -y \
322
    && yum install -y python3.12-devel \
323
324
325
326
327
    && yum install -y protobuf-compiler \
    || yum install -y https://raw.repo.almalinux.org/almalinux/8.10/AppStream/x86_64/os/Packages/protobuf-3.5.0-15.el8.x86_64.rpm \
    https://raw.repo.almalinux.org/almalinux/8.10/AppStream/x86_64/os/Packages/protobuf-compiler-3.5.0-15.el8.x86_64.rpm \
    && yum clean all \
    && rm -rf /var/cache/yum
328
329
330

ENV RUSTUP_HOME=/usr/local/rustup \
    CARGO_HOME=/usr/local/cargo \
331
332
    CARGO_TARGET_DIR=/workspace/target \
    VIRTUAL_ENV=/opt/dynamo/venv
333

334
335
336
337
338
COPY --from=base $RUSTUP_HOME $RUSTUP_HOME
COPY --from=base $CARGO_HOME $CARGO_HOME
COPY --from=base /workspace /workspace
COPY --from=base $VIRTUAL_ENV $VIRTUAL_ENV
ENV PATH=$CARGO_HOME/bin:$VIRTUAL_ENV/bin:$PATH
339

340
341
342
343
344
345
346
347
# Copy configuration files
COPY pyproject.toml /workspace/
COPY README.md /workspace/
COPY LICENSE /workspace/
COPY Cargo.toml /workspace/
COPY Cargo.lock /workspace/
COPY rust-toolchain.toml /workspace/
COPY hatch_build.py /workspace/
348

349
350
351
352
353
354
355
356
357
358
359
360
361
# Copy source code
COPY lib/ /workspace/lib/
COPY components /workspace/components
COPY launch /workspace/launch
COPY deploy/dynamo/sdk /workspace/deploy/dynamo/sdk

# Build Rust crate binaries packaged with the wheel
RUN cargo build --release --locked --features mistralrs,sglang,vllm,python \
    -p dynamo-run \
    -p llmctl \
    # Multiple http named crates are present in dependencies, need to specify the path
    -p file://$PWD/components/http \
    -p metrics
362
363

# Build dynamo wheel
364
RUN uv build --wheel --out-dir /workspace/dist && \
365
366
367
368
369
    cd /workspace/lib/bindings/python && \
    uv build --wheel --out-dir /workspace/dist --python 3.12 && \
    if [ "$RELEASE_BUILD" = "true" ]; then \
        uv build --wheel --out-dir /workspace/dist --python 3.11 && \
        uv build --wheel --out-dir /workspace/dist --python 3.10; \
370
    fi
371

372
373
374
#######################################
########## CI Minimum Image ###########
#######################################
375
FROM base AS ci_minimum
376

377
ENV DYNAMO_HOME=/workspace
378
379
380
ENV CARGO_TARGET_DIR=/workspace/target

WORKDIR /workspace
381

382
COPY --from=wheel_builder /workspace/dist/ /workspace/dist/
383
384
385
386
387
388
389
390
391
392
393
394
395
COPY --from=wheel_builder /workspace/target/ /workspace/target/
# Copy Cargo cache to avoid re-downloading dependencies
COPY --from=wheel_builder $CARGO_HOME $CARGO_HOME

COPY . /workspace

# Build rest of the crates
# Need to figure out rust caching to avoid rebuilding and remove exclude flags
RUN cargo build --release --locked --workspace \
    --exclude dynamo-run \
    --exclude llmctl \
    --exclude file://$PWD/components/http \
    --exclude metrics
396
397
398
399
400
401

# Package the bindings
RUN mkdir -p /opt/dynamo/bindings/wheels && \
    mkdir /opt/dynamo/bindings/lib && \
    cp dist/ai_dynamo*cp312*.whl /opt/dynamo/bindings/wheels/. && \
    cp target/release/libdynamo_llm_capi.so /opt/dynamo/bindings/lib/. && \
402
403
404
405
406
407
    cp -r lib/bindings/c/include /opt/dynamo/bindings/.  && \
    cp target/release/dynamo-run /usr/local/bin && \
    cp target/release/http /usr/local/bin && \
    cp target/release/llmctl /usr/local/bin && \
    cp target/release/metrics /usr/local/bin && \
    cp target/release/mock_worker /usr/local/bin
408
409

RUN uv pip install /workspace/dist/ai_dynamo_runtime*cp312*.whl && \
410
    uv pip install /workspace/dist/ai_dynamo*any.whl
411

412
413
414
415
416
417
# Copy launch banner
RUN --mount=type=bind,source=./container/launch_message.txt,target=/workspace/launch_message.txt \
    sed '/^#\s/d' /workspace/launch_message.txt > ~/.launch_screen && \
    echo "cat ~/.launch_screen" >> ~/.bashrc


418
419
420
421
##########################################
########## Perf Analyzer Image ###########
##########################################
FROM ${BASE_IMAGE}:${BASE_IMAGE_TAG} AS perf_analyzer
422

423
424
425
426
427
428
429
430
431
432
433
434
435
ARG GENAI_PERF_TAG

WORKDIR /workspace

# Build and install Perf Analyzer for benchmarking
RUN apt-get update -y && apt-get -y install cmake g++ libssl-dev python3 rapidjson-dev zlib1g-dev
RUN git clone https://github.com/triton-inference-server/perf_analyzer.git
RUN git -C perf_analyzer checkout ${GENAI_PERF_TAG}
RUN mkdir perf_analyzer/build
RUN cmake -B perf_analyzer/build -S perf_analyzer -D TRITON_ENABLE_PERF_ANALYZER_OPENAI=ON
RUN cmake --build perf_analyzer/build -- -j8
RUN mkdir bin &&  \
    cp -r perf_analyzer/build/perf_analyzer/src/perf-analyzer-build /workspace/bin/
436

437
########################################
438
########## Development Image ###########
439
########################################
440
FROM ci_minimum AS dev
441
442
443

ARG GENAI_PERF_TAG

444
445
446
COPY --from=perf_analyzer /workspace/bin/perf-analyzer-build/ /perf/bin
COPY --from=perf_analyzer /workspace/perf_analyzer /perf_analyzer
ENV PATH="/perf/bin:${PATH}"
447
448
449
450
451
452
453
454
455
456
457
458
459
460

# Install genai-perf for benchmarking
RUN uv pip install "git+https://github.com/triton-inference-server/perf_analyzer.git@${GENAI_PERF_TAG}#subdirectory=genai-perf"
RUN uv pip uninstall tritonclient

COPY . /workspace

ENTRYPOINT ["/opt/nvidia/nvidia_entrypoint.sh"]

CMD []

####################################
########## Runtime Image ###########
####################################
461
462
463
464

FROM ${RUNTIME_IMAGE}:${RUNTIME_IMAGE_TAG} AS runtime

WORKDIR /workspace
465
ENV DYNAMO_HOME=/workspace
466
467
ENV VIRTUAL_ENV=/opt/dynamo/venv

468
# Copy NIXL
469
COPY --from=build /usr/local/nixl /usr/local/nixl
470
471
472
ENV LD_LIBRARY_PATH=/usr/local/nixl/lib/x86_64-linux-gnu/:$LD_LIBRARY_PATH
ENV PYTHONPATH=/usr/local/nixl/lib/python3/dist-packages/:/opt/nixl/test/python/:$PYTHONPATH

473
474
# Setup the python environment
COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/
475
476
477
RUN apt-get update && \
    DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends python3-dev && \
    rm -rf /var/lib/apt/lists/* && \
478
    uv venv $VIRTUAL_ENV --python 3.12 && \
479
480
481
    echo "source $VIRTUAL_ENV/bin/activate" >> ~/.bashrc

# Install the wheels
482
483
COPY --from=wheel_builder /workspace/dist/*.whl wheelhouse/
RUN uv pip install ai-dynamo[vllm] --find-links wheelhouse  && \
484
    rm -r wheelhouse
485
486
487

# Tell vllm to use the Dynamo LLM C API for KV Cache Routing
ENV VLLM_KV_CAPI_PATH="/opt/dynamo/bindings/lib/libdynamo_llm_capi.so"
488

489
490
491
492
493
494
# Copy launch banner
RUN --mount=type=bind,source=./container/launch_message.txt,target=/workspace/launch_message.txt \
    sed '/^#\s/d' /workspace/launch_message.txt > ~/.launch_screen && \
    echo "cat ~/.launch_screen" >> ~/.bashrc

# Copy examples
495
COPY ./examples examples/
496

497
498
ENTRYPOINT [ "/usr/bin/bash" ]
CMD []