Dockerfile.vllm 18.3 KB
Newer Older
1
2
3
# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0

4
ARG BASE_IMAGE="nvcr.io/nvidia/cuda-dl-base"
5
ARG BASE_IMAGE_TAG="25.03-cuda12.8-devel-ubuntu24.04"
6
ARG RELEASE_BUILD
7
8
ARG RUNTIME_IMAGE="nvcr.io/nvidia/cuda"
ARG RUNTIME_IMAGE_TAG="12.8.1-runtime-ubuntu24.04"
Hongkuan Zhou's avatar
Hongkuan Zhou committed
9
10
# TODO: Move to published pypi tags
ARG GENAI_PERF_TAG="e67e853413a07a778dd78a55e299be7fba9c9c24"
11

12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
# Define general architecture ARGs for supporting both x86 and aarch64 builds.
#   ARCH: Used for package suffixes (e.g., amd64, arm64)
#   ARCH_ALT: Used for Rust targets, manylinux suffix (e.g., x86_64, aarch64)
#
# Default values are for x86/amd64:
#   --build-arg ARCH=amd64 --build-arg ARCH_ALT=x86_64
#
# For arm64/aarch64, build with:
#   --build-arg ARCH=arm64 --build-arg ARCH_ALT=aarch64
#
# NOTE: There isn't an easy way to define one of these values based on the other value
# without adding if statements everywhere, so just define both as ARGs for now.
ARG ARCH=amd64
ARG ARCH_ALT=x86_64

27
FROM ${BASE_IMAGE}:${BASE_IMAGE_TAG} AS nixl_base
28
29
30
31
32

# Redeclare ARCH and ARCH_ALT so they're available in this stage
ARG ARCH
ARG ARCH_ALT

33
34
35
36
37
38
39
40
WORKDIR /opt/nixl
# Add a cache hint that only changes when the nixl commit changes
ARG NIXL_COMMIT
# This line acts as a cache key - it only changes when NIXL_COMMIT changes
RUN echo "NIXL commit: ${NIXL_COMMIT}" > /opt/nixl/commit.txt
# Copy the nixl source
COPY --from=nixl . .

41
##################################
42
########## Base Image ############
43
44
##################################

45
FROM ${BASE_IMAGE}:${BASE_IMAGE_TAG} AS base
46

47
48
49
# Redeclare ARCH and ARCH_ALT so they're available in this stage
ARG ARCH
ARG ARCH_ALT
50

51
USER root
52
ARG PYTHON_VERSION=3.12
53
54
55
56

RUN apt-get update -y && \
    apt-get install -y \
    # NIXL build dependencies
57
    cmake \
58
59
    meson \
    ninja-build \
60
    pybind11-dev \
61
    # Rust build dependencies
62
	clang \
63
    libclang-dev \
64
	git \
65
66
    # Install utilities
    nvtop \
67
    tmux \
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
    vim \
    autoconf \
    libtool

WORKDIR /workspace

### UCX EFA Setup ###
RUN rm -rf /opt/hpcx/ucx
RUN rm -rf /usr/local/ucx
RUN cd /usr/local/src && \
    git clone https://github.com/openucx/ucx.git && \
    cd ucx &&                   \
    git checkout v1.19.x &&     \
    ./autogen.sh && ./configure \
    --prefix=/usr/local/ucx     \
    --enable-shared             \
    --disable-static            \
    --disable-doxygen-doc       \
    --enable-optimizations      \
    --enable-cma                \
    --enable-devel-headers      \
    --with-cuda=/usr/local/cuda \
    --with-verbs                \
    --with-efa                  \
    --with-dm                   \
    --with-gdrcopy=/usr/local   \
    --enable-mt &&              \
    make -j &&                  \
    make -j install-strip &&    \
    ldconfig

ENV LD_LIBRARY_PATH=/usr/lib:/usr/local/ucx/lib:/usr/local/cuda/compat/lib.real:$LD_LIBRARY_PATH
ENV CPATH=/usr/include:$CPATH
ENV PATH=/usr/bin:$PATH
ENV PKG_CONFIG_PATH=/usr/lib/pkgconfig:$PKG_CONFIG_PATH
SHELL ["/bin/bash", "-c"]
104
105
106

WORKDIR /workspace

107
### NIXL SETUP ###
108
109
110
# Copy nixl source, and use commit hash as cache hint
COPY --from=nixl_base /opt/nixl /opt/nixl
COPY --from=nixl_base /opt/nixl/commit.txt /opt/nixl/commit.txt
111
112
113
114
115
116
RUN cd /opt/nixl && \
    mkdir build && \
    meson setup build/ --prefix=/usr/local/nixl && \
    cd build/ && \
    ninja && \
    ninja install
117

118
### NATS & ETCD SETUP ###
119
# nats
120
121
RUN wget --tries=3 --waitretry=5 https://github.com/nats-io/nats-server/releases/download/v2.10.24/nats-server-v2.10.24-${ARCH}.deb && \
    dpkg -i nats-server-v2.10.24-${ARCH}.deb && rm nats-server-v2.10.24-${ARCH}.deb
122
123
# etcd
ENV ETCD_VERSION="v3.5.18"
124
RUN wget --tries=3 --waitretry=5 https://github.com/etcd-io/etcd/releases/download/$ETCD_VERSION/etcd-$ETCD_VERSION-linux-${ARCH}.tar.gz -O /tmp/etcd.tar.gz && \
125
126
127
    mkdir -p /usr/local/bin/etcd && \
    tar -xvf /tmp/etcd.tar.gz -C /usr/local/bin/etcd --strip-components=1 && \
    rm /tmp/etcd.tar.gz
128
129
130
131
ENV PATH=/usr/local/bin/etcd/:$PATH


### VIRTUAL ENVIRONMENT SETUP ###
132
133
134

# Install uv and create virtualenv
COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/
Neelay Shah's avatar
Neelay Shah committed
135
136
RUN mkdir /opt/dynamo && \
    uv venv /opt/dynamo/venv --python 3.12
137
138

# Activate virtual environment
Neelay Shah's avatar
Neelay Shah committed
139
ENV VIRTUAL_ENV=/opt/dynamo/venv
140
141
ENV PATH="${VIRTUAL_ENV}/bin:${PATH}"

142
143
144
# Install NIXL Python module
# TODO: Move gds_path selection based on arch into NIXL build
RUN if [ "$ARCH" = "arm64" ]; then \
145
        cd /opt/nixl && uv pip install . --config-settings=setup-args="-Dgds_path=/usr/local/cuda/targets/sbsa-linux"; \
146
147
148
149
    else \
        cd /opt/nixl && uv pip install . ; \
    fi

150
151
# Install patched vllm - keep this early in Dockerfile to avoid
# rebuilds from unrelated source code changes
152
ARG VLLM_REF="0.8.4"
153
ARG VLLM_PATCH="vllm_v${VLLM_REF}-dynamo-kv-disagg-patch.patch"
154
ARG VLLM_PATCHED_PACKAGE_NAME="ai_dynamo_vllm"
155
ARG VLLM_PATCHED_PACKAGE_VERSION="0.8.4.post1"
156
ARG VLLM_MAX_JOBS=4
157
RUN --mount=type=bind,source=./container/deps/,target=/tmp/deps \
158
    --mount=type=cache,target=/root/.cache/uv \
159
160
    mkdir /tmp/vllm && \
    uv pip install pip wheel && \
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
    # NOTE: vLLM build from source on ARM can take several hours, see VLLM_MAX_JOBS details.
    if [ "$ARCH" = "arm64" ]; then \
        # PyTorch 2.7 supports CUDA 12.8 and aarch64 installs
        uv pip install torch==2.7.0 torchvision torchaudio --index-url https://download.pytorch.org/whl/cu128 && \
        # Download vLLM source with version matching patch
        git clone --branch v${VLLM_REF} --depth 1 https://github.com/vllm-project/vllm.git /tmp/vllm/vllm-${VLLM_REF} && \
        cd /tmp/vllm/vllm-${VLLM_REF}/ && \
        # Patch vLLM source with dynamo additions
        patch -p1 < /tmp/deps/vllm/${VLLM_PATCH} && \
        # WAR: Set package version check to 'vllm' instead of 'ai_dynamo_vllm' to avoid
        # platform detection issues on ARM install.
        # TODO: Rename package from vllm to ai_dynamo_vllm like x86 path below to remove this WAR.
        sed -i 's/version("ai_dynamo_vllm")/version("vllm")/g' vllm/platforms/__init__.py && \
        # Remove pytorch from vllm install dependencies
        python use_existing_torch.py && \
        # Build/install vllm from source
        uv pip install -r requirements/build.txt && \
        # MAX_JOBS set to avoid running OOM on vllm-flash-attn build, this can
        # significantly impact the overall build time. Each job can take up
        # to -16GB RAM each, so tune according to available system memory.
        MAX_JOBS=${VLLM_MAX_JOBS} uv pip install . --no-build-isolation ; \
    # Handle x86_64: Download wheel, unpack, setup for later steps
    else \
        python -m pip download --only-binary=:all: --no-deps --dest /tmp/vllm vllm==v${VLLM_REF} && \
        # Patch vLLM pre-built download with dynamo additions
        cd /tmp/vllm && \
        wheel unpack *.whl && \
        cd vllm-${VLLM_REF}/ && \
        patch -p1 < /tmp/deps/vllm/${VLLM_PATCH} && \
        # Rename the package from vllm to ai_dynamo_vllm
        mv vllm-${VLLM_REF}.dist-info ${VLLM_PATCHED_PACKAGE_NAME}-${VLLM_PATCHED_PACKAGE_VERSION}.dist-info && \
        sed -i "s/^Name: vllm/Name: ${VLLM_PATCHED_PACKAGE_NAME}/g" ${VLLM_PATCHED_PACKAGE_NAME}-${VLLM_PATCHED_PACKAGE_VERSION}.dist-info/METADATA && \
        sed -i "s/^Version: ${VLLM_REF}/Version: ${VLLM_PATCHED_PACKAGE_VERSION}/g" ${VLLM_PATCHED_PACKAGE_NAME}-${VLLM_PATCHED_PACKAGE_VERSION}.dist-info/METADATA && \
        # Update wheel tag from linux_${ARCH_ALT} to manylinux1_${ARCH_ALT} in WHEEL file
195
        sed -i "s/Tag: cp38-abi3-linux_${ARCH_ALT}/Tag: cp38-abi3-manylinux1_${ARCH_ALT}/g" ${VLLM_PATCHED_PACKAGE_NAME}-${VLLM_PATCHED_PACKAGE_VERSION}.dist-info/WHEEL && \
196
197
198
199
200
201
        # Also update the tag in RECORD file to match
        sed -i "s/-cp38-abi3-linux_${ARCH_ALT}.whl/-cp38-abi3-manylinux1_${ARCH_ALT}.whl/g" ${VLLM_PATCHED_PACKAGE_NAME}-${VLLM_PATCHED_PACKAGE_VERSION}.dist-info/RECORD && \
        mkdir -p /workspace/dist && \
        wheel pack . --dest-dir /workspace/dist && \
        uv pip install /workspace/dist/${VLLM_PATCHED_PACKAGE_NAME}-*.whl ; \
    fi
202

203
204
205
206
# Common dependencies
RUN --mount=type=bind,source=./container/deps/requirements.txt,target=/tmp/requirements.txt \
    uv pip install --requirement /tmp/requirements.txt

207
208
209
# Install test dependencies
RUN --mount=type=bind,source=./container/deps/requirements.test.txt,target=/tmp/requirements.txt \
    uv pip install --requirement /tmp/requirements.txt
210

211
# ### MISC UTILITY SETUP ###
212
213
214

# Finish pyright install
RUN pyright --help > /dev/null 2>&1
215

216
217
218
219
220
# Enable Git operations in the /workspace directory
RUN printf "[safe]\n      directory=/workspace\n" > /root/.gitconfig

RUN ln -sf /bin/bash /bin/sh

221
222
223
224
### BUILDS ###

# Rust build/dev dependencies
RUN apt update -y && \
225
    apt install --no-install-recommends -y \
226
    build-essential \
Biswa Panda's avatar
Biswa Panda committed
227
    protobuf-compiler \
Neelay Shah's avatar
Neelay Shah committed
228
229
    cmake \
    libssl-dev \
230
231
232
233
234
    pkg-config

ENV RUSTUP_HOME=/usr/local/rustup \
    CARGO_HOME=/usr/local/cargo \
    PATH=/usr/local/cargo/bin:$PATH \
235
    RUST_VERSION=1.86.0
236

237
238
239
240
# Define Rust target based on ARCH_ALT ARG
ARG RUSTARCH=${ARCH_ALT}-unknown-linux-gnu

# Install Rust using RUSTARCH derived from ARCH_ALT
241
RUN wget --tries=3 --waitretry=5 "https://static.rust-lang.org/rustup/archive/1.28.1/${RUSTARCH}/rustup-init" && \
242
    # TODO: Add SHA check back based on RUSTARCH
243
244
245
246
    chmod +x rustup-init && \
    ./rustup-init -y --no-modify-path --profile minimal --default-toolchain $RUST_VERSION --default-host ${RUSTARCH} && \
    rm rustup-init && \
    chmod -R a+w $RUSTUP_HOME $CARGO_HOME
247

248
249
250
251
252
253
ARG CARGO_BUILD_JOBS
# Set CARGO_BUILD_JOBS to 16 if not provided
# This is to prevent cargo from building $(nproc) jobs in parallel,
# which might exceed the number of opened files limit.
ENV CARGO_BUILD_JOBS=${CARGO_BUILD_JOBS:-16}

254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
#######################################
########## Local Development ##########
#######################################

FROM base AS local-dev

# https://code.visualstudio.com/remote/advancedcontainers/add-nonroot-user
# Will use the default ubuntu user, but give sudo access
# Needed so files permissions aren't set to root ownership when writing from inside container

# Don't want ubuntu to be editable, just change uid and gid. User ubuntu is hardcoded in .devcontainer
ENV USERNAME=ubuntu
ARG USER_UID=1000
ARG USER_GID=1000

RUN apt-get update && apt-get install -y sudo gnupg2 gnupg1 \
    && echo "$USERNAME ALL=(root) NOPASSWD:ALL" > /etc/sudoers.d/$USERNAME \
    && chmod 0440 /etc/sudoers.d/$USERNAME \
    && mkdir -p /home/$USERNAME \
    && chown -R $USERNAME:$USERNAME /home/$USERNAME \
    && rm -rf /var/lib/apt/lists/* \
    && chsh -s /bin/bash $USERNAME

# This is a slow operation (~40s on my cpu)
# Much better than chown -R $USERNAME:$USERNAME /opt/dynamo/venv (~10min on my cpu)
COPY --from=base --chown=$USER_UID:$USER_GID /opt/dynamo/venv/ /opt/dynamo/venv/
280
RUN chown $USERNAME:$USERNAME /opt/dynamo/venv
281
282
283
284
285
286
287
288
289
290
291
292
293
294
COPY --from=base --chown=$USERNAME:$USERNAME /usr/local/bin /usr/local/bin

USER $USERNAME
ENV HOME=/home/$USERNAME
WORKDIR $HOME

# https://code.visualstudio.com/remote/advancedcontainers/persist-bash-history
RUN SNIPPET="export PROMPT_COMMAND='history -a' && export HISTFILE=$HOME/.commandhistory/.bash_history" \
    && mkdir -p $HOME/.commandhistory \
    && touch $HOME/.commandhistory/.bash_history \
    && echo "$SNIPPET" >> "$HOME/.bashrc"

RUN mkdir -p /home/$USERNAME/.cache/

295
ENV VLLM_KV_CAPI_PATH=$HOME/dynamo/.build/target/debug/libdynamo_llm_capi.so
296
297
298
299

ENTRYPOINT ["/opt/nvidia/nvidia_entrypoint.sh"]

##################################
300
##### Wheel Build Image ##########
301
302
##################################

303
304
305
306
# Redeclare ARCH_ALT ARG so it's available for interpolation in the FROM instruction
ARG ARCH_ALT

FROM quay.io/pypa/manylinux_2_28_${ARCH_ALT} AS wheel_builder
307

308
309
310
311
312
313
314
ARG CARGO_BUILD_JOBS
# Set CARGO_BUILD_JOBS to 16 if not provided
# This is to prevent cargo from building $(nproc) jobs in parallel,
# which might exceed the number of opened files limit.
ENV CARGO_BUILD_JOBS=${CARGO_BUILD_JOBS:-16}
# Use build arg RELEASE_BUILD = true to generate wheels for Python 3.10, 3.11 and 3.12.
ARG RELEASE_BUILD
315

316
317
WORKDIR /workspace

318
RUN yum update -y \
319
    && yum install -y llvm-toolset \
320
    && yum install -y python3.12-devel \
321
322
323
    && yum install -y protobuf-compiler \
    && yum clean all \
    && rm -rf /var/cache/yum
324
325
326

ENV RUSTUP_HOME=/usr/local/rustup \
    CARGO_HOME=/usr/local/cargo \
327
328
    CARGO_TARGET_DIR=/workspace/target \
    VIRTUAL_ENV=/opt/dynamo/venv
329

330
331
COPY --from=base $RUSTUP_HOME $RUSTUP_HOME
COPY --from=base $CARGO_HOME $CARGO_HOME
332
COPY --from=base /usr/local/nixl /opt/nvidia/nvda_nixl
333
334
335
COPY --from=base /workspace /workspace
COPY --from=base $VIRTUAL_ENV $VIRTUAL_ENV
ENV PATH=$CARGO_HOME/bin:$VIRTUAL_ENV/bin:$PATH
336

337
338
339
340
341
342
343
344
# Copy configuration files
COPY pyproject.toml /workspace/
COPY README.md /workspace/
COPY LICENSE /workspace/
COPY Cargo.toml /workspace/
COPY Cargo.lock /workspace/
COPY rust-toolchain.toml /workspace/
COPY hatch_build.py /workspace/
345

346
347
348
349
# Copy source code
COPY lib/ /workspace/lib/
COPY components /workspace/components
COPY launch /workspace/launch
350
COPY deploy/sdk /workspace/deploy/sdk
351
352

# Build Rust crate binaries packaged with the wheel
353
RUN cargo build --release --locked --features mistralrs,python,dynamo-llm/block-manager \
354
355
356
357
358
    -p dynamo-run \
    -p llmctl \
    # Multiple http named crates are present in dependencies, need to specify the path
    -p file://$PWD/components/http \
    -p metrics
359
360

# Build dynamo wheel
361
RUN uv build --wheel --out-dir /workspace/dist && \
362
363
364
365
366
    cd /workspace/lib/bindings/python && \
    uv build --wheel --out-dir /workspace/dist --python 3.12 && \
    if [ "$RELEASE_BUILD" = "true" ]; then \
        uv build --wheel --out-dir /workspace/dist --python 3.11 && \
        uv build --wheel --out-dir /workspace/dist --python 3.10; \
367
    fi
368

369
370
371
#######################################
########## CI Minimum Image ###########
#######################################
372
FROM base AS ci_minimum
373

374
ENV DYNAMO_HOME=/workspace
375
376
377
ENV CARGO_TARGET_DIR=/workspace/target

WORKDIR /workspace
378

379
COPY --from=wheel_builder /workspace/dist/ /workspace/dist/
380
COPY --from=wheel_builder /workspace/target/ /workspace/target/
381
COPY --from=wheel_builder /opt/nvidia/nvda_nixl /opt/nvidia/nvda_nixl
382
383
384
385
386
387
388
# Copy Cargo cache to avoid re-downloading dependencies
COPY --from=wheel_builder $CARGO_HOME $CARGO_HOME

COPY . /workspace

# Build rest of the crates
# Need to figure out rust caching to avoid rebuilding and remove exclude flags
389
RUN cargo build --release --locked --features block-manager --workspace \
390
391
392
393
    --exclude dynamo-run \
    --exclude llmctl \
    --exclude file://$PWD/components/http \
    --exclude metrics
394
395
396
397
398
399

# Package the bindings
RUN mkdir -p /opt/dynamo/bindings/wheels && \
    mkdir /opt/dynamo/bindings/lib && \
    cp dist/ai_dynamo*cp312*.whl /opt/dynamo/bindings/wheels/. && \
    cp target/release/libdynamo_llm_capi.so /opt/dynamo/bindings/lib/. && \
400
401
402
403
404
405
    cp -r lib/bindings/c/include /opt/dynamo/bindings/.  && \
    cp target/release/dynamo-run /usr/local/bin && \
    cp target/release/http /usr/local/bin && \
    cp target/release/llmctl /usr/local/bin && \
    cp target/release/metrics /usr/local/bin && \
    cp target/release/mock_worker /usr/local/bin
406
407

RUN uv pip install /workspace/dist/ai_dynamo_runtime*cp312*.whl && \
408
    uv pip install /workspace/dist/ai_dynamo*any.whl
409

410
411
412
413
414
# Copy launch banner
RUN --mount=type=bind,source=./container/launch_message.txt,target=/workspace/launch_message.txt \
    sed '/^#\s/d' /workspace/launch_message.txt > ~/.launch_screen && \
    echo "cat ~/.launch_screen" >> ~/.bashrc

415
# Tell vllm to use the Dynamo LLM C API for KV Cache Routing
416
ENV VLLM_KV_CAPI_PATH=/opt/dynamo/bindings/lib/libdynamo_llm_capi.so
417
ENV LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/opt/nvidia/nvda_nixl/lib/x86_64-linux-gnu/
418

419
420
421
422
##########################################
########## Perf Analyzer Image ###########
##########################################
FROM ${BASE_IMAGE}:${BASE_IMAGE_TAG} AS perf_analyzer
423

424
425
426
427
428
429
430
431
432
433
434
435
436
ARG GENAI_PERF_TAG

WORKDIR /workspace

# Build and install Perf Analyzer for benchmarking
RUN apt-get update -y && apt-get -y install cmake g++ libssl-dev python3 rapidjson-dev zlib1g-dev
RUN git clone https://github.com/triton-inference-server/perf_analyzer.git
RUN git -C perf_analyzer checkout ${GENAI_PERF_TAG}
RUN mkdir perf_analyzer/build
RUN cmake -B perf_analyzer/build -S perf_analyzer -D TRITON_ENABLE_PERF_ANALYZER_OPENAI=ON
RUN cmake --build perf_analyzer/build -- -j8
RUN mkdir bin &&  \
    cp -r perf_analyzer/build/perf_analyzer/src/perf-analyzer-build /workspace/bin/
437

438
########################################
439
########## Development Image ###########
440
########################################
441
FROM ci_minimum AS dev
442
443
444

ARG GENAI_PERF_TAG

445
446
447
COPY --from=perf_analyzer /workspace/bin/perf-analyzer-build/ /perf/bin
COPY --from=perf_analyzer /workspace/perf_analyzer /perf_analyzer
ENV PATH="/perf/bin:${PATH}"
448
449
450
451
452
453
454
455
456
457
458
459
460
461

# Install genai-perf for benchmarking
RUN uv pip install "git+https://github.com/triton-inference-server/perf_analyzer.git@${GENAI_PERF_TAG}#subdirectory=genai-perf"
RUN uv pip uninstall tritonclient

COPY . /workspace

ENTRYPOINT ["/opt/nvidia/nvidia_entrypoint.sh"]

CMD []

####################################
########## Runtime Image ###########
####################################
462
463
464
465

FROM ${RUNTIME_IMAGE}:${RUNTIME_IMAGE_TAG} AS runtime

WORKDIR /workspace
466
ENV DYNAMO_HOME=/workspace
467
ENV VIRTUAL_ENV=/opt/dynamo/venv
468
ENV PATH="${VIRTUAL_ENV}/bin:${PATH}"
469
470
471

# Setup the python environment
COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/
472
473
474
RUN apt-get update && \
    DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends python3-dev && \
    rm -rf /var/lib/apt/lists/* && \
475
    uv venv $VIRTUAL_ENV --python 3.12 && \
476
477
    echo "source $VIRTUAL_ENV/bin/activate" >> ~/.bashrc

478
479
# Install the wheels and symlink executables to /usr/local/bin so dynamo components can use them
# Dynamo components currently do not have the VIRTUAL_ENV in their PATH, so we need to symlink the executables
480
COPY --from=wheel_builder /workspace/dist/*.whl wheelhouse/
481
482
RUN uv pip install ai-dynamo[vllm] --find-links wheelhouse && \
    ln -sf $VIRTUAL_ENV/bin/* /usr/local/bin/ && \
483
    rm -r wheelhouse
484
485
486

# Tell vllm to use the Dynamo LLM C API for KV Cache Routing
ENV VLLM_KV_CAPI_PATH="/opt/dynamo/bindings/lib/libdynamo_llm_capi.so"
487

488
489
490
491
492
493
# Copy launch banner
RUN --mount=type=bind,source=./container/launch_message.txt,target=/workspace/launch_message.txt \
    sed '/^#\s/d' /workspace/launch_message.txt > ~/.launch_screen && \
    echo "cat ~/.launch_screen" >> ~/.bashrc

# Copy examples
494
COPY ./examples examples/
495

496
497
ENTRYPOINT [ "/usr/bin/bash" ]
CMD []