"test_audios/vscode:/vscode.git/clone" did not exist on "66b406ec3d5443ed0b322ebfac449f633a65aed2"
Dockerfile.vllm 17.9 KB
Newer Older
1
2
3
# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0

4
ARG BASE_IMAGE="nvcr.io/nvidia/cuda-dl-base"
5
ARG BASE_IMAGE_TAG="25.03-cuda12.8-devel-ubuntu24.04"
6
ARG RELEASE_BUILD
7
8
ARG RUNTIME_IMAGE="nvcr.io/nvidia/cuda"
ARG RUNTIME_IMAGE_TAG="12.8.1-runtime-ubuntu24.04"
Hongkuan Zhou's avatar
Hongkuan Zhou committed
9
10
# TODO: Move to published pypi tags
ARG GENAI_PERF_TAG="e67e853413a07a778dd78a55e299be7fba9c9c24"
11

12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
# Define general architecture ARGs for supporting both x86 and aarch64 builds.
#   ARCH: Used for package suffixes (e.g., amd64, arm64)
#   ARCH_ALT: Used for Rust targets, manylinux suffix (e.g., x86_64, aarch64)
#
# Default values are for x86/amd64:
#   --build-arg ARCH=amd64 --build-arg ARCH_ALT=x86_64
#
# For arm64/aarch64, build with:
#   --build-arg ARCH=arm64 --build-arg ARCH_ALT=aarch64
#
# NOTE: There isn't an easy way to define one of these values based on the other value
# without adding if statements everywhere, so just define both as ARGs for now.
ARG ARCH=amd64
ARG ARCH_ALT=x86_64

27
FROM ${BASE_IMAGE}:${BASE_IMAGE_TAG} AS nixl_base
28
29
30
31
32

# Redeclare ARCH and ARCH_ALT so they're available in this stage
ARG ARCH
ARG ARCH_ALT

33
34
35
36
37
38
39
40
WORKDIR /opt/nixl
# Add a cache hint that only changes when the nixl commit changes
ARG NIXL_COMMIT
# This line acts as a cache key - it only changes when NIXL_COMMIT changes
RUN echo "NIXL commit: ${NIXL_COMMIT}" > /opt/nixl/commit.txt
# Copy the nixl source
COPY --from=nixl . .

41
##################################
42
########## Base Image ############
43
44
##################################

45
FROM ${BASE_IMAGE}:${BASE_IMAGE_TAG} AS base
46

47
48
49
# Redeclare ARCH and ARCH_ALT so they're available in this stage
ARG ARCH
ARG ARCH_ALT
50

51
USER root
52
ARG PYTHON_VERSION=3.12
53
54
55
56

RUN apt-get update -y && \
    apt-get install -y \
    # NIXL build dependencies
57
    cmake \
58
59
    meson \
    ninja-build \
60
    pybind11-dev \
61
    # Rust build dependencies
62
	clang \
63
    libclang-dev \
64
	git \
65
66
    # Install utilities
    nvtop \
67
    tmux \
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
    vim \
    autoconf \
    libtool

WORKDIR /workspace

### UCX EFA Setup ###
RUN rm -rf /opt/hpcx/ucx
RUN rm -rf /usr/local/ucx
RUN cd /usr/local/src && \
    git clone https://github.com/openucx/ucx.git && \
    cd ucx &&                   \
    git checkout v1.19.x &&     \
    ./autogen.sh && ./configure \
    --prefix=/usr/local/ucx     \
    --enable-shared             \
    --disable-static            \
    --disable-doxygen-doc       \
    --enable-optimizations      \
    --enable-cma                \
    --enable-devel-headers      \
    --with-cuda=/usr/local/cuda \
    --with-verbs                \
    --with-efa                  \
    --with-dm                   \
    --with-gdrcopy=/usr/local   \
    --enable-mt &&              \
    make -j &&                  \
    make -j install-strip &&    \
    ldconfig

ENV LD_LIBRARY_PATH=/usr/lib:/usr/local/ucx/lib:/usr/local/cuda/compat/lib.real:$LD_LIBRARY_PATH
ENV CPATH=/usr/include:$CPATH
ENV PATH=/usr/bin:$PATH
ENV PKG_CONFIG_PATH=/usr/lib/pkgconfig:$PKG_CONFIG_PATH
SHELL ["/bin/bash", "-c"]
104
105
106

WORKDIR /workspace

107
### NIXL SETUP ###
108
109
110
# Copy nixl source, and use commit hash as cache hint
COPY --from=nixl_base /opt/nixl /opt/nixl
COPY --from=nixl_base /opt/nixl/commit.txt /opt/nixl/commit.txt
111

112
### NATS & ETCD SETUP ###
113
# nats
114
115
RUN wget --tries=3 --waitretry=5 https://github.com/nats-io/nats-server/releases/download/v2.10.24/nats-server-v2.10.24-${ARCH}.deb && \
    dpkg -i nats-server-v2.10.24-${ARCH}.deb && rm nats-server-v2.10.24-${ARCH}.deb
116
117
# etcd
ENV ETCD_VERSION="v3.5.18"
118
RUN wget --tries=3 --waitretry=5 https://github.com/etcd-io/etcd/releases/download/$ETCD_VERSION/etcd-$ETCD_VERSION-linux-${ARCH}.tar.gz -O /tmp/etcd.tar.gz && \
119
120
121
    mkdir -p /usr/local/bin/etcd && \
    tar -xvf /tmp/etcd.tar.gz -C /usr/local/bin/etcd --strip-components=1 && \
    rm /tmp/etcd.tar.gz
122
123
124
125
ENV PATH=/usr/local/bin/etcd/:$PATH


### VIRTUAL ENVIRONMENT SETUP ###
126
127
128

# Install uv and create virtualenv
COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/
Neelay Shah's avatar
Neelay Shah committed
129
130
RUN mkdir /opt/dynamo && \
    uv venv /opt/dynamo/venv --python 3.12
131
132

# Activate virtual environment
Neelay Shah's avatar
Neelay Shah committed
133
ENV VIRTUAL_ENV=/opt/dynamo/venv
134
135
ENV PATH="${VIRTUAL_ENV}/bin:${PATH}"

136
137
138
# Install NIXL Python module
# TODO: Move gds_path selection based on arch into NIXL build
RUN if [ "$ARCH" = "arm64" ]; then \
139
        cd /opt/nixl && uv pip install . --config-settings=setup-args="-Dgds_path=/usr/local/cuda/targets/sbsa-linux"; \
140
141
142
143
    else \
        cd /opt/nixl && uv pip install . ; \
    fi

144
145
# Install patched vllm - keep this early in Dockerfile to avoid
# rebuilds from unrelated source code changes
146
ARG VLLM_REF="0.8.4"
147
ARG VLLM_PATCH="vllm_v${VLLM_REF}-dynamo-kv-disagg-patch.patch"
148
ARG VLLM_PATCHED_PACKAGE_NAME="ai_dynamo_vllm"
149
ARG VLLM_PATCHED_PACKAGE_VERSION="0.8.4.post1"
150
ARG VLLM_MAX_JOBS=4
151
RUN --mount=type=bind,source=./container/deps/,target=/tmp/deps \
152
    --mount=type=cache,target=/root/.cache/uv \
153
154
    mkdir /tmp/vllm && \
    uv pip install pip wheel && \
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
    # NOTE: vLLM build from source on ARM can take several hours, see VLLM_MAX_JOBS details.
    if [ "$ARCH" = "arm64" ]; then \
        # PyTorch 2.7 supports CUDA 12.8 and aarch64 installs
        uv pip install torch==2.7.0 torchvision torchaudio --index-url https://download.pytorch.org/whl/cu128 && \
        # Download vLLM source with version matching patch
        git clone --branch v${VLLM_REF} --depth 1 https://github.com/vllm-project/vllm.git /tmp/vllm/vllm-${VLLM_REF} && \
        cd /tmp/vllm/vllm-${VLLM_REF}/ && \
        # Patch vLLM source with dynamo additions
        patch -p1 < /tmp/deps/vllm/${VLLM_PATCH} && \
        # WAR: Set package version check to 'vllm' instead of 'ai_dynamo_vllm' to avoid
        # platform detection issues on ARM install.
        # TODO: Rename package from vllm to ai_dynamo_vllm like x86 path below to remove this WAR.
        sed -i 's/version("ai_dynamo_vllm")/version("vllm")/g' vllm/platforms/__init__.py && \
        # Remove pytorch from vllm install dependencies
        python use_existing_torch.py && \
        # Build/install vllm from source
        uv pip install -r requirements/build.txt && \
        # MAX_JOBS set to avoid running OOM on vllm-flash-attn build, this can
        # significantly impact the overall build time. Each job can take up
        # to -16GB RAM each, so tune according to available system memory.
        MAX_JOBS=${VLLM_MAX_JOBS} uv pip install . --no-build-isolation ; \
    # Handle x86_64: Download wheel, unpack, setup for later steps
    else \
        python -m pip download --only-binary=:all: --no-deps --dest /tmp/vllm vllm==v${VLLM_REF} && \
        # Patch vLLM pre-built download with dynamo additions
        cd /tmp/vllm && \
        wheel unpack *.whl && \
        cd vllm-${VLLM_REF}/ && \
        patch -p1 < /tmp/deps/vllm/${VLLM_PATCH} && \
        # Rename the package from vllm to ai_dynamo_vllm
        mv vllm-${VLLM_REF}.dist-info ${VLLM_PATCHED_PACKAGE_NAME}-${VLLM_PATCHED_PACKAGE_VERSION}.dist-info && \
        sed -i "s/^Name: vllm/Name: ${VLLM_PATCHED_PACKAGE_NAME}/g" ${VLLM_PATCHED_PACKAGE_NAME}-${VLLM_PATCHED_PACKAGE_VERSION}.dist-info/METADATA && \
        sed -i "s/^Version: ${VLLM_REF}/Version: ${VLLM_PATCHED_PACKAGE_VERSION}/g" ${VLLM_PATCHED_PACKAGE_NAME}-${VLLM_PATCHED_PACKAGE_VERSION}.dist-info/METADATA && \
        # Update wheel tag from linux_${ARCH_ALT} to manylinux1_${ARCH_ALT} in WHEEL file
189
        sed -i "s/Tag: cp38-abi3-linux_${ARCH_ALT}/Tag: cp38-abi3-manylinux1_${ARCH_ALT}/g" ${VLLM_PATCHED_PACKAGE_NAME}-${VLLM_PATCHED_PACKAGE_VERSION}.dist-info/WHEEL && \
190
191
192
193
194
195
        # Also update the tag in RECORD file to match
        sed -i "s/-cp38-abi3-linux_${ARCH_ALT}.whl/-cp38-abi3-manylinux1_${ARCH_ALT}.whl/g" ${VLLM_PATCHED_PACKAGE_NAME}-${VLLM_PATCHED_PACKAGE_VERSION}.dist-info/RECORD && \
        mkdir -p /workspace/dist && \
        wheel pack . --dest-dir /workspace/dist && \
        uv pip install /workspace/dist/${VLLM_PATCHED_PACKAGE_NAME}-*.whl ; \
    fi
196

197
198
199
200
# Common dependencies
RUN --mount=type=bind,source=./container/deps/requirements.txt,target=/tmp/requirements.txt \
    uv pip install --requirement /tmp/requirements.txt

201
202
203
# Install test dependencies
RUN --mount=type=bind,source=./container/deps/requirements.test.txt,target=/tmp/requirements.txt \
    uv pip install --requirement /tmp/requirements.txt
204

205
# ### MISC UTILITY SETUP ###
206
207
208

# Finish pyright install
RUN pyright --help > /dev/null 2>&1
209

210
211
212
213
214
# Enable Git operations in the /workspace directory
RUN printf "[safe]\n      directory=/workspace\n" > /root/.gitconfig

RUN ln -sf /bin/bash /bin/sh

215
216
217
218
### BUILDS ###

# Rust build/dev dependencies
RUN apt update -y && \
219
    apt install --no-install-recommends -y \
220
    build-essential \
Biswa Panda's avatar
Biswa Panda committed
221
    protobuf-compiler \
Neelay Shah's avatar
Neelay Shah committed
222
223
    cmake \
    libssl-dev \
224
225
226
227
228
    pkg-config

ENV RUSTUP_HOME=/usr/local/rustup \
    CARGO_HOME=/usr/local/cargo \
    PATH=/usr/local/cargo/bin:$PATH \
229
    RUST_VERSION=1.86.0
230

231
232
233
234
# Define Rust target based on ARCH_ALT ARG
ARG RUSTARCH=${ARCH_ALT}-unknown-linux-gnu

# Install Rust using RUSTARCH derived from ARCH_ALT
235
RUN wget --tries=3 --waitretry=5 "https://static.rust-lang.org/rustup/archive/1.28.1/${RUSTARCH}/rustup-init" && \
236
    # TODO: Add SHA check back based on RUSTARCH
237
238
239
240
    chmod +x rustup-init && \
    ./rustup-init -y --no-modify-path --profile minimal --default-toolchain $RUST_VERSION --default-host ${RUSTARCH} && \
    rm rustup-init && \
    chmod -R a+w $RUSTUP_HOME $CARGO_HOME
241

242
243
244
245
246
247
ARG CARGO_BUILD_JOBS
# Set CARGO_BUILD_JOBS to 16 if not provided
# This is to prevent cargo from building $(nproc) jobs in parallel,
# which might exceed the number of opened files limit.
ENV CARGO_BUILD_JOBS=${CARGO_BUILD_JOBS:-16}

248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
#######################################
########## Local Development ##########
#######################################

FROM base AS local-dev

# https://code.visualstudio.com/remote/advancedcontainers/add-nonroot-user
# Will use the default ubuntu user, but give sudo access
# Needed so files permissions aren't set to root ownership when writing from inside container

# Don't want ubuntu to be editable, just change uid and gid. User ubuntu is hardcoded in .devcontainer
ENV USERNAME=ubuntu
ARG USER_UID=1000
ARG USER_GID=1000

RUN apt-get update && apt-get install -y sudo gnupg2 gnupg1 \
    && echo "$USERNAME ALL=(root) NOPASSWD:ALL" > /etc/sudoers.d/$USERNAME \
    && chmod 0440 /etc/sudoers.d/$USERNAME \
    && mkdir -p /home/$USERNAME \
    && chown -R $USERNAME:$USERNAME /home/$USERNAME \
    && rm -rf /var/lib/apt/lists/* \
    && chsh -s /bin/bash $USERNAME

# This is a slow operation (~40s on my cpu)
# Much better than chown -R $USERNAME:$USERNAME /opt/dynamo/venv (~10min on my cpu)
COPY --from=base --chown=$USER_UID:$USER_GID /opt/dynamo/venv/ /opt/dynamo/venv/
274
RUN chown $USERNAME:$USERNAME /opt/dynamo/venv
275
276
277
278
279
280
281
282
283
284
285
286
287
288
COPY --from=base --chown=$USERNAME:$USERNAME /usr/local/bin /usr/local/bin

USER $USERNAME
ENV HOME=/home/$USERNAME
WORKDIR $HOME

# https://code.visualstudio.com/remote/advancedcontainers/persist-bash-history
RUN SNIPPET="export PROMPT_COMMAND='history -a' && export HISTFILE=$HOME/.commandhistory/.bash_history" \
    && mkdir -p $HOME/.commandhistory \
    && touch $HOME/.commandhistory/.bash_history \
    && echo "$SNIPPET" >> "$HOME/.bashrc"

RUN mkdir -p /home/$USERNAME/.cache/

289
ENV VLLM_KV_CAPI_PATH=$HOME/dynamo/.build/target/debug/libdynamo_llm_capi.so
290
291
292
293

ENTRYPOINT ["/opt/nvidia/nvidia_entrypoint.sh"]

##################################
294
##### Wheel Build Image ##########
295
296
##################################

297
298
299
300
# Redeclare ARCH_ALT ARG so it's available for interpolation in the FROM instruction
ARG ARCH_ALT

FROM quay.io/pypa/manylinux_2_28_${ARCH_ALT} AS wheel_builder
301

302
303
304
305
306
307
308
ARG CARGO_BUILD_JOBS
# Set CARGO_BUILD_JOBS to 16 if not provided
# This is to prevent cargo from building $(nproc) jobs in parallel,
# which might exceed the number of opened files limit.
ENV CARGO_BUILD_JOBS=${CARGO_BUILD_JOBS:-16}
# Use build arg RELEASE_BUILD = true to generate wheels for Python 3.10, 3.11 and 3.12.
ARG RELEASE_BUILD
309

310
311
WORKDIR /workspace

312
RUN yum update -y \
313
    && yum install -y python3.12-devel \
314
315
316
    && yum install -y protobuf-compiler \
    && yum clean all \
    && rm -rf /var/cache/yum
317
318
319

ENV RUSTUP_HOME=/usr/local/rustup \
    CARGO_HOME=/usr/local/cargo \
320
321
    CARGO_TARGET_DIR=/workspace/target \
    VIRTUAL_ENV=/opt/dynamo/venv
322

323
324
325
326
327
COPY --from=base $RUSTUP_HOME $RUSTUP_HOME
COPY --from=base $CARGO_HOME $CARGO_HOME
COPY --from=base /workspace /workspace
COPY --from=base $VIRTUAL_ENV $VIRTUAL_ENV
ENV PATH=$CARGO_HOME/bin:$VIRTUAL_ENV/bin:$PATH
328

329
330
331
332
333
334
335
336
# Copy configuration files
COPY pyproject.toml /workspace/
COPY README.md /workspace/
COPY LICENSE /workspace/
COPY Cargo.toml /workspace/
COPY Cargo.lock /workspace/
COPY rust-toolchain.toml /workspace/
COPY hatch_build.py /workspace/
337

338
339
340
341
# Copy source code
COPY lib/ /workspace/lib/
COPY components /workspace/components
COPY launch /workspace/launch
342
COPY deploy/sdk /workspace/deploy/sdk
343
344

# Build Rust crate binaries packaged with the wheel
345
RUN cargo build --release --locked --features mistralrs,python \
346
347
348
349
350
    -p dynamo-run \
    -p llmctl \
    # Multiple http named crates are present in dependencies, need to specify the path
    -p file://$PWD/components/http \
    -p metrics
351
352

# Build dynamo wheel
353
RUN uv build --wheel --out-dir /workspace/dist && \
354
355
356
357
358
    cd /workspace/lib/bindings/python && \
    uv build --wheel --out-dir /workspace/dist --python 3.12 && \
    if [ "$RELEASE_BUILD" = "true" ]; then \
        uv build --wheel --out-dir /workspace/dist --python 3.11 && \
        uv build --wheel --out-dir /workspace/dist --python 3.10; \
359
    fi
360

361
362
363
#######################################
########## CI Minimum Image ###########
#######################################
364
FROM base AS ci_minimum
365

366
ENV DYNAMO_HOME=/workspace
367
368
369
ENV CARGO_TARGET_DIR=/workspace/target

WORKDIR /workspace
370

371
COPY --from=wheel_builder /workspace/dist/ /workspace/dist/
372
373
374
375
376
377
378
379
380
381
382
383
384
COPY --from=wheel_builder /workspace/target/ /workspace/target/
# Copy Cargo cache to avoid re-downloading dependencies
COPY --from=wheel_builder $CARGO_HOME $CARGO_HOME

COPY . /workspace

# Build rest of the crates
# Need to figure out rust caching to avoid rebuilding and remove exclude flags
RUN cargo build --release --locked --workspace \
    --exclude dynamo-run \
    --exclude llmctl \
    --exclude file://$PWD/components/http \
    --exclude metrics
385
386
387
388
389
390

# Package the bindings
RUN mkdir -p /opt/dynamo/bindings/wheels && \
    mkdir /opt/dynamo/bindings/lib && \
    cp dist/ai_dynamo*cp312*.whl /opt/dynamo/bindings/wheels/. && \
    cp target/release/libdynamo_llm_capi.so /opt/dynamo/bindings/lib/. && \
391
392
393
394
395
396
    cp -r lib/bindings/c/include /opt/dynamo/bindings/.  && \
    cp target/release/dynamo-run /usr/local/bin && \
    cp target/release/http /usr/local/bin && \
    cp target/release/llmctl /usr/local/bin && \
    cp target/release/metrics /usr/local/bin && \
    cp target/release/mock_worker /usr/local/bin
397
398

RUN uv pip install /workspace/dist/ai_dynamo_runtime*cp312*.whl && \
399
    uv pip install /workspace/dist/ai_dynamo*any.whl
400

401
402
403
404
405
# Copy launch banner
RUN --mount=type=bind,source=./container/launch_message.txt,target=/workspace/launch_message.txt \
    sed '/^#\s/d' /workspace/launch_message.txt > ~/.launch_screen && \
    echo "cat ~/.launch_screen" >> ~/.bashrc

406
# Tell vllm to use the Dynamo LLM C API for KV Cache Routing
407
ENV VLLM_KV_CAPI_PATH=/opt/dynamo/bindings/lib/libdynamo_llm_capi.so
408

409
410
411
412
##########################################
########## Perf Analyzer Image ###########
##########################################
FROM ${BASE_IMAGE}:${BASE_IMAGE_TAG} AS perf_analyzer
413

414
415
416
417
418
419
420
421
422
423
424
425
426
ARG GENAI_PERF_TAG

WORKDIR /workspace

# Build and install Perf Analyzer for benchmarking
RUN apt-get update -y && apt-get -y install cmake g++ libssl-dev python3 rapidjson-dev zlib1g-dev
RUN git clone https://github.com/triton-inference-server/perf_analyzer.git
RUN git -C perf_analyzer checkout ${GENAI_PERF_TAG}
RUN mkdir perf_analyzer/build
RUN cmake -B perf_analyzer/build -S perf_analyzer -D TRITON_ENABLE_PERF_ANALYZER_OPENAI=ON
RUN cmake --build perf_analyzer/build -- -j8
RUN mkdir bin &&  \
    cp -r perf_analyzer/build/perf_analyzer/src/perf-analyzer-build /workspace/bin/
427

428
########################################
429
########## Development Image ###########
430
########################################
431
FROM ci_minimum AS dev
432
433
434

ARG GENAI_PERF_TAG

435
436
437
COPY --from=perf_analyzer /workspace/bin/perf-analyzer-build/ /perf/bin
COPY --from=perf_analyzer /workspace/perf_analyzer /perf_analyzer
ENV PATH="/perf/bin:${PATH}"
438
439
440
441
442
443
444
445
446
447
448
449
450
451

# Install genai-perf for benchmarking
RUN uv pip install "git+https://github.com/triton-inference-server/perf_analyzer.git@${GENAI_PERF_TAG}#subdirectory=genai-perf"
RUN uv pip uninstall tritonclient

COPY . /workspace

ENTRYPOINT ["/opt/nvidia/nvidia_entrypoint.sh"]

CMD []

####################################
########## Runtime Image ###########
####################################
452
453
454
455

FROM ${RUNTIME_IMAGE}:${RUNTIME_IMAGE_TAG} AS runtime

WORKDIR /workspace
456
ENV DYNAMO_HOME=/workspace
457
ENV VIRTUAL_ENV=/opt/dynamo/venv
458
ENV PATH="${VIRTUAL_ENV}/bin:${PATH}"
459
460
461

# Setup the python environment
COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/
462
463
464
RUN apt-get update && \
    DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends python3-dev && \
    rm -rf /var/lib/apt/lists/* && \
465
    uv venv $VIRTUAL_ENV --python 3.12 && \
466
467
    echo "source $VIRTUAL_ENV/bin/activate" >> ~/.bashrc

468
469
# Install the wheels and symlink executables to /usr/local/bin so dynamo components can use them
# Dynamo components currently do not have the VIRTUAL_ENV in their PATH, so we need to symlink the executables
470
COPY --from=wheel_builder /workspace/dist/*.whl wheelhouse/
471
472
RUN uv pip install ai-dynamo[vllm] --find-links wheelhouse && \
    ln -sf $VIRTUAL_ENV/bin/* /usr/local/bin/ && \
473
    rm -r wheelhouse
474
475
476

# Tell vllm to use the Dynamo LLM C API for KV Cache Routing
ENV VLLM_KV_CAPI_PATH="/opt/dynamo/bindings/lib/libdynamo_llm_capi.so"
477

478
479
480
481
482
483
# Copy launch banner
RUN --mount=type=bind,source=./container/launch_message.txt,target=/workspace/launch_message.txt \
    sed '/^#\s/d' /workspace/launch_message.txt > ~/.launch_screen && \
    echo "cat ~/.launch_screen" >> ~/.bashrc

# Copy examples
484
COPY ./examples examples/
485

486
487
ENTRYPOINT [ "/usr/bin/bash" ]
CMD []