Dockerfile.vllm 19 KB
Newer Older
1
2
3
# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0

4
ARG BASE_IMAGE="nvcr.io/nvidia/cuda-dl-base"
5
6
7
8
9
# FIXME: NCCL will hang with 25.03, so use 25.01 for now
# Please check https://github.com/ai-dynamo/dynamo/pull/1065
# for details and reproducer to manually test if the image
# can be updated to later versions.
ARG BASE_IMAGE_TAG="25.01-cuda12.8-devel-ubuntu24.04"
10
ARG RELEASE_BUILD
11
12
ARG RUNTIME_IMAGE="nvcr.io/nvidia/cuda"
ARG RUNTIME_IMAGE_TAG="12.8.1-runtime-ubuntu24.04"
Hongkuan Zhou's avatar
Hongkuan Zhou committed
13
14
# TODO: Move to published pypi tags
ARG GENAI_PERF_TAG="e67e853413a07a778dd78a55e299be7fba9c9c24"
15

16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
# Define general architecture ARGs for supporting both x86 and aarch64 builds.
#   ARCH: Used for package suffixes (e.g., amd64, arm64)
#   ARCH_ALT: Used for Rust targets, manylinux suffix (e.g., x86_64, aarch64)
#
# Default values are for x86/amd64:
#   --build-arg ARCH=amd64 --build-arg ARCH_ALT=x86_64
#
# For arm64/aarch64, build with:
#   --build-arg ARCH=arm64 --build-arg ARCH_ALT=aarch64
#
# NOTE: There isn't an easy way to define one of these values based on the other value
# without adding if statements everywhere, so just define both as ARGs for now.
ARG ARCH=amd64
ARG ARCH_ALT=x86_64

31
FROM ${BASE_IMAGE}:${BASE_IMAGE_TAG} AS nixl_base
32
33
34
35
36

# Redeclare ARCH and ARCH_ALT so they're available in this stage
ARG ARCH
ARG ARCH_ALT

37
38
39
40
41
42
43
44
WORKDIR /opt/nixl
# Add a cache hint that only changes when the nixl commit changes
ARG NIXL_COMMIT
# This line acts as a cache key - it only changes when NIXL_COMMIT changes
RUN echo "NIXL commit: ${NIXL_COMMIT}" > /opt/nixl/commit.txt
# Copy the nixl source
COPY --from=nixl . .

45
##################################
46
########## Base Image ############
47
48
##################################

49
FROM ${BASE_IMAGE}:${BASE_IMAGE_TAG} AS base
50

51
52
53
# Redeclare ARCH and ARCH_ALT so they're available in this stage
ARG ARCH
ARG ARCH_ALT
54

55
USER root
56
ARG PYTHON_VERSION=3.12
57
58
59
60

RUN apt-get update -y && \
    apt-get install -y \
    # NIXL build dependencies
61
    cmake \
62
63
    meson \
    ninja-build \
64
    pybind11-dev \
65
    # Rust build dependencies
66
	clang \
67
    libclang-dev \
68
	git \
69
70
    # Install utilities
    nvtop \
71
    tmux \
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
    vim \
    autoconf \
    libtool

WORKDIR /workspace

### UCX EFA Setup ###
RUN rm -rf /opt/hpcx/ucx
RUN rm -rf /usr/local/ucx
RUN cd /usr/local/src && \
    git clone https://github.com/openucx/ucx.git && \
    cd ucx &&                   \
    git checkout v1.19.x &&     \
    ./autogen.sh && ./configure \
    --prefix=/usr/local/ucx     \
    --enable-shared             \
    --disable-static            \
    --disable-doxygen-doc       \
    --enable-optimizations      \
    --enable-cma                \
    --enable-devel-headers      \
    --with-cuda=/usr/local/cuda \
    --with-verbs                \
    --with-efa                  \
    --with-dm                   \
    --with-gdrcopy=/usr/local   \
    --enable-mt &&              \
    make -j &&                  \
    make -j install-strip &&    \
    ldconfig

Alec's avatar
Alec committed
103
ENV LD_LIBRARY_PATH=/usr/lib:/usr/local/ucx/lib:$LD_LIBRARY_PATH
104
105
106
107
ENV CPATH=/usr/include:$CPATH
ENV PATH=/usr/bin:$PATH
ENV PKG_CONFIG_PATH=/usr/lib/pkgconfig:$PKG_CONFIG_PATH
SHELL ["/bin/bash", "-c"]
108
109
110

WORKDIR /workspace

111
### NIXL SETUP ###
112
113
114
# Copy nixl source, and use commit hash as cache hint
COPY --from=nixl_base /opt/nixl /opt/nixl
COPY --from=nixl_base /opt/nixl/commit.txt /opt/nixl/commit.txt
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
RUN if [ "$ARCH" = "arm64" ]; then \
        cd /opt/nixl && \
        mkdir build && \
        meson setup build/ --prefix=/usr/local/nixl -Dgds_path=/usr/local/cuda/targets/sbsa-linux && \
        cd build/ && \
        ninja && \
        ninja install; \
    else \
        cd /opt/nixl && \
        mkdir build && \
        meson setup build/ --prefix=/usr/local/nixl && \
        cd build/ && \
        ninja && \
        ninja install; \
    fi
130

131
### NATS & ETCD SETUP ###
132
# nats
133
134
RUN wget --tries=3 --waitretry=5 https://github.com/nats-io/nats-server/releases/download/v2.10.24/nats-server-v2.10.24-${ARCH}.deb && \
    dpkg -i nats-server-v2.10.24-${ARCH}.deb && rm nats-server-v2.10.24-${ARCH}.deb
135
136
# etcd
ENV ETCD_VERSION="v3.5.18"
137
RUN wget --tries=3 --waitretry=5 https://github.com/etcd-io/etcd/releases/download/$ETCD_VERSION/etcd-$ETCD_VERSION-linux-${ARCH}.tar.gz -O /tmp/etcd.tar.gz && \
138
139
140
    mkdir -p /usr/local/bin/etcd && \
    tar -xvf /tmp/etcd.tar.gz -C /usr/local/bin/etcd --strip-components=1 && \
    rm /tmp/etcd.tar.gz
141
142
143
144
ENV PATH=/usr/local/bin/etcd/:$PATH


### VIRTUAL ENVIRONMENT SETUP ###
145
146
147

# Install uv and create virtualenv
COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/
Neelay Shah's avatar
Neelay Shah committed
148
149
RUN mkdir /opt/dynamo && \
    uv venv /opt/dynamo/venv --python 3.12
150
151

# Activate virtual environment
Neelay Shah's avatar
Neelay Shah committed
152
ENV VIRTUAL_ENV=/opt/dynamo/venv
153
154
ENV PATH="${VIRTUAL_ENV}/bin:${PATH}"

155
156
157
# Install NIXL Python module
# TODO: Move gds_path selection based on arch into NIXL build
RUN if [ "$ARCH" = "arm64" ]; then \
158
        cd /opt/nixl && uv pip install . --config-settings=setup-args="-Dgds_path=/usr/local/cuda/targets/sbsa-linux"; \
159
160
161
162
    else \
        cd /opt/nixl && uv pip install . ; \
    fi

163
164
# Install patched vllm - keep this early in Dockerfile to avoid
# rebuilds from unrelated source code changes
165
ARG VLLM_REF="0.8.4"
166
ARG VLLM_PATCH="vllm_v${VLLM_REF}-dynamo-kv-disagg-patch.patch"
167
ARG VLLM_PATCHED_PACKAGE_NAME="ai_dynamo_vllm"
168
ARG VLLM_PATCHED_PACKAGE_VERSION="0.8.4.post1"
169
ARG VLLM_MAX_JOBS=4
170
RUN --mount=type=bind,source=./container/deps/,target=/tmp/deps \
171
    --mount=type=cache,target=/root/.cache/uv \
172
173
    mkdir /tmp/vllm && \
    uv pip install pip wheel && \
174
175
176
    # NOTE: vLLM build from source on ARM can take several hours, see VLLM_MAX_JOBS details.
    if [ "$ARCH" = "arm64" ]; then \
        # PyTorch 2.7 supports CUDA 12.8 and aarch64 installs
177
178
        # NIXL has a torch dependency, so need to force-reinstall to install the correct version
        uv pip install torch==2.7.0 torchvision torchaudio --force-reinstall --index-url https://download.pytorch.org/whl/cu128 && \
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
        # Download vLLM source with version matching patch
        git clone --branch v${VLLM_REF} --depth 1 https://github.com/vllm-project/vllm.git /tmp/vllm/vllm-${VLLM_REF} && \
        cd /tmp/vllm/vllm-${VLLM_REF}/ && \
        # Patch vLLM source with dynamo additions
        patch -p1 < /tmp/deps/vllm/${VLLM_PATCH} && \
        # WAR: Set package version check to 'vllm' instead of 'ai_dynamo_vllm' to avoid
        # platform detection issues on ARM install.
        # TODO: Rename package from vllm to ai_dynamo_vllm like x86 path below to remove this WAR.
        sed -i 's/version("ai_dynamo_vllm")/version("vllm")/g' vllm/platforms/__init__.py && \
        # Remove pytorch from vllm install dependencies
        python use_existing_torch.py && \
        # Build/install vllm from source
        uv pip install -r requirements/build.txt && \
        # MAX_JOBS set to avoid running OOM on vllm-flash-attn build, this can
        # significantly impact the overall build time. Each job can take up
        # to -16GB RAM each, so tune according to available system memory.
        MAX_JOBS=${VLLM_MAX_JOBS} uv pip install . --no-build-isolation ; \
    # Handle x86_64: Download wheel, unpack, setup for later steps
    else \
        python -m pip download --only-binary=:all: --no-deps --dest /tmp/vllm vllm==v${VLLM_REF} && \
        # Patch vLLM pre-built download with dynamo additions
        cd /tmp/vllm && \
        wheel unpack *.whl && \
        cd vllm-${VLLM_REF}/ && \
        patch -p1 < /tmp/deps/vllm/${VLLM_PATCH} && \
        # Rename the package from vllm to ai_dynamo_vllm
        mv vllm-${VLLM_REF}.dist-info ${VLLM_PATCHED_PACKAGE_NAME}-${VLLM_PATCHED_PACKAGE_VERSION}.dist-info && \
        sed -i "s/^Name: vllm/Name: ${VLLM_PATCHED_PACKAGE_NAME}/g" ${VLLM_PATCHED_PACKAGE_NAME}-${VLLM_PATCHED_PACKAGE_VERSION}.dist-info/METADATA && \
        sed -i "s/^Version: ${VLLM_REF}/Version: ${VLLM_PATCHED_PACKAGE_VERSION}/g" ${VLLM_PATCHED_PACKAGE_NAME}-${VLLM_PATCHED_PACKAGE_VERSION}.dist-info/METADATA && \
        # Update wheel tag from linux_${ARCH_ALT} to manylinux1_${ARCH_ALT} in WHEEL file
209
        sed -i "s/Tag: cp38-abi3-linux_${ARCH_ALT}/Tag: cp38-abi3-manylinux1_${ARCH_ALT}/g" ${VLLM_PATCHED_PACKAGE_NAME}-${VLLM_PATCHED_PACKAGE_VERSION}.dist-info/WHEEL && \
210
211
212
213
214
215
        # Also update the tag in RECORD file to match
        sed -i "s/-cp38-abi3-linux_${ARCH_ALT}.whl/-cp38-abi3-manylinux1_${ARCH_ALT}.whl/g" ${VLLM_PATCHED_PACKAGE_NAME}-${VLLM_PATCHED_PACKAGE_VERSION}.dist-info/RECORD && \
        mkdir -p /workspace/dist && \
        wheel pack . --dest-dir /workspace/dist && \
        uv pip install /workspace/dist/${VLLM_PATCHED_PACKAGE_NAME}-*.whl ; \
    fi
216

217
218
219
220
# Common dependencies
RUN --mount=type=bind,source=./container/deps/requirements.txt,target=/tmp/requirements.txt \
    uv pip install --requirement /tmp/requirements.txt

221
222
223
# Install test dependencies
RUN --mount=type=bind,source=./container/deps/requirements.test.txt,target=/tmp/requirements.txt \
    uv pip install --requirement /tmp/requirements.txt
224

225
# ### MISC UTILITY SETUP ###
226
227
228

# Finish pyright install
RUN pyright --help > /dev/null 2>&1
229

230
231
232
233
234
# Enable Git operations in the /workspace directory
RUN printf "[safe]\n      directory=/workspace\n" > /root/.gitconfig

RUN ln -sf /bin/bash /bin/sh

235
236
237
238
### BUILDS ###

# Rust build/dev dependencies
RUN apt update -y && \
239
    apt install --no-install-recommends -y \
240
    build-essential \
Biswa Panda's avatar
Biswa Panda committed
241
    protobuf-compiler \
Neelay Shah's avatar
Neelay Shah committed
242
243
    cmake \
    libssl-dev \
244
245
246
247
248
    pkg-config

ENV RUSTUP_HOME=/usr/local/rustup \
    CARGO_HOME=/usr/local/cargo \
    PATH=/usr/local/cargo/bin:$PATH \
249
    RUST_VERSION=1.86.0
250

251
252
253
254
# Define Rust target based on ARCH_ALT ARG
ARG RUSTARCH=${ARCH_ALT}-unknown-linux-gnu

# Install Rust using RUSTARCH derived from ARCH_ALT
255
RUN wget --tries=3 --waitretry=5 "https://static.rust-lang.org/rustup/archive/1.28.1/${RUSTARCH}/rustup-init" && \
256
    # TODO: Add SHA check back based on RUSTARCH
257
258
259
260
    chmod +x rustup-init && \
    ./rustup-init -y --no-modify-path --profile minimal --default-toolchain $RUST_VERSION --default-host ${RUSTARCH} && \
    rm rustup-init && \
    chmod -R a+w $RUSTUP_HOME $CARGO_HOME
261

262
263
264
265
266
267
ARG CARGO_BUILD_JOBS
# Set CARGO_BUILD_JOBS to 16 if not provided
# This is to prevent cargo from building $(nproc) jobs in parallel,
# which might exceed the number of opened files limit.
ENV CARGO_BUILD_JOBS=${CARGO_BUILD_JOBS:-16}

268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
#######################################
########## Local Development ##########
#######################################

FROM base AS local-dev

# https://code.visualstudio.com/remote/advancedcontainers/add-nonroot-user
# Will use the default ubuntu user, but give sudo access
# Needed so files permissions aren't set to root ownership when writing from inside container

# Don't want ubuntu to be editable, just change uid and gid. User ubuntu is hardcoded in .devcontainer
ENV USERNAME=ubuntu
ARG USER_UID=1000
ARG USER_GID=1000

RUN apt-get update && apt-get install -y sudo gnupg2 gnupg1 \
    && echo "$USERNAME ALL=(root) NOPASSWD:ALL" > /etc/sudoers.d/$USERNAME \
    && chmod 0440 /etc/sudoers.d/$USERNAME \
    && mkdir -p /home/$USERNAME \
    && chown -R $USERNAME:$USERNAME /home/$USERNAME \
    && rm -rf /var/lib/apt/lists/* \
    && chsh -s /bin/bash $USERNAME

# This is a slow operation (~40s on my cpu)
# Much better than chown -R $USERNAME:$USERNAME /opt/dynamo/venv (~10min on my cpu)
COPY --from=base --chown=$USER_UID:$USER_GID /opt/dynamo/venv/ /opt/dynamo/venv/
294
RUN chown $USERNAME:$USERNAME /opt/dynamo/venv
295
296
297
298
COPY --from=base --chown=$USERNAME:$USERNAME /usr/local/bin /usr/local/bin

USER $USERNAME
ENV HOME=/home/$USERNAME
299
ENV PYTHONPATH=$HOME/dynamo/deploy/sdk/src:$PYTHONPATH:$HOME/dynamo/components/planner/src:$PYTHONPATH
300
301
302
303
304
305
306
307
308
309
WORKDIR $HOME

# https://code.visualstudio.com/remote/advancedcontainers/persist-bash-history
RUN SNIPPET="export PROMPT_COMMAND='history -a' && export HISTFILE=$HOME/.commandhistory/.bash_history" \
    && mkdir -p $HOME/.commandhistory \
    && touch $HOME/.commandhistory/.bash_history \
    && echo "$SNIPPET" >> "$HOME/.bashrc"

RUN mkdir -p /home/$USERNAME/.cache/

310
ENV VLLM_KV_CAPI_PATH=$HOME/dynamo/.build/target/debug/libdynamo_llm_capi.so
311
312
313
314

ENTRYPOINT ["/opt/nvidia/nvidia_entrypoint.sh"]

##################################
315
##### Wheel Build Image ##########
316
317
##################################

318
319
320
321
# Redeclare ARCH_ALT ARG so it's available for interpolation in the FROM instruction
ARG ARCH_ALT

FROM quay.io/pypa/manylinux_2_28_${ARCH_ALT} AS wheel_builder
322

323
324
325
326
327
328
329
ARG CARGO_BUILD_JOBS
# Set CARGO_BUILD_JOBS to 16 if not provided
# This is to prevent cargo from building $(nproc) jobs in parallel,
# which might exceed the number of opened files limit.
ENV CARGO_BUILD_JOBS=${CARGO_BUILD_JOBS:-16}
# Use build arg RELEASE_BUILD = true to generate wheels for Python 3.10, 3.11 and 3.12.
ARG RELEASE_BUILD
330

331
332
WORKDIR /workspace

333
RUN yum update -y \
Ryan Olson's avatar
Ryan Olson committed
334
    && yum install -y llvm-toolset \
335
    && yum install -y python3.12-devel \
336
337
338
    && yum install -y protobuf-compiler \
    && yum clean all \
    && rm -rf /var/cache/yum
339
340
341

ENV RUSTUP_HOME=/usr/local/rustup \
    CARGO_HOME=/usr/local/cargo \
342
343
    CARGO_TARGET_DIR=/workspace/target \
    VIRTUAL_ENV=/opt/dynamo/venv
344

345
346
COPY --from=base $RUSTUP_HOME $RUSTUP_HOME
COPY --from=base $CARGO_HOME $CARGO_HOME
Ryan Olson's avatar
Ryan Olson committed
347
COPY --from=base /usr/local/nixl /opt/nvidia/nvda_nixl
348
349
350
COPY --from=base /workspace /workspace
COPY --from=base $VIRTUAL_ENV $VIRTUAL_ENV
ENV PATH=$CARGO_HOME/bin:$VIRTUAL_ENV/bin:$PATH
351

352
353
354
355
356
357
358
359
# Copy configuration files
COPY pyproject.toml /workspace/
COPY README.md /workspace/
COPY LICENSE /workspace/
COPY Cargo.toml /workspace/
COPY Cargo.lock /workspace/
COPY rust-toolchain.toml /workspace/
COPY hatch_build.py /workspace/
360

361
362
363
364
# Copy source code
COPY lib/ /workspace/lib/
COPY components /workspace/components
COPY launch /workspace/launch
365
COPY deploy/sdk /workspace/deploy/sdk
366
367

# Build Rust crate binaries packaged with the wheel
Ryan Olson's avatar
Ryan Olson committed
368
RUN cargo build --release --locked --features mistralrs,python,dynamo-llm/block-manager \
369
370
371
372
373
    -p dynamo-run \
    -p llmctl \
    # Multiple http named crates are present in dependencies, need to specify the path
    -p file://$PWD/components/http \
    -p metrics
374
375

# Build dynamo wheel
376
RUN uv build --wheel --out-dir /workspace/dist && \
377
378
379
380
381
    cd /workspace/lib/bindings/python && \
    uv build --wheel --out-dir /workspace/dist --python 3.12 && \
    if [ "$RELEASE_BUILD" = "true" ]; then \
        uv build --wheel --out-dir /workspace/dist --python 3.11 && \
        uv build --wheel --out-dir /workspace/dist --python 3.10; \
382
    fi
383

384
385
386
#######################################
########## CI Minimum Image ###########
#######################################
387
FROM base AS ci_minimum
388

389
ENV DYNAMO_HOME=/workspace
390
391
392
ENV CARGO_TARGET_DIR=/workspace/target

WORKDIR /workspace
393

394
COPY --from=wheel_builder /workspace/dist/ /workspace/dist/
395
COPY --from=wheel_builder /workspace/target/ /workspace/target/
Ryan Olson's avatar
Ryan Olson committed
396
COPY --from=wheel_builder /opt/nvidia/nvda_nixl /opt/nvidia/nvda_nixl
397
398
399
400
401
402
403
# Copy Cargo cache to avoid re-downloading dependencies
COPY --from=wheel_builder $CARGO_HOME $CARGO_HOME

COPY . /workspace

# Build rest of the crates
# Need to figure out rust caching to avoid rebuilding and remove exclude flags
Ryan Olson's avatar
Ryan Olson committed
404
RUN cargo build --release --locked --features block-manager --workspace \
405
406
407
408
    --exclude dynamo-run \
    --exclude llmctl \
    --exclude file://$PWD/components/http \
    --exclude metrics
409
410
411
412
413
414

# Package the bindings
RUN mkdir -p /opt/dynamo/bindings/wheels && \
    mkdir /opt/dynamo/bindings/lib && \
    cp dist/ai_dynamo*cp312*.whl /opt/dynamo/bindings/wheels/. && \
    cp target/release/libdynamo_llm_capi.so /opt/dynamo/bindings/lib/. && \
415
416
417
418
419
420
    cp -r lib/bindings/c/include /opt/dynamo/bindings/.  && \
    cp target/release/dynamo-run /usr/local/bin && \
    cp target/release/http /usr/local/bin && \
    cp target/release/llmctl /usr/local/bin && \
    cp target/release/metrics /usr/local/bin && \
    cp target/release/mock_worker /usr/local/bin
421
422

RUN uv pip install /workspace/dist/ai_dynamo_runtime*cp312*.whl && \
423
    uv pip install /workspace/dist/ai_dynamo*any.whl
424

425
426
427
428
429
# Copy launch banner
RUN --mount=type=bind,source=./container/launch_message.txt,target=/workspace/launch_message.txt \
    sed '/^#\s/d' /workspace/launch_message.txt > ~/.launch_screen && \
    echo "cat ~/.launch_screen" >> ~/.bashrc

430
# Tell vllm to use the Dynamo LLM C API for KV Cache Routing
431
ENV VLLM_KV_CAPI_PATH=/opt/dynamo/bindings/lib/libdynamo_llm_capi.so
Ryan Olson's avatar
Ryan Olson committed
432
ENV LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/opt/nvidia/nvda_nixl/lib/x86_64-linux-gnu/
433

434
435
436
437
##########################################
########## Perf Analyzer Image ###########
##########################################
FROM ${BASE_IMAGE}:${BASE_IMAGE_TAG} AS perf_analyzer
438

439
440
441
442
443
444
445
446
447
448
449
450
451
ARG GENAI_PERF_TAG

WORKDIR /workspace

# Build and install Perf Analyzer for benchmarking
RUN apt-get update -y && apt-get -y install cmake g++ libssl-dev python3 rapidjson-dev zlib1g-dev
RUN git clone https://github.com/triton-inference-server/perf_analyzer.git
RUN git -C perf_analyzer checkout ${GENAI_PERF_TAG}
RUN mkdir perf_analyzer/build
RUN cmake -B perf_analyzer/build -S perf_analyzer -D TRITON_ENABLE_PERF_ANALYZER_OPENAI=ON
RUN cmake --build perf_analyzer/build -- -j8
RUN mkdir bin &&  \
    cp -r perf_analyzer/build/perf_analyzer/src/perf-analyzer-build /workspace/bin/
452

453
########################################
454
########## Development Image ###########
455
########################################
456
FROM ci_minimum AS dev
457
458
459

ARG GENAI_PERF_TAG

460
461
462
COPY --from=perf_analyzer /workspace/bin/perf-analyzer-build/ /perf/bin
COPY --from=perf_analyzer /workspace/perf_analyzer /perf_analyzer
ENV PATH="/perf/bin:${PATH}"
463
464
465
466
467
468
469
470
471
472
473
474
475
476

# Install genai-perf for benchmarking
RUN uv pip install "git+https://github.com/triton-inference-server/perf_analyzer.git@${GENAI_PERF_TAG}#subdirectory=genai-perf"
RUN uv pip uninstall tritonclient

COPY . /workspace

ENTRYPOINT ["/opt/nvidia/nvidia_entrypoint.sh"]

CMD []

####################################
########## Runtime Image ###########
####################################
477
478
479
480

FROM ${RUNTIME_IMAGE}:${RUNTIME_IMAGE_TAG} AS runtime

WORKDIR /workspace
481
ENV DYNAMO_HOME=/workspace
482
ENV VIRTUAL_ENV=/opt/dynamo/venv
483
ENV PATH="${VIRTUAL_ENV}/bin:${PATH}"
484
485
486

# Setup the python environment
COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/
487
488
489
RUN apt-get update && \
    DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends python3-dev && \
    rm -rf /var/lib/apt/lists/* && \
490
    uv venv $VIRTUAL_ENV --python 3.12 && \
491
492
    echo "source $VIRTUAL_ENV/bin/activate" >> ~/.bashrc

493
494
# Install the wheels and symlink executables to /usr/local/bin so dynamo components can use them
# Dynamo components currently do not have the VIRTUAL_ENV in their PATH, so we need to symlink the executables
495
COPY --from=wheel_builder /workspace/dist/*.whl wheelhouse/
496
497
RUN uv pip install ai-dynamo[vllm] --find-links wheelhouse && \
    ln -sf $VIRTUAL_ENV/bin/* /usr/local/bin/ && \
498
    rm -r wheelhouse
499
500
501

# Tell vllm to use the Dynamo LLM C API for KV Cache Routing
ENV VLLM_KV_CAPI_PATH="/opt/dynamo/bindings/lib/libdynamo_llm_capi.so"
502

503
504
505
506
507
508
# Copy launch banner
RUN --mount=type=bind,source=./container/launch_message.txt,target=/workspace/launch_message.txt \
    sed '/^#\s/d' /workspace/launch_message.txt > ~/.launch_screen && \
    echo "cat ~/.launch_screen" >> ~/.bashrc

# Copy examples
509
COPY ./examples examples/
510

511
512
ENTRYPOINT [ "/usr/bin/bash" ]
CMD []