"vscode:/vscode.git/clone" did not exist on "877218b735214d2b47dc2e31c4454b9a41a977dd"
Dockerfile.vllm 16.9 KB
Newer Older
1
2
3
# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0

4
ARG BASE_IMAGE="nvcr.io/nvidia/cuda-dl-base"
5
ARG BASE_IMAGE_TAG="25.03-cuda12.8-devel-ubuntu24.04"
6
ARG RELEASE_BUILD
7
8
ARG RUNTIME_IMAGE="nvcr.io/nvidia/cuda"
ARG RUNTIME_IMAGE_TAG="12.8.1-runtime-ubuntu24.04"
Hongkuan Zhou's avatar
Hongkuan Zhou committed
9
10
# TODO: Move to published pypi tags
ARG GENAI_PERF_TAG="e67e853413a07a778dd78a55e299be7fba9c9c24"
11

12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
# Define general architecture ARGs for supporting both x86 and aarch64 builds.
#   ARCH: Used for package suffixes (e.g., amd64, arm64)
#   ARCH_ALT: Used for Rust targets, manylinux suffix (e.g., x86_64, aarch64)
#
# Default values are for x86/amd64:
#   --build-arg ARCH=amd64 --build-arg ARCH_ALT=x86_64
#
# For arm64/aarch64, build with:
#   --build-arg ARCH=arm64 --build-arg ARCH_ALT=aarch64
#
# NOTE: There isn't an easy way to define one of these values based on the other value
# without adding if statements everywhere, so just define both as ARGs for now.
ARG ARCH=amd64
ARG ARCH_ALT=x86_64

27
FROM ${BASE_IMAGE}:${BASE_IMAGE_TAG} AS nixl_base
28
29
30
31
32

# Redeclare ARCH and ARCH_ALT so they're available in this stage
ARG ARCH
ARG ARCH_ALT

33
34
35
36
37
38
39
40
WORKDIR /opt/nixl
# Add a cache hint that only changes when the nixl commit changes
ARG NIXL_COMMIT
# This line acts as a cache key - it only changes when NIXL_COMMIT changes
RUN echo "NIXL commit: ${NIXL_COMMIT}" > /opt/nixl/commit.txt
# Copy the nixl source
COPY --from=nixl . .

41
##################################
42
########## Base Image ############
43
44
##################################

45
FROM ${BASE_IMAGE}:${BASE_IMAGE_TAG} AS base
46

47
48
49
# Redeclare ARCH and ARCH_ALT so they're available in this stage
ARG ARCH
ARG ARCH_ALT
50

51
USER root
52
ARG PYTHON_VERSION=3.12
53
54
55
56

RUN apt-get update -y && \
    apt-get install -y \
    # NIXL build dependencies
57
    cmake \
58
59
    meson \
    ninja-build \
60
    pybind11-dev \
61
    # Rust build dependencies
62
	clang \
63
    libclang-dev \
64
	git \
65
66
    # Install utilities
    nvtop \
67
    tmux \
68
    vim
69
70
71

WORKDIR /workspace

72
### NIXL SETUP ###
73
74
75
# Copy nixl source, and use commit hash as cache hint
COPY --from=nixl_base /opt/nixl /opt/nixl
COPY --from=nixl_base /opt/nixl/commit.txt /opt/nixl/commit.txt
76

77
### NATS & ETCD SETUP ###
78
# nats
79
80
RUN wget --tries=3 --waitretry=5 https://github.com/nats-io/nats-server/releases/download/v2.10.24/nats-server-v2.10.24-${ARCH}.deb && \
    dpkg -i nats-server-v2.10.24-${ARCH}.deb && rm nats-server-v2.10.24-${ARCH}.deb
81
82
# etcd
ENV ETCD_VERSION="v3.5.18"
83
RUN wget --tries=3 --waitretry=5 https://github.com/etcd-io/etcd/releases/download/$ETCD_VERSION/etcd-$ETCD_VERSION-linux-${ARCH}.tar.gz -O /tmp/etcd.tar.gz && \
84
85
86
    mkdir -p /usr/local/bin/etcd && \
    tar -xvf /tmp/etcd.tar.gz -C /usr/local/bin/etcd --strip-components=1 && \
    rm /tmp/etcd.tar.gz
87
88
89
90
ENV PATH=/usr/local/bin/etcd/:$PATH


### VIRTUAL ENVIRONMENT SETUP ###
91
92
93

# Install uv and create virtualenv
COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/
Neelay Shah's avatar
Neelay Shah committed
94
95
RUN mkdir /opt/dynamo && \
    uv venv /opt/dynamo/venv --python 3.12
96
97

# Activate virtual environment
Neelay Shah's avatar
Neelay Shah committed
98
ENV VIRTUAL_ENV=/opt/dynamo/venv
99
100
ENV PATH="${VIRTUAL_ENV}/bin:${PATH}"

101
102
103
104
105
106
107
108
# Install NIXL Python module
# TODO: Move gds_path selection based on arch into NIXL build
RUN if [ "$ARCH" = "arm64" ]; then \
        cd /opt/nixl && uv pip install . --config-settings=setup-args="-Dgds_path=/usr/local/cuda/targets/sbsa-linux/"; \
    else \
        cd /opt/nixl && uv pip install . ; \
    fi

109
110
# Install patched vllm - keep this early in Dockerfile to avoid
# rebuilds from unrelated source code changes
111
ARG VLLM_REF="0.8.4"
112
ARG VLLM_PATCH="vllm_v${VLLM_REF}-dynamo-kv-disagg-patch.patch"
113
ARG VLLM_PATCHED_PACKAGE_NAME="ai_dynamo_vllm"
114
ARG VLLM_PATCHED_PACKAGE_VERSION="0.8.4"
115
ARG VLLM_MAX_JOBS=4
116
RUN --mount=type=bind,source=./container/deps/,target=/tmp/deps \
117
    --mount=type=cache,target=/root/.cache/uv \
118
119
    mkdir /tmp/vllm && \
    uv pip install pip wheel && \
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
    # NOTE: vLLM build from source on ARM can take several hours, see VLLM_MAX_JOBS details.
    if [ "$ARCH" = "arm64" ]; then \
        # PyTorch 2.7 supports CUDA 12.8 and aarch64 installs
        uv pip install torch==2.7.0 torchvision torchaudio --index-url https://download.pytorch.org/whl/cu128 && \
        # Download vLLM source with version matching patch
        git clone --branch v${VLLM_REF} --depth 1 https://github.com/vllm-project/vllm.git /tmp/vllm/vllm-${VLLM_REF} && \
        cd /tmp/vllm/vllm-${VLLM_REF}/ && \
        # Patch vLLM source with dynamo additions
        patch -p1 < /tmp/deps/vllm/${VLLM_PATCH} && \
        # WAR: Set package version check to 'vllm' instead of 'ai_dynamo_vllm' to avoid
        # platform detection issues on ARM install.
        # TODO: Rename package from vllm to ai_dynamo_vllm like x86 path below to remove this WAR.
        sed -i 's/version("ai_dynamo_vllm")/version("vllm")/g' vllm/platforms/__init__.py && \
        # Remove pytorch from vllm install dependencies
        python use_existing_torch.py && \
        # Build/install vllm from source
        uv pip install -r requirements/build.txt && \
        # MAX_JOBS set to avoid running OOM on vllm-flash-attn build, this can
        # significantly impact the overall build time. Each job can take up
        # to -16GB RAM each, so tune according to available system memory.
        MAX_JOBS=${VLLM_MAX_JOBS} uv pip install . --no-build-isolation ; \
    # Handle x86_64: Download wheel, unpack, setup for later steps
    else \
        python -m pip download --only-binary=:all: --no-deps --dest /tmp/vllm vllm==v${VLLM_REF} && \
        # Patch vLLM pre-built download with dynamo additions
        cd /tmp/vllm && \
        wheel unpack *.whl && \
        cd vllm-${VLLM_REF}/ && \
        patch -p1 < /tmp/deps/vllm/${VLLM_PATCH} && \
        # Rename the package from vllm to ai_dynamo_vllm
        mv vllm-${VLLM_REF}.dist-info ${VLLM_PATCHED_PACKAGE_NAME}-${VLLM_PATCHED_PACKAGE_VERSION}.dist-info && \
        sed -i "s/^Name: vllm/Name: ${VLLM_PATCHED_PACKAGE_NAME}/g" ${VLLM_PATCHED_PACKAGE_NAME}-${VLLM_PATCHED_PACKAGE_VERSION}.dist-info/METADATA && \
        sed -i "s/^Version: ${VLLM_REF}/Version: ${VLLM_PATCHED_PACKAGE_VERSION}/g" ${VLLM_PATCHED_PACKAGE_NAME}-${VLLM_PATCHED_PACKAGE_VERSION}.dist-info/METADATA && \
        # Update wheel tag from linux_${ARCH_ALT} to manylinux1_${ARCH_ALT} in WHEEL file
154
        sed -i "s/Tag: cp38-abi3-linux_${ARCH_ALT}/Tag: cp38-abi3-manylinux1_${ARCH_ALT}/g" ${VLLM_PATCHED_PACKAGE_NAME}-${VLLM_PATCHED_PACKAGE_VERSION}.dist-info/WHEEL && \
155
156
157
158
159
160
        # Also update the tag in RECORD file to match
        sed -i "s/-cp38-abi3-linux_${ARCH_ALT}.whl/-cp38-abi3-manylinux1_${ARCH_ALT}.whl/g" ${VLLM_PATCHED_PACKAGE_NAME}-${VLLM_PATCHED_PACKAGE_VERSION}.dist-info/RECORD && \
        mkdir -p /workspace/dist && \
        wheel pack . --dest-dir /workspace/dist && \
        uv pip install /workspace/dist/${VLLM_PATCHED_PACKAGE_NAME}-*.whl ; \
    fi
161

162
163
164
165
# Common dependencies
RUN --mount=type=bind,source=./container/deps/requirements.txt,target=/tmp/requirements.txt \
    uv pip install --requirement /tmp/requirements.txt

166
167
168
# Install test dependencies
RUN --mount=type=bind,source=./container/deps/requirements.test.txt,target=/tmp/requirements.txt \
    uv pip install --requirement /tmp/requirements.txt
169

170
# ### MISC UTILITY SETUP ###
171
172
173

# Finish pyright install
RUN pyright --help > /dev/null 2>&1
174

175
176
177
178
179
# Enable Git operations in the /workspace directory
RUN printf "[safe]\n      directory=/workspace\n" > /root/.gitconfig

RUN ln -sf /bin/bash /bin/sh

180
181
182
183
### BUILDS ###

# Rust build/dev dependencies
RUN apt update -y && \
184
    apt install --no-install-recommends -y \
185
    build-essential \
Biswa Panda's avatar
Biswa Panda committed
186
    protobuf-compiler \
Neelay Shah's avatar
Neelay Shah committed
187
188
    cmake \
    libssl-dev \
189
190
191
192
193
    pkg-config

ENV RUSTUP_HOME=/usr/local/rustup \
    CARGO_HOME=/usr/local/cargo \
    PATH=/usr/local/cargo/bin:$PATH \
194
    RUST_VERSION=1.86.0
195

196
197
198
199
# Define Rust target based on ARCH_ALT ARG
ARG RUSTARCH=${ARCH_ALT}-unknown-linux-gnu

# Install Rust using RUSTARCH derived from ARCH_ALT
200
RUN wget --tries=3 --waitretry=5 "https://static.rust-lang.org/rustup/archive/1.28.1/${RUSTARCH}/rustup-init" && \
201
    # TODO: Add SHA check back based on RUSTARCH
202
203
204
205
    chmod +x rustup-init && \
    ./rustup-init -y --no-modify-path --profile minimal --default-toolchain $RUST_VERSION --default-host ${RUSTARCH} && \
    rm rustup-init && \
    chmod -R a+w $RUSTUP_HOME $CARGO_HOME
206

207
208
209
210
211
212
ARG CARGO_BUILD_JOBS
# Set CARGO_BUILD_JOBS to 16 if not provided
# This is to prevent cargo from building $(nproc) jobs in parallel,
# which might exceed the number of opened files limit.
ENV CARGO_BUILD_JOBS=${CARGO_BUILD_JOBS:-16}

213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
#######################################
########## Local Development ##########
#######################################

FROM base AS local-dev

# https://code.visualstudio.com/remote/advancedcontainers/add-nonroot-user
# Will use the default ubuntu user, but give sudo access
# Needed so files permissions aren't set to root ownership when writing from inside container

# Don't want ubuntu to be editable, just change uid and gid. User ubuntu is hardcoded in .devcontainer
ENV USERNAME=ubuntu
ARG USER_UID=1000
ARG USER_GID=1000

RUN apt-get update && apt-get install -y sudo gnupg2 gnupg1 \
    && echo "$USERNAME ALL=(root) NOPASSWD:ALL" > /etc/sudoers.d/$USERNAME \
    && chmod 0440 /etc/sudoers.d/$USERNAME \
    && mkdir -p /home/$USERNAME \
    && chown -R $USERNAME:$USERNAME /home/$USERNAME \
    && rm -rf /var/lib/apt/lists/* \
    && chsh -s /bin/bash $USERNAME

# This is a slow operation (~40s on my cpu)
# Much better than chown -R $USERNAME:$USERNAME /opt/dynamo/venv (~10min on my cpu)
COPY --from=base --chown=$USER_UID:$USER_GID /opt/dynamo/venv/ /opt/dynamo/venv/
239
RUN chown $USERNAME:$USERNAME /opt/dynamo/venv
240
241
242
243
244
245
246
247
248
249
250
251
252
253
COPY --from=base --chown=$USERNAME:$USERNAME /usr/local/bin /usr/local/bin

USER $USERNAME
ENV HOME=/home/$USERNAME
WORKDIR $HOME

# https://code.visualstudio.com/remote/advancedcontainers/persist-bash-history
RUN SNIPPET="export PROMPT_COMMAND='history -a' && export HISTFILE=$HOME/.commandhistory/.bash_history" \
    && mkdir -p $HOME/.commandhistory \
    && touch $HOME/.commandhistory/.bash_history \
    && echo "$SNIPPET" >> "$HOME/.bashrc"

RUN mkdir -p /home/$USERNAME/.cache/

254
ENV VLLM_KV_CAPI_PATH=$HOME/dynamo/.build/target/debug/libdynamo_llm_capi.so
255
256
257
258

ENTRYPOINT ["/opt/nvidia/nvidia_entrypoint.sh"]

##################################
259
##### Wheel Build Image ##########
260
261
##################################

262
263
264
265
# Redeclare ARCH_ALT ARG so it's available for interpolation in the FROM instruction
ARG ARCH_ALT

FROM quay.io/pypa/manylinux_2_28_${ARCH_ALT} AS wheel_builder
266

267
268
269
270
271
272
273
ARG CARGO_BUILD_JOBS
# Set CARGO_BUILD_JOBS to 16 if not provided
# This is to prevent cargo from building $(nproc) jobs in parallel,
# which might exceed the number of opened files limit.
ENV CARGO_BUILD_JOBS=${CARGO_BUILD_JOBS:-16}
# Use build arg RELEASE_BUILD = true to generate wheels for Python 3.10, 3.11 and 3.12.
ARG RELEASE_BUILD
274

275
276
WORKDIR /workspace

277
RUN yum update -y \
278
    && yum install -y python3.12-devel \
279
280
281
    && yum install -y protobuf-compiler \
    && yum clean all \
    && rm -rf /var/cache/yum
282
283
284

ENV RUSTUP_HOME=/usr/local/rustup \
    CARGO_HOME=/usr/local/cargo \
285
286
    CARGO_TARGET_DIR=/workspace/target \
    VIRTUAL_ENV=/opt/dynamo/venv
287

288
289
290
291
292
COPY --from=base $RUSTUP_HOME $RUSTUP_HOME
COPY --from=base $CARGO_HOME $CARGO_HOME
COPY --from=base /workspace /workspace
COPY --from=base $VIRTUAL_ENV $VIRTUAL_ENV
ENV PATH=$CARGO_HOME/bin:$VIRTUAL_ENV/bin:$PATH
293

294
295
296
297
298
299
300
301
# Copy configuration files
COPY pyproject.toml /workspace/
COPY README.md /workspace/
COPY LICENSE /workspace/
COPY Cargo.toml /workspace/
COPY Cargo.lock /workspace/
COPY rust-toolchain.toml /workspace/
COPY hatch_build.py /workspace/
302

303
304
305
306
# Copy source code
COPY lib/ /workspace/lib/
COPY components /workspace/components
COPY launch /workspace/launch
307
COPY deploy/sdk /workspace/deploy/sdk
308
309

# Build Rust crate binaries packaged with the wheel
310
RUN cargo build --release --locked --features mistralrs,python \
311
312
313
314
315
    -p dynamo-run \
    -p llmctl \
    # Multiple http named crates are present in dependencies, need to specify the path
    -p file://$PWD/components/http \
    -p metrics
316
317

# Build dynamo wheel
318
RUN uv build --wheel --out-dir /workspace/dist && \
319
320
321
322
323
    cd /workspace/lib/bindings/python && \
    uv build --wheel --out-dir /workspace/dist --python 3.12 && \
    if [ "$RELEASE_BUILD" = "true" ]; then \
        uv build --wheel --out-dir /workspace/dist --python 3.11 && \
        uv build --wheel --out-dir /workspace/dist --python 3.10; \
324
    fi
325

326
327
328
#######################################
########## CI Minimum Image ###########
#######################################
329
FROM base AS ci_minimum
330

331
ENV DYNAMO_HOME=/workspace
332
333
334
ENV CARGO_TARGET_DIR=/workspace/target

WORKDIR /workspace
335

336
COPY --from=wheel_builder /workspace/dist/ /workspace/dist/
337
338
339
340
341
342
343
344
345
346
347
348
349
COPY --from=wheel_builder /workspace/target/ /workspace/target/
# Copy Cargo cache to avoid re-downloading dependencies
COPY --from=wheel_builder $CARGO_HOME $CARGO_HOME

COPY . /workspace

# Build rest of the crates
# Need to figure out rust caching to avoid rebuilding and remove exclude flags
RUN cargo build --release --locked --workspace \
    --exclude dynamo-run \
    --exclude llmctl \
    --exclude file://$PWD/components/http \
    --exclude metrics
350
351
352
353
354
355

# Package the bindings
RUN mkdir -p /opt/dynamo/bindings/wheels && \
    mkdir /opt/dynamo/bindings/lib && \
    cp dist/ai_dynamo*cp312*.whl /opt/dynamo/bindings/wheels/. && \
    cp target/release/libdynamo_llm_capi.so /opt/dynamo/bindings/lib/. && \
356
357
358
359
360
361
    cp -r lib/bindings/c/include /opt/dynamo/bindings/.  && \
    cp target/release/dynamo-run /usr/local/bin && \
    cp target/release/http /usr/local/bin && \
    cp target/release/llmctl /usr/local/bin && \
    cp target/release/metrics /usr/local/bin && \
    cp target/release/mock_worker /usr/local/bin
362
363

RUN uv pip install /workspace/dist/ai_dynamo_runtime*cp312*.whl && \
364
    uv pip install /workspace/dist/ai_dynamo*any.whl
365

366
367
368
369
370
# Copy launch banner
RUN --mount=type=bind,source=./container/launch_message.txt,target=/workspace/launch_message.txt \
    sed '/^#\s/d' /workspace/launch_message.txt > ~/.launch_screen && \
    echo "cat ~/.launch_screen" >> ~/.bashrc

371
# Tell vllm to use the Dynamo LLM C API for KV Cache Routing
372
ENV VLLM_KV_CAPI_PATH=/opt/dynamo/bindings/lib/libdynamo_llm_capi.so
373

374
375
376
377
##########################################
########## Perf Analyzer Image ###########
##########################################
FROM ${BASE_IMAGE}:${BASE_IMAGE_TAG} AS perf_analyzer
378

379
380
381
382
383
384
385
386
387
388
389
390
391
ARG GENAI_PERF_TAG

WORKDIR /workspace

# Build and install Perf Analyzer for benchmarking
RUN apt-get update -y && apt-get -y install cmake g++ libssl-dev python3 rapidjson-dev zlib1g-dev
RUN git clone https://github.com/triton-inference-server/perf_analyzer.git
RUN git -C perf_analyzer checkout ${GENAI_PERF_TAG}
RUN mkdir perf_analyzer/build
RUN cmake -B perf_analyzer/build -S perf_analyzer -D TRITON_ENABLE_PERF_ANALYZER_OPENAI=ON
RUN cmake --build perf_analyzer/build -- -j8
RUN mkdir bin &&  \
    cp -r perf_analyzer/build/perf_analyzer/src/perf-analyzer-build /workspace/bin/
392

393
########################################
394
########## Development Image ###########
395
########################################
396
FROM ci_minimum AS dev
397
398
399

ARG GENAI_PERF_TAG

400
401
402
COPY --from=perf_analyzer /workspace/bin/perf-analyzer-build/ /perf/bin
COPY --from=perf_analyzer /workspace/perf_analyzer /perf_analyzer
ENV PATH="/perf/bin:${PATH}"
403
404
405
406
407
408
409
410
411
412
413
414
415
416

# Install genai-perf for benchmarking
RUN uv pip install "git+https://github.com/triton-inference-server/perf_analyzer.git@${GENAI_PERF_TAG}#subdirectory=genai-perf"
RUN uv pip uninstall tritonclient

COPY . /workspace

ENTRYPOINT ["/opt/nvidia/nvidia_entrypoint.sh"]

CMD []

####################################
########## Runtime Image ###########
####################################
417
418
419
420

FROM ${RUNTIME_IMAGE}:${RUNTIME_IMAGE_TAG} AS runtime

WORKDIR /workspace
421
ENV DYNAMO_HOME=/workspace
422
ENV VIRTUAL_ENV=/opt/dynamo/venv
423
ENV PATH="${VIRTUAL_ENV}/bin:${PATH}"
424
425
426

# Setup the python environment
COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/
427
428
429
RUN apt-get update && \
    DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends python3-dev && \
    rm -rf /var/lib/apt/lists/* && \
430
    uv venv $VIRTUAL_ENV --python 3.12 && \
431
432
    echo "source $VIRTUAL_ENV/bin/activate" >> ~/.bashrc

433
434
# Install the wheels and symlink executables to /usr/local/bin so dynamo components can use them
# Dynamo components currently do not have the VIRTUAL_ENV in their PATH, so we need to symlink the executables
435
COPY --from=wheel_builder /workspace/dist/*.whl wheelhouse/
436
437
RUN uv pip install ai-dynamo[vllm] --find-links wheelhouse && \
    ln -sf $VIRTUAL_ENV/bin/* /usr/local/bin/ && \
438
    rm -r wheelhouse
439
440
441

# Tell vllm to use the Dynamo LLM C API for KV Cache Routing
ENV VLLM_KV_CAPI_PATH="/opt/dynamo/bindings/lib/libdynamo_llm_capi.so"
442

443
444
445
446
447
448
# Copy launch banner
RUN --mount=type=bind,source=./container/launch_message.txt,target=/workspace/launch_message.txt \
    sed '/^#\s/d' /workspace/launch_message.txt > ~/.launch_screen && \
    echo "cat ~/.launch_screen" >> ~/.bashrc

# Copy examples
449
COPY ./examples examples/
450

451
452
ENTRYPOINT [ "/usr/bin/bash" ]
CMD []