Dockerfile 22.9 KB
Newer Older
1
# syntax=docker/dockerfile:1.10.0
2
# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3
# SPDX-License-Identifier: Apache-2.0
4
5
6
7
8
9
10
11
12
13
14
15
#
# NOTE FOR dynamo_base AND wheel_builder STAGES:
#
# All changes to dynamo_base and wheel_builder stages should be replicated across
# Dockerfile and Dockerfile.<framework> images.:
#   - Dockerfile
#   - Dockerfile.vllm
#   - Dockerfile.sglang
#   - Dockerfile.trtllm
# This duplication was introduced purposely to quickly enable Docker layer caching and
# deduplication. Please ensure these stages stay in sync until the duplication can be
# addressed.
16

17
18
19
20
##################################
########## Build Arguments ########
##################################

21
22
# This section contains build arguments that are common and shared across various
# Dockerfile.<frameworks>, so they should NOT have a default. The source of truth is from build.sh.
23

24
25
ARG BASE_IMAGE
ARG BASE_IMAGE_TAG
26
ARG EPP_IMAGE="us-central1-docker.pkg.dev/k8s-staging-images/gateway-api-inference-extension/epp:v0.5.1"
27
28
29

ARG PYTHON_VERSION
ARG ENABLE_KVBM
30
ARG ENABLE_MEDIA_NIXL
31
ARG ENABLE_MEDIA_FFMPEG
32
ARG CARGO_BUILD_JOBS
33
34
35
36
37
38
39
40
41
42
43
44
45
46

# Define general architecture ARGs for supporting both x86 and aarch64 builds.
#   ARCH: Used for package suffixes (e.g., amd64, arm64)
#   ARCH_ALT: Used for Rust targets, manylinux suffix (e.g., x86_64, aarch64)
#
# Default values are for x86/amd64:
#   --build-arg ARCH=amd64 --build-arg ARCH_ALT=x86_64
#
# For arm64/aarch64, build with:
#   --build-arg ARCH=arm64 --build-arg ARCH_ALT=aarch64
#TODO OPS-592: Leverage uname -m to determine ARCH instead of passing it as an arg
ARG ARCH=amd64
ARG ARCH_ALT=x86_64

47
48
49
50
51
52
# SCCACHE configuration
ARG USE_SCCACHE
ARG SCCACHE_BUCKET=""
ARG SCCACHE_REGION=""

# NIXL configuration
53
54
55
ARG NIXL_UCX_REF
ARG NIXL_REF
ARG NIXL_GDRCOPY_REF
Joe Chandler's avatar
Joe Chandler committed
56
ARG NIXL_LIBFABRIC_REF
57

58
59
60
61
62
63
64
65
66
67
##################################
########## Base Image ############
##################################

FROM ${BASE_IMAGE}:${BASE_IMAGE_TAG} AS base

ARG ARCH
ARG ARCH_ALT

USER root
68
69
70
WORKDIR /opt/dynamo

# Install uv package manager
71
72
COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/

73
74
75
76
77
# Install NATS server
ENV NATS_VERSION="v2.10.28"
RUN --mount=type=cache,target=/var/cache/apt \
    wget --tries=3 --waitretry=5 https://github.com/nats-io/nats-server/releases/download/${NATS_VERSION}/nats-server-${NATS_VERSION}-${ARCH}.deb && \
    dpkg -i nats-server-${NATS_VERSION}-${ARCH}.deb && rm nats-server-${NATS_VERSION}-${ARCH}.deb
78

79
80
81
82
83
84
85
# Install etcd
ENV ETCD_VERSION="v3.5.21"
RUN wget --tries=3 --waitretry=5 https://github.com/etcd-io/etcd/releases/download/$ETCD_VERSION/etcd-$ETCD_VERSION-linux-${ARCH}.tar.gz -O /tmp/etcd.tar.gz && \
    mkdir -p /usr/local/bin/etcd && \
    tar -xvf /tmp/etcd.tar.gz -C /usr/local/bin/etcd --strip-components=1 && \
    rm /tmp/etcd.tar.gz
ENV PATH=/usr/local/bin/etcd/:$PATH
86

87
# Rust Setup
88
89
90
91
# Rust environment setup
ENV RUSTUP_HOME=/usr/local/rustup \
    CARGO_HOME=/usr/local/cargo \
    PATH=/usr/local/cargo/bin:$PATH \
92
    RUST_VERSION=1.90.0
93
94
95
96

# Define Rust target based on ARCH_ALT ARG
ARG RUSTARCH=${ARCH_ALT}-unknown-linux-gnu

97
# Install Rust
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
RUN wget --tries=3 --waitretry=5 "https://static.rust-lang.org/rustup/archive/1.28.1/${RUSTARCH}/rustup-init" && \
    chmod +x rustup-init && \
    ./rustup-init -y --no-modify-path --profile minimal --default-toolchain $RUST_VERSION --default-host ${RUSTARCH} && \
    rm rustup-init && \
    chmod -R a+w $RUSTUP_HOME $CARGO_HOME


##################################
##### Wheel Build Image ##########
##################################

# Redeclare ARCH_ALT ARG so it's available for interpolation in the FROM instruction
ARG ARCH_ALT

FROM quay.io/pypa/manylinux_2_28_${ARCH_ALT} AS wheel_builder

114
115
116
# Redeclare ARGs for this stage
ARG ARCH
ARG ARCH_ALT
117
ARG CARGO_BUILD_JOBS
118
ARG ENABLE_MEDIA_FFMPEG
119

Richard Huo's avatar
Richard Huo committed
120
WORKDIR /workspace
121

122
123
124
125
126
127
128
129
130
131
132
133
134
135
# Copy CUDA from base stage
COPY --from=base /usr/local/cuda /usr/local/cuda
COPY --from=base /etc/ld.so.conf.d/hpcx.conf /etc/ld.so.conf.d/hpcx.conf

# Set environment variables first so they can be used in COPY commands
ENV CARGO_BUILD_JOBS=${CARGO_BUILD_JOBS:-16} \
    RUSTUP_HOME=/usr/local/rustup \
    CARGO_HOME=/usr/local/cargo \
    CARGO_TARGET_DIR=/opt/dynamo/target \
    PATH=/usr/local/cargo/bin:$PATH

# Copy artifacts from base stage
COPY --from=base $RUSTUP_HOME $RUSTUP_HOME
COPY --from=base $CARGO_HOME $CARGO_HOME
136
# Install system dependencies
Richard Huo's avatar
Richard Huo committed
137
138
139
140
141
142
143
144
RUN yum groupinstall -y 'Development Tools' &&  \
    dnf install -y almalinux-release-synergy && \
    dnf config-manager --set-enabled powertools && \
    dnf install -y \
        # Build tools
        cmake \
        ninja-build \
        clang-devel \
145
146
147
148
        # Install GCC toolset 14 (CUDA compatible, max version 14)
        gcc-toolset-14-gcc \
        gcc-toolset-14-gcc-c++ \
        gcc-toolset-14-binutils \
Richard Huo's avatar
Richard Huo committed
149
150
151
152
153
154
155
156
157
158
159
160
161
162
        flex \
        wget \
        # Kernel module build dependencies
        dkms \
        # Protobuf support
        protobuf-compiler \
        # RDMA/InfiniBand support (required for UCX build with --with-verbs)
        libibverbs \
        libibverbs-devel \
        rdma-core \
        rdma-core-devel \
        libibumad \
        libibumad-devel \
        librdmacm-devel \
Joe Chandler's avatar
Joe Chandler committed
163
164
165
166
167
        numactl-devel \
        # Libfabric support
        hwloc \
        hwloc-devel

168
169
170
171
172
# Set GCC toolset 14 as the default compiler (CUDA requires GCC <= 14)
ENV PATH="/opt/rh/gcc-toolset-14/root/usr/bin:${PATH}" \
    LD_LIBRARY_PATH="/opt/rh/gcc-toolset-14/root/usr/lib64:${LD_LIBRARY_PATH}" \
    CC="/opt/rh/gcc-toolset-14/root/usr/bin/gcc" \
    CXX="/opt/rh/gcc-toolset-14/root/usr/bin/g++"
173

174

175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
# Ensure a modern protoc is available (required for --experimental_allow_proto3_optional)
RUN set -eux; \
    PROTOC_VERSION=25.3; \
    case "${ARCH_ALT}" in \
      x86_64) PROTOC_ZIP="protoc-${PROTOC_VERSION}-linux-x86_64.zip" ;; \
      aarch64) PROTOC_ZIP="protoc-${PROTOC_VERSION}-linux-aarch_64.zip" ;; \
      *) echo "Unsupported architecture: ${ARCH_ALT}" >&2; exit 1 ;; \
    esac; \
    wget --tries=3 --waitretry=5 -O /tmp/protoc.zip "https://github.com/protocolbuffers/protobuf/releases/download/v${PROTOC_VERSION}/${PROTOC_ZIP}"; \
    rm -f /usr/local/bin/protoc /usr/bin/protoc; \
    unzip -o /tmp/protoc.zip -d /usr/local bin/protoc include/*; \
    chmod +x /usr/local/bin/protoc; \
    ln -s /usr/local/bin/protoc /usr/bin/protoc; \
    protoc --version

# Point build tools explicitly at the modern protoc
ENV PROTOC=/usr/local/bin/protoc

Richard Huo's avatar
Richard Huo committed
193
194
195
196
197
198
ENV CUDA_PATH=/usr/local/cuda \
    PATH=/usr/local/cuda/bin:$PATH \
    LD_LIBRARY_PATH=/usr/local/cuda/lib64:/usr/local/lib:/usr/local/lib64:$LD_LIBRARY_PATH \
    NVIDIA_DRIVER_CAPABILITIES=video,compute,utility

# Create virtual environment for building wheels
199
ARG PYTHON_VERSION
Richard Huo's avatar
Richard Huo committed
200
201
ENV VIRTUAL_ENV=/workspace/.venv
RUN uv venv ${VIRTUAL_ENV} --python $PYTHON_VERSION && \
202
    uv pip install --upgrade meson pybind11 patchelf maturin[patchelf] tomlkit
Richard Huo's avatar
Richard Huo committed
203

204
205
206
207
ARG NIXL_UCX_REF
ARG NIXL_REF
ARG NIXL_GDRCOPY_REF

Richard Huo's avatar
Richard Huo committed
208
209
210
211
212
213
214
215
# Build and install gdrcopy
RUN git clone --depth 1 --branch ${NIXL_GDRCOPY_REF} https://github.com/NVIDIA/gdrcopy.git && \
    cd gdrcopy/packages && \
    CUDA=/usr/local/cuda ./build-rpm-packages.sh && \
    rpm -Uvh gdrcopy-kmod-*.el8.noarch.rpm && \
    rpm -Uvh gdrcopy-*.el8.${ARCH_ALT}.rpm && \
    rpm -Uvh gdrcopy-devel-*.el8.noarch.rpm

216
217
218
219
220
221
222
223
224
225
226
227
228
229
# Install SCCACHE if requested
ARG USE_SCCACHE
ARG SCCACHE_BUCKET
ARG SCCACHE_REGION
COPY container/use-sccache.sh /tmp/use-sccache.sh
RUN if [ "$USE_SCCACHE" = "true" ]; then \
        /tmp/use-sccache.sh install; \
    fi

# Set SCCACHE environment variables
ENV SCCACHE_BUCKET=${USE_SCCACHE:+${SCCACHE_BUCKET}} \
    SCCACHE_REGION=${USE_SCCACHE:+${SCCACHE_REGION}} \
    RUSTC_WRAPPER=${USE_SCCACHE:+sccache}

230
# Build FFmpeg from source
milesial's avatar
milesial committed
231
# Do not delete the source tarball for legal reasons
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
ARG FFMPEG_VERSION=7.1
RUN --mount=type=secret,id=aws-key-id,env=AWS_ACCESS_KEY_ID \
    --mount=type=secret,id=aws-secret-id,env=AWS_SECRET_ACCESS_KEY \
if [ "$ENABLE_MEDIA_FFMPEG" = "true" ]; then \
    export SCCACHE_S3_KEY_PREFIX=${SCCACHE_S3_KEY_PREFIX:-${ARCH}} && \
    if [ "$USE_SCCACHE" = "true" ]; then \
        export CMAKE_C_COMPILER_LAUNCHER="sccache" && \
        export CMAKE_CXX_COMPILER_LAUNCHER="sccache" && \
        export RUSTC_WRAPPER="sccache"; \
    fi && \
    dnf install -y pkg-config && \
    cd /tmp && \
    curl -LO https://ffmpeg.org/releases/ffmpeg-${FFMPEG_VERSION}.tar.xz && \
    tar xf ffmpeg-${FFMPEG_VERSION}.tar.xz && \
    cd ffmpeg-${FFMPEG_VERSION} && \
    ./configure \
        --prefix=/usr/local \
milesial's avatar
milesial committed
249
250
        --disable-gpl \
        --disable-nonfree \
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
        --disable-programs \
        --disable-doc \
        --disable-static \
        --disable-x86asm \
        --disable-postproc \
        --disable-network \
        --disable-encoders \
        --disable-muxers \
        --disable-bsfs \
        --disable-devices \
        --disable-libdrm \
        --enable-shared && \
    make -j$(nproc) && \
    make install && \
    /tmp/use-sccache.sh show-stats "FFMPEG" && \
    ldconfig && \
milesial's avatar
milesial committed
267
268
    mkdir -p /usr/local/src/ffmpeg && \
    mv /tmp/ffmpeg-${FFMPEG_VERSION}* /usr/local/src/ffmpeg/; \
269
270
fi

Richard Huo's avatar
Richard Huo committed
271
272
273
274
# Build and install UCX
RUN --mount=type=secret,id=aws-key-id,env=AWS_ACCESS_KEY_ID \
    --mount=type=secret,id=aws-secret-id,env=AWS_SECRET_ACCESS_KEY \
    export SCCACHE_S3_KEY_PREFIX="${SCCACHE_S3_KEY_PREFIX:-${ARCH}}" && \
275
276
277
278
279
    if [ "$USE_SCCACHE" = "true" ]; then \
        export CMAKE_C_COMPILER_LAUNCHER="sccache" && \
        export CMAKE_CXX_COMPILER_LAUNCHER="sccache" && \
        export CMAKE_CUDA_COMPILER_LAUNCHER="sccache"; \
    fi && \
Richard Huo's avatar
Richard Huo committed
280
281
282
283
    cd /usr/local/src && \
     git clone https://github.com/openucx/ucx.git && \
     cd ucx && 			     \
     git checkout $NIXL_UCX_REF &&	 \
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
     ./autogen.sh &&      \
     ./contrib/configure-release    \
        --prefix=/usr/local/ucx     \
        --enable-shared             \
        --disable-static            \
        --disable-doxygen-doc       \
        --enable-optimizations      \
        --enable-cma                \
        --enable-devel-headers      \
        --with-cuda=/usr/local/cuda \
        --with-verbs                \
        --with-dm                   \
        --with-gdrcopy=/usr/local   \
        --with-efa                  \
        --enable-mt &&              \
Richard Huo's avatar
Richard Huo committed
299
300
301
302
303
304
305
     make -j &&                      \
     make -j install-strip &&        \
     /tmp/use-sccache.sh show-stats "UCX" && \
     echo "/usr/local/ucx/lib" > /etc/ld.so.conf.d/ucx.conf && \
     echo "/usr/local/ucx/lib/ucx" >> /etc/ld.so.conf.d/ucx.conf && \
     ldconfig

Joe Chandler's avatar
Joe Chandler committed
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
ARG NIXL_LIBFABRIC_REF
RUN --mount=type=secret,id=aws-key-id,env=AWS_ACCESS_KEY_ID \
    --mount=type=secret,id=aws-secret-id,env=AWS_SECRET_ACCESS_KEY \
    export SCCACHE_S3_KEY_PREFIX="${SCCACHE_S3_KEY_PREFIX:-${ARCH}}" && \
    if [ "$USE_SCCACHE" = "true" ]; then \
        export CMAKE_C_COMPILER_LAUNCHER="sccache" && \
        export CMAKE_CXX_COMPILER_LAUNCHER="sccache" && \
        export CMAKE_CUDA_COMPILER_LAUNCHER="sccache"; \
    fi && \
    cd /usr/local/src && \
    git clone https://github.com/ofiwg/libfabric.git && \
    cd libfabric && \
    git checkout $NIXL_LIBFABRIC_REF && \
    ./autogen.sh && \
    ./configure --prefix="/usr/local/libfabric" \
                --disable-verbs \
                --disable-psm3 \
                --disable-opx \
                --disable-usnic \
                --disable-rstream \
                --enable-efa \
                --with-cuda=/usr/local/cuda \
                --enable-cuda-dlopen \
                --with-gdrcopy \
                --enable-gdrcopy-dlopen && \
    make -j$(nproc) && \
    make install && \
    /tmp/use-sccache.sh show-stats "LIBFABRIC" && \
    echo "/usr/local/libfabric/lib" > /etc/ld.so.conf.d/libfabric.conf && \
    ldconfig

Richard Huo's avatar
Richard Huo committed
337
338
339
340
# build and install nixl
RUN --mount=type=secret,id=aws-key-id,env=AWS_ACCESS_KEY_ID \
    --mount=type=secret,id=aws-secret-id,env=AWS_SECRET_ACCESS_KEY \
    export SCCACHE_S3_KEY_PREFIX="${SCCACHE_S3_KEY_PREFIX:-${ARCH}}" && \
341
342
343
344
345
    if [ "$USE_SCCACHE" = "true" ]; then \
        export CMAKE_C_COMPILER_LAUNCHER="sccache" && \
        export CMAKE_CXX_COMPILER_LAUNCHER="sccache" && \
        export CMAKE_CUDA_COMPILER_LAUNCHER="sccache"; \
    fi && \
Richard Huo's avatar
Richard Huo committed
346
    source ${VIRTUAL_ENV}/bin/activate && \
347
    git clone "https://github.com/ai-dynamo/nixl.git" && \
Richard Huo's avatar
Richard Huo committed
348
    cd nixl && \
349
350
351
352
353
354
355
356
    git checkout ${NIXL_REF} && \
    CUDA_MAJOR=$(nvcc --version | grep -Eo 'release [0-9]+\.[0-9]+' | cut -d' ' -f2 | cut -d'.' -f1) && \
    if [ "$CUDA_MAJOR" -ne 12 ] && [ "$CUDA_MAJOR" -ne 13 ]; then \
        echo "Invalid CUDA_MAJOR: '$CUDA_MAJOR'" && \
        exit 1; \
    fi && \
    PKG_NAME="nixl-cu${CUDA_MAJOR}" && \
    ./contrib/tomlutil.py --wheel-name $PKG_NAME pyproject.toml && \
Richard Huo's avatar
Richard Huo committed
357
358
359
360
    mkdir build && \
    meson setup build/ --prefix=/opt/nvidia/nvda_nixl --buildtype=release \
    -Dcudapath_lib="/usr/local/cuda/lib64" \
    -Dcudapath_inc="/usr/local/cuda/include" \
Joe Chandler's avatar
Joe Chandler committed
361
362
    -Ducx_path="/usr/local/ucx" \
    -Dlibfabric_path="/usr/local/libfabric" && \
Richard Huo's avatar
Richard Huo committed
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
    cd build && \
    ninja && \
    ninja install && \
    /tmp/use-sccache.sh show-stats "NIXL"

ENV NIXL_LIB_DIR=/opt/nvidia/nvda_nixl/lib64  \
    NIXL_PLUGIN_DIR=/opt/nvidia/nvda_nixl/lib64/plugins \
    NIXL_PREFIX=/opt/nvidia/nvda_nixl
ENV LD_LIBRARY_PATH=${NIXL_LIB_DIR}:${NIXL_PLUGIN_DIR}:/usr/local/ucx/lib:/usr/local/ucx/lib/ucx:${LD_LIBRARY_PATH}

RUN echo "$NIXL_LIB_DIR" > /etc/ld.so.conf.d/nixl.conf && \
    echo "$NIXL_PLUGIN_DIR" >> /etc/ld.so.conf.d/nixl.conf && \
    ldconfig

RUN --mount=type=secret,id=aws-key-id,env=AWS_ACCESS_KEY_ID \
    --mount=type=secret,id=aws-secret-id,env=AWS_SECRET_ACCESS_KEY \
    export SCCACHE_S3_KEY_PREFIX="${SCCACHE_S3_KEY_PREFIX:-${ARCH}}" && \
380
381
382
383
384
    if [ "$USE_SCCACHE" = "true" ]; then \
        export CMAKE_C_COMPILER_LAUNCHER="sccache" && \
        export CMAKE_CXX_COMPILER_LAUNCHER="sccache" && \
        export CMAKE_CUDA_COMPILER_LAUNCHER="sccache"; \
    fi && \
Richard Huo's avatar
Richard Huo committed
385
    cd /workspace/nixl && \
386
    uv build . --wheel --out-dir /opt/dynamo/dist/nixl --python $PYTHON_VERSION
Richard Huo's avatar
Richard Huo committed
387

388
389
# Copy source code (order matters for layer caching)
COPY pyproject.toml README.md LICENSE Cargo.toml Cargo.lock rust-toolchain.toml hatch_build.py /opt/dynamo/
Richard Huo's avatar
Richard Huo committed
390
COPY launch/ /opt/dynamo/launch/
391
392
393
COPY lib/ /opt/dynamo/lib/
COPY components/ /opt/dynamo/components/

Richard Huo's avatar
Richard Huo committed
394
# Build dynamo wheels
395
ARG ENABLE_KVBM
396
397
RUN --mount=type=secret,id=aws-key-id,env=AWS_ACCESS_KEY_ID \
    --mount=type=secret,id=aws-secret-id,env=AWS_SECRET_ACCESS_KEY \
398
    export SCCACHE_S3_KEY_PREFIX=${SCCACHE_S3_KEY_PREFIX:-${ARCH}} && \
399
400
401
402
403
    if [ "$USE_SCCACHE" = "true" ]; then \
        export CMAKE_C_COMPILER_LAUNCHER="sccache" && \
        export CMAKE_CXX_COMPILER_LAUNCHER="sccache" && \
        export RUSTC_WRAPPER="sccache"; \
    fi && \
Richard Huo's avatar
Richard Huo committed
404
405
    source ${VIRTUAL_ENV}/bin/activate && \
    cd /opt/dynamo && \
406
    uv build --wheel --out-dir /opt/dynamo/dist && \
407
    cd /opt/dynamo/lib/bindings/python && \
408
    FEATURES=""; \
409
    if [ "$ENABLE_MEDIA_NIXL" = "true" ]; then \
410
411
412
413
414
415
416
        FEATURES="$FEATURES dynamo-llm/media-nixl"; \
    fi; \
    if [ "$ENABLE_MEDIA_FFMPEG" = "true" ]; then \
        FEATURES="$FEATURES media-ffmpeg"; \
    fi; \
    if [ -n "$FEATURES" ]; then \
        maturin build --release --features "$FEATURES" --out /opt/dynamo/dist; \
417
418
419
    else \
        maturin build --release --out /opt/dynamo/dist; \
    fi && \
420
    if [ "$ENABLE_KVBM" = "true" ]; then \
421
        cd /opt/dynamo/lib/bindings/kvbm && \
422
423
424
425
426
        maturin build --release --out target/wheels && \
        auditwheel repair \
            --exclude libnixl.so \
            --exclude libnixl_build.so \
            --exclude libnixl_common.so \
427
            --exclude 'lib*.so*' \
428
429
430
            --plat manylinux_2_28_${ARCH_ALT} \
            --wheel-dir /opt/dynamo/dist \
            target/wheels/*.whl; \
431
    fi && \
432
    /tmp/use-sccache.sh show-stats "Dynamo"
433
434

##############################################
435
########## Runtime image ##############
436
##############################################
437

438
FROM base AS runtime
439

Richard Huo's avatar
Richard Huo committed
440
ARG ARCH_ALT
441
ARG PYTHON_VERSION
Richard Huo's avatar
Richard Huo committed
442

443
444
445
446
# Create dynamo user with group 0 for OpenShift compatibility
RUN userdel -r ubuntu > /dev/null 2>&1 || true \
    && useradd -m -s /bin/bash -g 0 dynamo \
    && [ `id -u dynamo` -eq 1000 ] \
447
448
449
450
451
452
453
    && mkdir -p /home/dynamo/.cache /opt/dynamo \
    # Non-recursive chown - only the directories themselves, not contents
    && chown dynamo:0 /home/dynamo /home/dynamo/.cache /opt/dynamo /workspace \
    # No chmod needed: umask 002 handles new files, COPY --chmod handles copied content
    # Set umask globally for all subsequent RUN commands (must be done as root before USER dynamo)
    # NOTE: Setting ENV UMASK=002 does NOT work - umask is a shell builtin, not an environment variable
    && mkdir -p /etc/profile.d && echo 'umask 002' > /etc/profile.d/00-umask.sh
454
455
456
457
458

# NIXL environment variables
ENV NIXL_PREFIX=/opt/nvidia/nvda_nixl \
    NIXL_LIB_DIR=/opt/nvidia/nvda_nixl/lib/${ARCH_ALT}-linux-gnu \
    NIXL_PLUGIN_DIR=/opt/nvidia/nvda_nixl/lib/${ARCH_ALT}-linux-gnu/plugins \
459
    CARGO_TARGET_DIR=/opt/dynamo/target
460

461
462
463
464
465
466
467
# Copy ucx and nixl libs
COPY --chown=dynamo: --from=wheel_builder /usr/local/ucx/ /usr/local/ucx/
COPY --chown=dynamo: --from=wheel_builder ${NIXL_PREFIX}/ ${NIXL_PREFIX}/
COPY --chown=dynamo: --from=wheel_builder /opt/nvidia/nvda_nixl/lib64/. ${NIXL_LIB_DIR}/
COPY --chown=dynamo: --from=wheel_builder /opt/dynamo/dist/nixl/ /opt/dynamo/wheelhouse/nixl/
COPY --chown=dynamo: --from=wheel_builder /workspace/nixl/build/src/bindings/python/nixl-meta/nixl-*.whl /opt/dynamo/wheelhouse/nixl/

468
469
470
471
472
# Copy ffmpeg
RUN --mount=type=bind,from=wheel_builder,source=/usr/local/,target=/tmp/usr/local/ \
    cp -rnL /tmp/usr/local/include/libav* /tmp/usr/local/include/libsw* /usr/local/include/; \
    cp -nL /tmp/usr/local/lib/libav*.so /tmp/usr/local/lib/libsw*.so /usr/local/lib/; \
    cp -nL /tmp/usr/local/lib/pkgconfig/libav*.pc /tmp/usr/local/lib/pkgconfig/libsw*.pc /usr/lib/pkgconfig/; \
milesial's avatar
milesial committed
473
    cp -r /tmp/usr/local/src/ffmpeg /usr/local/src/; \
474
475
    true # in case ffmpeg not enabled

476
477
478
# Copy built artifacts
COPY --chown=dynamo: --from=wheel_builder $CARGO_TARGET_DIR $CARGO_TARGET_DIR
COPY --chown=dynamo: --from=wheel_builder /opt/dynamo/dist/*.whl /opt/dynamo/wheelhouse/
479

480
481
482
483
484
485
486
487
488
489
# Install Python for framework=none runtime (cuda-dl-base doesn't include Python)
# This is needed to create venv and install dynamo packages
ARG PYTHON_VERSION
RUN apt-get update && \
    DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
        python${PYTHON_VERSION}-dev \
        python${PYTHON_VERSION}-venv && \
    apt-get clean && \
    rm -rf /var/lib/apt/lists/* && \
    ln -sf /usr/bin/python${PYTHON_VERSION} /usr/bin/python3
490

491
# Switch to dynamo user and create virtual environment
492
USER dynamo
493
ENV HOME=/home/dynamo
Richard Huo's avatar
Richard Huo committed
494
495

# Create and activate virtual environment
496
497
498
# Use login shell to pick up umask 002 from /etc/profile.d/00-umask.sh for group-writable files
SHELL ["/bin/bash", "-l", "-o", "pipefail", "-c"]
RUN uv venv /opt/dynamo/venv --python ${PYTHON_VERSION}
Richard Huo's avatar
Richard Huo committed
499
500
501
502

ENV VIRTUAL_ENV=/opt/dynamo/venv \
    PATH="/opt/dynamo/venv/bin:${PATH}"

503
# Install dynamo wheels (runtime packages only, no test dependencies)
504
ARG ENABLE_KVBM
505
RUN uv pip install \
506
    /opt/dynamo/wheelhouse/ai_dynamo_runtime*.whl \
507
    /opt/dynamo/wheelhouse/ai_dynamo*any.whl \
Richard Huo's avatar
Richard Huo committed
508
509
    /opt/dynamo/wheelhouse/nixl/nixl*.whl && \
    if [ "$ENABLE_KVBM" = "true" ]; then \
510
511
512
513
514
515
        KVBM_WHEEL=$(ls /opt/dynamo/wheelhouse/kvbm*.whl 2>/dev/null | head -1); \
        if [ -z "$KVBM_WHEEL" ]; then \
            echo "ERROR: ENABLE_KVBM is true but no KVBM wheel found in wheelhouse" >&2; \
            exit 1; \
        fi; \
        uv pip install "$KVBM_WHEEL"; \
516
    fi
517

518
519
520
ARG DYNAMO_COMMIT_SHA
ENV DYNAMO_COMMIT_SHA=$DYNAMO_COMMIT_SHA

521
ENTRYPOINT ["/opt/nvidia/nvidia_entrypoint.sh"]
522
CMD []
523
524
525
526
527
528
529
530
531
532
533
534
535
536

##############################################
########## Frontend entrypoint image #########
##############################################
FROM ${EPP_IMAGE} AS epp

FROM nvcr.io/nvidia/base/ubuntu:noble-20250619 AS frontend

ARG PYTHON_VERSION
RUN apt-get update -y \
    && apt-get install -y --no-install-recommends \
        # required for EPP
        ca-certificates \
        libstdc++6 \
537
538
        # required for verification of GPG keys
        gnupg2 \
539
540
541
542
543
        # required for installing dependencies from git repositories
        git \
        git-lfs \
        # Python runtime - required for virtual environment to work
        python${PYTHON_VERSION}-dev \
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
    && apt-get clean \
    && rm -rf /var/lib/apt/lists/*


# Create dynamo user with group 0 for OpenShift compatibility
RUN userdel -r ubuntu > /dev/null 2>&1 || true \
    && useradd -m -s /bin/bash -g 0 dynamo \
    && [ `id -u dynamo` -eq 1000 ] \
    && mkdir -p /home/dynamo/.cache /opt/dynamo /workspace \
    && chown -R dynamo: /opt/dynamo /home/dynamo/.cache /workspace \
    && chmod -R g+w /opt/dynamo /home/dynamo/.cache /workspace

# Set HOME so ModelExpress can find the cache directory
ENV HOME=/home/dynamo
# Switch to dynamo user
USER dynamo
ENV DYNAMO_HOME=/opt/dynamo

WORKDIR /
COPY --chown=dynamo: --from=epp /epp /epp

COPY --chown=dynamo: container/launch_message/frontend.txt /opt/dynamo/.launch_screen
# Copy tests, benchmarks, deploy and components with correct ownership
COPY --chown=dynamo: tests /workspace/tests
COPY --chown=dynamo: examples /workspace/examples
COPY --chown=dynamo: benchmarks /workspace/benchmarks
COPY --chown=dynamo: deploy /workspace/deploy
COPY --chown=dynamo: components/ /workspace/components/
COPY --chown=dynamo: recipes/ /workspace/recipes/
# Copy attribution files with correct ownership
COPY --chown=dynamo: ATTRIBUTION* LICENSE /workspace/

ENV VIRTUAL_ENV=/opt/dynamo/venv
ENV PATH="/opt/dynamo/venv/bin:$PATH"
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609

# Copy uv and wheelhouse from runtime stage
COPY --chown=dynamo: --from=runtime /bin/uv /bin/uvx /bin/
COPY --chown=dynamo: --from=runtime /opt/dynamo/wheelhouse/ /opt/dynamo/wheelhouse/

# Create virtual environment
RUN mkdir -p /opt/dynamo/venv && \
    uv venv /opt/dynamo/venv --python $PYTHON_VERSION

# Install common and test dependencies
RUN --mount=type=bind,source=./container/deps/requirements.txt,target=/tmp/requirements.txt \
    --mount=type=bind,source=./container/deps/requirements.test.txt,target=/tmp/requirements.test.txt \
    UV_GIT_LFS=1 uv pip install \
        --no-cache \
        --requirement /tmp/requirements.txt \
        --requirement /tmp/requirements.test.txt

ARG ENABLE_KVBM
RUN uv pip install \
    /opt/dynamo/wheelhouse/ai_dynamo_runtime*.whl \
    /opt/dynamo/wheelhouse/ai_dynamo*any.whl \
    /opt/dynamo/wheelhouse/nixl/nixl*.whl && \
    if [ "$ENABLE_KVBM" = "true" ]; then \
        KVBM_WHEEL=$(ls /opt/dynamo/wheelhouse/kvbm*.whl 2>/dev/null | head -1); \
        if [ -z "$KVBM_WHEEL" ]; then \
            echo "ERROR: ENABLE_KVBM is true but no KVBM wheel found in wheelhouse" >&2; \
            exit 1; \
        fi; \
        uv pip install "$KVBM_WHEEL"; \
    fi && \
    cd /workspace/benchmarks && \
    UV_GIT_LFS=1 uv pip install --no-cache .
610
611
612
613
614
615
616
617
618
619

# Setup environment for all users
USER root
RUN chmod 755 /opt/dynamo/.launch_screen && \
    echo 'source /opt/dynamo/venv/bin/activate' >> /etc/bash.bashrc && \
    echo 'cat /opt/dynamo/.launch_screen' >> /etc/bash.bashrc

USER dynamo

ENTRYPOINT ["/epp"]
620
CMD ["/bin/bash"]