"vllm/tool_parsers/jamba_tool_parser.py" did not exist on "d6953beb91da4e9c99be4c0a1304a2d24189535c"
wheel_builder.Dockerfile 23.8 KB
Newer Older
1
2
{#
# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3
# SPDX-License-Identifier: Apache-2.0
4
#}
5
# === BEGIN templates/wheel_builder.Dockerfile ===
6
7
8
9
##################################
##### Wheel Build Image ##########
##################################

10
11
12
13
14
15
16
17
18
{% if platform == "multi" and device == "cuda" %}
# Multi-arch: declare both manylinux base images with explicit --platform so each is
# always pulled as the correct native arch regardless of the current TARGETPLATFORM.
# BuildKit only fetches and builds the stage that TARGETARCH resolves to; the other
# is a no-op for each sub-build.
FROM --platform=linux/amd64 quay.io/pypa/manylinux_2_28_x86_64 AS manylinux_amd64
FROM --platform=linux/arm64 quay.io/pypa/manylinux_2_28_aarch64 AS manylinux_arm64
{% endif %}

19
20
21
22
23
##################################
##### wheel_builder_base #########
##################################
# Shared base for all wheel builds: tools, system deps, and native libraries (except nixl).

24
25
26
{% if platform == "multi" and device == "cuda" %}
FROM manylinux_${TARGETARCH} AS wheel_builder_base
{% else %}
27
FROM ${WHEEL_BUILDER_IMAGE} AS wheel_builder_base
28
{% endif %}
29
30

# Redeclare ARGs for this stage
31
ARG TARGETARCH
32
ARG CARGO_BUILD_JOBS
33
ARG DEVICE
34
35

WORKDIR /workspace
36
37
38
39
40
41
42
43
44
45
46
{% if device == "xpu" or device == "cpu" %}
RUN apt clean && apt-get update -y && \
    apt-get install -y --no-install-recommends --fix-missing \
    curl ca-certificates zip unzip git lsb-release numactl wget vim \
    libsndfile1 \
    libsm6 \
    libxext6 \
    libgl1 \
    libaio-dev \
    linux-libc-dev
{% endif %}
47

48
{% if device == "cuda" %}
49
50
51
# Copy CUDA from base stage
COPY --from=dynamo_base /usr/local/cuda /usr/local/cuda
COPY --from=dynamo_base /etc/ld.so.conf.d/hpcx.conf /etc/ld.so.conf.d/hpcx.conf
52
{% endif %}
53
54
55
56
57
58
59
60

# Set environment variables first so they can be used in COPY commands
ENV CARGO_BUILD_JOBS=${CARGO_BUILD_JOBS:-16} \
    RUSTUP_HOME=/usr/local/rustup \
    CARGO_HOME=/usr/local/cargo \
    CARGO_TARGET_DIR=/opt/dynamo/target \
    PATH=/usr/local/cargo/bin:$PATH

61
62


63
64
65
# Copy artifacts from base stage
COPY --from=dynamo_base $RUSTUP_HOME $RUSTUP_HOME
COPY --from=dynamo_base $CARGO_HOME $CARGO_HOME
66

67
68
69
70
71
{% if device == "xpu" %}
RUN wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB | gpg --dearmor | tee /usr/share/keyrings/oneapi-archive-keyring.gpg > /dev/null && \
    echo "deb [signed-by=/usr/share/keyrings/oneapi-archive-keyring.gpg] https://apt.repos.intel.com/oneapi all main" | tee /etc/apt/sources.list.d/oneAPI.list && \
    add-apt-repository -y ppa:kobuk-team/intel-graphics

72
# Fetch UCX patch
73
74
RUN wget --tries=3 --waitretry=5 https://raw.githubusercontent.com/intel/llm-scaler/35a14cbc08d714f460a29b7a7328df5620c8530f/vllm/patches/ai-dynamo-xpu/patches/ucx-v1.12.0.patch -O /tmp/ucx.patch

75
76
# Install Intel GPU runtime packages
RUN apt update -y && apt upgrade -y && \
77
78
79
    apt-get install -y libze1 libze-dev libze-intel-gpu1 intel-opencl-icd  \
    libze-intel-gpu-raytracing intel-ocloc intel-oneapi-compiler-dpcpp-cpp-2025.3 && \
    apt-get clean && rm -rf /var/lib/apt/lists/*
80
{% endif %}
81

82
83
{% if device == "xpu" or device == "cpu" %}
SHELL ["/bin/bash", "-o", "pipefail", "-c"]
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
RUN apt-get update -y \
    && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
        # NIXL build dependencies
        autoconf \
        automake \
        cmake \
        git-lfs \
        libtool \
        meson \
        net-tools \
        ninja-build \
        pybind11-dev \
        # Rust build dependencies
        clang \
        libclang-dev \
        protobuf-compiler \
    && apt-get clean \
    && rm -rf /var/lib/apt/lists/*

RUN apt-get update -y \
    && DEBIAN_FRONTEND=noninteractive apt-get -y install --reinstall --no-install-recommends \
        libibverbs-dev \
        rdma-core \
        ibverbs-utils \
        libibumad-dev \
        libnuma-dev \
        librdmacm-dev \
        ibverbs-providers \
    && apt-get clean \
    && rm -rf /var/lib/apt/lists/*
{% endif %}

{% if device == "cuda" %}
117
# Install system dependencies
118
119
120
# Cache dnf downloads; sharing=locked avoids dnf/rpm races with concurrent builds.
RUN --mount=type=cache,target=/var/cache/dnf,sharing=locked \
    dnf install -y almalinux-release-synergy && \
121
122
    dnf config-manager --set-enabled powertools && \
    dnf install -y \
123
124
125
126
127
        # Autotools (required for UCX, libfabric ./autogen.sh and ./configure)
        autoconf \
        automake \
        libtool \
        make \
128
        # RPM build tools (required for gdrcopy's build-rpm-packages.sh)
129
130
        rpm-build \
        rpm-sign \
131
132
133
134
        # Build tools
        cmake \
        ninja-build \
        clang-devel \
135
136
137
138
        # Install GCC toolset 14 (CUDA compatible, max version 14)
        gcc-toolset-14-gcc \
        gcc-toolset-14-gcc-c++ \
        gcc-toolset-14-binutils \
139
140
141
142
143
144
145
146
147
148
149
150
151
152
        flex \
        wget \
        # Kernel module build dependencies
        dkms \
        # Protobuf support
        protobuf-compiler \
        # RDMA/InfiniBand support (required for UCX build with --with-verbs)
        libibverbs \
        libibverbs-devel \
        rdma-core \
        rdma-core-devel \
        libibumad \
        libibumad-devel \
        librdmacm-devel \
Joe Chandler's avatar
Joe Chandler committed
153
154
        numactl-devel \
        # Libfabric support
155
156
157
        libcurl-devel \
        openssl-devel \
        libuuid-devel \
158
        zlib-devel
159

160
161
162
163
164
165
166
167
168
169
170
171
172
173
# Build hwloc >= 2.3 from source (RHEL8 ships 2.2 which lacks hwloc_location API
# required by nixl v1.0.x libfabric topology code)
ARG HWLOC_VERSION=2.12.0
RUN HWLOC_SERIES="$(echo "${HWLOC_VERSION}" | cut -d. -f1-2)" && \
    cd /tmp && \
    curl --retry 3 -LO "https://download.open-mpi.org/release/hwloc/v${HWLOC_SERIES}/hwloc-${HWLOC_VERSION}.tar.gz" && \
    tar xf hwloc-${HWLOC_VERSION}.tar.gz && \
    cd hwloc-${HWLOC_VERSION} && \
    ./configure --prefix=/usr/local && \
    make -j$(nproc) && \
    make install && \
    ldconfig && \
    rm -rf /tmp/hwloc-*

174
175
176
177
178
# Set GCC toolset 14 as the default compiler (CUDA requires GCC <= 14)
ENV PATH="/opt/rh/gcc-toolset-14/root/usr/bin:${PATH}" \
    LD_LIBRARY_PATH="/opt/rh/gcc-toolset-14/root/usr/lib64:${LD_LIBRARY_PATH}" \
    CC="/opt/rh/gcc-toolset-14/root/usr/bin/gcc" \
    CXX="/opt/rh/gcc-toolset-14/root/usr/bin/g++"
179
{% endif %}
180

181
182
# Ensure a modern protoc is available (required for --experimental_allow_proto3_optional)
RUN set -eux; \
183
    ARCH_ALT=$([ "${TARGETARCH}" = "amd64" ] && echo "x86_64" || echo "aarch64"); \
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
    PROTOC_VERSION=25.3; \
    case "${ARCH_ALT}" in \
      x86_64) PROTOC_ZIP="protoc-${PROTOC_VERSION}-linux-x86_64.zip" ;; \
      aarch64) PROTOC_ZIP="protoc-${PROTOC_VERSION}-linux-aarch_64.zip" ;; \
      *) echo "Unsupported architecture: ${ARCH_ALT}" >&2; exit 1 ;; \
    esac; \
    wget --tries=3 --waitretry=5 -O /tmp/protoc.zip "https://github.com/protocolbuffers/protobuf/releases/download/v${PROTOC_VERSION}/${PROTOC_ZIP}"; \
    rm -f /usr/local/bin/protoc /usr/bin/protoc; \
    unzip -o /tmp/protoc.zip -d /usr/local bin/protoc include/*; \
    chmod +x /usr/local/bin/protoc; \
    ln -s /usr/local/bin/protoc /usr/bin/protoc; \
    protoc --version

# Point build tools explicitly at the modern protoc
ENV PROTOC=/usr/local/bin/protoc

200
{% if device == "xpu" or device == "cpu" %}
201
202
# Install uv package manager
COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/
203
ENV LD_LIBRARY_PATH=/usr/local/lib:/usr/local/lib64:${LD_LIBRARY_PATH:-}
204
{% else %}
205
206
ENV CUDA_PATH=/usr/local/cuda \
    PATH=/usr/local/cuda/bin:$PATH \
207
    LD_LIBRARY_PATH=/usr/local/cuda/lib64:/usr/local/lib:/usr/local/lib64:${LD_LIBRARY_PATH:-} \
208
    NVIDIA_DRIVER_CAPABILITIES=video,compute,utility
209
{% endif %}
210
211
212
213

# Create virtual environment for building wheels
ARG PYTHON_VERSION
ENV VIRTUAL_ENV=/workspace/.venv
214
215
216
217
# Cache uv downloads; uv handles its own locking for this cache.
RUN --mount=type=cache,target=/root/.cache/uv \
    export UV_CACHE_DIR=/root/.cache/uv UV_HTTP_TIMEOUT=300 UV_HTTP_RETRIES=5 && \
    uv venv ${VIRTUAL_ENV} --python $PYTHON_VERSION && \
218
    uv pip install --upgrade meson pybind11 patchelf maturin[patchelf] tomlkit
219
220

ARG NIXL_UCX_REF
221
222

{% if device == "cuda" %}
223
224
225
ARG NIXL_GDRCOPY_REF

# Build and install gdrcopy
226
227
RUN ARCH_ALT=$([ "${TARGETARCH}" = "amd64" ] && echo "x86_64" || echo "aarch64") && \
    git clone --depth 1 --branch ${NIXL_GDRCOPY_REF} https://github.com/NVIDIA/gdrcopy.git && \
228
229
230
231
232
    cd gdrcopy/packages && \
    CUDA=/usr/local/cuda ./build-rpm-packages.sh && \
    rpm -Uvh gdrcopy-kmod-*.el8.noarch.rpm && \
    rpm -Uvh gdrcopy-*.el8.${ARCH_ALT}.rpm && \
    rpm -Uvh gdrcopy-devel-*.el8.noarch.rpm
233
{% endif %}
234

235
236
237
238
239
# sccache binary is pre-installed in dynamo_base; stage it off-PATH so
# Meson doesn't auto-detect it as a CUDA compiler launcher
# (https://github.com/mesonbuild/meson/issues/11118).
# When USE_SCCACHE=true the RUN below symlinks it onto PATH before install.
COPY --from=dynamo_base /usr/local/bin/sccache /opt/sccache/sccache
240

241
242
243
244
245
ARG USE_SCCACHE
ARG SCCACHE_BUCKET
ARG SCCACHE_REGION
COPY container/use-sccache.sh /tmp/use-sccache.sh
RUN if [ "$USE_SCCACHE" = "true" ]; then \
246
        ln -s /opt/sccache/sccache /usr/local/bin/sccache && \
247
248
249
        /tmp/use-sccache.sh install; \
    fi

250
251
# Set SCCACHE environment variables (RUSTC_WRAPPER is set dynamically by
# setup-env only when the sccache server starts successfully)
252
ENV SCCACHE_BUCKET=${USE_SCCACHE:+${SCCACHE_BUCKET}} \
253
    SCCACHE_REGION=${USE_SCCACHE:+${SCCACHE_REGION}}
254

255
# Always build FFmpeg so libs are available for Rust checks in CI
milesial's avatar
milesial committed
256
# Do not delete the source tarball for legal reasons
257
ARG FFMPEG_VERSION
258
259
RUN --mount=type=secret,id=aws-key-id,env=AWS_ACCESS_KEY_ID \
    --mount=type=secret,id=aws-secret-id,env=AWS_SECRET_ACCESS_KEY \
260
    export SCCACHE_S3_KEY_PREFIX=${SCCACHE_S3_KEY_PREFIX:-${TARGETARCH}} && \
261
    if [ "$USE_SCCACHE" = "true" ]; then \
262
        eval $(/tmp/use-sccache.sh setup-env); \
263
    fi && \
264
265
    if [ "$DEVICE" = "xpu" ] || [ "$DEVICE" = "cpu" ]; then \
    apt-get update -y && apt-get install -y build-essential pkg-config xz-utils; \
266
267
    apt-get clean && rm -rf /var/lib/apt/lists/*; \
    elif [ "$DEVICE" = "cuda" ]; then \
268
    dnf install -y pkg-config xz; \
269
    fi && \
270
    cd /tmp && \
271
    curl --retry 5 --retry-delay 3 -LO https://ffmpeg.org/releases/ffmpeg-${FFMPEG_VERSION}.tar.xz && \
272
273
274
275
    tar xf ffmpeg-${FFMPEG_VERSION}.tar.xz && \
    cd ffmpeg-${FFMPEG_VERSION} && \
    ./configure \
        --prefix=/usr/local \
milesial's avatar
milesial committed
276
277
        --disable-gpl \
        --disable-nonfree \
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
        --disable-programs \
        --disable-doc \
        --disable-static \
        --disable-x86asm \
        --disable-postproc \
        --disable-network \
        --disable-encoders \
        --disable-muxers \
        --disable-bsfs \
        --disable-devices \
        --disable-libdrm \
        --enable-shared && \
    make -j$(nproc) && \
    make install && \
    /tmp/use-sccache.sh show-stats "FFMPEG" && \
    ldconfig && \
milesial's avatar
milesial committed
294
    mkdir -p /usr/local/src/ffmpeg && \
295
    mv /tmp/ffmpeg-${FFMPEG_VERSION}* /usr/local/src/ffmpeg/
296

297
298
299
# Build and install UCX
RUN --mount=type=secret,id=aws-key-id,env=AWS_ACCESS_KEY_ID \
    --mount=type=secret,id=aws-secret-id,env=AWS_SECRET_ACCESS_KEY \
300
    export SCCACHE_S3_KEY_PREFIX="${SCCACHE_S3_KEY_PREFIX:-${TARGETARCH}}" && \
301
    if [ "$USE_SCCACHE" = "true" ]; then \
302
        eval $(/tmp/use-sccache.sh setup-env); \
303
    fi && \
304
    cd /usr/local/src && \
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
    git clone https://github.com/openucx/ucx.git && \
    cd ucx &&  \
    git checkout $NIXL_UCX_REF &&	 \
    if [ "$DEVICE" = "xpu" ]; then \
    git apply --ignore-whitespace /tmp/ucx.patch; \
    fi && \
    ./autogen.sh &&      \
    if [ "$DEVICE" = "xpu" ]; then \
     ./contrib/configure-release     \
        --prefix=/usr/local/ucx     \
        --with-ze                   \
        --enable-shared             \
        --disable-static            \
        --disable-doxygen-doc       \
        --enable-optimizations      \
        --enable-cma                \
        --enable-devel-headers      \
        --with-verbs                \
        --with-dm                   \
        --with-efa                  \
        --without-cuda              \
        --enable-mt;                 \
    elif [ "$DEVICE" = "cuda" ]; then \
     ./contrib/configure-release     \
329
330
331
332
333
334
335
336
337
338
339
340
        --prefix=/usr/local/ucx     \
        --enable-shared             \
        --disable-static            \
        --disable-doxygen-doc       \
        --enable-optimizations      \
        --enable-cma                \
        --enable-devel-headers      \
        --with-cuda=/usr/local/cuda \
        --with-verbs                \
        --with-dm                   \
        --with-gdrcopy=/usr/local   \
        --with-efa                  \
341
        --enable-mt;                 \
342
343
344
345
346
347
348
349
350
351
352
353
    elif [ "$DEVICE" = "cpu" ]; then  \
     ./contrib/configure-release     \
        --prefix=/usr/local/ucx     \
        --enable-shared             \
        --disable-static            \
        --disable-doxygen-doc       \
        --enable-optimizations      \
        --enable-cma                \
        --enable-devel-headers      \
        --with-verbs                \
        --without-cuda              \
        --enable-mt;                 \
354
     fi && \
355
356
357
358
359
360
361
     make -j &&                      \
     make -j install-strip &&        \
     /tmp/use-sccache.sh show-stats "UCX" && \
     echo "/usr/local/ucx/lib" > /etc/ld.so.conf.d/ucx.conf && \
     echo "/usr/local/ucx/lib/ucx" >> /etc/ld.so.conf.d/ucx.conf && \
     ldconfig

362
{% if device == "cuda" %}
Joe Chandler's avatar
Joe Chandler committed
363
364
365
ARG NIXL_LIBFABRIC_REF
RUN --mount=type=secret,id=aws-key-id,env=AWS_ACCESS_KEY_ID \
    --mount=type=secret,id=aws-secret-id,env=AWS_SECRET_ACCESS_KEY \
366
    export SCCACHE_S3_KEY_PREFIX="${SCCACHE_S3_KEY_PREFIX:-${TARGETARCH}}" && \
Joe Chandler's avatar
Joe Chandler committed
367
    if [ "$USE_SCCACHE" = "true" ]; then \
368
        eval $(/tmp/use-sccache.sh setup-env); \
Joe Chandler's avatar
Joe Chandler committed
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
    fi && \
    cd /usr/local/src && \
    git clone https://github.com/ofiwg/libfabric.git && \
    cd libfabric && \
    git checkout $NIXL_LIBFABRIC_REF && \
    ./autogen.sh && \
    ./configure --prefix="/usr/local/libfabric" \
                --disable-verbs \
                --disable-psm3 \
                --disable-opx \
                --disable-usnic \
                --disable-rstream \
                --enable-efa \
                --with-cuda=/usr/local/cuda \
                --enable-cuda-dlopen \
                --with-gdrcopy \
                --enable-gdrcopy-dlopen && \
    make -j$(nproc) && \
    make install && \
    /tmp/use-sccache.sh show-stats "LIBFABRIC" && \
    echo "/usr/local/libfabric/lib" > /etc/ld.so.conf.d/libfabric.conf && \
    ldconfig
391
{% endif %}
Joe Chandler's avatar
Joe Chandler committed
392

393
{% if framework == "vllm" and device == "cuda" %}
394
# Build and install AWS SDK C++ (required for NIXL OBJ backend / S3 support)
395
ARG AWS_SDK_CPP_VERSION=1.11.760
396
397
RUN --mount=type=secret,id=aws-key-id,env=AWS_ACCESS_KEY_ID \
    --mount=type=secret,id=aws-secret-id,env=AWS_SECRET_ACCESS_KEY \
398
    export SCCACHE_S3_KEY_PREFIX="${SCCACHE_S3_KEY_PREFIX:-${TARGETARCH}}" && \
399
400
401
    if [ "$USE_SCCACHE" = "true" ]; then \
        eval $(/tmp/use-sccache.sh setup-env cmake); \
    fi && \
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
    git clone --recurse-submodules --depth 1 --branch ${AWS_SDK_CPP_VERSION} \
        https://github.com/aws/aws-sdk-cpp.git /tmp/aws-sdk-cpp && \
    mkdir -p /tmp/aws-sdk-cpp/build && \
    cd /tmp/aws-sdk-cpp/build && \
    cmake .. \
        -DCMAKE_BUILD_TYPE=Release \
        -DBUILD_ONLY="s3" \
        -DENABLE_TESTING=OFF \
        -DCMAKE_INSTALL_PREFIX=/usr/local \
        -DBUILD_SHARED_LIBS=ON && \
    make -j$(nproc) && \
    make install && \
    cd / && \
    rm -rf /tmp/aws-sdk-cpp && \
    ldconfig && \
    /tmp/use-sccache.sh show-stats "AWS SDK C++"
{% endif %}

420
421
422
423
424
425
426
427
428
429

##################################
##### runtime_wheel_builder ######
##################################
# Builds ai-dynamo, ai-dynamo-runtime, and gpu_memory_service wheels, sans nixl.

FROM wheel_builder_base AS runtime_wheel_builder

{% if target not in ("dev", "local-dev") %}
# Copy source code (order matters for layer caching)
430
COPY .cargo/ /opt/dynamo/.cargo/
431
432
433
434
435
436
437
438
439
440
441
442
443
COPY pyproject.toml README.md LICENSE Cargo.toml Cargo.lock rust-toolchain.toml hatch_build.py /opt/dynamo/
COPY lib/ /opt/dynamo/lib/
COPY components/ /opt/dynamo/components/

# Build ai-dynamo (pure Python) and ai-dynamo-runtime (maturin) wheels
ARG USE_SCCACHE
ARG ENABLE_MEDIA_FFMPEG
RUN --mount=type=secret,id=aws-key-id,env=AWS_ACCESS_KEY_ID \
    --mount=type=secret,id=aws-secret-id,env=AWS_SECRET_ACCESS_KEY \
    --mount=type=cache,target=/root/.cargo/registry \
    --mount=type=cache,target=/root/.cargo/git \
    --mount=type=cache,target=/root/.cache/uv \
    export UV_CACHE_DIR=/root/.cache/uv && \
444
    export SCCACHE_S3_KEY_PREFIX=${SCCACHE_S3_KEY_PREFIX:-${TARGETARCH}} && \
445
446
447
448
449
450
451
452
453
    if [ "$USE_SCCACHE" = "true" ]; then \
        eval $(/tmp/use-sccache.sh setup-env cmake); \
    fi && \
    mkdir -p ${CARGO_TARGET_DIR} && \
    source ${VIRTUAL_ENV}/bin/activate && \
    cd /opt/dynamo && \
    uv build --wheel --out-dir /opt/dynamo/dist && \
    cd /opt/dynamo/lib/bindings/python && \
    if [ "$ENABLE_MEDIA_FFMPEG" = "true" ]; then \
454
        maturin build --release --features "media-ffmpeg,kv-indexer" --out /opt/dynamo/dist; \
455
    else \
456
        maturin build --release --features "kv-indexer" --out /opt/dynamo/dist; \
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
    fi && \
    /tmp/use-sccache.sh show-stats "Dynamo Runtime"

{% else %}
# Dev/local-dev targets do not have pre-built wheels or /workspace source code.
# After you start the local-dev/dev container, you will need to build from source:
#   cargo build --features dynamo-llm/block-manager
#   cd /workspace/lib/bindings/python && maturin develop --uv && cd /workspace
#   uv pip install --no-deps -e /workspace
# See container/launch_message/dev.txt for the full setup steps.

# Create dist dir with a placeholder so downstream COPY --from=wheel_builder /opt/dynamo/dist/*.whl always has a match.
RUN mkdir -p /opt/dynamo/dist ${CARGO_TARGET_DIR} && \
    touch /opt/dynamo/dist/.placeholder.whl

# Dev/local-dev skip the full COPY lib/ above, so copy gpu_memory_service source explicitly for the wheel build below
COPY lib/gpu_memory_service/ /opt/dynamo/lib/gpu_memory_service/
{% endif %}

# Build gpu-memory-service wheel → /opt/dynamo/dist/gpu_memory_service*.whl (small C++ extension, fast build -- all targets, all frameworks)
{% if device == "cuda" %}
# Build gpu_memory_service wheel (C++ extension only needs Python headers, no CUDA/torch)
ARG ENABLE_GPU_MEMORY_SERVICE
RUN --mount=type=cache,target=/root/.cache/uv \
    if [ "$ENABLE_GPU_MEMORY_SERVICE" = "true" ]; then \
        export UV_CACHE_DIR=/root/.cache/uv && \
        source ${VIRTUAL_ENV}/bin/activate && \
        uv build --wheel --out-dir /opt/dynamo/dist /opt/dynamo/lib/gpu_memory_service; \
    fi
{% endif %}


##################################
##### wheel_builder ##############
##################################
# Builds nixl (native + Python wheel) and kvbm wheel, then consolidates all wheels.
# Runtime templates COPY from this stage.

FROM wheel_builder_base AS wheel_builder

# Build and install nixl
498
ARG TARGETARCH
499
500
501
ARG DEVICE
ARG NIXL_REF
ARG USE_SCCACHE
502
{% if device == "cuda" %}
503
ARG CUDA_MAJOR
504
505
{% endif %}

506
507
RUN --mount=type=secret,id=aws-key-id,env=AWS_ACCESS_KEY_ID \
    --mount=type=secret,id=aws-secret-id,env=AWS_SECRET_ACCESS_KEY \
508
    export SCCACHE_S3_KEY_PREFIX="${SCCACHE_S3_KEY_PREFIX:-${TARGETARCH}}" && \
509
    if [ "$USE_SCCACHE" = "true" ]; then \
510
        eval $(/tmp/use-sccache.sh setup-env); \
511
    fi && \
512
    source ${VIRTUAL_ENV}/bin/activate && \
513
    git clone "https://github.com/ai-dynamo/nixl.git" && \
514
    cd nixl && \
515
    git checkout ${NIXL_REF} && \
516
517
    if [ "$DEVICE" = "cuda" ]; then \
        PKG_NAME="nixl-cu${CUDA_MAJOR}"; \
518
519
    else \
        PKG_NAME="nixl-${DEVICE}"; \
520
    fi && \
521
    ./contrib/tomlutil.py --wheel-name $PKG_NAME pyproject.toml && \
522
    mkdir build && \
523
524
525
526
527
528
529
530
531
    if [ "$DEVICE" = "cuda" ]; then \
        meson setup build/ --prefix=/opt/nvidia/nvda_nixl --buildtype=release \
            -Dcudapath_lib="/usr/local/cuda/lib64" \
            -Dcudapath_inc="/usr/local/cuda/include" \
            -Ducx_path="/usr/local/ucx" \
            -Dlibfabric_path="/usr/local/libfabric"; \
    elif [ "$DEVICE" = "xpu" ]; then \
        meson setup build/ --prefix=/opt/intel/intel_nixl --buildtype=release \
            -Ducx_path="/usr/local/ucx"; \
532
533
534
    elif [ "$DEVICE" = "cpu" ]; then \
        meson setup build/ --prefix=/opt/nvidia/nvda_nixl --buildtype=release \
            -Ducx_path="/usr/local/ucx"; \
535
    fi && \
536
537
538
539
540
    cd build && \
    ninja && \
    ninja install && \
    /tmp/use-sccache.sh show-stats "NIXL"

541
{% if device == "xpu" %}
542
543
544
{# XPU only supports x86_64; no ARCH_ALT ARG needed #}
ENV NIXL_LIB_DIR=/opt/intel/intel_nixl/lib/x86_64-linux-gnu \
    NIXL_PLUGIN_DIR=/opt/intel/intel_nixl/lib/x86_64-linux-gnu/plugins \
545
    NIXL_PREFIX=/opt/intel/intel_nixl
546
547
548
549
{% elif device == "cpu" %}
ENV NIXL_LIB_DIR=/opt/nvidia/nvda_nixl/lib/x86_64-linux-gnu \
    NIXL_PLUGIN_DIR=/opt/nvidia/nvda_nixl/lib/x86_64-linux-gnu/plugins \
    NIXL_PREFIX=/opt/nvidia/nvda_nixl
550
{% else %}
551
ENV NIXL_LIB_DIR=/opt/nvidia/nvda_nixl/lib64 \
552
553
    NIXL_PLUGIN_DIR=/opt/nvidia/nvda_nixl/lib64/plugins \
    NIXL_PREFIX=/opt/nvidia/nvda_nixl
554
555
{% endif %}

556
557
558
559
560
561
ENV LD_LIBRARY_PATH=${NIXL_LIB_DIR}:${NIXL_PLUGIN_DIR}:/usr/local/ucx/lib:/usr/local/ucx/lib/ucx:${LD_LIBRARY_PATH}

RUN echo "$NIXL_LIB_DIR" > /etc/ld.so.conf.d/nixl.conf && \
    echo "$NIXL_PLUGIN_DIR" >> /etc/ld.so.conf.d/nixl.conf && \
    ldconfig

562
# Build NIXL wheel → /opt/dynamo/dist/nixl/nixl*.whl (C++ transport library, all targets)
563
ARG PYTHON_VERSION
564
565
RUN --mount=type=secret,id=aws-key-id,env=AWS_ACCESS_KEY_ID \
    --mount=type=secret,id=aws-secret-id,env=AWS_SECRET_ACCESS_KEY \
566
567
    --mount=type=cache,target=/root/.cache/uv \
    export UV_CACHE_DIR=/root/.cache/uv && \
568
    export SCCACHE_S3_KEY_PREFIX="${SCCACHE_S3_KEY_PREFIX:-${TARGETARCH}}" && \
569
    if [ "$USE_SCCACHE" = "true" ]; then \
570
        eval $(/tmp/use-sccache.sh setup-env); \
571
    fi && \
572
    cd /workspace/nixl && \
573
    uv build . --wheel --out-dir /opt/dynamo/dist/nixl --python $PYTHON_VERSION
574

575
{% if target not in ("dev", "local-dev") %}
576
# Copy source code (order matters for layer caching)
577
COPY .cargo/ /opt/dynamo/.cargo/
578
579
580
581
COPY pyproject.toml README.md LICENSE Cargo.toml Cargo.lock rust-toolchain.toml hatch_build.py /opt/dynamo/
COPY lib/ /opt/dynamo/lib/
COPY components/ /opt/dynamo/components/

582
# Build kvbm wheel (with nixl linkage via auditwheel repair)
583
584
585
ARG ENABLE_KVBM
RUN --mount=type=secret,id=aws-key-id,env=AWS_ACCESS_KEY_ID \
    --mount=type=secret,id=aws-secret-id,env=AWS_SECRET_ACCESS_KEY \
586
587
588
589
    --mount=type=cache,target=/root/.cargo/registry \
    --mount=type=cache,target=/root/.cargo/git \
    --mount=type=cache,target=/root/.cache/uv \
    export UV_CACHE_DIR=/root/.cache/uv && \
590
591
    export SCCACHE_S3_KEY_PREFIX=${SCCACHE_S3_KEY_PREFIX:-${TARGETARCH}} && \
    ARCH_ALT=$([ "${TARGETARCH}" = "amd64" ] && echo "x86_64" || echo "aarch64") && \
592
    if [ "$USE_SCCACHE" = "true" ]; then \
593
        eval $(/tmp/use-sccache.sh setup-env cmake); \
594
    fi && \
595
    mkdir -p ${CARGO_TARGET_DIR} && \
596
    source ${VIRTUAL_ENV}/bin/activate && \
597
    if [ "$ENABLE_KVBM" = "true" ]; then \
598
        cd /opt/dynamo/lib/bindings/kvbm && \
599
600
601
        KVBM_FEATURES=""; \
        if [ "$DEVICE" = "cuda" ]; then KVBM_FEATURES="--features nccl"; fi && \
        maturin build --release ${KVBM_FEATURES} --out target/wheels && \
602
603
604
605
606
607
608
609
610
        if [ "$DEVICE" = "cuda" ]; then \
            auditwheel repair \
                --exclude libnixl.so \
                --exclude libnixl_build.so \
                --exclude libnixl_common.so \
                --exclude 'lib*.so*' \
                --plat manylinux_2_28_${ARCH_ALT} \
                --wheel-dir /opt/dynamo/dist \
                target/wheels/*.whl; \
611
        elif [ "$DEVICE" = "xpu" ] || [ "$DEVICE" = "cpu" ]; then \
612
613
            cp target/wheels/*.whl /opt/dynamo/dist/; \
        fi; \
614
    fi && \
615
    /tmp/use-sccache.sh show-stats "Dynamo KVBM"
616
617
{% endif %}

618
619
# Consolidate all wheels from the runtime wheel builder stage
COPY --from=runtime_wheel_builder /opt/dynamo/dist/ /opt/dynamo/dist/