"examples/multimodal_v1/connect/__init__.py" did not exist on "1f07dab7bd8b0563893f820d725ca980dfb9e49f"
wheel_builder.Dockerfile 22.2 KB
Newer Older
1
2
{#
# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3
# SPDX-License-Identifier: Apache-2.0
4
#}
5
# === BEGIN templates/wheel_builder.Dockerfile ===
6
7
8
9
##################################
##### Wheel Build Image ##########
##################################

10
11
12
13
14
15
16
17
18
{% if platform == "multi" and device == "cuda" %}
# Multi-arch: declare both manylinux base images with explicit --platform so each is
# always pulled as the correct native arch regardless of the current TARGETPLATFORM.
# BuildKit only fetches and builds the stage that TARGETARCH resolves to; the other
# is a no-op for each sub-build.
FROM --platform=linux/amd64 quay.io/pypa/manylinux_2_28_x86_64 AS manylinux_amd64
FROM --platform=linux/arm64 quay.io/pypa/manylinux_2_28_aarch64 AS manylinux_arm64
{% endif %}

19
20
21
22
23
##################################
##### wheel_builder_base #########
##################################
# Shared base for all wheel builds: tools, system deps, and native libraries (except nixl).

24
25
26
{% if platform == "multi" and device == "cuda" %}
FROM manylinux_${TARGETARCH} AS wheel_builder_base
{% else %}
27
FROM ${WHEEL_BUILDER_IMAGE} AS wheel_builder_base
28
{% endif %}
29
30

# Redeclare ARGs for this stage
31
ARG TARGETARCH
32
ARG CARGO_BUILD_JOBS
33
ARG DEVICE
34
35
36

WORKDIR /workspace

37
{% if device == "cuda" %}
38
39
40
# Copy CUDA from base stage
COPY --from=dynamo_base /usr/local/cuda /usr/local/cuda
COPY --from=dynamo_base /etc/ld.so.conf.d/hpcx.conf /etc/ld.so.conf.d/hpcx.conf
41
{% endif %}
42
43
44
45
46
47
48
49

# Set environment variables first so they can be used in COPY commands
ENV CARGO_BUILD_JOBS=${CARGO_BUILD_JOBS:-16} \
    RUSTUP_HOME=/usr/local/rustup \
    CARGO_HOME=/usr/local/cargo \
    CARGO_TARGET_DIR=/opt/dynamo/target \
    PATH=/usr/local/cargo/bin:$PATH

50
51


52
53
54
# Copy artifacts from base stage
COPY --from=dynamo_base $RUSTUP_HOME $RUSTUP_HOME
COPY --from=dynamo_base $CARGO_HOME $CARGO_HOME
55

56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
{% if device == "xpu" %}
SHELL ["/bin/bash", "-o", "pipefail", "-c"]
RUN wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB | gpg --dearmor | tee /usr/share/keyrings/oneapi-archive-keyring.gpg > /dev/null && \
    echo "deb [signed-by=/usr/share/keyrings/oneapi-archive-keyring.gpg] https://apt.repos.intel.com/oneapi all main" | tee /etc/apt/sources.list.d/oneAPI.list && \
    add-apt-repository -y ppa:kobuk-team/intel-graphics

RUN wget --tries=3 --waitretry=5 https://raw.githubusercontent.com/intel/llm-scaler/35a14cbc08d714f460a29b7a7328df5620c8530f/vllm/patches/ai-dynamo-xpu/patches/ucx-v1.12.0.patch -O /tmp/ucx.patch

RUN apt clean && apt-get update -y && \
    apt-get install -y --no-install-recommends --fix-missing \
    curl \
    #ffmpeg \
    ca-certificates \
    zip \
    unzip \
    git \
    libsndfile1 \
    libsm6 \
    libxext6 \
    libgl1 \
    lsb-release \
    libaio-dev \
    numactl \
    wget \
    vim \
    linux-libc-dev && \
    # Install Intel GPU runtime packages
    apt update -y && apt upgrade -y && \
    apt-get install -y libze1 libze-dev libze-intel-gpu1 intel-opencl-icd  \
    libze-intel-gpu-raytracing intel-ocloc intel-oneapi-compiler-dpcpp-cpp-2025.3 && \
    apt-get clean && rm -rf /var/lib/apt/lists/*

RUN apt-get update -y \
    && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
        # NIXL build dependencies
        autoconf \
        automake \
        cmake \
        git-lfs \
        libtool \
        meson \
        net-tools \
        ninja-build \
        pybind11-dev \
        # Rust build dependencies
        clang \
        libclang-dev \
        protobuf-compiler \
    && apt-get clean \
    && rm -rf /var/lib/apt/lists/*

RUN apt-get update -y \
    && DEBIAN_FRONTEND=noninteractive apt-get -y install --reinstall --no-install-recommends \
        libibverbs-dev \
        rdma-core \
        ibverbs-utils \
        libibumad-dev \
        libnuma-dev \
        librdmacm-dev \
        ibverbs-providers \
    && apt-get clean \
    && rm -rf /var/lib/apt/lists/*
{% endif %}

{% if device == "cuda" %}
121
# Install system dependencies
122
123
124
# Cache dnf downloads; sharing=locked avoids dnf/rpm races with concurrent builds.
RUN --mount=type=cache,target=/var/cache/dnf,sharing=locked \
    dnf install -y almalinux-release-synergy && \
125
126
    dnf config-manager --set-enabled powertools && \
    dnf install -y \
127
128
129
130
131
        # Autotools (required for UCX, libfabric ./autogen.sh and ./configure)
        autoconf \
        automake \
        libtool \
        make \
132
        # RPM build tools (required for gdrcopy's build-rpm-packages.sh)
133
134
        rpm-build \
        rpm-sign \
135
136
137
138
        # Build tools
        cmake \
        ninja-build \
        clang-devel \
139
140
141
142
        # Install GCC toolset 14 (CUDA compatible, max version 14)
        gcc-toolset-14-gcc \
        gcc-toolset-14-gcc-c++ \
        gcc-toolset-14-binutils \
143
144
145
146
147
148
149
150
151
152
153
154
155
156
        flex \
        wget \
        # Kernel module build dependencies
        dkms \
        # Protobuf support
        protobuf-compiler \
        # RDMA/InfiniBand support (required for UCX build with --with-verbs)
        libibverbs \
        libibverbs-devel \
        rdma-core \
        rdma-core-devel \
        libibumad \
        libibumad-devel \
        librdmacm-devel \
Joe Chandler's avatar
Joe Chandler committed
157
158
159
        numactl-devel \
        # Libfabric support
        hwloc \
160
161
162
163
        hwloc-devel \
        libcurl-devel \
        openssl-devel \
        libuuid-devel \
164
        zlib-devel
165

166
167
168
169
170
# Set GCC toolset 14 as the default compiler (CUDA requires GCC <= 14)
ENV PATH="/opt/rh/gcc-toolset-14/root/usr/bin:${PATH}" \
    LD_LIBRARY_PATH="/opt/rh/gcc-toolset-14/root/usr/lib64:${LD_LIBRARY_PATH}" \
    CC="/opt/rh/gcc-toolset-14/root/usr/bin/gcc" \
    CXX="/opt/rh/gcc-toolset-14/root/usr/bin/g++"
171
{% endif %}
172

173
174
# Ensure a modern protoc is available (required for --experimental_allow_proto3_optional)
RUN set -eux; \
175
    ARCH_ALT=$([ "${TARGETARCH}" = "amd64" ] && echo "x86_64" || echo "aarch64"); \
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
    PROTOC_VERSION=25.3; \
    case "${ARCH_ALT}" in \
      x86_64) PROTOC_ZIP="protoc-${PROTOC_VERSION}-linux-x86_64.zip" ;; \
      aarch64) PROTOC_ZIP="protoc-${PROTOC_VERSION}-linux-aarch_64.zip" ;; \
      *) echo "Unsupported architecture: ${ARCH_ALT}" >&2; exit 1 ;; \
    esac; \
    wget --tries=3 --waitretry=5 -O /tmp/protoc.zip "https://github.com/protocolbuffers/protobuf/releases/download/v${PROTOC_VERSION}/${PROTOC_ZIP}"; \
    rm -f /usr/local/bin/protoc /usr/bin/protoc; \
    unzip -o /tmp/protoc.zip -d /usr/local bin/protoc include/*; \
    chmod +x /usr/local/bin/protoc; \
    ln -s /usr/local/bin/protoc /usr/bin/protoc; \
    protoc --version

# Point build tools explicitly at the modern protoc
ENV PROTOC=/usr/local/bin/protoc

192
193
194
195
196
{% if device == "xpu" %}
# Install uv package manager
COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/
ENV LD_LIBRARY_PATH=/usr/local/lib:/usr/local/lib64:$LD_LIBRARY_PATH
{% else %}
197
198
199
200
ENV CUDA_PATH=/usr/local/cuda \
    PATH=/usr/local/cuda/bin:$PATH \
    LD_LIBRARY_PATH=/usr/local/cuda/lib64:/usr/local/lib:/usr/local/lib64:$LD_LIBRARY_PATH \
    NVIDIA_DRIVER_CAPABILITIES=video,compute,utility
201
{% endif %}
202
203
204
205

# Create virtual environment for building wheels
ARG PYTHON_VERSION
ENV VIRTUAL_ENV=/workspace/.venv
206
207
208
209
# Cache uv downloads; uv handles its own locking for this cache.
RUN --mount=type=cache,target=/root/.cache/uv \
    export UV_CACHE_DIR=/root/.cache/uv UV_HTTP_TIMEOUT=300 UV_HTTP_RETRIES=5 && \
    uv venv ${VIRTUAL_ENV} --python $PYTHON_VERSION && \
210
    uv pip install --upgrade meson pybind11 patchelf maturin[patchelf] tomlkit
211
212

ARG NIXL_UCX_REF
213
214

{% if device == "cuda" %}
215
216
217
ARG NIXL_GDRCOPY_REF

# Build and install gdrcopy
218
219
RUN ARCH_ALT=$([ "${TARGETARCH}" = "amd64" ] && echo "x86_64" || echo "aarch64") && \
    git clone --depth 1 --branch ${NIXL_GDRCOPY_REF} https://github.com/NVIDIA/gdrcopy.git && \
220
221
222
223
224
    cd gdrcopy/packages && \
    CUDA=/usr/local/cuda ./build-rpm-packages.sh && \
    rpm -Uvh gdrcopy-kmod-*.el8.noarch.rpm && \
    rpm -Uvh gdrcopy-*.el8.${ARCH_ALT}.rpm && \
    rpm -Uvh gdrcopy-devel-*.el8.noarch.rpm
225
{% endif %}
226

227
228
229
230
231
# sccache binary is pre-installed in dynamo_base; stage it off-PATH so
# Meson doesn't auto-detect it as a CUDA compiler launcher
# (https://github.com/mesonbuild/meson/issues/11118).
# When USE_SCCACHE=true the RUN below symlinks it onto PATH before install.
COPY --from=dynamo_base /usr/local/bin/sccache /opt/sccache/sccache
232

233
234
235
236
237
ARG USE_SCCACHE
ARG SCCACHE_BUCKET
ARG SCCACHE_REGION
COPY container/use-sccache.sh /tmp/use-sccache.sh
RUN if [ "$USE_SCCACHE" = "true" ]; then \
238
        ln -s /opt/sccache/sccache /usr/local/bin/sccache && \
239
240
241
        /tmp/use-sccache.sh install; \
    fi

242
243
# Set SCCACHE environment variables (RUSTC_WRAPPER is set dynamically by
# setup-env only when the sccache server starts successfully)
244
ENV SCCACHE_BUCKET=${USE_SCCACHE:+${SCCACHE_BUCKET}} \
245
    SCCACHE_REGION=${USE_SCCACHE:+${SCCACHE_REGION}}
246

247
# Always build FFmpeg so libs are available for Rust checks in CI
milesial's avatar
milesial committed
248
# Do not delete the source tarball for legal reasons
249
ARG FFMPEG_VERSION
250
251
RUN --mount=type=secret,id=aws-key-id,env=AWS_ACCESS_KEY_ID \
    --mount=type=secret,id=aws-secret-id,env=AWS_SECRET_ACCESS_KEY \
252
    export SCCACHE_S3_KEY_PREFIX=${SCCACHE_S3_KEY_PREFIX:-${TARGETARCH}} && \
253
    if [ "$USE_SCCACHE" = "true" ]; then \
254
        eval $(/tmp/use-sccache.sh setup-env); \
255
    fi && \
256
257
258
259
260
261
    if [ "$DEVICE" = "xpu" ]; then \
    apt-get update -y && apt-get install -y pkg-config; \
    apt-get clean && rm -rf /var/lib/apt/lists/*; \
    elif [ "$DEVICE" = "cuda" ]; then \
    dnf install -y pkg-config; \
    fi && \
262
    cd /tmp && \
263
    curl --retry 5 --retry-delay 3 -LO https://ffmpeg.org/releases/ffmpeg-${FFMPEG_VERSION}.tar.xz && \
264
265
266
267
    tar xf ffmpeg-${FFMPEG_VERSION}.tar.xz && \
    cd ffmpeg-${FFMPEG_VERSION} && \
    ./configure \
        --prefix=/usr/local \
milesial's avatar
milesial committed
268
269
        --disable-gpl \
        --disable-nonfree \
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
        --disable-programs \
        --disable-doc \
        --disable-static \
        --disable-x86asm \
        --disable-postproc \
        --disable-network \
        --disable-encoders \
        --disable-muxers \
        --disable-bsfs \
        --disable-devices \
        --disable-libdrm \
        --enable-shared && \
    make -j$(nproc) && \
    make install && \
    /tmp/use-sccache.sh show-stats "FFMPEG" && \
    ldconfig && \
milesial's avatar
milesial committed
286
    mkdir -p /usr/local/src/ffmpeg && \
287
    mv /tmp/ffmpeg-${FFMPEG_VERSION}* /usr/local/src/ffmpeg/
288

289
290
291
# Build and install UCX
RUN --mount=type=secret,id=aws-key-id,env=AWS_ACCESS_KEY_ID \
    --mount=type=secret,id=aws-secret-id,env=AWS_SECRET_ACCESS_KEY \
292
    export SCCACHE_S3_KEY_PREFIX="${SCCACHE_S3_KEY_PREFIX:-${TARGETARCH}}" && \
293
    if [ "$USE_SCCACHE" = "true" ]; then \
294
        eval $(/tmp/use-sccache.sh setup-env); \
295
    fi && \
296
    cd /usr/local/src && \
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
    git clone https://github.com/openucx/ucx.git && \
    cd ucx &&  \
    git checkout $NIXL_UCX_REF &&	 \
    if [ "$DEVICE" = "xpu" ]; then \
    git apply --ignore-whitespace /tmp/ucx.patch; \
    fi && \
    ./autogen.sh &&      \
    if [ "$DEVICE" = "xpu" ]; then \
     ./contrib/configure-release     \
        --prefix=/usr/local/ucx     \
        --with-ze                   \
        --enable-shared             \
        --disable-static            \
        --disable-doxygen-doc       \
        --enable-optimizations      \
        --enable-cma                \
        --enable-devel-headers      \
        --with-verbs                \
        --with-dm                   \
        --with-efa                  \
        --without-cuda              \
        --enable-mt;                 \
    elif [ "$DEVICE" = "cuda" ]; then \
     ./contrib/configure-release     \
321
322
323
324
325
326
327
328
329
330
331
332
        --prefix=/usr/local/ucx     \
        --enable-shared             \
        --disable-static            \
        --disable-doxygen-doc       \
        --enable-optimizations      \
        --enable-cma                \
        --enable-devel-headers      \
        --with-cuda=/usr/local/cuda \
        --with-verbs                \
        --with-dm                   \
        --with-gdrcopy=/usr/local   \
        --with-efa                  \
333
334
        --enable-mt;                 \
     fi && \
335
336
337
338
339
340
341
     make -j &&                      \
     make -j install-strip &&        \
     /tmp/use-sccache.sh show-stats "UCX" && \
     echo "/usr/local/ucx/lib" > /etc/ld.so.conf.d/ucx.conf && \
     echo "/usr/local/ucx/lib/ucx" >> /etc/ld.so.conf.d/ucx.conf && \
     ldconfig

342
{% if device == "cuda" %}
Joe Chandler's avatar
Joe Chandler committed
343
344
345
ARG NIXL_LIBFABRIC_REF
RUN --mount=type=secret,id=aws-key-id,env=AWS_ACCESS_KEY_ID \
    --mount=type=secret,id=aws-secret-id,env=AWS_SECRET_ACCESS_KEY \
346
    export SCCACHE_S3_KEY_PREFIX="${SCCACHE_S3_KEY_PREFIX:-${TARGETARCH}}" && \
Joe Chandler's avatar
Joe Chandler committed
347
    if [ "$USE_SCCACHE" = "true" ]; then \
348
        eval $(/tmp/use-sccache.sh setup-env); \
Joe Chandler's avatar
Joe Chandler committed
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
    fi && \
    cd /usr/local/src && \
    git clone https://github.com/ofiwg/libfabric.git && \
    cd libfabric && \
    git checkout $NIXL_LIBFABRIC_REF && \
    ./autogen.sh && \
    ./configure --prefix="/usr/local/libfabric" \
                --disable-verbs \
                --disable-psm3 \
                --disable-opx \
                --disable-usnic \
                --disable-rstream \
                --enable-efa \
                --with-cuda=/usr/local/cuda \
                --enable-cuda-dlopen \
                --with-gdrcopy \
                --enable-gdrcopy-dlopen && \
    make -j$(nproc) && \
    make install && \
    /tmp/use-sccache.sh show-stats "LIBFABRIC" && \
    echo "/usr/local/libfabric/lib" > /etc/ld.so.conf.d/libfabric.conf && \
    ldconfig
371
{% endif %}
Joe Chandler's avatar
Joe Chandler committed
372

373
{% if framework == "vllm" and device == "cuda" %}
374
# Build and install AWS SDK C++ (required for NIXL OBJ backend / S3 support)
375
ARG AWS_SDK_CPP_VERSION=1.11.760
376
377
RUN --mount=type=secret,id=aws-key-id,env=AWS_ACCESS_KEY_ID \
    --mount=type=secret,id=aws-secret-id,env=AWS_SECRET_ACCESS_KEY \
378
    export SCCACHE_S3_KEY_PREFIX="${SCCACHE_S3_KEY_PREFIX:-${TARGETARCH}}" && \
379
380
381
    if [ "$USE_SCCACHE" = "true" ]; then \
        eval $(/tmp/use-sccache.sh setup-env cmake); \
    fi && \
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
    git clone --recurse-submodules --depth 1 --branch ${AWS_SDK_CPP_VERSION} \
        https://github.com/aws/aws-sdk-cpp.git /tmp/aws-sdk-cpp && \
    mkdir -p /tmp/aws-sdk-cpp/build && \
    cd /tmp/aws-sdk-cpp/build && \
    cmake .. \
        -DCMAKE_BUILD_TYPE=Release \
        -DBUILD_ONLY="s3" \
        -DENABLE_TESTING=OFF \
        -DCMAKE_INSTALL_PREFIX=/usr/local \
        -DBUILD_SHARED_LIBS=ON && \
    make -j$(nproc) && \
    make install && \
    cd / && \
    rm -rf /tmp/aws-sdk-cpp && \
    ldconfig && \
    /tmp/use-sccache.sh show-stats "AWS SDK C++"
{% endif %}

400
401
402
403
404
405
406
407
408
409

##################################
##### runtime_wheel_builder ######
##################################
# Builds ai-dynamo, ai-dynamo-runtime, and gpu_memory_service wheels, sans nixl.

FROM wheel_builder_base AS runtime_wheel_builder

{% if target not in ("dev", "local-dev") %}
# Copy source code (order matters for layer caching)
410
COPY .cargo/ /opt/dynamo/.cargo/
411
412
413
414
415
416
417
418
419
420
421
422
423
COPY pyproject.toml README.md LICENSE Cargo.toml Cargo.lock rust-toolchain.toml hatch_build.py /opt/dynamo/
COPY lib/ /opt/dynamo/lib/
COPY components/ /opt/dynamo/components/

# Build ai-dynamo (pure Python) and ai-dynamo-runtime (maturin) wheels
ARG USE_SCCACHE
ARG ENABLE_MEDIA_FFMPEG
RUN --mount=type=secret,id=aws-key-id,env=AWS_ACCESS_KEY_ID \
    --mount=type=secret,id=aws-secret-id,env=AWS_SECRET_ACCESS_KEY \
    --mount=type=cache,target=/root/.cargo/registry \
    --mount=type=cache,target=/root/.cargo/git \
    --mount=type=cache,target=/root/.cache/uv \
    export UV_CACHE_DIR=/root/.cache/uv && \
424
    export SCCACHE_S3_KEY_PREFIX=${SCCACHE_S3_KEY_PREFIX:-${TARGETARCH}} && \
425
426
427
428
429
430
431
432
433
    if [ "$USE_SCCACHE" = "true" ]; then \
        eval $(/tmp/use-sccache.sh setup-env cmake); \
    fi && \
    mkdir -p ${CARGO_TARGET_DIR} && \
    source ${VIRTUAL_ENV}/bin/activate && \
    cd /opt/dynamo && \
    uv build --wheel --out-dir /opt/dynamo/dist && \
    cd /opt/dynamo/lib/bindings/python && \
    if [ "$ENABLE_MEDIA_FFMPEG" = "true" ]; then \
434
        maturin build --release --features "media-ffmpeg,kv-indexer" --out /opt/dynamo/dist; \
435
    else \
436
        maturin build --release --features "kv-indexer" --out /opt/dynamo/dist; \
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
    fi && \
    /tmp/use-sccache.sh show-stats "Dynamo Runtime"

{% else %}
# Dev/local-dev targets do not have pre-built wheels or /workspace source code.
# After you start the local-dev/dev container, you will need to build from source:
#   cargo build --features dynamo-llm/block-manager
#   cd /workspace/lib/bindings/python && maturin develop --uv && cd /workspace
#   uv pip install --no-deps -e /workspace
# See container/launch_message/dev.txt for the full setup steps.

# Create dist dir with a placeholder so downstream COPY --from=wheel_builder /opt/dynamo/dist/*.whl always has a match.
RUN mkdir -p /opt/dynamo/dist ${CARGO_TARGET_DIR} && \
    touch /opt/dynamo/dist/.placeholder.whl

# Dev/local-dev skip the full COPY lib/ above, so copy gpu_memory_service source explicitly for the wheel build below
COPY lib/gpu_memory_service/ /opt/dynamo/lib/gpu_memory_service/
{% endif %}

# Build gpu-memory-service wheel → /opt/dynamo/dist/gpu_memory_service*.whl (small C++ extension, fast build -- all targets, all frameworks)
{% if device == "cuda" %}
# Build gpu_memory_service wheel (C++ extension only needs Python headers, no CUDA/torch)
ARG ENABLE_GPU_MEMORY_SERVICE
RUN --mount=type=cache,target=/root/.cache/uv \
    if [ "$ENABLE_GPU_MEMORY_SERVICE" = "true" ]; then \
        export UV_CACHE_DIR=/root/.cache/uv && \
        source ${VIRTUAL_ENV}/bin/activate && \
        uv build --wheel --out-dir /opt/dynamo/dist /opt/dynamo/lib/gpu_memory_service; \
    fi
{% endif %}


##################################
##### wheel_builder ##############
##################################
# Builds nixl (native + Python wheel) and kvbm wheel, then consolidates all wheels.
# Runtime templates COPY from this stage.

FROM wheel_builder_base AS wheel_builder

# Build and install nixl
478
ARG TARGETARCH
479
480
481
ARG DEVICE
ARG NIXL_REF
ARG USE_SCCACHE
482
{% if device == "cuda" %}
483
ARG CUDA_MAJOR
484
485
{% endif %}

486
487
RUN --mount=type=secret,id=aws-key-id,env=AWS_ACCESS_KEY_ID \
    --mount=type=secret,id=aws-secret-id,env=AWS_SECRET_ACCESS_KEY \
488
    export SCCACHE_S3_KEY_PREFIX="${SCCACHE_S3_KEY_PREFIX:-${TARGETARCH}}" && \
489
    if [ "$USE_SCCACHE" = "true" ]; then \
490
        eval $(/tmp/use-sccache.sh setup-env); \
491
    fi && \
492
    source ${VIRTUAL_ENV}/bin/activate && \
493
    git clone "https://github.com/ai-dynamo/nixl.git" && \
494
    cd nixl && \
495
    git checkout ${NIXL_REF} && \
496
497
498
499
500
    if [ "$DEVICE" = "cuda" ]; then \
        PKG_NAME="nixl-cu${CUDA_MAJOR}"; \
    elif [ "$DEVICE" = "xpu" ]; then \
        PKG_NAME="nixl-xpu"; \
    fi && \
501
    ./contrib/tomlutil.py --wheel-name $PKG_NAME pyproject.toml && \
502
    mkdir build && \
503
504
505
506
507
508
509
510
511
512
    if [ "$DEVICE" = "cuda" ]; then \
        meson setup build/ --prefix=/opt/nvidia/nvda_nixl --buildtype=release \
            -Dcudapath_lib="/usr/local/cuda/lib64" \
            -Dcudapath_inc="/usr/local/cuda/include" \
            -Ducx_path="/usr/local/ucx" \
            -Dlibfabric_path="/usr/local/libfabric"; \
    elif [ "$DEVICE" = "xpu" ]; then \
        meson setup build/ --prefix=/opt/intel/intel_nixl --buildtype=release \
            -Ducx_path="/usr/local/ucx"; \
    fi && \
513
514
515
516
517
    cd build && \
    ninja && \
    ninja install && \
    /tmp/use-sccache.sh show-stats "NIXL"

518
{% if device == "xpu" %}
519
520
521
{# XPU only supports x86_64; no ARCH_ALT ARG needed #}
ENV NIXL_LIB_DIR=/opt/intel/intel_nixl/lib/x86_64-linux-gnu \
    NIXL_PLUGIN_DIR=/opt/intel/intel_nixl/lib/x86_64-linux-gnu/plugins \
522
523
    NIXL_PREFIX=/opt/intel/intel_nixl
{% else %}
524
ENV NIXL_LIB_DIR=/opt/nvidia/nvda_nixl/lib64 \
525
526
    NIXL_PLUGIN_DIR=/opt/nvidia/nvda_nixl/lib64/plugins \
    NIXL_PREFIX=/opt/nvidia/nvda_nixl
527
528
{% endif %}

529
530
531
532
533
534
ENV LD_LIBRARY_PATH=${NIXL_LIB_DIR}:${NIXL_PLUGIN_DIR}:/usr/local/ucx/lib:/usr/local/ucx/lib/ucx:${LD_LIBRARY_PATH}

RUN echo "$NIXL_LIB_DIR" > /etc/ld.so.conf.d/nixl.conf && \
    echo "$NIXL_PLUGIN_DIR" >> /etc/ld.so.conf.d/nixl.conf && \
    ldconfig

535
# Build NIXL wheel → /opt/dynamo/dist/nixl/nixl*.whl (C++ transport library, all targets)
536
ARG PYTHON_VERSION
537
538
RUN --mount=type=secret,id=aws-key-id,env=AWS_ACCESS_KEY_ID \
    --mount=type=secret,id=aws-secret-id,env=AWS_SECRET_ACCESS_KEY \
539
540
    --mount=type=cache,target=/root/.cache/uv \
    export UV_CACHE_DIR=/root/.cache/uv && \
541
    export SCCACHE_S3_KEY_PREFIX="${SCCACHE_S3_KEY_PREFIX:-${TARGETARCH}}" && \
542
    if [ "$USE_SCCACHE" = "true" ]; then \
543
        eval $(/tmp/use-sccache.sh setup-env); \
544
    fi && \
545
    cd /workspace/nixl && \
546
    uv build . --wheel --out-dir /opt/dynamo/dist/nixl --python $PYTHON_VERSION
547

548
{% if target not in ("dev", "local-dev") %}
549
# Copy source code (order matters for layer caching)
550
COPY .cargo/ /opt/dynamo/.cargo/
551
552
553
554
COPY pyproject.toml README.md LICENSE Cargo.toml Cargo.lock rust-toolchain.toml hatch_build.py /opt/dynamo/
COPY lib/ /opt/dynamo/lib/
COPY components/ /opt/dynamo/components/

555
# Build kvbm wheel (with nixl linkage via auditwheel repair)
556
557
558
ARG ENABLE_KVBM
RUN --mount=type=secret,id=aws-key-id,env=AWS_ACCESS_KEY_ID \
    --mount=type=secret,id=aws-secret-id,env=AWS_SECRET_ACCESS_KEY \
559
560
561
562
    --mount=type=cache,target=/root/.cargo/registry \
    --mount=type=cache,target=/root/.cargo/git \
    --mount=type=cache,target=/root/.cache/uv \
    export UV_CACHE_DIR=/root/.cache/uv && \
563
564
    export SCCACHE_S3_KEY_PREFIX=${SCCACHE_S3_KEY_PREFIX:-${TARGETARCH}} && \
    ARCH_ALT=$([ "${TARGETARCH}" = "amd64" ] && echo "x86_64" || echo "aarch64") && \
565
    if [ "$USE_SCCACHE" = "true" ]; then \
566
        eval $(/tmp/use-sccache.sh setup-env cmake); \
567
    fi && \
568
    mkdir -p ${CARGO_TARGET_DIR} && \
569
    source ${VIRTUAL_ENV}/bin/activate && \
570
    if [ "$ENABLE_KVBM" = "true" ]; then \
571
572
        cd /opt/dynamo/lib/bindings/kvbm && \
        maturin build --release --out target/wheels && \
573
574
575
576
577
578
579
580
581
582
583
584
        if [ "$DEVICE" = "cuda" ]; then \
            auditwheel repair \
                --exclude libnixl.so \
                --exclude libnixl_build.so \
                --exclude libnixl_common.so \
                --exclude 'lib*.so*' \
                --plat manylinux_2_28_${ARCH_ALT} \
                --wheel-dir /opt/dynamo/dist \
                target/wheels/*.whl; \
        elif [ "$DEVICE" = "xpu" ]; then \
            cp target/wheels/*.whl /opt/dynamo/dist/; \
        fi; \
585
    fi && \
586
    /tmp/use-sccache.sh show-stats "Dynamo KVBM"
587
588
{% endif %}

589
590
# Consolidate all wheels from the runtime wheel builder stage
COPY --from=runtime_wheel_builder /opt/dynamo/dist/ /opt/dynamo/dist/