Dockerfile.trtllm 20.6 KB
Newer Older
1
2
3
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0

4
ARG BASE_IMAGE="nvcr.io/nvidia/pytorch"
5
ARG BASE_IMAGE_TAG="25.06-py3"
6
ARG RELEASE_BUILD
7
ARG RUNTIME_IMAGE="nvcr.io/nvidia/cuda"
8
ARG RUNTIME_IMAGE_TAG="12.9.1-runtime-ubuntu24.04"
9

10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
# Define general architecture ARGs for supporting both x86 and aarch64 builds.
#   ARCH: Used for package suffixes (e.g., amd64, arm64)
#   ARCH_ALT: Used for Rust targets, manylinux suffix (e.g., x86_64, aarch64)
#
# Default values are for x86/amd64:
#   --build-arg ARCH=amd64 --build-arg ARCH_ALT=x86_64
#
# For arm64/aarch64, build with:
#   --build-arg ARCH=arm64 --build-arg ARCH_ALT=aarch64
#
# NOTE: There isn't an easy way to define one of these values based on the other value
# without adding if statements everywhere, so just define both as ARGs for now.
ARG ARCH=amd64
ARG ARCH_ALT=x86_64

25
26
27
28
29
##################################
########## Build Image ###########
##################################

FROM ${BASE_IMAGE}:${BASE_IMAGE_TAG} AS build
30

31
32
33
# Redeclare ARCH and ARCH_ALT so they're available in this build stage
ARG ARCH
ARG ARCH_ALT
34

35
ARG NIXL_UCX_REF=v1.19.0
36
ARG NIXL_REF=0.4.1
37

38
39
40
41
42
43
ENV NIXL_SRC_DIR=/opt/nixl
ENV NIXL_PREFIX=/opt/nvidia/nvda_nixl
ENV NIXL_LIB_DIR=$NIXL_PREFIX/lib/${ARCH_ALT}-linux-gnu
ENV NIXL_PLUGIN_DIR=$NIXL_LIB_DIR/plugins
ENV LD_LIBRARY_PATH=$NIXL_LIB_DIR:$NIXL_PLUGIN_DIR:$LD_LIBRARY_PATH

44
45
46
USER root

# Install utilities
47
48
49
50
51
52
53
54
55
RUN apt update -y && \
    apt install -y \
    git \
    wget \
    curl \
    nvtop \
    tmux \
    vim \
    ## NIXL dependencies
56
    cmake \
57
    meson \
58
59
    ninja-build \
    pybind11-dev \
60
61
    ## support UCX to establish connections with zmq
    libzmq3-dev \
62
63
64
65
    # These headers are missing with the hpcx installer, required
    # by UCX to find RDMA devices
    libibverbs-dev rdma-core ibverbs-utils libibumad-dev \
    libnuma-dev librdmacm-dev ibverbs-providers
66
67

### UCX EFA Setup ###
68
69
70
71
RUN rm -rf /opt/hpcx/ucx && \
    rm -rf /usr/local/ucx && \
    echo "Building UCX with reference $NIXL_UCX_REF" && \
    cd /usr/local/src &&                            \
72
    git clone https://github.com/openucx/ucx.git && \
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
    cd ucx &&                                       \
    git checkout $NIXL_UCX_REF &&                   \
    ./autogen.sh && ./configure                     \
    --prefix=/usr/local/ucx                         \
    --enable-shared                                 \
    --disable-static                                \
    --disable-doxygen-doc                           \
    --enable-optimizations                          \
    --enable-cma                                    \
    --enable-devel-headers                          \
    --with-cuda=/usr/local/cuda                     \
    --with-verbs                                    \
    --with-efa                                      \
    --with-dm                                       \
    --with-gdrcopy=/usr/local                       \
    --enable-mt &&                                  \
    make -j &&                                      \
    make -j install-strip &&                        \
91
92
    ldconfig

93
94
95
96
ENV LD_LIBRARY_PATH=\
/usr/lib:/usr/local/ucx/lib:\
/usr/local/ucx/lib/ucx:\
$LD_LIBRARY_PATH
97
98
99
100
101
ENV CPATH=/usr/include:$CPATH
ENV PATH=/usr/bin:$PATH
ENV PKG_CONFIG_PATH=/usr/lib/pkgconfig:$PKG_CONFIG_PATH
SHELL ["/bin/bash", "-c"]

102
103
104
105
106
107
108
### NIXL SETUP ###
# Clone nixl source
# TEMP: disable gds backend for arm64
RUN git clone "https://github.com/ai-dynamo/nixl.git" ${NIXL_SRC_DIR} && \
    cd ${NIXL_SRC_DIR} && \
    git checkout ${NIXL_REF} && \
    if [ "$ARCH" = "arm64" ]; then \
109
        nixl_build_args="-Ddisable_gds_backend=true"; \
110
    else \
111
112
113
114
115
116
117
        nixl_build_args=""; \
    fi && \
    mkdir build && \
    meson setup build/ --buildtype=release --prefix=$NIXL_PREFIX $nixl_build_args && \
    cd build/ && \
    ninja && \
    ninja install;
118

119
# nats
120
121
RUN wget --tries=3 --waitretry=5 https://github.com/nats-io/nats-server/releases/download/v2.10.28/nats-server-v2.10.28-${ARCH}.deb && \
    dpkg -i nats-server-v2.10.28-${ARCH}.deb && rm nats-server-v2.10.28-${ARCH}.deb
122

123
# etcd
124
ENV ETCD_VERSION="v3.5.21"
125
RUN wget https://github.com/etcd-io/etcd/releases/download/$ETCD_VERSION/etcd-$ETCD_VERSION-linux-${ARCH}.tar.gz -O /tmp/etcd.tar.gz && \
126
127
128
    mkdir -p /usr/local/bin/etcd && \
    tar -xvf /tmp/etcd.tar.gz -C /usr/local/bin/etcd --strip-components=1 && \
    rm /tmp/etcd.tar.gz
129
130
ENV PATH=/usr/local/bin/etcd/:$PATH

131
132
133
134
135
136
137
138
139
ARG HAS_TRTLLM_CONTEXT=0
ARG TENSORRTLLM_PIP_WHEEL="tensorrt-llm"
ARG TENSORRTLLM_INDEX_URL="https://pypi.python.org/simple"

COPY --from=trtllm_wheel . /trtllm_wheel/

# TODO: Currently, ABI compatibility issues with TRTLLM wheel and NGC PyTorch prevent us
# from using the TRTLLM wheel in a uv venv. Once the issues are resolved, we can
# use uv to install TensorRT-LLM wheel within the uv venv.
140
141
142
# Note: TensorRT needs to be uninstalled before installing the TRTLLM wheel
# because there might be mismatched versions of TensorRT between the NGC PyTorch
# and the TRTLLM wheel.
143
# Locking triton version to 3.3.1 as 3.4.0 breaks tensorrt-llm 1.0.0rc6
144
RUN [ -f /etc/pip/constraint.txt ] && : > /etc/pip/constraint.txt || true && \
145
    pip uninstall -y tensorrt && \
146
147
148
149
150
    if [ "$HAS_TRTLLM_CONTEXT" = "1" ]; then \
        # Install from local wheel directory in build context
        WHEEL_FILE=$(find /trtllm_wheel -name "*.whl" | head -n 1); \
        if [ -n "$WHEEL_FILE" ]; then \
            pip install "$WHEEL_FILE"; \
151
152
153
            if [ "$ARCH" = "amd64" ]; then \
                pip install "triton==3.3.1"; \
            fi; \
154
155
156
157
158
        else \
            echo "No wheel file found in /trtllm_wheel directory."; \
            exit 1; \
        fi; \
    else \
159
160
161
162
163
        # Install TensorRT-LLM wheel from the provided index URL, allow dependencies from PyPI
        pip install --extra-index-url "${TENSORRTLLM_INDEX_URL}" "${TENSORRTLLM_PIP_WHEEL}"; \
        if [ "$ARCH" = "amd64" ]; then \
            pip install "triton==3.3.1"; \
        fi; \
164
    fi
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179

# Install test dependencies
RUN --mount=type=bind,source=./container/deps/requirements.test.txt,target=/tmp/requirements.txt \
    pip install --requirement /tmp/requirements.txt

### MISC UTILITY SETUP ###

# Finish pyright install
RUN pyright --help > /dev/null 2>&1

# Enable Git operations in the /workspace directory
RUN printf "[safe]\n      directory=/workspace\n" > /root/.gitconfig

# Rust build/dev dependencies
RUN apt-get update && \
180
181
182
183
184
    apt-get install --no-install-recommends -y \
    gdb \
    protobuf-compiler \
    cmake \
    libssl-dev \
185
186
    pkg-config \
    libclang-dev
187
188
189
190

ENV RUSTUP_HOME=/usr/local/rustup \
    CARGO_HOME=/usr/local/cargo \
    PATH=/usr/local/cargo/bin:$PATH \
191
    RUST_VERSION=1.89.0
192

193
194
195
196
# Define Rust target based on ARCH_ALT ARG
ARG RUSTARCH=${ARCH_ALT}-unknown-linux-gnu

# Install Rust using RUSTARCH derived from ARCH_ALT
197
RUN wget --tries=3 --waitretry=5 "https://static.rust-lang.org/rustup/archive/1.28.1/${RUSTARCH}/rustup-init" && \
198
    # TODO: Add SHA check back based on RUSTARCH
199
    chmod +x rustup-init && \
200
    ./rustup-init -y --no-modify-path --profile default --default-toolchain $RUST_VERSION --default-host ${RUSTARCH} && \
201
202
    rm rustup-init && \
    chmod -R a+w $RUSTUP_HOME $CARGO_HOME
203

204
ARG CARGO_BUILD_JOBS
205
206
207
208
# Set CARGO_BUILD_JOBS to 16 if not provided
# This is to prevent cargo from building $(nproc) jobs in parallel,
# which might exceed the number of opened files limit.
ENV CARGO_BUILD_JOBS=${CARGO_BUILD_JOBS:-16}
209

210
ENV CARGO_TARGET_DIR=/workspace/target
211
212
213

# Install uv, create virtualenv for general use, and build dynamo wheel
COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/
214
215
216
217

### VIRTUAL ENVIRONMENT SETUP ###
RUN mkdir /opt/dynamo && \
    uv venv /opt/dynamo/venv --python 3.12
218
219
220
ENV VIRTUAL_ENV=/opt/dynamo/venv

# Install NIXL Python module
221
222
223
224
# TODO: Move gds_path selection based on arch into NIXL build
# TEMP: disable gds backend for arm64
RUN if [ "$ARCH" = "arm64" ]; then \
        cd ${NIXL_SRC_DIR} && uv build . --out-dir /workspace/wheels/nixl \
225
        --config-settings=setup-args="-Ddisable_gds_backend=true"; \
226
227
228
229
230
    else \
        cd ${NIXL_SRC_DIR} && uv build . --out-dir /workspace/wheels/nixl; \
    fi && \
    # Install the wheel
    # TODO: Move NIXL wheel install to the wheel_builder stage
231
232
    uv pip install /workspace/wheels/nixl/*.whl && \
    pip install /workspace/wheels/nixl/*.whl
233
234
235
236
237

###################################
####### WHEEL BUILD STAGE #########
###################################

238
239
240
241
# Redeclare ARCH_ALT ARG so it's available for interpolation in the FROM instruction
ARG ARCH_ALT

FROM quay.io/pypa/manylinux_2_28_${ARCH_ALT} AS wheel_builder
242
243
244
245
246
247
248
ARG RELEASE_BUILD
ARG CARGO_BUILD_JOBS
# Set CARGO_BUILD_JOBS to 16 if not provided
# This is to prevent cargo from building $(nproc) jobs in parallel,
# which might exceed the number of opened files limit.
ENV CARGO_BUILD_JOBS=${CARGO_BUILD_JOBS:-16}

249
250
251
# Keep in sync with the base image.
ENV NIXL_PREFIX=/opt/nvidia/nvda_nixl

252
253
WORKDIR /workspace

254
RUN yum update -y \
255
    && yum install -y llvm-toolset python3.12-devel \
256
    && yum install -y protobuf-compiler \
257
258
259
260
261
    && yum clean all \
    && rm -rf /var/cache/yum

ENV RUSTUP_HOME=/usr/local/rustup \
    CARGO_HOME=/usr/local/cargo \
262
263
    CARGO_TARGET_DIR=/workspace/target \
    VIRTUAL_ENV=/opt/dynamo/venv
264
265
266

COPY --from=build $RUSTUP_HOME $RUSTUP_HOME
COPY --from=build $CARGO_HOME $CARGO_HOME
267
COPY --from=build $NIXL_PREFIX $NIXL_PREFIX
268
269
270
COPY --from=build /workspace /workspace
COPY --from=build $VIRTUAL_ENV $VIRTUAL_ENV
ENV PATH=$CARGO_HOME/bin:$VIRTUAL_ENV/bin:$PATH
271

272
273
274
275
276
277
278
# Copy configuration files
COPY pyproject.toml /workspace/
COPY README.md /workspace/
COPY LICENSE /workspace/
COPY Cargo.toml /workspace/
COPY Cargo.lock /workspace/
COPY rust-toolchain.toml /workspace/
279
COPY hatch_build.py /workspace/
280
281
282
283
284
285

# Copy source code
COPY lib/ /workspace/lib/
COPY components /workspace/components
COPY launch /workspace/launch

286
287
288
289
290
RUN cargo build \
	--release \
	--locked \
	--features dynamo-llm/block-manager \
	--workspace
291

292
# Build dynamo wheels
293
RUN uv build --wheel --out-dir /workspace/dist && \
294
    cd /workspace/lib/bindings/python && \
295
296
297
298
    uv build --wheel --out-dir /workspace/dist --python 3.12 && \
    if [ "$RELEASE_BUILD" = "true" ]; then \
        uv build --wheel --out-dir /workspace/dist --python 3.11 && \
        uv build --wheel --out-dir /workspace/dist --python 3.10; \
299
    fi
300
301
302
303
304
305
306
307

########################################
########## Development Image ###########
########################################

FROM build AS dev

WORKDIR /workspace
308
309

COPY --from=wheel_builder /workspace /workspace
310
311
312
# Copy Cargo cache to avoid re-downloading dependencies
COPY --from=wheel_builder $CARGO_HOME $CARGO_HOME

313
314
315
# Copy rest of the code
COPY . /workspace

316
317
318
# Package the bindings
RUN mkdir -p /opt/dynamo/bindings/wheels && \
    mkdir /opt/dynamo/bindings/lib && \
319
    cp dist/ai_dynamo*cp312*.whl /opt/dynamo/bindings/wheels/. && \
320
    cp target/release/metrics /usr/local/bin
321

322
# Install wheels
323
324
325
326
RUN . /opt/dynamo/venv/bin/activate && \
    uv pip install /workspace/dist/ai_dynamo_runtime*cp312*.whl && \
    uv pip install /workspace/dist/ai_dynamo*any.whl

327
# Install dynamo.runtime and dynamo.llm wheels globally in container for tests
328
# TODO: In future, we may use a virtualenv for everything and remove this.
329
330
RUN pip install dist/ai_dynamo_runtime*cp312*.whl  && \
    pip install dist/ai_dynamo*any.whl
331

332
ENV DYNAMO_HOME=/workspace
333
334
335
336
337
# Copy launch banner
RUN --mount=type=bind,source=./container/launch_message.txt,target=/workspace/launch_message.txt \
    sed '/^#\s/d' /workspace/launch_message.txt > ~/.launch_screen && \
    echo "cat ~/.launch_screen" >> ~/.bashrc

338
339
# FIXME: May want a modification with dynamo banner on entry
ENTRYPOINT ["/opt/nvidia/nvidia_entrypoint.sh"]
340
CMD []
341
342
343
344
345
346
347

####################################
########## Runtime Image ###########
####################################

FROM ${RUNTIME_IMAGE}:${RUNTIME_IMAGE_TAG} AS runtime

348
349
WORKDIR /workspace

350
351
ARG ARCH_ALT

352
ENV DYNAMO_HOME=/workspace
353
354
355
356
357
358
359
360
361
362
363
364
ENV NIXL_PREFIX=/opt/nvidia/nvda_nixl
ENV NIXL_LIB_DIR=$NIXL_PREFIX/lib/${ARCH_ALT}-linux-gnu
ENV NIXL_PLUGIN_DIR=$NIXL_LIB_DIR/plugins
ENV LD_LIBRARY_PATH=\
$NIXL_LIB_DIR:\
$NIXL_PLUGIN_DIR:\
/usr/local/ucx/lib:\
/usr/local/ucx/lib/ucx:\
/opt/hpcx/ompi/lib:\
$LD_LIBRARY_PATH
ENV PATH=/opt/hpcx/ompi/bin:/usr/local/bin/etcd/:/usr/local/cuda/nvvm/bin:$PATH
ENV OPAL_PREFIX=/opt/hpcx/ompi
365
366
367
368
369
370
371

# Install apt dependencies
# openssh-client, openssh-server are needed for OpenMPI
RUN apt-get update && \
    DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
        build-essential \
        python3-dev \
372
        python3-pip \
373
374
        # jq and curl for polling various endpoints and health checks
        jq \
375
376
377
        curl \
        # For debugging
        vim \
378
379
        # support UCX to establish connections with zmq
        libzmq3-dev \
380
381
        # install cudnn libs
        libcudnn9-cuda-12 \
382
383
384
        # Libraries required by UCX to find RDMA devices
        libibverbs1 rdma-core ibverbs-utils libibumad3 \
        libnuma1 librdmacm1 ibverbs-providers \
385
386
        openssh-client \
        openssh-server && \
387
    ln -s /usr/bin/python3 /usr/bin/python && \
388
389
390
391
392
393
394
395
396
397
    rm -rf /var/lib/apt/lists/*

# Copy all bindings (wheels, lib, include) from dev image
COPY --from=dev /opt/dynamo/bindings /opt/dynamo/bindings

# Copy nats and etcd from build image
COPY --from=build /usr/bin/nats-server /usr/bin/nats-server
COPY --from=build /usr/local/bin/etcd/ /usr/local/bin/etcd/

# Copy UCX from build image as plugin for NIXL
398
# Copy NIXL source from wheel_builder image
399
COPY --from=build /usr/local/ucx /usr/local/ucx
400
COPY --from=wheel_builder $NIXL_PREFIX $NIXL_PREFIX
401
402
403
404
405
406
407
# Copy OpenMPI from build image
COPY --from=build /opt/hpcx/ompi /opt/hpcx/ompi
# Copy NUMA library from build image
COPY --from=build /usr/lib/${ARCH_ALT}-linux-gnu/libnuma.so* /usr/lib/${ARCH_ALT}-linux-gnu/


# Common dependencies
408
# TODO: Remove extra install and use pyproject.toml to define all dependencies
409
RUN --mount=type=bind,source=./container/deps/requirements.txt,target=/tmp/requirements.txt \
410
411
412
    python3 -m pip install --no-cache-dir --break-system-packages --requirement /tmp/requirements.txt && \
    echo "uninstall (networkx packaging torch triton) as we will use NVIDIA's versions later" && \
    python3 -m pip uninstall --yes --break-system-packages networkx packaging torch triton
413
414
415
416

# Install test dependencies
# TODO: Remove this once we have a functional CI image built on top of the runtime image
RUN --mount=type=bind,source=./container/deps/requirements.test.txt,target=/tmp/requirements.txt \
417
    python3 -m pip install --no-cache-dir --break-system-packages --requirement /tmp/requirements.txt
418
419
420
421
422
423
424
425
426
427
428

# Copy CUDA toolkit components needed for nvcc, cudafe, cicc etc.
COPY --from=build /usr/local/cuda/bin/nvcc /usr/local/cuda/bin/nvcc
COPY --from=build /usr/local/cuda/bin/cudafe++ /usr/local/cuda/bin/cudafe++
COPY --from=build /usr/local/cuda/bin/ptxas /usr/local/cuda/bin/ptxas
COPY --from=build /usr/local/cuda/bin/fatbinary /usr/local/cuda/bin/fatbinary
COPY --from=build /usr/local/cuda/include/ /usr/local/cuda/include/
COPY --from=build /usr/local/cuda/lib64/libcudart.so* /usr/local/cuda/lib64/
COPY --from=build /usr/local/cuda/nvvm /usr/local/cuda/nvvm

# Copy pytorch installation from NGC PyTorch
429
430
ARG TORCH_VER=2.8.0a0+5228986c39.nv25.6
ARG TORCHVISION_VER=0.22.0a0+95f10a4e
431
432
433
ARG SETUPTOOLS_VER=78.1.1
ARG PYTORCH_TRITON_VER=3.3.0+git96316ce52.nvinternal
ARG JINJA2_VER=3.1.6
434
ARG NETWORKX_VER=3.5
435
436
ARG SYMPY_VER=1.14.0
ARG PACKAGING_VER=23.2
437
ARG FLASH_ATTN_VER=2.7.4.post1
438
ARG MPMATH_VER=1.3.0
439

440
COPY --from=build /usr/local/lib/lib* /usr/local/lib/
441
442
443
444
445
446
447
448
449
COPY --from=build /usr/local/cuda-12.9/targets/x86_64-linux/lib/libcupti* /usr/local/cuda/targets/x86_64-linux/lib/
# Copy UCX libraries, libucc.so is needed by pytorch. May not need to copy whole hpcx dir but only /opt/hpcx/ucc/
COPY --from=build /opt/hpcx /opt/hpcx
# This is needed to make libucc.so visible so pytorch can use it.
ENV LD_LIBRARY_PATH="/opt/hpcx/ucc/lib:${LD_LIBRARY_PATH}"
# Might not need to copy cusparseLt in the future once it's included in DLFW cuda container
# networkx, packaging, setuptools get overridden by trtllm installation, so not copying them
# pytorch-triton is copied after trtllm installation.
COPY --from=build /usr/local/cuda/lib64/libcusparseLt* /usr/local/cuda/lib64/
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
COPY --from=build /usr/local/lib/python3.12/dist-packages/torch /usr/local/lib/python3.12/dist-packages/torch
COPY --from=build /usr/local/lib/python3.12/dist-packages/torch-${TORCH_VER}.dist-info /usr/local/lib/python3.12/dist-packages/torch-${TORCH_VER}.dist-info
COPY --from=build /usr/local/lib/python3.12/dist-packages/torchgen /usr/local/lib/python3.12/dist-packages/torchgen
COPY --from=build /usr/local/lib/python3.12/dist-packages/torchvision /usr/local/lib/python3.12/dist-packages/torchvision
COPY --from=build /usr/local/lib/python3.12/dist-packages/torchvision-${TORCHVISION_VER}.dist-info /usr/local/lib/python3.12/dist-packages/torchvision-${TORCHVISION_VER}.dist-info
COPY --from=build /usr/local/lib/python3.12/dist-packages/torchvision.libs /usr/local/lib/python3.12/dist-packages/torchvision.libs
COPY --from=build /usr/local/lib/python3.12/dist-packages/functorch /usr/local/lib/python3.12/dist-packages/functorch
COPY --from=build /usr/local/lib/python3.12/dist-packages/jinja2 /usr/local/lib/python3.12/dist-packages/jinja2
COPY --from=build /usr/local/lib/python3.12/dist-packages/jinja2-${JINJA2_VER}.dist-info /usr/local/lib/python3.12/dist-packages/jinja2-${JINJA2_VER}.dist-info
COPY --from=build /usr/local/lib/python3.12/dist-packages/sympy /usr/local/lib/python3.12/dist-packages/sympy
COPY --from=build /usr/local/lib/python3.12/dist-packages/sympy-${SYMPY_VER}.dist-info /usr/local/lib/python3.12/dist-packages/sympy-${SYMPY_VER}.dist-info
COPY --from=build /usr/local/lib/python3.12/dist-packages/flash_attn /usr/local/lib/python3.12/dist-packages/flash_attn
COPY --from=build /usr/local/lib/python3.12/dist-packages/flash_attn-${FLASH_ATTN_VER}.dist-info /usr/local/lib/python3.12/dist-packages/flash_attn-${FLASH_ATTN_VER}.dist-info
COPY --from=build /usr/local/lib/python3.12/dist-packages/flash_attn_2_cuda.cpython-312-*-linux-gnu.so /usr/local/lib/python3.12/dist-packages/


# Install TensorRT-LLM (same as in build stage)
ARG HAS_TRTLLM_CONTEXT=0
ARG TENSORRTLLM_PIP_WHEEL="tensorrt-llm"
ARG TENSORRTLLM_INDEX_URL="https://pypi.python.org/simple"

# Copy Dynamo wheels into wheelhouse
472
# Copy metrics binary from wheel_builder image, not part of ai-dynamo wheel
473
474
COPY --from=dev /workspace/wheels/nixl/*.whl /workspace/wheelhouse/
COPY --from=wheel_builder /workspace/dist/*.whl /workspace/wheelhouse/
475
COPY --from=dev /workspace/target/release/metrics /usr/local/bin/metrics
476

477
478
479
# NOTE: If a package (tensorrt_llm) exists on both --index-url and --extra-index-url,
# uv will prioritize the --extra-index-url, unless --index-strategy unsafe-best-match
# is also specified. So set the configurable index as a --extra-index-url for prioritization.
480
481
# NOTE: locking triton version to 3.3.1 as 3.4.0 breaks tensorrt-llm 1.0.0rc6
# NOTE: locking cuda-python version to <13 to avoid breaks with tensorrt-llm 1.0.0rc6. This
482
483
#       can be removed after https://github.com/NVIDIA/TensorRT-LLM/pull/6703 is merged
#       we upgrade to a published pip wheel containing this change.
484
485
486
487
488
489
490
491
492
493
494
RUN python3 -m pip install --no-cache-dir --break-system-packages "cuda-python>=12,<13" && \
    python3 -m pip install --no-cache-dir --break-system-packages --extra-index-url "${TENSORRTLLM_INDEX_URL}" "${TENSORRTLLM_PIP_WHEEL}" && \
    python3 -m pip install --no-cache-dir --break-system-packages \
        /workspace/wheelhouse/ai_dynamo_runtime*cp312*.whl \
        /workspace/wheelhouse/ai_dynamo*any.whl \
        /workspace/wheelhouse/nixl*.whl && \
    python3 -m pip uninstall -y --break-system-packages triton
    # triton is copied from pytorch container below

COPY --from=build /usr/local/lib/python3.12/dist-packages/triton /usr/local/lib/python3.12/dist-packages/triton
COPY --from=build /usr/local/lib/python3.12/dist-packages/pytorch_triton-${PYTORCH_TRITON_VER}.dist-info /usr/local/lib/python3.12/dist-packages/pytorch_triton-${PYTORCH_TRITON_VER}.dist-info
495

496
# Copy benchmarks, backends and tests for CI
497
498
499
# TODO: Remove this once we have a functional CI image built on top of the runtime image
COPY tests /workspace/tests
COPY benchmarks /workspace/benchmarks
500
COPY components/backends/trtllm /workspace/components/backends/trtllm
501
RUN python3 -m pip install --no-cache-dir --break-system-packages /workspace/benchmarks
502

503
504
505
# Copy files for legal compliance
COPY ATTRIBUTION* LICENSE /workspace/

506
507
508
509
510
511
512
# Copy launch banner
RUN --mount=type=bind,source=./container/launch_message.txt,target=/workspace/launch_message.txt \
    sed '/^#\s/d' /workspace/launch_message.txt > ~/.launch_screen && \
    echo "cat ~/.launch_screen" >> ~/.bashrc

ENTRYPOINT ["/opt/nvidia/nvidia_entrypoint.sh"]
CMD []