Dockerfile.tensorrt_llm 9.5 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

16
17
ARG BASE_IMAGE="nvcr.io/nvidia/pytorch"
ARG BASE_IMAGE_TAG="25.03-py3"
18
ARG RELEASE_BUILD
19

20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
# Define general architecture ARGs for supporting both x86 and aarch64 builds.
#   ARCH: Used for package suffixes (e.g., amd64, arm64)
#   ARCH_ALT: Used for Rust targets, manylinux suffix (e.g., x86_64, aarch64)
#
# Default values are for x86/amd64:
#   --build-arg ARCH=amd64 --build-arg ARCH_ALT=x86_64
#
# For arm64/aarch64, build with:
#   --build-arg ARCH=arm64 --build-arg ARCH_ALT=aarch64
#
# NOTE: There isn't an easy way to define one of these values based on the other value
# without adding if statements everywhere, so just define both as ARGs for now.
ARG ARCH=amd64
ARG ARCH_ALT=x86_64

35
36
37
38
39
##################################
########## Build Image ###########
##################################

FROM ${BASE_IMAGE}:${BASE_IMAGE_TAG} AS build
40

41
42
43
44
# Redeclare ARCH and ARCH_ALT so they're available in this build stage
ARG ARCH
ARG ARCH_ALT

45
46
47
48
USER root

# Install utilities
RUN apt update -y && apt install -y git wget curl nvtop tmux vim
49

50
# nats
51
52
53
RUN wget --tries=3 --waitretry=5 https://github.com/nats-io/nats-server/releases/download/v2.10.24/nats-server-v2.10.24-${ARCH}.deb && \
    dpkg -i nats-server-v2.10.24-${ARCH}.deb && rm nats-server-v2.10.24-${ARCH}.deb

54
55
# etcd
ENV ETCD_VERSION="v3.5.18"
56
RUN wget https://github.com/etcd-io/etcd/releases/download/$ETCD_VERSION/etcd-$ETCD_VERSION-linux-${ARCH}.tar.gz -O /tmp/etcd.tar.gz && \
57
58
59
    mkdir -p /usr/local/bin/etcd && \
    tar -xvf /tmp/etcd.tar.gz -C /usr/local/bin/etcd --strip-components=1 && \
    rm /tmp/etcd.tar.gz
60
61
ENV PATH=/usr/local/bin/etcd/:$PATH

62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
ARG HAS_TRTLLM_CONTEXT=0
ARG TENSORRTLLM_PIP_WHEEL="tensorrt-llm"
ARG TENSORRTLLM_INDEX_URL="https://pypi.python.org/simple"

COPY --from=trtllm_wheel . /trtllm_wheel/

# TODO: Currently, ABI compatibility issues with TRTLLM wheel and NGC PyTorch prevent us
# from using the TRTLLM wheel in a uv venv. Once the issues are resolved, we can
# use uv to install TensorRT-LLM wheel within the uv venv.
RUN [ -f /etc/pip/constraint.txt ] && : > /etc/pip/constraint.txt || true && \
    if [ "$HAS_TRTLLM_CONTEXT" = "1" ]; then \
        # Install from local wheel directory in build context
        WHEEL_FILE=$(find /trtllm_wheel -name "*.whl" | head -n 1); \
        if [ -n "$WHEEL_FILE" ]; then \
            pip install "$WHEEL_FILE"; \
        else \
            echo "No wheel file found in /trtllm_wheel directory."; \
            exit 1; \
        fi; \
    else \
         # Install TensorRT-LLM wheel from the provided index URL, allow dependencies from PyPI
         pip install --index-url "${TENSORRTLLM_INDEX_URL}" \
         --extra-index-url https://pypi.org/simple \
         "${TENSORRTLLM_PIP_WHEEL}" ; \
    fi
87
88

# Install genai-perf for benchmarking
Hongkuan Zhou's avatar
Hongkuan Zhou committed
89
90
# TODO: Move to published pypi tags
ARG GENAI_PERF_TAG="e67e853413a07a778dd78a55e299be7fba9c9c24"
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
RUN pip install "git+https://github.com/triton-inference-server/perf_analyzer.git@${GENAI_PERF_TAG}#subdirectory=genai-perf"

# Install test dependencies
RUN --mount=type=bind,source=./container/deps/requirements.test.txt,target=/tmp/requirements.txt \
    pip install --requirement /tmp/requirements.txt

### MISC UTILITY SETUP ###

# Finish pyright install
RUN pyright --help > /dev/null 2>&1

# Enable Git operations in the /workspace directory
RUN printf "[safe]\n      directory=/workspace\n" > /root/.gitconfig

RUN ln -sf /bin/bash /bin/sh

# Rust build/dev dependencies
RUN apt-get update && \
109
110
111
112
113
    apt-get install --no-install-recommends -y \
    gdb \
    protobuf-compiler \
    cmake \
    libssl-dev \
114
115
    pkg-config \
    libclang-dev
116
117
118
119

ENV RUSTUP_HOME=/usr/local/rustup \
    CARGO_HOME=/usr/local/cargo \
    PATH=/usr/local/cargo/bin:$PATH \
120
    RUST_VERSION=1.86.0
121

122
123
124
125
# Define Rust target based on ARCH_ALT ARG
ARG RUSTARCH=${ARCH_ALT}-unknown-linux-gnu

# Install Rust using RUSTARCH derived from ARCH_ALT
126
RUN wget --tries=3 --waitretry=5 "https://static.rust-lang.org/rustup/archive/1.28.1/${RUSTARCH}/rustup-init" && \
127
    # TODO: Add SHA check back based on RUSTARCH
128
129
130
131
    chmod +x rustup-init && \
    ./rustup-init -y --no-modify-path --profile minimal --default-toolchain $RUST_VERSION --default-host ${RUSTARCH} && \
    rm rustup-init && \
    chmod -R a+w $RUSTUP_HOME $CARGO_HOME
132

133
ARG CARGO_BUILD_JOBS
134
135
136
137
# Set CARGO_BUILD_JOBS to 16 if not provided
# This is to prevent cargo from building $(nproc) jobs in parallel,
# which might exceed the number of opened files limit.
ENV CARGO_BUILD_JOBS=${CARGO_BUILD_JOBS:-16}
138

139
ENV CARGO_TARGET_DIR=/workspace/target
140
141
142

# Install uv, create virtualenv for general use, and build dynamo wheel
COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/
143
144
145
146
147
148
149
150
151

### VIRTUAL ENVIRONMENT SETUP ###
RUN mkdir /opt/dynamo && \
    uv venv /opt/dynamo/venv --python 3.12

###################################
####### WHEEL BUILD STAGE #########
###################################

152
153
154
155
# Redeclare ARCH_ALT ARG so it's available for interpolation in the FROM instruction
ARG ARCH_ALT

FROM quay.io/pypa/manylinux_2_28_${ARCH_ALT} AS wheel_builder
156
157
158
159
160
161
162
163
164
ARG RELEASE_BUILD
ARG CARGO_BUILD_JOBS
# Set CARGO_BUILD_JOBS to 16 if not provided
# This is to prevent cargo from building $(nproc) jobs in parallel,
# which might exceed the number of opened files limit.
ENV CARGO_BUILD_JOBS=${CARGO_BUILD_JOBS:-16}

WORKDIR /workspace

165
RUN yum update -y \
166
    && yum install -y python3.12-devel \
167
    && yum install -y protobuf-compiler \
168
169
170
171
172
    && yum clean all \
    && rm -rf /var/cache/yum

ENV RUSTUP_HOME=/usr/local/rustup \
    CARGO_HOME=/usr/local/cargo \
173
174
    CARGO_TARGET_DIR=/workspace/target \
    VIRTUAL_ENV=/opt/dynamo/venv
175
176
177

COPY --from=build $RUSTUP_HOME $RUSTUP_HOME
COPY --from=build $CARGO_HOME $CARGO_HOME
178
179
180
COPY --from=build /workspace /workspace
COPY --from=build $VIRTUAL_ENV $VIRTUAL_ENV
ENV PATH=$CARGO_HOME/bin:$VIRTUAL_ENV/bin:$PATH
181

182
183
184
185
186
187
188
189
190
191
192
193
194
# Copy configuration files
COPY pyproject.toml /workspace/
COPY README.md /workspace/
COPY LICENSE /workspace/
COPY Cargo.toml /workspace/
COPY Cargo.lock /workspace/
COPY rust-toolchain.toml /workspace/
COPY hatch_build.py /workspace/

# Copy source code
COPY lib/ /workspace/lib/
COPY components /workspace/components
COPY launch /workspace/launch
195
COPY deploy/sdk /workspace/deploy/sdk
196
197

# Build Rust crate binaries packaged with the wheel
198
RUN cargo build --release --locked --features mistralrs,python \
199
200
201
202
203
204
    -p dynamo-run \
    -p llmctl \
    # Multiple http named crates are present in dependencies, need to specify the path
    -p file://$PWD/components/http \
    -p metrics

205
# Build dynamo wheels
206
RUN uv build --wheel --out-dir /workspace/dist && \
207
    cd /workspace/lib/bindings/python && \
208
209
210
211
    uv build --wheel --out-dir /workspace/dist --python 3.12 && \
    if [ "$RELEASE_BUILD" = "true" ]; then \
        uv build --wheel --out-dir /workspace/dist --python 3.11 && \
        uv build --wheel --out-dir /workspace/dist --python 3.10; \
212
    fi
213
214
215
216
217
218
219
220
221

########################################
########## Development Image ###########
########################################

FROM build AS dev

WORKDIR /workspace
COPY --from=wheel_builder /workspace/dist/ /workspace/dist/
222
223
224
225
226
227
228
229
230
231
232
233
234
COPY --from=wheel_builder /workspace/target/ /workspace/target/
# Copy Cargo cache to avoid re-downloading dependencies
COPY --from=wheel_builder $CARGO_HOME $CARGO_HOME

COPY . /workspace

# Build rest of the crates
# Need to figure out rust caching to avoid rebuilding and remove exclude flags
RUN cargo build --release --locked --workspace \
    --exclude dynamo-run \
    --exclude llmctl \
    --exclude file://$PWD/components/http \
    --exclude metrics
235
236
237
238

# Package the bindings
RUN mkdir -p /opt/dynamo/bindings/wheels && \
    mkdir /opt/dynamo/bindings/lib && \
239
    cp dist/ai_dynamo*cp312*.whl /opt/dynamo/bindings/wheels/. && \
240
    cp target/release/libdynamo_llm_capi.so /opt/dynamo/bindings/lib/. && \
241
242
243
244
245
246
    cp -r lib/bindings/c/include /opt/dynamo/bindings/.  && \
    cp target/release/dynamo-run /usr/local/bin && \
    cp target/release/http /usr/local/bin && \
    cp target/release/llmctl /usr/local/bin && \
    cp target/release/metrics /usr/local/bin && \
    cp target/release/mock_worker /usr/local/bin
247

248
# Install wheels
249
250
251
252
RUN . /opt/dynamo/venv/bin/activate && \
    uv pip install /workspace/dist/ai_dynamo_runtime*cp312*.whl && \
    uv pip install /workspace/dist/ai_dynamo*any.whl

253
# Install dynamo.runtime and dynamo.llm wheels globally in container for tests
254
# TODO: In future, we may use a virtualenv for everything and remove this.
255
256
RUN pip install dist/ai_dynamo_runtime*cp312*.whl  && \
    pip install dist/ai_dynamo*any.whl
257

258
ENV DYNAMO_HOME=/workspace
259

260
261
# Use UCX for TRTLLM KV Cache Transfer
ENV TRTLLM_USE_UCX_KVCACHE=1
262

263
264
265
266
267
268

# Copy launch banner
RUN --mount=type=bind,source=./container/launch_message.txt,target=/workspace/launch_message.txt \
    sed '/^#\s/d' /workspace/launch_message.txt > ~/.launch_screen && \
    echo "cat ~/.launch_screen" >> ~/.bashrc

269
270
271

# FIXME: May want a modification with dynamo banner on entry
ENTRYPOINT ["/opt/nvidia/nvidia_entrypoint.sh"]
272
CMD []