Dockerfile.tensorrt_llm 7.21 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

16
17
ARG BASE_IMAGE="tensorrt_llm/release"
ARG BASE_IMAGE_TAG="latest"
18
19
ARG MANYLINUX_IMAGE="quay.io/pypa/manylinux_2_28_x86_64"
ARG RELEASE_BUILD
20

21
22
23
24
25
##################################
########## Build Image ###########
##################################

FROM ${BASE_IMAGE}:${BASE_IMAGE_TAG} AS build
26
27
28
29
30
31

USER root

# Install utilities
RUN apt update -y && apt install -y git wget curl nvtop tmux vim
# nats
32
33
RUN wget --tries=3 --waitretry=5 https://github.com/nats-io/nats-server/releases/download/v2.10.24/nats-server-v2.10.24-amd64.deb && \
    dpkg -i nats-server-v2.10.24-amd64.deb && rm nats-server-v2.10.24-amd64.deb
34
35
36
# etcd
ENV ETCD_VERSION="v3.5.18"
RUN wget https://github.com/etcd-io/etcd/releases/download/$ETCD_VERSION/etcd-$ETCD_VERSION-linux-amd64.tar.gz -O /tmp/etcd.tar.gz && \
37
38
39
    mkdir -p /usr/local/bin/etcd && \
    tar -xvf /tmp/etcd.tar.gz -C /usr/local/bin/etcd --strip-components=1 && \
    rm /tmp/etcd.tar.gz
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
ENV PATH=/usr/local/bin/etcd/:$PATH

# TODO: Try using uv to install tensorrtllm
ARG TENSORRTLLM_PIP_WHEEL_PATH=""
COPY ${TENSORRTLLM_PIP_WHEEL_PATH}/*.whl /tmp/
RUN find /tmp -name "*.whl" -exec pip install {} +

# Install genai-perf for benchmarking
# TODO: Move to tag when fix for genai-perf will be released
ARG GENAI_PERF_TAG="25d0188713adc47868d6b3f22426375237a90529"
RUN pip install "git+https://github.com/triton-inference-server/perf_analyzer.git@${GENAI_PERF_TAG}#subdirectory=genai-perf"

# Install test dependencies
RUN --mount=type=bind,source=./container/deps/requirements.test.txt,target=/tmp/requirements.txt \
    pip install --requirement /tmp/requirements.txt

### MISC UTILITY SETUP ###

# Finish pyright install
RUN pyright --help > /dev/null 2>&1

# Enable Git operations in the /workspace directory
RUN printf "[safe]\n      directory=/workspace\n" > /root/.gitconfig

RUN ln -sf /bin/bash /bin/sh

# Rust build/dev dependencies
RUN apt-get update && \
68
69
70
71
72
73
74
75
76
77
    apt-get install --no-install-recommends -y \
    gdb \
    protobuf-compiler \
    cmake \
    libssl-dev \
    pkg-config

ENV RUSTUP_HOME=/usr/local/rustup \
    CARGO_HOME=/usr/local/cargo \
    PATH=/usr/local/cargo/bin:$PATH \
78
    RUST_VERSION=1.86.0 \
79
80
81
82
83
84
85
86
    RUSTARCH=x86_64-unknown-linux-gnu

RUN wget --tries=3 --waitretry=5 "https://static.rust-lang.org/rustup/archive/1.28.1/${RUSTARCH}/rustup-init" && \
    echo "a3339fb004c3d0bb9862ba0bce001861fe5cbde9c10d16591eb3f39ee6cd3e7f *rustup-init" | sha256sum -c - && \
    chmod +x rustup-init && \
    ./rustup-init -y --no-modify-path --profile minimal --default-toolchain $RUST_VERSION --default-host ${RUSTARCH} && \
    rm rustup-init && \
    chmod -R a+w $RUSTUP_HOME $CARGO_HOME
87
88
89
90
91
92
93
94

# Working directory
WORKDIR /workspace

# Copy Python wheel configuration files
COPY pyproject.toml /workspace/
COPY README.md /workspace/
COPY LICENSE /workspace/
95
96
97
COPY Cargo.toml /workspace/
COPY Cargo.lock /workspace/
COPY rust-toolchain.toml /workspace/
98

99
ARG CARGO_BUILD_JOBS
100
101
102
103
# Set CARGO_BUILD_JOBS to 16 if not provided
# This is to prevent cargo from building $(nproc) jobs in parallel,
# which might exceed the number of opened files limit.
ENV CARGO_BUILD_JOBS=${CARGO_BUILD_JOBS:-16}
104

105
ENV CARGO_TARGET_DIR=/workspace/target
106

107
108
# Build Rust
COPY lib/ /workspace/lib/
109
110
COPY components /workspace/components
COPY launch /workspace/launch
111

112
RUN cargo build --release --locked --features mistralrs,sglang,python && \
113
    cargo doc --no-deps && \
114
115
116
    cp target/release/dynamo-run /usr/local/bin && \
    cp target/release/http /usr/local/bin && \
    cp target/release/llmctl /usr/local/bin && \
117
118
    cp target/release/metrics /usr/local/bin && \
    cp target/release/mock_worker /usr/local/bin
119

120
COPY deploy/dynamo/sdk /workspace/deploy/dynamo/sdk
121
122
123

# Install uv, create virtualenv for general use, and build dynamo wheel
COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158

### VIRTUAL ENVIRONMENT SETUP ###
RUN mkdir /opt/dynamo && \
    uv venv /opt/dynamo/venv --python 3.12

###################################
####### WHEEL BUILD STAGE #########
###################################

# Build the wheel in the manylinux environment
FROM ${MANYLINUX_IMAGE} AS wheel_builder
ARG RELEASE_BUILD
ARG CARGO_BUILD_JOBS
# Set CARGO_BUILD_JOBS to 16 if not provided
# This is to prevent cargo from building $(nproc) jobs in parallel,
# which might exceed the number of opened files limit.
ENV CARGO_BUILD_JOBS=${CARGO_BUILD_JOBS:-16}

WORKDIR /workspace

RUN yum install -y protobuf-compiler \
    && yum clean all \
    && rm -rf /var/cache/yum

COPY --from=build /workspace /workspace

ENV RUSTUP_HOME=/usr/local/rustup \
    CARGO_HOME=/usr/local/cargo \
    PATH=/usr/local/cargo/bin:$PATH \
    CARGO_TARGET_DIR=/workspace/target

COPY --from=build $RUSTUP_HOME $RUSTUP_HOME
COPY --from=build $CARGO_HOME $CARGO_HOME

# Create virtualenv and build dynamo wheel
159
160
161
RUN mkdir /opt/dynamo && \
    uv venv /opt/dynamo/venv --python 3.12 && \
    source /opt/dynamo/venv/bin/activate && \
162
    cd /workspace/lib/bindings/python && \
163
164
165
166
167
    uv build --wheel --out-dir /workspace/dist --python 3.12 && \
    if [ "$RELEASE_BUILD" = "true" ]; then \
        uv build --wheel --out-dir /workspace/dist --python 3.11 && \
        uv build --wheel --out-dir /workspace/dist --python 3.10; \
    fi && \
168
    cd /workspace && \
169
170
171
172
173
174
175
176
177
178
    uv build --wheel --out-dir /workspace/dist

########################################
########## Development Image ###########
########################################

FROM build AS dev

WORKDIR /workspace
COPY --from=wheel_builder /workspace/dist/ /workspace/dist/
179
180
181
182

# Package the bindings
RUN mkdir -p /opt/dynamo/bindings/wheels && \
    mkdir /opt/dynamo/bindings/lib && \
183
    cp dist/ai_dynamo_runtime*cp312*.whl /opt/dynamo/bindings/wheels/. && \
184
    cp target/release/libdynamo_llm_capi.so /opt/dynamo/bindings/lib/. && \
185
186
    cp -r lib/bindings/c/include /opt/dynamo/bindings/.

187
188
189
190
RUN . /opt/dynamo/venv/bin/activate && \
    uv pip install /workspace/dist/ai_dynamo_runtime*cp312*.whl && \
    uv pip install /workspace/dist/ai_dynamo*any.whl

191
192
193
# Install dynamo.runtime and dynamo.llm wheels globally in container for tests that
# currently run without virtual environment activated.
# TODO: In future, we may use a virtualenv for everything and remove this.
194
195
RUN pip install dist/ai_dynamo_runtime*cp312*.whl  && \
    pip install dist/ai_dynamo*any.whl
196
197
198

# Tell TRTLLM worker to use the Dynamo LLM C API for KV Cache Routing
ENV DYNAMO_KV_CAPI_PATH="/opt/dynamo/bindings/lib/libdynamo_llm_capi.so"
199
ENV DYNAMO_HOME=/workspace
200
201
202
203
204
205

# FIXME: Copy more specific folders in for dev/debug after directory restructure
COPY . /workspace

# FIXME: May want a modification with dynamo banner on entry
ENTRYPOINT ["/opt/nvidia/nvidia_entrypoint.sh"]
206
CMD []