Dockerfile.tensorrt_llm 5.72 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

ARG BASE_IMAGE="gitlab-master.nvidia.com:5005/dl/dgx/tritonserver/tensorrt-llm/amd64"
ARG BASE_IMAGE_TAG="krish-fix-trtllm-build.23766174"

FROM ${BASE_IMAGE}:${BASE_IMAGE_TAG} AS dev

USER root

# Install utilities
RUN apt update -y && apt install -y git wget curl nvtop tmux vim
# nats
26
RUN wget https://github.com/nats-io/nats-server/releases/download/v2.10.24/nats-server-v2.10.24-amd64.deb && dpkg -i nats-server-v2.10.24-amd64.deb && rm nats-server-v2.10.24-amd64.deb
27
28
29
# etcd
ENV ETCD_VERSION="v3.5.18"
RUN wget https://github.com/etcd-io/etcd/releases/download/$ETCD_VERSION/etcd-$ETCD_VERSION-linux-amd64.tar.gz -O /tmp/etcd.tar.gz && \
30
31
32
    mkdir -p /usr/local/bin/etcd && \
    tar -xvf /tmp/etcd.tar.gz -C /usr/local/bin/etcd --strip-components=1 && \
    rm /tmp/etcd.tar.gz
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
ENV PATH=/usr/local/bin/etcd/:$PATH

# TODO: Try using uv to install tensorrtllm
ARG TENSORRTLLM_PIP_WHEEL_PATH=""
COPY ${TENSORRTLLM_PIP_WHEEL_PATH}/*.whl /tmp/
RUN find /tmp -name "*.whl" -exec pip install {} +

# Install genai-perf for benchmarking
# TODO: Move to tag when fix for genai-perf will be released
ARG GENAI_PERF_TAG="25d0188713adc47868d6b3f22426375237a90529"
RUN pip install "git+https://github.com/triton-inference-server/perf_analyzer.git@${GENAI_PERF_TAG}#subdirectory=genai-perf"

# Install test dependencies
RUN --mount=type=bind,source=./container/deps/requirements.test.txt,target=/tmp/requirements.txt \
    pip install --requirement /tmp/requirements.txt

### MISC UTILITY SETUP ###

# Finish pyright install
RUN pyright --help > /dev/null 2>&1

# Enable Git operations in the /workspace directory
RUN printf "[safe]\n      directory=/workspace\n" > /root/.gitconfig

RUN ln -sf /bin/bash /bin/sh

# Rust build/dev dependencies
RUN apt-get update && \
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
    apt-get install --no-install-recommends -y \
    gdb \
    protobuf-compiler \
    cmake \
    libssl-dev \
    pkg-config

ENV RUSTUP_HOME=/usr/local/rustup \
    CARGO_HOME=/usr/local/cargo \
    PATH=/usr/local/cargo/bin:$PATH \
    RUST_VERSION=1.85.0 \
    RUSTARCH=x86_64-unknown-linux-gnu

RUN wget --tries=3 --waitretry=5 "https://static.rust-lang.org/rustup/archive/1.28.1/${RUSTARCH}/rustup-init" && \
    echo "a3339fb004c3d0bb9862ba0bce001861fe5cbde9c10d16591eb3f39ee6cd3e7f *rustup-init" | sha256sum -c - && \
    chmod +x rustup-init && \
    ./rustup-init -y --no-modify-path --profile minimal --default-toolchain $RUST_VERSION --default-host ${RUSTARCH} && \
    rm rustup-init && \
    chmod -R a+w $RUSTUP_HOME $CARGO_HOME
80
81
82
83
84
85
86
87

# Working directory
WORKDIR /workspace

# Copy Python wheel configuration files
COPY pyproject.toml /workspace/
COPY README.md /workspace/
COPY LICENSE /workspace/
88
89
90
COPY Cargo.toml /workspace/
COPY Cargo.lock /workspace/
COPY rust-toolchain.toml /workspace/
91

92
ARG CARGO_BUILD_JOBS
93
94
95
96
# Set CARGO_BUILD_JOBS to 16 if not provided
# This is to prevent cargo from building $(nproc) jobs in parallel,
# which might exceed the number of opened files limit.
ENV CARGO_BUILD_JOBS=${CARGO_BUILD_JOBS:-16}
97

98
ENV CARGO_TARGET_DIR=/workspace/target
99

100
101
# Build Rust
COPY lib/ /workspace/lib/
102
103
COPY components /workspace/components
COPY launch /workspace/launch
104

105
# TODO: Tanmay Add LLMAPI-based feature flag once the engine is ready.
106
RUN cargo build --release --locked --features mistralrs,sglang,python && \
107
    cargo doc --no-deps && \
108
109
110
    cp target/release/dynamo-run /usr/local/bin && \
    cp target/release/http /usr/local/bin && \
    cp target/release/llmctl /usr/local/bin && \
111
112
    cp target/release/metrics /usr/local/bin && \
    cp target/release/mock_worker /usr/local/bin
113

114
COPY deploy/dynamo/sdk /workspace/deploy/dynamo/sdk
115
116
117
118
119
120

# Install uv, create virtualenv for general use, and build dynamo wheel
COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/
RUN mkdir /opt/dynamo && \
    uv venv /opt/dynamo/venv --python 3.12 && \
    source /opt/dynamo/venv/bin/activate && \
121
    cd /workspace/lib/bindings/python && \
122
    uv build --wheel --out-dir /workspace/dist && \
123
124
    uv pip install /workspace/dist/ai_dynamo_runtime*cp312*.whl && \
    cd /workspace && \
125
    uv build --wheel --out-dir /workspace/dist && \
126
    uv pip install /workspace/dist/ai_dynamo*any.whl
127
128
129
130

# Package the bindings
RUN mkdir -p /opt/dynamo/bindings/wheels && \
    mkdir /opt/dynamo/bindings/lib && \
131
    cp dist/ai_dynamo_runtime*cp312*.whl /opt/dynamo/bindings/wheels/. && \
132
    cp target/release/libdynamo_llm_capi.so /opt/dynamo/bindings/lib/. && \
133
134
135
136
137
    cp -r lib/bindings/c/include /opt/dynamo/bindings/.

# Install dynamo.runtime and dynamo.llm wheels globally in container for tests that
# currently run without virtual environment activated.
# TODO: In future, we may use a virtualenv for everything and remove this.
138
139
RUN pip install dist/ai_dynamo_runtime*cp312*.whl  && \
    pip install dist/ai_dynamo*any.whl
140
141
142

# Tell TRTLLM worker to use the Dynamo LLM C API for KV Cache Routing
ENV DYNAMO_KV_CAPI_PATH="/opt/dynamo/bindings/lib/libdynamo_llm_capi.so"
143
ENV DYNAMO_HOME=/workspace
144
145
146
147
148
149

# FIXME: Copy more specific folders in for dev/debug after directory restructure
COPY . /workspace

# FIXME: May want a modification with dynamo banner on entry
ENTRYPOINT ["/opt/nvidia/nvidia_entrypoint.sh"]
150
CMD []