Dockerfile.tensorrt_llm 4.92 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

ARG BASE_IMAGE="gitlab-master.nvidia.com:5005/dl/dgx/tritonserver/tensorrt-llm/amd64"
ARG BASE_IMAGE_TAG="krish-fix-trtllm-build.23766174"

FROM ${BASE_IMAGE}:${BASE_IMAGE_TAG} AS dev

USER root

# Install utilities
RUN apt update -y && apt install -y git wget curl nvtop tmux vim
# nats
RUN wget https://github.com/nats-io/nats-server/releases/download/v2.10.24/nats-server-v2.10.24-amd64.deb && dpkg -i nats-server-v2.10.24-amd64.deb
# etcd
ENV ETCD_VERSION="v3.5.18"
RUN wget https://github.com/etcd-io/etcd/releases/download/$ETCD_VERSION/etcd-$ETCD_VERSION-linux-amd64.tar.gz -O /tmp/etcd.tar.gz && \
mkdir -p /usr/local/bin/etcd && \
tar -xvf /tmp/etcd.tar.gz -C /usr/local/bin/etcd --strip-components=1
ENV PATH=/usr/local/bin/etcd/:$PATH

# TODO: Try using uv to install tensorrtllm
ARG TENSORRTLLM_PIP_WHEEL_PATH=""
COPY ${TENSORRTLLM_PIP_WHEEL_PATH}/*.whl /tmp/
RUN find /tmp -name "*.whl" -exec pip install {} +

# Install genai-perf for benchmarking
# TODO: Move to tag when fix for genai-perf will be released
ARG GENAI_PERF_TAG="25d0188713adc47868d6b3f22426375237a90529"
RUN pip install "git+https://github.com/triton-inference-server/perf_analyzer.git@${GENAI_PERF_TAG}#subdirectory=genai-perf"

# Install test dependencies
RUN --mount=type=bind,source=./container/deps/requirements.test.txt,target=/tmp/requirements.txt \
    pip install --requirement /tmp/requirements.txt

### MISC UTILITY SETUP ###

# Finish pyright install
RUN pyright --help > /dev/null 2>&1

# Enable Git operations in the /workspace directory
RUN printf "[safe]\n      directory=/workspace\n" > /root/.gitconfig

RUN ln -sf /bin/bash /bin/sh

# Rust build/dev dependencies
RUN apt-get update && \
    apt-get install --no-install-recommends --yes  gdb protobuf-compiler cmake libssl-dev pkg-config

RUN curl https://sh.rustup.rs -sSf | bash -s -- -y
ENV PATH="/root/.cargo/bin:${PATH}"
RUN rustup toolchain install 1.85.0-x86_64-unknown-linux-gnu

# Working directory
WORKDIR /workspace

# Copy Python wheel configuration files
COPY pyproject.toml /workspace/
COPY README.md /workspace/
COPY LICENSE /workspace/

# Build Rust runtime
COPY lib/runtime /workspace/lib/runtime
RUN cd lib/runtime && \
    cargo build --release --locked && cargo doc --no-deps

# Build OpenAI HTTP Service binaries
COPY lib/llm /workspace/lib/llm
COPY examples/rust /workspace/examples/rust
RUN cd examples/rust && \
    cargo build --release && \
    cp target/release/http /usr/local/bin/ && \
    cp target/release/llmctl /usr/local/bin/

# TODO: Build dynamo-run
# COPY applications/...

COPY deploy/dynamo/sdk /workspace/deploy/dynamo/sdk


# Generate C bindings. Note that this is required for TRTLLM backend re-build
COPY lib/bindings /workspace/lib/bindings
RUN cd lib/bindings/c/ && \
    cargo build --release --locked && cargo doc --no-deps

# Install uv, create virtualenv for general use, and build dynamo wheel
COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/
RUN mkdir /opt/dynamo && \
    uv venv /opt/dynamo/venv --python 3.12 && \
    source /opt/dynamo/venv/bin/activate && \
    uv build --wheel --out-dir /workspace/dist && \
    uv pip install /workspace/dist/dynamo*cp312*.whl && \
    cd /workspace/deploy/dynamo/sdk && \
    uv build --wheel --out-dir /workspace/dist && \
    uv pip install /workspace/dist/dynamo_sdk*any.whl

# Package the bindings
RUN mkdir -p /opt/dynamo/bindings/wheels && \
    mkdir /opt/dynamo/bindings/lib && \
    cp dist/dynamo*cp312*.whl /opt/dynamo/bindings/wheels/. && \
    cp lib/bindings/c/target/release/libdynamo_llm_capi.so /opt/dynamo/bindings/lib/. && \
    cp -r lib/bindings/c/include /opt/dynamo/bindings/.

# Install dynamo.runtime and dynamo.llm wheels globally in container for tests that
# currently run without virtual environment activated.
# TODO: In future, we may use a virtualenv for everything and remove this.
RUN cd /opt/dynamo/bindings/wheels && \
    pip install dynamo*cp312*.whl  && \
    pip install /workspace/dist/dynamo_sdk*any.whl

# Tell TRTLLM worker to use the Dynamo LLM C API for KV Cache Routing
ENV DYNAMO_KV_CAPI_PATH="/opt/dynamo/bindings/lib/libdynamo_llm_capi.so"

# FIXME: Copy more specific folders in for dev/debug after directory restructure
COPY . /workspace

# FIXME: May want a modification with dynamo banner on entry
ENTRYPOINT ["/opt/nvidia/nvidia_entrypoint.sh"]
CMD []