# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0

ARG BASE_IMAGE="nvcr.io/nvidia/cuda-dl-base"
ARG BASE_IMAGE_TAG="25.01-cuda12.8-devel-ubuntu24.04"

FROM ${BASE_IMAGE}:${BASE_IMAGE_TAG} AS dev

USER root

# Install utilities
RUN apt update -y && apt install -y git wget curl nvtop tmux vim
# nats
RUN wget https://github.com/nats-io/nats-server/releases/download/v2.10.24/nats-server-v2.10.24-amd64.deb && dpkg -i nats-server-v2.10.24-amd64.deb
# etcd
ENV ETCD_VERSION="v3.5.18"
RUN wget https://github.com/etcd-io/etcd/releases/download/$ETCD_VERSION/etcd-$ETCD_VERSION-linux-amd64.tar.gz -O /tmp/etcd.tar.gz && \
mkdir -p /usr/local/bin/etcd && \
tar -xvf /tmp/etcd.tar.gz -C /usr/local/bin/etcd --strip-components=1
ENV PATH=/usr/local/bin/etcd/:$PATH


### VIRTUAL ENVIRONMENT SETUP ###

# Install uv and create virtualenv
COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/
RUN mkdir /opt/triton && \
    uv venv /opt/triton/venv --python 3.12

# Activate virtual environment
ENV VIRTUAL_ENV=/opt/triton/venv
ENV PATH="${VIRTUAL_ENV}/bin:${PATH}"

# Install patched vllm - keep this early in Dockerfile to avoid
# rebuilds from unrelated source code changes
ARG VLLM_REF="v0.7.2"
ARG VLLM_PATCH="vllm_${VLLM_REF}-triton-kv-disagg-patch.patch"
RUN --mount=type=bind,source=./container/deps/,target=/tmp/deps \
    bash /tmp/deps/vllm/install.sh --patch /tmp/deps/vllm/${VLLM_PATCH} --ref ${VLLM_REF} --install-cmd "uv pip install --editable" --use-precompiled --installation-dir /opt/vllm

# Install genai-perf for benchmarking
ARG GENAI_PERF_TAG="r25.01"
RUN uv pip install "git+https://github.com/triton-inference-server/perf_analyzer.git@${GENAI_PERF_TAG}#subdirectory=genai-perf"

# Install test dependencies
RUN --mount=type=bind,source=./container/deps/requirements.test.txt,target=/tmp/requirements.txt \
    uv pip install --requirement /tmp/requirements.txt

### MISC UTILITY SETUP ###

# Finish pyright install
RUN pyright --help > /dev/null 2>&1

# Enable Git operations in the /workspace directory
RUN printf "[safe]\n      directory=/workspace\n" > /root/.gitconfig

RUN ln -sf /bin/bash /bin/sh

### BUILDS ###

# Rust build/dev dependencies
RUN apt update -y && \
    apt install -y \
    build-essential \
    protobuf-compiler && \
    curl https://sh.rustup.rs -sSf | bash -s -- -y
ENV PATH="/root/.cargo/bin:${PATH}"

# Working directory
WORKDIR /workspace

COPY runtime/rust /workspace/runtime/rust
RUN cd runtime/rust && \
    cargo build --release --locked && cargo doc --no-deps

# Build OpenAI HTTP Service binaries
COPY llm/rust /workspace/llm/rust
COPY examples/rust /workspace/examples/rust
RUN cd examples/rust && \
    cargo build --release && \
    cp target/release/http /usr/local/bin/ && \
    cp target/release/llmctl /usr/local/bin/

# TODO: Build tio
# COPY applications/...

# Generate C bindings for kv cache routing in vLLM
COPY llm/rust /workspace/llm/rust
RUN cd llm/rust/ && \
cargo build --release --locked && cargo doc --no-deps

# Build triton_distributed_rs wheel
COPY python-wheel /workspace/python-wheel
RUN cd python-wheel && \
    uv build && \
    uv pip install dist/triton_distributed_rs*cp312*.whl

# Package the bindings
RUN mkdir -p /opt/triton/llm_binding/wheels && mkdir /opt/triton/llm_binding/lib
RUN cp python-wheel/dist/triton_distributed_rs*cp312*.whl /opt/triton/llm_binding/wheels/.
RUN cp llm/rust/target/release/libtriton_llm_capi.so /opt/triton/llm_binding/lib/.
RUN cp -r llm/rust/libtriton-llm/include /opt/triton/llm_binding/.
# Tell vllm to use the Triton LLM C API for KV Cache Routing
ENV VLLM_KV_CAPI_PATH="/opt/triton/llm_binding/lib/libtriton_llm_capi.so"

# FIXME: Copy more specific folders in for dev/debug after directory restructure
COPY . /workspace

# FIXME: May want a modification with triton-distributed banner on entry
ENTRYPOINT ["/opt/nvidia/nvidia_entrypoint.sh"]

CMD []

### Lean Runtime Image Stage ###

# FIXME: Separate build and runtime images
FROM ${BASE_IMAGE}:${BASE_IMAGE_TAG} AS runtime

USER root

# Install tools for interactive convenience
RUN apt update -y && \
    apt install -y curl tmux vim && \
    echo "set -g mouse on" >> /root/.tmux.conf

# Set environment variables
ENV VIRTUAL_ENV=/opt/triton/venv
ENV PATH="${VIRTUAL_ENV}/bin:${PATH}"
ENV RAPIDS_LIBUCX_PREFER_SYSTEM_LIBRARY=true
ENV VLLM_KV_CAPI_PATH="/opt/triton/llm_binding/lib/libtriton_llm_capi.so"

# Copy binaries
COPY --from=dev /usr/local/bin/http /usr/local/bin/http
COPY --from=dev /usr/local/bin/llmctl /usr/local/bin/llmctl
COPY --from=dev /usr/local/bin/etcd/etcd /usr/local/bin/etcd
COPY --from=dev /usr/bin/nats-server /usr/local/bin/nats-server
COPY --from=dev /bin/uv /usr/local/bin/uv
COPY --from=dev /bin/uvx /usr/local/bin/uvx

# Copy venv with installed packages
RUN uv python install 3.12
COPY --from=dev /opt/vllm /opt/vllm
COPY --from=dev ${VIRTUAL_ENV} ${VIRTUAL_ENV}

# Copy minimal set of files for testing. May consider separate stage for testing
# if test dependencies start to negatively impact deployment environment/size.
COPY pyproject.toml /workspace/pyproject.toml
COPY container/deps/vllm /workspace/container/deps/vllm
COPY python-wheel/python /workspace/python-wheel/python
# Add library for KV routing
COPY --from=dev ${VLLM_KV_CAPI_PATH} ${VLLM_KV_CAPI_PATH}
# Copy minimal set of files for deployment/examples
# FIXME: Use a more consolidated path after directory restructure
COPY examples/python_rs/llm/vllm /workspace/examples/python_rs/llm/vllm

WORKDIR /workspace

# FIXME: May want a modification with triton-distributed banner on entry
ENTRYPOINT ["/opt/nvidia/nvidia_entrypoint.sh"]

CMD []