{#
# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#}
# === BEGIN templates/dev.Dockerfile ===
# ======================================================================
# STAGE: dynamo_tools for developers
# ======================================================================
# Why this is a separate stage (not merged into `dev`):
# - `dev` is built FROM the framework `runtime` image. Installing lots of tooling with apt in that stage is slow and
#   makes rebuilds expensive when iterating on later dev layers.
# - Keeping tooling installation in `dynamo_tools` lets Docker cache the tools layer independently; `dev` can then
#   pull those binaries/configs in via COPY.
FROM runtime AS dynamo_tools

ARG ARCH
ARG ARCH_ALT

ENV DEBIAN_FRONTEND=noninteractive
ENV PATH=/usr/local/bin:${PATH}

USER root
SHELL ["/bin/bash", "-c"]

# NOTE: We intentionally disable the NVIDIA CUDA apt repo for this stage.
# The upstream runtime images may ship CUDA apt sources that occasionally go out of sync (mirror updates),
# causing apt-get update to fail with "File has unexpected size ... Mirror sync in progress".
# This stage only installs generic developer tools that are available from Ubuntu repos, so CUDA repos are unnecessary.
#
# We also add a small retry/backoff to make transient apt metadata issues less disruptive.
# Cache apt downloads; sharing=locked avoids apt/dpkg races with concurrent builds.
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
    set -eux; \
    if [ -d /etc/apt/sources.list.d ]; then \
        mkdir -p /tmp/apt-disabled; \
        for f in /etc/apt/sources.list.d/*.list; do \
            [ -e "$f" ] || continue; \
            if grep -q "developer.download.nvidia.com/compute/cuda/repos" "$f"; then \
                mv "$f" "/tmp/apt-disabled/$(basename "$f")"; \
            fi; \
        done; \
    fi; \
    for i in 1 2 3 4 5; do \
        apt-get update -y && break; \
        rm -rf /var/lib/apt/lists/*; \
        sleep $((i * 5)); \
    done; \
    apt-get install -y --no-install-recommends \
        # Core CLI utilities
        ca-certificates \
        curl \
        wget \
        git \
        git-lfs \
        less \
        grep \
        sed \
        # Editors / shells
        vim \
        nano \
        htop \
        tmux \
        screen \
        zsh \
        fish \
        bash-completion \
        # Networking / transfers
        net-tools \
        openssh-client \
        iproute2 \
        iputils-ping \
        zip \
        unzip \
        rsync \
        # Build toolchain
        build-essential \
        cmake \
        autoconf \
        automake \
        libtool \
        meson \
        ninja-build \
        pybind11-dev \
        pkg-config \
        protobuf-compiler \
        # Debugging / tracing
        gdb \
        valgrind \
        strace \
        ltrace \
        # JSON/YAML + filesystem helpers
        jq \
        yq \
        tree \
        fd-find \
        ripgrep \
        # Privilege escalation + crypto tooling
        sudo \
        gnupg2 \
        gnupg1 \
        # GPU / perf helpers
        nvtop \
        # Python
        python3 \
        python3-pip \
        python3-venv \
        # Native deps for Python/Rust wheels
        patchelf \
        clang \
        libclang-dev && \
    rm -rf /var/lib/apt/lists/* && \
    # Initialize Git LFS for the dynamo user (required for requirements with lfs=true)
    git lfs install

# Install awk separately with fault tolerance.
# awk is a virtual package with multiple implementations (gawk, mawk, original-awk).
# Cache apt downloads; sharing=locked avoids apt/dpkg races with concurrent builds.
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
    (apt-get update && \
     (apt-get install -y --no-install-recommends gawk || \
      apt-get install -y --no-install-recommends mawk || \
      apt-get install -y --no-install-recommends original-awk || \
      echo "Warning: Could not install any awk implementation") && \
     rm -rf /var/lib/apt/lists/*) && \
    (command -v awk >/dev/null 2>&1 && echo "awk available: $(command -v awk)" || echo "awk not available")

# Add NVIDIA devtools repository and install development tools (nsight-systems).
# Cache apt downloads; sharing=locked avoids apt/dpkg races with concurrent builds.
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
    wget -qO - "https://developer.download.nvidia.com/devtools/repos/ubuntu2404/${ARCH}/nvidia.pub" \
        | gpg --dearmor -o /etc/apt/keyrings/nvidia-devtools.gpg && \
    echo "deb [signed-by=/etc/apt/keyrings/nvidia-devtools.gpg] https://developer.download.nvidia.com/devtools/repos/ubuntu2404/${ARCH} /" \
        | tee /etc/apt/sources.list.d/nvidia-devtools.list && \
    apt-get update && \
    apt-get install -y --no-install-recommends nsight-systems-2025.5.1 && \
    rm -rf /var/lib/apt/lists/*

# ======================================================================
# TARGET: dev (root-based development)
# ======================================================================
FROM runtime AS dev

# Redeclare ARGs for use in this stage
ARG FRAMEWORK

USER root

# Redeclare build args for use in this stage
ARG PYTHON_VERSION

# Ensure the runtime stage always has /usr/bin/python3.
# - vLLM/TRTLLM runtime images may only have Python in /opt/dynamo/venv/bin/{python,python3}
# - SGLang runtime images typically have /usr/bin/python3 already
# - framework=none runtime stage now installs /usr/bin/python3
RUN if [ ! -e /usr/bin/python3 ]; then \
        if [ -x /opt/dynamo/venv/bin/python3 ]; then \
            ln -s /opt/dynamo/venv/bin/python3 /usr/bin/python3; \
        elif [ -x /opt/dynamo/venv/bin/python ]; then \
            ln -s /opt/dynamo/venv/bin/python /usr/bin/python3; \
        elif command -v python3 >/dev/null 2>&1; then \
            ln -s $(command -v python3) /usr/bin/python3; \
        elif command -v python >/dev/null 2>&1; then \
            ln -s $(command -v python) /usr/bin/python3; \
        else \
            echo "ERROR: Could not find Python to symlink to /usr/bin/python3" >&2; \
            exit 1; \
        fi; \
    fi

# Copy UCX and NIXL libraries for dev stage compilation.
# The upstream SGLang runtime image doesn't include NIXL, but cargo build needs to link against
# -lnixl, -lnixl_build, and -lnixl_common. Runtime stage doesn't need this since it uses pre-built
# wheels, but dev stage needs it for maturin develop and cargo build from source.
# - SGLang: Copy NIXL/UCX/libfabric/gdrcopy binaries from wheel_builder (not in upstream lmsysorg/sglang runtime).
# - vllm/trtllm/none: NIXL/UCX are already present in runtime (no-op).
ARG ARCH_ALT
RUN --mount=from=wheel_builder,target=/wheel_builder \
    if [ "${FRAMEWORK}" = "sglang" ]; then \
        if [ -d /wheel_builder/usr/local/ucx ] && [ -d /wheel_builder/opt/nvidia/nvda_nixl ]; then \
            mkdir -p /opt/nvidia /usr/include /usr/lib64 /etc/ld.so.conf.d; \
            cp -r /wheel_builder/opt/nvidia/nvda_nixl /opt/nvidia/; \
            cp -r /wheel_builder/usr/local/ucx /usr/local/; \
            cp -r /wheel_builder/usr/local/libfabric /usr/local/; \
            cp /wheel_builder/usr/include/gdrapi.h /usr/include/; \
            cp /wheel_builder/usr/lib64/libgdrapi.so* /usr/lib64/; \
            echo "/usr/lib64" >> /etc/ld.so.conf.d/gdrcopy.conf; \
            # SGLang expects ARCH-qualified lib paths; mirror lib64 into lib/${ARCH_ALT}-linux-gnu for parity.
            if [ -d /opt/nvidia/nvda_nixl/lib64 ]; then \
                mkdir -p /opt/nvidia/nvda_nixl/lib/${ARCH_ALT}-linux-gnu; \
                cp -r /opt/nvidia/nvda_nixl/lib64/. /opt/nvidia/nvda_nixl/lib/${ARCH_ALT}-linux-gnu/; \
            fi; \
        fi; \
    fi

# All frameworks use the same path pattern: /opt/nvidia/nvda_nixl/lib/${ARCH_ALT}-linux-gnu
# For vllm/trtllm/none: This resets the same values already set in runtime (no harm)
# For sglang: This sets them for the first time (required)
ENV NIXL_PREFIX=/opt/nvidia/nvda_nixl \
    NIXL_LIB_DIR=/opt/nvidia/nvda_nixl/lib/${ARCH_ALT}-linux-gnu \
    NIXL_PLUGIN_DIR=/opt/nvidia/nvda_nixl/lib/${ARCH_ALT}-linux-gnu/plugins

# Set universal CUDA development environment variables (all frameworks)
# vLLM: Dockerfile.vllm line 533, 597
# TRT-LLM: Dockerfile.trtllm lines 600-606
ENV CUDA_HOME=/usr/local/cuda \
    CPATH=/usr/local/cuda/include \
    CUDA_DEVICE_ORDER=PCI_BUS_ID \
    TRITON_CUPTI_PATH=/usr/local/cuda/include \
    TRITON_CUDACRT_PATH=/usr/local/cuda/include \
    TRITON_CUOBJDUMP_PATH=/usr/local/cuda/bin/cuobjdump \
    TRITON_NVDISASM_PATH=/usr/local/cuda/bin/nvdisasm \
    TRITON_PTXAS_PATH=/usr/local/cuda/bin/ptxas \
    TRITON_CUDART_PATH=/usr/local/cuda/include \
    NVIDIA_DRIVER_CAPABILITIES=video,compute,utility

# Base LD_LIBRARY_PATH with universal paths (all frameworks have these)
# Framework-specific paths are conditionally added in /etc/profile.d/50-framework-paths.sh
ARG PYTHON_VERSION
ENV LD_LIBRARY_PATH=\
${NIXL_LIB_DIR}:\
${NIXL_PLUGIN_DIR}:\
/usr/local/ucx/lib:\
/usr/local/ucx/lib/ucx:\
/usr/local/cuda/compat/lib.real:\
${LD_LIBRARY_PATH}

# Copy shell profile script for framework-specific environment variables
# This script conditionally adds PATH/LD_LIBRARY_PATH entries based on what exists
COPY --chmod=755 container/dev/50-framework-paths.sh /etc/profile.d/50-framework-paths.sh

# Set umask for group-writable files in dev stage (runs as root)
RUN mkdir -p /etc/profile.d && echo 'umask 002' > /etc/profile.d/00-umask.sh
SHELL ["/bin/bash", "-l", "-o", "pipefail", "-c"]

# Developer tools are installed in the dynamo_tools layer and copied into the runtime-based dev image.
# This keeps dev builds fast and avoids apt-get in runtime-derived stages.
#
# IMPORTANT: Do not clobber runtime /usr/bin/python3 (SGLang depends on system python3 being present).
# We stash the pre-tools python3 (which may be a real binary or a symlink we created earlier for vLLM/TRTLLM)
# and restore it after copying toolchains from dynamo_tools.
RUN if [ -e /usr/bin/python3 ]; then cp -a /usr/bin/python3 /tmp/python3.pretools; fi
COPY --from=dynamo_tools /usr/bin/ /usr/bin/
COPY --from=dynamo_tools /usr/sbin/ /usr/sbin/
COPY --from=dynamo_tools /usr/lib/ /usr/lib/
COPY --from=dynamo_tools /usr/libexec/ /usr/libexec/
COPY --from=dynamo_tools /lib/ /lib/
COPY --from=dynamo_tools /usr/share/ /usr/share/
COPY --from=dynamo_tools /etc/alternatives/ /etc/alternatives/
COPY --from=dynamo_tools /etc/bash_completion.d/ /etc/bash_completion.d/
COPY --from=dynamo_tools /etc/sudoers /etc/sudoers
COPY --from=dynamo_tools /etc/sudoers.d/ /etc/sudoers.d/
COPY --from=dynamo_tools /opt/nvidia/ /opt/nvidia/

# Restore the pre-tools python3 (keeps SGLang system python intact and avoids venv symlink loops).
RUN if [ -e /tmp/python3.pretools ]; then cp -af /tmp/python3.pretools /usr/bin/python3; fi

ARG WORKSPACE_DIR=/workspace

# Dev environment variables (aligned with framework dev stages)
# Framework-specific PATH additions are handled in /etc/profile.d/50-framework-paths.sh
ENV WORKSPACE_DIR=${WORKSPACE_DIR} \
    DYNAMO_HOME=${WORKSPACE_DIR} \
    RUSTUP_HOME=/usr/local/rustup \
    CARGO_HOME=/usr/local/cargo \
    CARGO_TARGET_DIR=/workspace/target \
    VIRTUAL_ENV=/opt/dynamo/venv \
    PATH=/opt/dynamo/venv/bin:/usr/local/cargo/bin:$PATH

# Copy Rust/Cargo/Maturin from the concatenated framework stages.
# - Rust/Cargo: from `wheel_builder` (already installed there)
# - maturin: from `wheel_builder` venv (installed there via uv pip)
COPY --from=wheel_builder --chown=dynamo:0 --chmod=775 /usr/local/rustup /usr/local/rustup
COPY --from=wheel_builder --chown=dynamo:0 --chmod=775 /usr/local/cargo /usr/local/cargo
COPY --from=wheel_builder --chown=dynamo:0 --chmod=775 /workspace/.venv/bin/maturin /usr/local/bin/maturin

# Provide an `uv` binary for SGLang venv creation below.
COPY --from=ghcr.io/astral-sh/uv:latest /uv /tmp/uv-binary

# Create venv for SGLang (vLLM/TensorRT-LLM/framework=none already have /opt/dynamo/venv from runtime)
# - SGLang: Use --system-site-packages to inherit runtime packages, then copy user site-packages
# - framework=none: Runtime already has venv with dynamo packages installed
# Note: umask 002 from login shell ensures files are group-writable
RUN if [ "${FRAMEWORK}" = "sglang" ]; then \
        mkdir -p /opt/dynamo/venv && \
        python3 -m venv --system-site-packages /opt/dynamo/venv && \
        # Copy all packages from runtime stage system site-packages into venv
        # This includes ai-dynamo-runtime, kubernetes, and all other dependencies
        # Use --no-preserve=mode so copied files inherit umask 002 (group-writable)
        cp -r --no-preserve=mode /usr/local/lib/python${PYTHON_VERSION}/dist-packages/* \
              /opt/dynamo/venv/lib/python${PYTHON_VERSION}/site-packages/; \
        # Ensure `uv` is available on PATH for subsequent `uv pip ...` steps.
        cp /tmp/uv-binary /opt/dynamo/venv/bin/uv && \
        chmod +x /opt/dynamo/venv/bin/uv && \
        # Install maturin into the base interpreter so we can build/repair wheels when needed.
        pip install --ignore-installed maturin[patchelf]; \
    elif [ "${FRAMEWORK}" = "none" ] && [ ! -d /opt/dynamo/venv ]; then \
        mkdir -p /opt/dynamo && \
        python3 -m venv /opt/dynamo/venv; \
    fi

# Initialize Git LFS for the dynamo user (required for requirements with lfs=true)
RUN git lfs install

# Install common and test dependencies (matches main Dockerfile dev stage)
# This installs pytest-benchmark and other test dependencies required for CI
# TRT-LLM specific: Also installs cupy-cuda13x with special index strategy (Dockerfile.trtllm lines 768-776)
# SGLang specific: Reinstall pytest to ensure venv has pytest executable with correct shebang
ARG FRAMEWORK
RUN --mount=type=bind,source=./container/deps/requirements.txt,target=/tmp/requirements.txt \
    --mount=type=bind,source=./container/deps/requirements.test.txt,target=/tmp/requirements.test.txt \
    # Cache uv downloads; uv handles its own locking for this cache.
    --mount=type=cache,target=/root/.cache/uv \
    export UV_CACHE_DIR=/root/.cache/uv UV_GIT_LFS=1 UV_HTTP_TIMEOUT=300 UV_HTTP_RETRIES=5 && \
    uv pip install \
        --index-strategy unsafe-best-match \
        --extra-index-url https://download.pytorch.org/whl/cu130 \
        --requirement /tmp/requirements.txt \
        --requirement /tmp/requirements.test.txt && \
    if [ "${FRAMEWORK}" = "sglang" ]; then \
        uv pip install --force-reinstall --no-deps pytest; \
    fi

# Copy entire workspace (old design - simpler for CI)
# .dockerignore filters out unwanted files (.git, build artifacts, etc.)
WORKDIR ${WORKSPACE_DIR}
COPY --chmod=775 --chown=dynamo:0 ./ ${WORKSPACE_DIR}/

RUN chmod g+w ${WORKSPACE_DIR}

# Install benchmarks package (includes prefix_data_generator, tabulate, etc.)
RUN --mount=type=cache,target=/root/.cache/uv \
    cd ${WORKSPACE_DIR}/benchmarks && \
    export UV_CACHE_DIR=/root/.cache/uv UV_GIT_LFS=1 UV_HTTP_TIMEOUT=300 UV_HTTP_RETRIES=5 && \
    uv pip install .

# Install maturin and create editable install entry points.
#
# Why the `uv` check:
# - This dev stage uses `uv` earlier (requirements + benchmarks). For SGLang, we also install an `uv` binary into
#   /opt/dynamo/venv/bin and put that venv on PATH, so `uv` is expected to be available here in normal builds.
# - The `command -v uv` guard is defensive: on SGLang, `uv` needs to "disappear" from PATH and we fall back to
#   `python3 -m pip` so the editable install can still proceed (instead of failing mid-layer with a confusing error).
# Cache uv downloads; uv handles its own locking for this cache.
RUN --mount=type=cache,target=/root/.cache/uv \
    --mount=type=cache,target=/root/.cache/pip,sharing=locked \
    export UV_CACHE_DIR=/root/.cache/uv UV_HTTP_TIMEOUT=300 UV_HTTP_RETRIES=5 PIP_CACHE_DIR=/root/.cache/pip && \
    if [ -f pyproject.toml ]; then \
        if command -v uv >/dev/null 2>&1; then \
            uv pip install maturin[patchelf] && uv pip install --no-deps -e . ; \
        else \
            python3 -m pip install maturin[patchelf] && python3 -m pip install --no-deps -e . ; \
        fi; \
    else \
        echo "ERROR: pyproject.toml not found in ${WORKSPACE_DIR}; expected to build from the Dynamo repo root." >&2; \
        exit 1; \
    fi && \
    chmod -R g+w /root/.cache /home/dynamo/.cache 2>/dev/null || true

# Set commit SHA for tests (passed via docker build as --build-arg)
ARG DYNAMO_COMMIT_SHA
ENV DYNAMO_COMMIT_SHA=$DYNAMO_COMMIT_SHA

ENTRYPOINT ["/opt/nvidia/nvidia_entrypoint.sh"]
CMD []