dynamo_runtime.Dockerfile 7.53 KB
Newer Older
1
2
3
4
{#
# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#}
5
# === BEGIN templates/dynamo_runtime.Dockerfile ===
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
#######################################
########## Runtime image ##############
#######################################

FROM dynamo_base AS runtime

ARG ARCH_ALT
ARG PYTHON_VERSION

# Create dynamo user with group 0 for OpenShift compatibility
RUN userdel -r ubuntu > /dev/null 2>&1 || true \
    && useradd -m -s /bin/bash -g 0 dynamo \
    && [ `id -u dynamo` -eq 1000 ] \
    && mkdir -p /home/dynamo/.cache /opt/dynamo \
    # Non-recursive chown - only the directories themselves, not contents
    && chown dynamo:0 /home/dynamo /home/dynamo/.cache /opt/dynamo /workspace \
    # No chmod needed: umask 002 handles new files, COPY --chmod handles copied content
    # Set umask globally for all subsequent RUN commands (must be done as root before USER dynamo)
    # NOTE: Setting ENV UMASK=002 does NOT work - umask is a shell builtin, not an environment variable
    && mkdir -p /etc/profile.d && echo 'umask 002' > /etc/profile.d/00-umask.sh

# NIXL environment variables
ENV NIXL_PREFIX=/opt/nvidia/nvda_nixl \
    NIXL_LIB_DIR=/opt/nvidia/nvda_nixl/lib/${ARCH_ALT}-linux-gnu \
    NIXL_PLUGIN_DIR=/opt/nvidia/nvda_nixl/lib/${ARCH_ALT}-linux-gnu/plugins \
    CARGO_TARGET_DIR=/opt/dynamo/target

33
34
35
36
37
38
39
ENV LD_LIBRARY_PATH=\
${NIXL_LIB_DIR}:\
${NIXL_PLUGIN_DIR}:\
/usr/local/ucx/lib:\
/usr/local/ucx/lib/ucx:\
${LD_LIBRARY_PATH}

40
41
42
43
44
45
46
# Copy ucx and nixl libs
COPY --chown=dynamo: --from=wheel_builder /usr/local/ucx/ /usr/local/ucx/
COPY --chown=dynamo: --from=wheel_builder ${NIXL_PREFIX}/ ${NIXL_PREFIX}/
COPY --chown=dynamo: --from=wheel_builder /opt/nvidia/nvda_nixl/lib64/. ${NIXL_LIB_DIR}/
COPY --chown=dynamo: --from=wheel_builder /opt/dynamo/dist/nixl/ /opt/dynamo/wheelhouse/nixl/
COPY --chown=dynamo: --from=wheel_builder /workspace/nixl/build/src/bindings/python/nixl-meta/nixl-*.whl /opt/dynamo/wheelhouse/nixl/

47
# Always copy FFmpeg so libs are available for Rust checks in CI
48
RUN --mount=type=bind,from=wheel_builder,source=/usr/local/,target=/tmp/usr/local/ \
49
50
51
52
53
    mkdir -p /usr/local/lib/pkgconfig && \
    cp -rnL /tmp/usr/local/include/libav* /tmp/usr/local/include/libsw* /usr/local/include/ && \
    cp -nL /tmp/usr/local/lib/libav*.so /tmp/usr/local/lib/libsw*.so /usr/local/lib/ && \
    cp -nL /tmp/usr/local/lib/pkgconfig/libav*.pc /tmp/usr/local/lib/pkgconfig/libsw*.pc /usr/local/lib/pkgconfig/ && \
    cp -r /tmp/usr/local/src/ffmpeg /usr/local/src/
54

55
56
{% if target not in ("dev", "local-dev") %}
# Copy built artifacts (not needed for dev/local-dev; users build from source)
57
COPY --chown=dynamo: --from=wheel_builder $CARGO_TARGET_DIR $CARGO_TARGET_DIR
58
{% endif %}
59
60
61
62
63
64
COPY --chown=dynamo: --from=wheel_builder /opt/dynamo/dist/*.whl /opt/dynamo/wheelhouse/

# Install Python for framework=none runtime (cuda-dl-base doesn't include Python)
# This is needed to create venv and install dynamo packages
ARG PYTHON_VERSION
# Cache apt downloads; sharing=locked avoids apt/dpkg races with concurrent builds.
65
# Clear partial downloads first to avoid stale rename failures from prior interrupted builds.
66
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
67
    rm -rf /var/cache/apt/archives/partial/* && \
68
69
70
    apt-get update && \
    DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
        python${PYTHON_VERSION}-dev \
71
72
73
74
75
76
77
78
79
80
        python${PYTHON_VERSION}-venv \
        build-essential \
        cmake \
        protobuf-compiler \
        pkg-config \
        clang \
        libclang-dev \
        patchelf \
        git \
        git-lfs && \
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
    rm -rf /var/lib/apt/lists/* && \
    ln -sf /usr/bin/python${PYTHON_VERSION} /usr/bin/python3

# Switch to dynamo user and create virtual environment
USER dynamo
ENV HOME=/home/dynamo

# Create and activate virtual environment
# Use login shell to pick up umask 002 from /etc/profile.d/00-umask.sh for group-writable files
SHELL ["/bin/bash", "-l", "-o", "pipefail", "-c"]
# Cache uv downloads; uv handles its own locking for the cache.
RUN --mount=type=cache,target=/home/dynamo/.cache/uv,uid=1000,gid=0,mode=0775 \
    export UV_CACHE_DIR=/home/dynamo/.cache/uv && \
    uv venv /opt/dynamo/venv --python ${PYTHON_VERSION}

ENV VIRTUAL_ENV=/opt/dynamo/venv \
    PATH="/opt/dynamo/venv/bin:${PATH}"

99
{% if target not in ("dev", "local-dev") %}
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
# Install dynamo wheels (runtime packages only, no test dependencies)
# uv handles its own locking for the cache, no need to add sharing=locked
ARG ENABLE_KVBM
RUN --mount=type=cache,target=/home/dynamo/.cache/uv,uid=1000,gid=0,mode=0775 \
    export UV_CACHE_DIR=/home/dynamo/.cache/uv && \
    uv pip install \
    /opt/dynamo/wheelhouse/ai_dynamo_runtime*.whl \
    /opt/dynamo/wheelhouse/ai_dynamo*any.whl \
    /opt/dynamo/wheelhouse/nixl/nixl*.whl && \
    if [ "$ENABLE_KVBM" = "true" ]; then \
        KVBM_WHEEL=$(ls /opt/dynamo/wheelhouse/kvbm*.whl 2>/dev/null | head -1); \
        if [ -z "$KVBM_WHEEL" ]; then \
            echo "ERROR: ENABLE_KVBM is true but no KVBM wheel found in wheelhouse" >&2; \
            exit 1; \
        fi; \
        uv pip install "$KVBM_WHEEL"; \
    fi
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
{% else %}
# Dev/local-dev: skip dynamo wheel install (users build from source via cargo build + maturin develop).
# Install NIXL wheel only (pre-built C++ binary, not buildable from source).
RUN --mount=type=cache,target=/home/dynamo/.cache/uv,uid=1000,gid=0,mode=0775 \
    export UV_CACHE_DIR=/home/dynamo/.cache/uv && \
    uv pip install /opt/dynamo/wheelhouse/nixl/nixl*.whl
{% endif %}

# Install gpu_memory_service wheel if enabled (all targets)
ARG ENABLE_GPU_MEMORY_SERVICE
RUN --mount=type=cache,target=/home/dynamo/.cache/uv,uid=1000,gid=0,mode=0775 \
    if [ "${ENABLE_GPU_MEMORY_SERVICE}" = "true" ]; then \
        export UV_CACHE_DIR=/home/dynamo/.cache/uv && \
        GMS_WHEEL=$(ls /opt/dynamo/wheelhouse/gpu_memory_service*.whl 2>/dev/null | head -1); \
        if [ -n "$GMS_WHEEL" ]; then uv pip install "$GMS_WHEEL"; fi; \
    fi
133

134
135
136
# Initialize Git LFS (required for git+https dependencies with LFS artifacts)
RUN git lfs install

137
138
139
140
141
142
143
# Install runtime dependencies (common + planner + frontend).
# Frontend deps (tritonclient + grpcio/protobuf pins) are installed here so the resolver
# sees all constraints in one pass, avoiding grpcio downgrades in the test layer.
# Test and dev dependencies are NOT installed here — they go in the test and dev images.
RUN --mount=type=bind,source=./container/deps/requirements.common.txt,target=/tmp/requirements.common.txt \
    --mount=type=bind,source=./container/deps/requirements.planner.txt,target=/tmp/requirements.planner.txt \
    --mount=type=bind,source=./container/deps/requirements.frontend.txt,target=/tmp/requirements.frontend.txt \
144
145
146
147
148
    --mount=type=cache,target=/home/dynamo/.cache/uv,uid=1000,gid=0,mode=0775 \
    export UV_CACHE_DIR=/home/dynamo/.cache/uv UV_GIT_LFS=1 UV_HTTP_TIMEOUT=300 UV_HTTP_RETRIES=5 && \
    uv pip install \
        --index-strategy unsafe-best-match \
        --extra-index-url https://download.pytorch.org/whl/cu130 \
149
150
151
        --requirement /tmp/requirements.common.txt \
        --requirement /tmp/requirements.planner.txt \
        --requirement /tmp/requirements.frontend.txt
152

153
# TODO: skip /workspace COPY for dev/local-dev (bind-mounted from host, gets shadowed)
154
155
156
157
158
159
# Copy workspace source code
ARG WORKSPACE_DIR=/workspace
WORKDIR ${WORKSPACE_DIR}
COPY --chmod=775 --chown=dynamo:0 ./ ${WORKSPACE_DIR}/
RUN chmod g+w ${WORKSPACE_DIR}

160
161
162
163
164
ARG DYNAMO_COMMIT_SHA
ENV DYNAMO_COMMIT_SHA=$DYNAMO_COMMIT_SHA

ENTRYPOINT ["/opt/nvidia/nvidia_entrypoint.sh"]
CMD []