"...git@developer.sourcefind.cn:2222/OpenDAS/vllm_cscc.git" did not exist on "c798593f0d88cec583c599ea7ea40a2cc26c312b"
Unverified Commit dab2de67 authored by Anant Sharma's avatar Anant Sharma Committed by GitHub
Browse files

build: reorder docker layers to maximize layer cache hits (#4468)


Signed-off-by: default avatarAnant Sharma <anants@nvidia.com>
Signed-off-by: default avatarHarrison Saturley-Hall <hsaturleyhal@nvidia.com>
Co-authored-by: default avatarHarrison Saturley-Hall <hsaturleyhal@nvidia.com>
parent db5687f5
...@@ -45,45 +45,30 @@ ARG NIXL_GDRCOPY_REF=v2.5.1 ...@@ -45,45 +45,30 @@ ARG NIXL_GDRCOPY_REF=v2.5.1
FROM ${BASE_IMAGE}:${BASE_IMAGE_TAG} AS base FROM ${BASE_IMAGE}:${BASE_IMAGE_TAG} AS base
# Redeclare ARGs for this stage
ARG ARCH ARG ARCH
ARG ARCH_ALT ARG ARCH_ALT
ARG PYTHON_VERSION
ARG USE_SCCACHE
ARG SCCACHE_BUCKET
ARG SCCACHE_REGION
ARG NIXL_UCX_REF
ARG NIXL_REF
ARG NIXL_GDRCOPY_REF
USER root USER root
WORKDIR /opt/dynamo WORKDIR /opt/dynamo
##################################
########## Tool Installation #####
##################################
# Install uv package manager # Install uv package manager
COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/ COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/
# Install SCCACHE if requested # Install NATS server
COPY container/use-sccache.sh /tmp/use-sccache.sh ENV NATS_VERSION="v2.10.28"
RUN if [ "$USE_SCCACHE" = "true" ]; then \ RUN --mount=type=cache,target=/var/cache/apt \
/tmp/use-sccache.sh install; \ wget --tries=3 --waitretry=5 https://github.com/nats-io/nats-server/releases/download/${NATS_VERSION}/nats-server-${NATS_VERSION}-${ARCH}.deb && \
fi dpkg -i nats-server-${NATS_VERSION}-${ARCH}.deb && rm nats-server-${NATS_VERSION}-${ARCH}.deb
# Set SCCACHE environment variables
ENV SCCACHE_BUCKET=${USE_SCCACHE:+${SCCACHE_BUCKET}} \
SCCACHE_REGION=${USE_SCCACHE:+${SCCACHE_REGION}} \
RUSTC_WRAPPER=${USE_SCCACHE:+sccache} \
CMAKE_C_COMPILER_LAUNCHER=${USE_SCCACHE:+sccache} \
CMAKE_CXX_COMPILER_LAUNCHER=${USE_SCCACHE:+sccache} \
CMAKE_CUDA_COMPILER_LAUNCHER=${USE_SCCACHE:+sccache}
################################## # Install etcd
########## Rust Setup ############ ENV ETCD_VERSION="v3.5.21"
################################## RUN wget --tries=3 --waitretry=5 https://github.com/etcd-io/etcd/releases/download/$ETCD_VERSION/etcd-$ETCD_VERSION-linux-${ARCH}.tar.gz -O /tmp/etcd.tar.gz && \
mkdir -p /usr/local/bin/etcd && \
tar -xvf /tmp/etcd.tar.gz -C /usr/local/bin/etcd --strip-components=1 && \
rm /tmp/etcd.tar.gz
ENV PATH=/usr/local/bin/etcd/:$PATH
# Rust Setup
# Rust environment setup # Rust environment setup
ENV RUSTUP_HOME=/usr/local/rustup \ ENV RUSTUP_HOME=/usr/local/rustup \
CARGO_HOME=/usr/local/cargo \ CARGO_HOME=/usr/local/cargo \
...@@ -100,24 +85,6 @@ RUN wget --tries=3 --waitretry=5 "https://static.rust-lang.org/rustup/archive/1. ...@@ -100,24 +85,6 @@ RUN wget --tries=3 --waitretry=5 "https://static.rust-lang.org/rustup/archive/1.
rm rustup-init && \ rm rustup-init && \
chmod -R a+w $RUSTUP_HOME $CARGO_HOME chmod -R a+w $RUSTUP_HOME $CARGO_HOME
##################################
########## External Services #####
##################################
# Install NATS server
ENV NATS_VERSION="v2.10.28"
RUN --mount=type=cache,target=/var/cache/apt \
wget --tries=3 --waitretry=5 https://github.com/nats-io/nats-server/releases/download/${NATS_VERSION}/nats-server-${NATS_VERSION}-${ARCH}.deb && \
dpkg -i nats-server-${NATS_VERSION}-${ARCH}.deb && rm nats-server-${NATS_VERSION}-${ARCH}.deb
# Install etcd
ENV ETCD_VERSION="v3.5.21"
RUN wget --tries=3 --waitretry=5 https://github.com/etcd-io/etcd/releases/download/$ETCD_VERSION/etcd-$ETCD_VERSION-linux-${ARCH}.tar.gz -O /tmp/etcd.tar.gz && \
mkdir -p /usr/local/bin/etcd && \
tar -xvf /tmp/etcd.tar.gz -C /usr/local/bin/etcd --strip-components=1 && \
rm /tmp/etcd.tar.gz
ENV PATH=/usr/local/bin/etcd/:$PATH
################################## ##################################
##### Wheel Build Image ########## ##### Wheel Build Image ##########
...@@ -132,17 +99,23 @@ FROM quay.io/pypa/manylinux_2_28_${ARCH_ALT} AS wheel_builder ...@@ -132,17 +99,23 @@ FROM quay.io/pypa/manylinux_2_28_${ARCH_ALT} AS wheel_builder
ARG ARCH ARG ARCH
ARG ARCH_ALT ARG ARCH_ALT
ARG CARGO_BUILD_JOBS ARG CARGO_BUILD_JOBS
ARG PYTHON_VERSION
ARG ENABLE_KVBM
ARG USE_SCCACHE
ARG SCCACHE_BUCKET
ARG SCCACHE_REGION
ARG NIXL_UCX_REF
ARG NIXL_REF
ARG NIXL_GDRCOPY_REF
WORKDIR /workspace WORKDIR /workspace
# Copy CUDA from base stage
COPY --from=base /usr/local/cuda /usr/local/cuda
COPY --from=base /etc/ld.so.conf.d/hpcx.conf /etc/ld.so.conf.d/hpcx.conf
# Set environment variables first so they can be used in COPY commands
ENV CARGO_BUILD_JOBS=${CARGO_BUILD_JOBS:-16} \
RUSTUP_HOME=/usr/local/rustup \
CARGO_HOME=/usr/local/cargo \
CARGO_TARGET_DIR=/opt/dynamo/target \
PATH=/usr/local/cargo/bin:$PATH
# Copy artifacts from base stage
COPY --from=base $RUSTUP_HOME $RUSTUP_HOME
COPY --from=base $CARGO_HOME $CARGO_HOME
# Install system dependencies # Install system dependencies
RUN yum groupinstall -y 'Development Tools' && \ RUN yum groupinstall -y 'Development Tools' && \
dnf install -y almalinux-release-synergy && \ dnf install -y almalinux-release-synergy && \
...@@ -187,42 +160,21 @@ RUN set -eux; \ ...@@ -187,42 +160,21 @@ RUN set -eux; \
# Point build tools explicitly at the modern protoc # Point build tools explicitly at the modern protoc
ENV PROTOC=/usr/local/bin/protoc ENV PROTOC=/usr/local/bin/protoc
# Set environment variables first so they can be used in COPY commands
ENV CARGO_BUILD_JOBS=${CARGO_BUILD_JOBS:-16} \
RUSTUP_HOME=/usr/local/rustup \
CARGO_HOME=/usr/local/cargo \
CARGO_TARGET_DIR=/opt/dynamo/target \
PATH=/usr/local/cargo/bin:$PATH
# Copy artifacts from base stage
COPY --from=base $RUSTUP_HOME $RUSTUP_HOME
COPY --from=base $CARGO_HOME $CARGO_HOME
# Install SCCACHE if requested
COPY container/use-sccache.sh /tmp/use-sccache.sh
RUN if [ "$USE_SCCACHE" = "true" ]; then \
/tmp/use-sccache.sh install; \
fi
# Set SCCACHE environment variables
ENV SCCACHE_BUCKET=${USE_SCCACHE:+${SCCACHE_BUCKET}} \
SCCACHE_REGION=${USE_SCCACHE:+${SCCACHE_REGION}} \
RUSTC_WRAPPER=${USE_SCCACHE:+sccache}
# Copy CUDA from base stage
COPY --from=base /usr/local/cuda /usr/local/cuda
COPY --from=base /etc/ld.so.conf.d/hpcx.conf /etc/ld.so.conf.d/hpcx.conf
ENV CUDA_PATH=/usr/local/cuda \ ENV CUDA_PATH=/usr/local/cuda \
PATH=/usr/local/cuda/bin:$PATH \ PATH=/usr/local/cuda/bin:$PATH \
LD_LIBRARY_PATH=/usr/local/cuda/lib64:/usr/local/lib:/usr/local/lib64:$LD_LIBRARY_PATH \ LD_LIBRARY_PATH=/usr/local/cuda/lib64:/usr/local/lib:/usr/local/lib64:$LD_LIBRARY_PATH \
NVIDIA_DRIVER_CAPABILITIES=video,compute,utility NVIDIA_DRIVER_CAPABILITIES=video,compute,utility
# Create virtual environment for building wheels # Create virtual environment for building wheels
ARG PYTHON_VERSION
ENV VIRTUAL_ENV=/workspace/.venv ENV VIRTUAL_ENV=/workspace/.venv
RUN uv venv ${VIRTUAL_ENV} --python $PYTHON_VERSION && \ RUN uv venv ${VIRTUAL_ENV} --python $PYTHON_VERSION && \
uv pip install --upgrade meson pybind11 patchelf maturin[patchelf] uv pip install --upgrade meson pybind11 patchelf maturin[patchelf]
ARG NIXL_UCX_REF
ARG NIXL_REF
ARG NIXL_GDRCOPY_REF
# Build and install gdrcopy # Build and install gdrcopy
RUN git clone --depth 1 --branch ${NIXL_GDRCOPY_REF} https://github.com/NVIDIA/gdrcopy.git && \ RUN git clone --depth 1 --branch ${NIXL_GDRCOPY_REF} https://github.com/NVIDIA/gdrcopy.git && \
cd gdrcopy/packages && \ cd gdrcopy/packages && \
...@@ -231,6 +183,20 @@ RUN git clone --depth 1 --branch ${NIXL_GDRCOPY_REF} https://github.com/NVIDIA/g ...@@ -231,6 +183,20 @@ RUN git clone --depth 1 --branch ${NIXL_GDRCOPY_REF} https://github.com/NVIDIA/g
rpm -Uvh gdrcopy-*.el8.${ARCH_ALT}.rpm && \ rpm -Uvh gdrcopy-*.el8.${ARCH_ALT}.rpm && \
rpm -Uvh gdrcopy-devel-*.el8.noarch.rpm rpm -Uvh gdrcopy-devel-*.el8.noarch.rpm
# Install SCCACHE if requested
ARG USE_SCCACHE
ARG SCCACHE_BUCKET
ARG SCCACHE_REGION
COPY container/use-sccache.sh /tmp/use-sccache.sh
RUN if [ "$USE_SCCACHE" = "true" ]; then \
/tmp/use-sccache.sh install; \
fi
# Set SCCACHE environment variables
ENV SCCACHE_BUCKET=${USE_SCCACHE:+${SCCACHE_BUCKET}} \
SCCACHE_REGION=${USE_SCCACHE:+${SCCACHE_REGION}} \
RUSTC_WRAPPER=${USE_SCCACHE:+sccache}
# Build and install UCX # Build and install UCX
RUN --mount=type=secret,id=aws-key-id,env=AWS_ACCESS_KEY_ID \ RUN --mount=type=secret,id=aws-key-id,env=AWS_ACCESS_KEY_ID \
--mount=type=secret,id=aws-secret-id,env=AWS_SECRET_ACCESS_KEY \ --mount=type=secret,id=aws-secret-id,env=AWS_SECRET_ACCESS_KEY \
...@@ -304,6 +270,7 @@ COPY lib/ /opt/dynamo/lib/ ...@@ -304,6 +270,7 @@ COPY lib/ /opt/dynamo/lib/
COPY components/ /opt/dynamo/components/ COPY components/ /opt/dynamo/components/
# Build dynamo wheels # Build dynamo wheels
ARG ENABLE_KVBM
RUN --mount=type=secret,id=aws-key-id,env=AWS_ACCESS_KEY_ID \ RUN --mount=type=secret,id=aws-key-id,env=AWS_ACCESS_KEY_ID \
--mount=type=secret,id=aws-secret-id,env=AWS_SECRET_ACCESS_KEY \ --mount=type=secret,id=aws-secret-id,env=AWS_SECRET_ACCESS_KEY \
export SCCACHE_S3_KEY_PREFIX=${SCCACHE_S3_KEY_PREFIX:-${ARCH}} && \ export SCCACHE_S3_KEY_PREFIX=${SCCACHE_S3_KEY_PREFIX:-${ARCH}} && \
...@@ -326,21 +293,45 @@ RUN --mount=type=secret,id=aws-key-id,env=AWS_ACCESS_KEY_ID \ ...@@ -326,21 +293,45 @@ RUN --mount=type=secret,id=aws-key-id,env=AWS_ACCESS_KEY_ID \
/tmp/use-sccache.sh show-stats "Dynamo" /tmp/use-sccache.sh show-stats "Dynamo"
############################################## ##############################################
########## Dev entrypoint image ############## ########## Runtime image ##############
############################################## ##############################################
FROM base AS dev FROM base AS runtime
ARG ENABLE_KVBM
ARG ARCH_ALT ARG ARCH_ALT
# Application environment variables # Create dynamo user with group 0 for OpenShift compatibility
ENV DYNAMO_HOME=/opt/dynamo \ RUN userdel -r ubuntu > /dev/null 2>&1 || true \
&& useradd -m -s /bin/bash -g 0 dynamo \
&& [ `id -u dynamo` -eq 1000 ] \
&& mkdir -p /home/dynamo/.cache \
&& chown -R dynamo: /opt/dynamo /home/dynamo /workspace \
&& chmod -R g+w /opt/dynamo /home/dynamo/.cache /workspace
# NIXL environment variables
ENV NIXL_PREFIX=/opt/nvidia/nvda_nixl \
NIXL_LIB_DIR=/opt/nvidia/nvda_nixl/lib/${ARCH_ALT}-linux-gnu \
NIXL_PLUGIN_DIR=/opt/nvidia/nvda_nixl/lib/${ARCH_ALT}-linux-gnu/plugins \
CARGO_TARGET_DIR=/opt/dynamo/target CARGO_TARGET_DIR=/opt/dynamo/target
ARG DYNAMO_COMMIT_SHA # Copy ucx and nixl libs
ENV DYNAMO_COMMIT_SHA=$DYNAMO_COMMIT_SHA COPY --chown=dynamo: --from=wheel_builder /usr/local/ucx/ /usr/local/ucx/
COPY --chown=dynamo: --from=wheel_builder ${NIXL_PREFIX}/ ${NIXL_PREFIX}/
COPY --chown=dynamo: --from=wheel_builder /opt/nvidia/nvda_nixl/lib64/. ${NIXL_LIB_DIR}/
COPY --chown=dynamo: --from=wheel_builder /opt/dynamo/dist/nixl/ /opt/dynamo/wheelhouse/nixl/
COPY --chown=dynamo: --from=wheel_builder /workspace/nixl/build/src/bindings/python/nixl-meta/nixl-*.whl /opt/dynamo/wheelhouse/nixl/
# Copy built artifacts
COPY --chown=dynamo: --from=wheel_builder $CARGO_TARGET_DIR $CARGO_TARGET_DIR
COPY --chown=dynamo: --from=wheel_builder /opt/dynamo/dist/*.whl /opt/dynamo/wheelhouse/
##############################################
########## Dev entrypoint image ##############
##############################################
FROM runtime AS dev
# Application environment variables
RUN apt-get update -y \ RUN apt-get update -y \
&& apt-get install -y --no-install-recommends \ && apt-get install -y --no-install-recommends \
# required for AIC perf files # required for AIC perf files
...@@ -358,17 +349,12 @@ RUN apt-get update -y \ ...@@ -358,17 +349,12 @@ RUN apt-get update -y \
&& echo "dynamo ALL=(ALL) NOPASSWD:ALL" > /etc/sudoers.d/dynamo \ && echo "dynamo ALL=(ALL) NOPASSWD:ALL" > /etc/sudoers.d/dynamo \
&& chmod 0440 /etc/sudoers.d/dynamo && chmod 0440 /etc/sudoers.d/dynamo
# Create dynamo user with group 0 for OpenShift compatibility
RUN userdel -r ubuntu > /dev/null 2>&1 || true \
&& useradd -m -s /bin/bash -g 0 dynamo \
&& [ `id -u dynamo` -eq 1000 ] \
&& mkdir -p /home/dynamo/.cache \
&& chown -R dynamo: /opt/dynamo /home/dynamo /workspace \
&& chmod -R g+w /opt/dynamo /home/dynamo/.cache /workspace
# Switch to dynamo user # Switch to dynamo user
USER dynamo USER dynamo
ENV HOME=/home/dynamo ENV HOME=/home/dynamo \
DYNAMO_HOME=/opt/dynamo \
CARGO_TARGET_DIR=/opt/dynamo/target
ENV LD_LIBRARY_PATH=${NIXL_LIB_DIR}:${NIXL_PLUGIN_DIR}:/usr/local/ucx/lib:/usr/local/ucx/lib/ucx:${LD_LIBRARY_PATH}
# Create and activate virtual environment # Create and activate virtual environment
ARG PYTHON_VERSION ARG PYTHON_VERSION
...@@ -385,26 +371,9 @@ RUN --mount=type=bind,source=./container/deps/requirements.txt,target=/tmp/requi ...@@ -385,26 +371,9 @@ RUN --mount=type=bind,source=./container/deps/requirements.txt,target=/tmp/requi
--requirement /tmp/requirements.txt \ --requirement /tmp/requirements.txt \
--requirement /tmp/requirements.test.txt --requirement /tmp/requirements.test.txt
# NIXL environment variables
ENV NIXL_PREFIX=/opt/nvidia/nvda_nixl \
NIXL_LIB_DIR=/opt/nvidia/nvda_nixl/lib/${ARCH_ALT}-linux-gnu \
NIXL_PLUGIN_DIR=/opt/nvidia/nvda_nixl/lib/${ARCH_ALT}-linux-gnu/plugins
ENV LD_LIBRARY_PATH=${NIXL_LIB_DIR}:${NIXL_PLUGIN_DIR}:/usr/local/ucx/lib:/usr/local/ucx/lib/ucx:${LD_LIBRARY_PATH}
# Copy ucx and nixl libs
COPY --chown=dynamo: --from=wheel_builder /usr/local/ucx/ /usr/local/ucx/
COPY --chown=dynamo: --from=wheel_builder ${NIXL_PREFIX}/ ${NIXL_PREFIX}/
COPY --chown=dynamo: --from=wheel_builder /opt/nvidia/nvda_nixl/lib64/. ${NIXL_LIB_DIR}/
# Copy built artifacts
COPY --chown=dynamo: --from=wheel_builder /opt/dynamo/dist/nixl/ /opt/dynamo/wheelhouse/nixl/
COPY --chown=dynamo: --from=wheel_builder /workspace/nixl/build/src/bindings/python/nixl-meta/nixl-*.whl /opt/dynamo/wheelhouse/nixl/
COPY --chown=dynamo: --from=wheel_builder /opt/dynamo/dist/*.whl /opt/dynamo/wheelhouse/
COPY --chown=dynamo: --from=wheel_builder $CARGO_TARGET_DIR $CARGO_TARGET_DIR
COPY --chown=dynamo: --from=wheel_builder $CARGO_HOME $CARGO_HOME
COPY --chown=dynamo: ./ /workspace/ COPY --chown=dynamo: ./ /workspace/
ARG ENABLE_KVBM
RUN uv pip install \ RUN uv pip install \
/opt/dynamo/wheelhouse/ai_dynamo_runtime*.whl \ /opt/dynamo/wheelhouse/ai_dynamo_runtime*.whl \
/opt/dynamo/wheelhouse/ai_dynamo*any.whl \ /opt/dynamo/wheelhouse/ai_dynamo*any.whl \
...@@ -425,6 +394,9 @@ RUN chmod 755 /opt/dynamo/.launch_screen && \ ...@@ -425,6 +394,9 @@ RUN chmod 755 /opt/dynamo/.launch_screen && \
echo 'source /opt/dynamo/venv/bin/activate' >> /etc/bash.bashrc && \ echo 'source /opt/dynamo/venv/bin/activate' >> /etc/bash.bashrc && \
echo 'cat /opt/dynamo/.launch_screen' >> /etc/bash.bashrc echo 'cat /opt/dynamo/.launch_screen' >> /etc/bash.bashrc
ARG DYNAMO_COMMIT_SHA
ENV DYNAMO_COMMIT_SHA=$DYNAMO_COMMIT_SHA
USER dynamo USER dynamo
ENTRYPOINT ["/opt/nvidia/nvidia_entrypoint.sh"] ENTRYPOINT ["/opt/nvidia/nvidia_entrypoint.sh"]
......
...@@ -333,10 +333,6 @@ ${NIXL_PLUGIN_DIR}:\ ...@@ -333,10 +333,6 @@ ${NIXL_PLUGIN_DIR}:\
/usr/local/nvidia/lib64:\ /usr/local/nvidia/lib64:\
${LD_LIBRARY_PATH} ${LD_LIBRARY_PATH}
ARG DYNAMO_COMMIT_SHA
ENV DYNAMO_COMMIT_SHA=$DYNAMO_COMMIT_SHA
# Copy NATS and ETCD from dynamo_base, and UCX/NIXL # Copy NATS and ETCD from dynamo_base, and UCX/NIXL
COPY --from=dynamo_base /usr/bin/nats-server /usr/bin/nats-server COPY --from=dynamo_base /usr/bin/nats-server /usr/bin/nats-server
COPY --from=dynamo_base /usr/local/bin/etcd/ /usr/local/bin/etcd/ COPY --from=dynamo_base /usr/local/bin/etcd/ /usr/local/bin/etcd/
...@@ -387,6 +383,9 @@ COPY --chown=dynamo: deploy /workspace/deploy ...@@ -387,6 +383,9 @@ COPY --chown=dynamo: deploy /workspace/deploy
COPY --chown=dynamo: components/ /workspace/components/ COPY --chown=dynamo: components/ /workspace/components/
COPY --chown=dynamo: recipes/ /workspace/recipes/ COPY --chown=dynamo: recipes/ /workspace/recipes/
ARG DYNAMO_COMMIT_SHA
ENV DYNAMO_COMMIT_SHA=$DYNAMO_COMMIT_SHA
ENTRYPOINT ["/opt/nvidia/nvidia_entrypoint.sh"] ENTRYPOINT ["/opt/nvidia/nvidia_entrypoint.sh"]
CMD [] CMD []
...@@ -408,9 +407,6 @@ FROM runtime AS dev ...@@ -408,9 +407,6 @@ FROM runtime AS dev
ARG WORKSPACE_DIR=/sgl-workspace/dynamo ARG WORKSPACE_DIR=/sgl-workspace/dynamo
ARG PYTHON_VERSION ARG PYTHON_VERSION
ARG DYNAMO_COMMIT_SHA
ENV DYNAMO_COMMIT_SHA=$DYNAMO_COMMIT_SHA
# NOTE: SGLang uses system Python (not a virtualenv in framework/runtime stages) to align with # NOTE: SGLang uses system Python (not a virtualenv in framework/runtime stages) to align with
# upstream SGLang Dockerfile: https://github.com/sgl-project/sglang/blob/main/docker/Dockerfile # upstream SGLang Dockerfile: https://github.com/sgl-project/sglang/blob/main/docker/Dockerfile
# For dev stage, we create a lightweight venv with --system-site-packages to satisfy maturin develop # For dev stage, we create a lightweight venv with --system-site-packages to satisfy maturin develop
......
...@@ -57,15 +57,10 @@ FROM ${PYTORCH_BASE_IMAGE}:${PYTORCH_BASE_IMAGE_TAG} AS pytorch_base ...@@ -57,15 +57,10 @@ FROM ${PYTORCH_BASE_IMAGE}:${PYTORCH_BASE_IMAGE_TAG} AS pytorch_base
FROM ${BASE_IMAGE}:${BASE_IMAGE_TAG} AS framework FROM ${BASE_IMAGE}:${BASE_IMAGE_TAG} AS framework
ARG ARCH_ALT ARG ARCH_ALT
ARG PYTHON_VERSION COPY --from=dynamo_base /bin/uv /bin/uvx /bin/
ARG ENABLE_KVBM
ENV NIXL_PREFIX=/opt/nvidia/nvda_nixl
ENV NIXL_LIB_DIR=$NIXL_PREFIX/lib/${ARCH_ALT}-linux-gnu
ENV NIXL_PLUGIN_DIR=$NIXL_LIB_DIR/plugins
ENV VIRTUAL_ENV=/opt/dynamo/venv
ENV PATH="${VIRTUAL_ENV}/bin:${PATH}"
# Install minimal dependencies needed for TensorRT-LLM installation # Install minimal dependencies needed for TensorRT-LLM installation
ARG PYTHON_VERSION
RUN apt-get update && \ RUN apt-get update && \
DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \ DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
python${PYTHON_VERSION}-dev \ python${PYTHON_VERSION}-dev \
...@@ -77,13 +72,13 @@ RUN apt-get update && \ ...@@ -77,13 +72,13 @@ RUN apt-get update && \
apt-get clean && \ apt-get clean && \
rm -rf /var/lib/apt/lists/* rm -rf /var/lib/apt/lists/*
# Copy uv
COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/
# Create virtual environment # Create virtual environment
RUN mkdir -p /opt/dynamo/venv && \ RUN mkdir -p /opt/dynamo/venv && \
uv venv /opt/dynamo/venv --python $PYTHON_VERSION uv venv /opt/dynamo/venv --python $PYTHON_VERSION
ENV VIRTUAL_ENV=/opt/dynamo/venv \
PATH="/opt/dynamo/venv/bin:${PATH}"
# Copy pytorch installation from NGC PyTorch # Copy pytorch installation from NGC PyTorch
ARG TORCH_VER=2.9.0a0+145a3a7bda.nv25.10 ARG TORCH_VER=2.9.0a0+145a3a7bda.nv25.10
ARG TORCH_TENSORRT_VER=2.9.0a0 ARG TORCH_TENSORRT_VER=2.9.0a0
...@@ -187,23 +182,67 @@ RUN if [ "$HAS_TRTLLM_CONTEXT" = "1" ]; then \ ...@@ -187,23 +182,67 @@ RUN if [ "$HAS_TRTLLM_CONTEXT" = "1" ]; then \
FROM ${RUNTIME_IMAGE}:${RUNTIME_IMAGE_TAG} AS runtime FROM ${RUNTIME_IMAGE}:${RUNTIME_IMAGE_TAG} AS runtime
ARG ARCH_ALT ARG ARCH_ALT
ARG ENABLE_KVBM
ARG PYTHON_VERSION
WORKDIR /workspace WORKDIR /workspace
ENV ENV=${ENV:-/etc/shinit_v2} ENV ENV=${ENV:-/etc/shinit_v2}
ENV VIRTUAL_ENV=/opt/dynamo/venv ENV VIRTUAL_ENV=/opt/dynamo/venv
ENV NIXL_PREFIX=/opt/nvidia/nvda_nixl ENV PATH="${VIRTUAL_ENV}/bin:${PATH}"
ENV NIXL_LIB_DIR=$NIXL_PREFIX/lib/${ARCH_ALT}-linux-gnu
ENV NIXL_PLUGIN_DIR=$NIXL_LIB_DIR/plugins
# workaround for pickle lib issue # workaround for pickle lib issue
ENV OMPI_MCA_coll_ucc_enable=0 ENV OMPI_MCA_coll_ucc_enable=0
ARG DYNAMO_COMMIT_SHA # Copy CUDA development tools (nvcc, headers, dependencies, etc.) from PyTorch base image
ENV DYNAMO_COMMIT_SHA=$DYNAMO_COMMIT_SHA COPY --from=pytorch_base /usr/local/cuda/bin/nvcc /usr/local/cuda/bin/nvcc
COPY --from=pytorch_base /usr/local/cuda/bin/cudafe++ /usr/local/cuda/bin/cudafe++
COPY --from=pytorch_base /usr/local/cuda/bin/ptxas /usr/local/cuda/bin/ptxas
COPY --from=pytorch_base /usr/local/cuda/bin/fatbinary /usr/local/cuda/bin/fatbinary
COPY --from=pytorch_base /usr/local/cuda/include/ /usr/local/cuda/include/
COPY --from=pytorch_base /usr/local/cuda/nvvm /usr/local/cuda/nvvm
COPY --from=pytorch_base /usr/local/cuda/lib64/libcudart.so* /usr/local/cuda/lib64/
COPY --from=pytorch_base /usr/local/cuda/lib64/libcupti* /usr/local/cuda/lib64/
COPY --from=pytorch_base /usr/local/lib/lib* /usr/local/lib/
COPY --from=pytorch_base /usr/local/cuda/bin/cuobjdump /usr/local/cuda/bin/cuobjdump
COPY --from=pytorch_base /usr/local/cuda/bin/nvdisasm /usr/local/cuda/bin/nvdisasm
ENV CUDA_HOME=/usr/local/cuda \
TRITON_CUPTI_PATH=/usr/local/cuda/include \
TRITON_CUDACRT_PATH=/usr/local/cuda/include \
TRITON_CUOBJDUMP_PATH=/usr/local/cuda/bin/cuobjdump \
TRITON_NVDISASM_PATH=/usr/local/cuda/bin/nvdisasm \
TRITON_PTXAS_PATH=/usr/local/cuda/bin/ptxas \
TRITON_CUDART_PATH=/usr/local/cuda/include
# Copy OpenMPI from PyTorch base image
COPY --from=pytorch_base /opt/hpcx/ompi /opt/hpcx/ompi
# Copy NUMA library from PyTorch base image
COPY --from=pytorch_base /usr/lib/${ARCH_ALT}-linux-gnu/libnuma.so* /usr/lib/${ARCH_ALT}-linux-gnu/
# Copy UCX libraries, libucc.so is needed by pytorch. May not need to copy whole hpcx dir but only /opt/hpcx/ucc/
COPY --from=pytorch_base /opt/hpcx /opt/hpcx
# This is needed to make libucc.so visible so pytorch can use it.
ENV LD_LIBRARY_PATH="/opt/hpcx/ucc/lib:${LD_LIBRARY_PATH}"
# Might not need to copy cusparseLt in the future once it's included in DLFW cuda container
# networkx, packaging, setuptools get overridden by trtllm installation, so not copying them
# pytorch-triton is copied after trtllm installation.
COPY --from=pytorch_base /usr/local/cuda/lib64/libcusparseLt* /usr/local/cuda/lib64/
# Copy nats and etcd from dynamo_base image
COPY --from=dynamo_base /usr/bin/nats-server /usr/bin/nats-server
COPY --from=dynamo_base /usr/local/bin/etcd/ /usr/local/bin/etcd/
# Add ETCD and CUDA binaries to PATH so cicc and other CUDA tools are accessible
ENV PATH=/usr/local/bin/etcd/:/usr/local/cuda/nvvm/bin:$PATH
# Copy uv to system /bin
COPY --from=dynamo_base /bin/uv /bin/uvx /bin/
# Create dynamo user with group 0 for OpenShift compatibility
RUN userdel -r ubuntu > /dev/null 2>&1 || true \
&& useradd -m -s /bin/bash -g 0 dynamo \
&& [ `id -u dynamo` -eq 1000 ] \
&& mkdir -p /home/dynamo/.cache /opt/dynamo \
&& chown -R dynamo: /workspace /home/dynamo /opt/dynamo \
&& chmod -R g+w /workspace /home/dynamo/.cache /opt/dynamo
# Install Python, build-essential and python3-dev as apt dependencies # Install Python, build-essential and python3-dev as apt dependencies
ARG PYTHON_VERSION
RUN if [ ${ARCH_ALT} = "x86_64" ]; then \ RUN if [ ${ARCH_ALT} = "x86_64" ]; then \
ARCH_FOR_GPG=${ARCH_ALT}; \ ARCH_FOR_GPG=${ARCH_ALT}; \
else \ else \
...@@ -249,67 +288,20 @@ RUN if [ ${ARCH_ALT} = "x86_64" ]; then \ ...@@ -249,67 +288,20 @@ RUN if [ ${ARCH_ALT} = "x86_64" ]; then \
apt-get clean && \ apt-get clean && \
rm -rf /var/lib/apt/lists/* rm -rf /var/lib/apt/lists/*
ENV LD_LIBRARY_PATH="/usr/lib/${ARCH_ALT}-linux-gnu/nvshmem/13/:${LD_LIBRARY_PATH}" # Switch to dynamo user
USER dynamo
# Copy CUDA development tools (nvcc, headers, dependencies, etc.) from PyTorch base image ENV HOME=/home/dynamo
COPY --from=pytorch_base /usr/local/cuda/bin/nvcc /usr/local/cuda/bin/nvcc ENV DYNAMO_HOME=/workspace
COPY --from=pytorch_base /usr/local/cuda/bin/cudafe++ /usr/local/cuda/bin/cudafe++ ENV NIXL_PREFIX=/opt/nvidia/nvda_nixl
COPY --from=pytorch_base /usr/local/cuda/bin/ptxas /usr/local/cuda/bin/ptxas ENV NIXL_LIB_DIR=$NIXL_PREFIX/lib/${ARCH_ALT}-linux-gnu
COPY --from=pytorch_base /usr/local/cuda/bin/fatbinary /usr/local/cuda/bin/fatbinary ENV NIXL_PLUGIN_DIR=$NIXL_LIB_DIR/plugins
COPY --from=pytorch_base /usr/local/cuda/include/ /usr/local/cuda/include/
COPY --from=pytorch_base /usr/local/cuda/nvvm /usr/local/cuda/nvvm
COPY --from=pytorch_base /usr/local/cuda/lib64/libcudart.so* /usr/local/cuda/lib64/
COPY --from=pytorch_base /usr/local/cuda/lib64/libcupti* /usr/local/cuda/lib64/
COPY --from=pytorch_base /usr/local/lib/lib* /usr/local/lib/
COPY --from=pytorch_base /usr/local/cuda/bin/cuobjdump /usr/local/cuda/bin/cuobjdump
COPY --from=pytorch_base /usr/local/cuda/bin/nvdisasm /usr/local/cuda/bin/nvdisasm
ENV CUDA_HOME=/usr/local/cuda \
TRITON_CUPTI_PATH=/usr/local/cuda/include \
TRITON_CUDACRT_PATH=/usr/local/cuda/include \
TRITON_CUOBJDUMP_PATH=/usr/local/cuda/bin/cuobjdump \
TRITON_NVDISASM_PATH=/usr/local/cuda/bin/nvdisasm \
TRITON_PTXAS_PATH=/usr/local/cuda/bin/ptxas \
TRITON_CUDART_PATH=/usr/local/cuda/include
# Copy nats and etcd from dynamo_base image
COPY --from=dynamo_base /usr/bin/nats-server /usr/bin/nats-server
COPY --from=dynamo_base /usr/local/bin/etcd/ /usr/local/bin/etcd/
# Add ETCD and CUDA binaries to PATH so cicc and other CUDA tools are accessible
ENV PATH=/usr/local/bin/etcd/:/usr/local/cuda/nvvm/bin:$PATH
# Copy OpenMPI from PyTorch base image
COPY --from=pytorch_base /opt/hpcx/ompi /opt/hpcx/ompi
# Copy NUMA library from PyTorch base image
COPY --from=pytorch_base /usr/lib/${ARCH_ALT}-linux-gnu/libnuma.so* /usr/lib/${ARCH_ALT}-linux-gnu/
# Copy UCX libraries, libucc.so is needed by pytorch. May not need to copy whole hpcx dir but only /opt/hpcx/ucc/
COPY --from=pytorch_base /opt/hpcx /opt/hpcx
# This is needed to make libucc.so visible so pytorch can use it.
ENV LD_LIBRARY_PATH="/opt/hpcx/ucc/lib:${LD_LIBRARY_PATH}"
# Might not need to copy cusparseLt in the future once it's included in DLFW cuda container
COPY --from=pytorch_base /usr/local/cuda/lib64/libcusparseLt* /usr/local/cuda/lib64/
# Copy uv to system /bin
COPY --from=framework /bin/uv /bin/uvx /bin/
# Copy libgomp.so from framework image # Copy libgomp.so from framework image
COPY --from=framework /usr/local/tensorrt /usr/local/tensorrt COPY --from=framework /usr/local/tensorrt /usr/local/tensorrt
COPY --from=framework /usr/lib/${ARCH_ALT}-linux-gnu/libgomp.so* /usr/lib/${ARCH_ALT}-linux-gnu/ COPY --from=framework /usr/lib/${ARCH_ALT}-linux-gnu/libgomp.so* /usr/lib/${ARCH_ALT}-linux-gnu/
# Create dynamo user with group 0 for OpenShift compatibility # Copy pre-built venv with PyTorch and TensorRT-LLM from framework stage
RUN userdel -r ubuntu > /dev/null 2>&1 || true \ COPY --chown=dynamo: --from=framework ${VIRTUAL_ENV} ${VIRTUAL_ENV}
&& useradd -m -s /bin/bash -g 0 dynamo \
&& [ `id -u dynamo` -eq 1000 ] \
&& mkdir -p /home/dynamo/.cache /opt/dynamo \
&& chown -R dynamo: /workspace /home/dynamo /opt/dynamo \
&& chmod -R g+w /workspace /home/dynamo/.cache /opt/dynamo
# Switch to dynamo user
USER dynamo
ENV HOME=/home/dynamo
ENV DYNAMO_HOME=/workspace
# Copy UCX from framework image as plugin for NIXL # Copy UCX from framework image as plugin for NIXL
# Copy NIXL source from framework image # Copy NIXL source from framework image
...@@ -317,6 +309,7 @@ ENV DYNAMO_HOME=/workspace ...@@ -317,6 +309,7 @@ ENV DYNAMO_HOME=/workspace
COPY --chown=dynamo: --from=dynamo_base /usr/local/ucx /usr/local/ucx COPY --chown=dynamo: --from=dynamo_base /usr/local/ucx /usr/local/ucx
COPY --chown=dynamo: --from=dynamo_base $NIXL_PREFIX $NIXL_PREFIX COPY --chown=dynamo: --from=dynamo_base $NIXL_PREFIX $NIXL_PREFIX
ENV TENSORRT_LIB_DIR=/usr/local/tensorrt/targets/${ARCH_ALT}-linux-gnu/lib
ENV PATH="/usr/local/ucx/bin:${VIRTUAL_ENV}/bin:/opt/hpcx/ompi/bin:/usr/local/bin/etcd/:/usr/local/cuda/bin:/usr/local/cuda/nvvm/bin:$PATH" ENV PATH="/usr/local/ucx/bin:${VIRTUAL_ENV}/bin:/opt/hpcx/ompi/bin:/usr/local/bin/etcd/:/usr/local/cuda/bin:/usr/local/cuda/nvvm/bin:$PATH"
ENV LD_LIBRARY_PATH=\ ENV LD_LIBRARY_PATH=\
$NIXL_LIB_DIR:\ $NIXL_LIB_DIR:\
...@@ -324,17 +317,18 @@ $NIXL_PLUGIN_DIR:\ ...@@ -324,17 +317,18 @@ $NIXL_PLUGIN_DIR:\
/usr/local/ucx/lib:\ /usr/local/ucx/lib:\
/usr/local/ucx/lib/ucx:\ /usr/local/ucx/lib/ucx:\
/opt/hpcx/ompi/lib:\ /opt/hpcx/ompi/lib:\
/usr/lib/${ARCH_ALT}-linux-gnu/nvshmem/13/:\
$TENSORRT_LIB_DIR:\
/opt/dynamo/venv/lib/python${PYTHON_VERSION}/site-packages/torch/lib:\
/opt/dynamo/venv/lib/python${PYTHON_VERSION}/site-packages/torch_tensorrt/lib:\
$LD_LIBRARY_PATH $LD_LIBRARY_PATH
ENV OPAL_PREFIX=/opt/hpcx/ompi ENV OPAL_PREFIX=/opt/hpcx/ompi
# Copy pre-built venv with PyTorch and TensorRT-LLM from framework stage COPY --chown=dynamo: ATTRIBUTION* LICENSE /workspace/
COPY --chown=dynamo: --from=framework ${VIRTUAL_ENV} ${VIRTUAL_ENV} COPY --chown=dynamo: benchmarks/ /workspace/benchmarks/
ENV TENSORRT_LIB_DIR=/usr/local/tensorrt/targets/${ARCH_ALT}-linux-gnu/lib
ENV LD_LIBRARY_PATH=/opt/dynamo/venv/lib/python3.12/site-packages/torch/lib:/opt/dynamo/venv/lib/python3.12/site-packages/torch_tensorrt/lib:${TENSORRT_LIB_DIR}:${LD_LIBRARY_PATH}
# Install dynamo, NIXL, and dynamo-specific dependencies # Install dynamo, NIXL, and dynamo-specific dependencies
COPY --chown=dynamo: benchmarks/ /opt/dynamo/benchmarks/ ARG ENABLE_KVBM
COPY --chown=dynamo: --from=dynamo_base /opt/dynamo/wheelhouse/ /opt/dynamo/wheelhouse/ COPY --chown=dynamo: --from=dynamo_base /opt/dynamo/wheelhouse/ /opt/dynamo/wheelhouse/
RUN uv pip install \ RUN uv pip install \
--no-cache \ --no-cache \
...@@ -344,10 +338,8 @@ RUN uv pip install \ ...@@ -344,10 +338,8 @@ RUN uv pip install \
&& if [ "${ENABLE_KVBM}" = "true" ]; then \ && if [ "${ENABLE_KVBM}" = "true" ]; then \
uv pip install --no-cache /opt/dynamo/wheelhouse/kvbm*.whl; \ uv pip install --no-cache /opt/dynamo/wheelhouse/kvbm*.whl; \
fi \ fi \
&& cd /opt/dynamo/benchmarks \ && cd /workspace/benchmarks \
&& UV_GIT_LFS=1 uv pip install --no-cache . \ && UV_GIT_LFS=1 uv pip install --no-cache .
&& cd - \
&& rm -rf /opt/dynamo/benchmarks
# Install common and test dependencies # Install common and test dependencies
RUN --mount=type=bind,source=./container/deps/requirements.txt,target=/tmp/requirements.txt \ RUN --mount=type=bind,source=./container/deps/requirements.txt,target=/tmp/requirements.txt \
...@@ -360,17 +352,13 @@ RUN --mount=type=bind,source=./container/deps/requirements.txt,target=/tmp/requi ...@@ -360,17 +352,13 @@ RUN --mount=type=bind,source=./container/deps/requirements.txt,target=/tmp/requi
--requirement /tmp/requirements.test.txt \ --requirement /tmp/requirements.test.txt \
cupy-cuda13x cupy-cuda13x
# Copy tests, benchmarks, deploy and components for CI with correct ownership # Copy tests, benchmarks, deploy and components for CI
COPY --chown=dynamo: tests /workspace/tests COPY --chown=dynamo: tests /workspace/tests
COPY --chown=dynamo: examples /workspace/examples COPY --chown=dynamo: examples /workspace/examples
COPY --chown=dynamo: benchmarks /workspace/benchmarks
COPY --chown=dynamo: deploy /workspace/deploy COPY --chown=dynamo: deploy /workspace/deploy
COPY --chown=dynamo: components/ /workspace/components/ COPY --chown=dynamo: components/ /workspace/components/
COPY --chown=dynamo: recipes/ /workspace/recipes/ COPY --chown=dynamo: recipes/ /workspace/recipes/
# Copy attribution files with correct ownership
COPY --chown=dynamo: ATTRIBUTION* LICENSE /workspace/
# Setup launch banner in common directory accessible to all users # Setup launch banner in common directory accessible to all users
RUN --mount=type=bind,source=./container/launch_message/runtime.txt,target=/opt/dynamo/launch_message.txt \ RUN --mount=type=bind,source=./container/launch_message/runtime.txt,target=/opt/dynamo/launch_message.txt \
sed '/^#\s/d' /opt/dynamo/launch_message.txt > /opt/dynamo/.launch_screen sed '/^#\s/d' /opt/dynamo/launch_message.txt > /opt/dynamo/.launch_screen
...@@ -382,6 +370,9 @@ RUN chmod 755 /opt/dynamo/.launch_screen && \ ...@@ -382,6 +370,9 @@ RUN chmod 755 /opt/dynamo/.launch_screen && \
echo 'cat /opt/dynamo/.launch_screen' >> /etc/bash.bashrc echo 'cat /opt/dynamo/.launch_screen' >> /etc/bash.bashrc
USER dynamo USER dynamo
ARG DYNAMO_COMMIT_SHA
ENV DYNAMO_COMMIT_SHA=$DYNAMO_COMMIT_SHA
ENTRYPOINT ["/opt/nvidia/nvidia_entrypoint.sh"] ENTRYPOINT ["/opt/nvidia/nvidia_entrypoint.sh"]
CMD [] CMD []
......
...@@ -46,6 +46,9 @@ ARG ARCH_ALT=x86_64 ...@@ -46,6 +46,9 @@ ARG ARCH_ALT=x86_64
ARG DYNAMO_BASE_IMAGE="dynamo:latest-none" ARG DYNAMO_BASE_IMAGE="dynamo:latest-none"
FROM ${DYNAMO_BASE_IMAGE} AS dynamo_base FROM ${DYNAMO_BASE_IMAGE} AS dynamo_base
# Copy cuda tools and libs from base image
FROM ${BASE_IMAGE}:${BASE_IMAGE_TAG} AS base
######################################################## ########################################################
########## Framework Development Image ################ ########## Framework Development Image ################
######################################################## ########################################################
...@@ -67,6 +70,8 @@ FROM ${DYNAMO_BASE_IMAGE} AS dynamo_base ...@@ -67,6 +70,8 @@ FROM ${DYNAMO_BASE_IMAGE} AS dynamo_base
# Use dynamo base image (see /container/Dockerfile for more details) # Use dynamo base image (see /container/Dockerfile for more details)
FROM ${BASE_IMAGE}:${BASE_IMAGE_TAG} AS framework FROM ${BASE_IMAGE}:${BASE_IMAGE_TAG} AS framework
COPY --from=dynamo_base /bin/uv /bin/uvx /bin/
ARG PYTHON_VERSION ARG PYTHON_VERSION
RUN apt-get update -y \ RUN apt-get update -y \
...@@ -90,9 +95,6 @@ RUN apt-get update -y \ ...@@ -90,9 +95,6 @@ RUN apt-get update -y \
# generic dev name .so so we symlink .s0.1 -> .so # generic dev name .so so we symlink .s0.1 -> .so
RUN ln -sf /usr/lib/aarch64-linux-gnu/libmlx5.so.1 /usr/lib/aarch64-linux-gnu/libmlx5.so || true RUN ln -sf /usr/lib/aarch64-linux-gnu/libmlx5.so.1 /usr/lib/aarch64-linux-gnu/libmlx5.so || true
### VIRTUAL ENVIRONMENT SETUP ###
COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/
# Create virtual environment # Create virtual environment
RUN mkdir -p /opt/dynamo/venv && \ RUN mkdir -p /opt/dynamo/venv && \
uv venv /opt/dynamo/venv --python $PYTHON_VERSION uv venv /opt/dynamo/venv --python $PYTHON_VERSION
...@@ -178,15 +180,41 @@ ENV PATH="${VIRTUAL_ENV}/bin:${PATH}" ...@@ -178,15 +180,41 @@ ENV PATH="${VIRTUAL_ENV}/bin:${PATH}"
# This fixes NVML InvalidArgument errors when CUDA_VISIBLE_DEVICES is set # This fixes NVML InvalidArgument errors when CUDA_VISIBLE_DEVICES is set
ENV CUDA_DEVICE_ORDER=PCI_BUS_ID ENV CUDA_DEVICE_ORDER=PCI_BUS_ID
# Copy CUDA development tools (nvcc, headers, dependencies, etc.) from base devel image
COPY --from=base /usr/local/cuda/bin/nvcc /usr/local/cuda/bin/nvcc
COPY --from=base /usr/local/cuda/bin/cudafe++ /usr/local/cuda/bin/cudafe++
COPY --from=base /usr/local/cuda/bin/ptxas /usr/local/cuda/bin/ptxas
COPY --from=base /usr/local/cuda/bin/fatbinary /usr/local/cuda/bin/fatbinary
COPY --from=base /usr/local/cuda/include/ /usr/local/cuda/include/
COPY --from=base /usr/local/cuda/nvvm /usr/local/cuda/nvvm
COPY --from=base /usr/local/cuda/lib64/libcudart.so* /usr/local/cuda/lib64/
RUN ln -s /usr/local/cuda/lib64/libcublas.so.12 /usr/local/cuda/lib64/libcublas.so
RUN ln -s /usr/local/cuda/lib64/libcublasLt.so.12 /usr/local/cuda/lib64/libcublasLt.so
# DeepGemm runs nvcc for JIT kernel compilation, however the CUDA include path
# is not properly set for complilation. Set CPATH to help nvcc find the headers.
ENV CPATH=/usr/local/cuda/include
### COPY NATS & ETCD ###
# Copy nats and etcd from dev image
COPY --from=dynamo_base /usr/bin/nats-server /usr/bin/nats-server
COPY --from=dynamo_base /usr/local/bin/etcd/ /usr/local/bin/etcd/
# Add ETCD and CUDA binaries to PATH so cicc and other CUDA tools are accessible
ENV PATH=/usr/local/bin/etcd/:/usr/local/cuda/nvvm/bin:$PATH
# Copy uv to system /bin
COPY --from=dynamo_base /bin/uv /bin/uvx /bin/
# Create dynamo user with group 0 for OpenShift compatibility
RUN userdel -r ubuntu > /dev/null 2>&1 || true \
&& useradd -m -s /bin/bash -g 0 dynamo \
&& [ `id -u dynamo` -eq 1000 ] \
&& mkdir -p /home/dynamo/.cache /opt/dynamo \
&& chown -R dynamo: /workspace /home/dynamo /opt/dynamo \
&& chmod -R g+w /workspace /home/dynamo/.cache /opt/dynamo
ARG ARCH_ALT ARG ARCH_ALT
ARG PYTHON_VERSION ARG PYTHON_VERSION
ARG ENABLE_KVBM
ENV NIXL_PREFIX=/opt/nvidia/nvda_nixl
ENV NIXL_LIB_DIR=$NIXL_PREFIX/lib/${ARCH_ALT}-linux-gnu
ENV NIXL_PLUGIN_DIR=$NIXL_LIB_DIR/plugins
ARG DYNAMO_COMMIT_SHA
ENV DYNAMO_COMMIT_SHA=$DYNAMO_COMMIT_SHA
# Install Python, build-essential and python3-dev as apt dependencies # Install Python, build-essential and python3-dev as apt dependencies
RUN apt-get update && \ RUN apt-get update && \
...@@ -211,41 +239,18 @@ RUN apt-get update && \ ...@@ -211,41 +239,18 @@ RUN apt-get update && \
cuda-command-line-tools-12-8 && \ cuda-command-line-tools-12-8 && \
rm -rf /var/lib/apt/lists/* rm -rf /var/lib/apt/lists/*
# Copy CUDA development tools (nvcc, headers, dependencies, etc.) from base devel image
COPY --from=framework /usr/local/cuda/bin/nvcc /usr/local/cuda/bin/nvcc
COPY --from=framework /usr/local/cuda/bin/cudafe++ /usr/local/cuda/bin/cudafe++
COPY --from=framework /usr/local/cuda/bin/ptxas /usr/local/cuda/bin/ptxas
COPY --from=framework /usr/local/cuda/bin/fatbinary /usr/local/cuda/bin/fatbinary
COPY --from=framework /usr/local/cuda/include/ /usr/local/cuda/include/
COPY --from=framework /usr/local/cuda/nvvm /usr/local/cuda/nvvm
COPY --from=framework /usr/local/cuda/lib64/libcudart.so* /usr/local/cuda/lib64/
RUN ln -s /usr/local/cuda/lib64/libcublas.so.12 /usr/local/cuda/lib64/libcublas.so
RUN ln -s /usr/local/cuda/lib64/libcublasLt.so.12 /usr/local/cuda/lib64/libcublasLt.so
### COPY NATS & ETCD ###
# Copy nats and etcd from dev image
COPY --from=dynamo_base /usr/bin/nats-server /usr/bin/nats-server
COPY --from=dynamo_base /usr/local/bin/etcd/ /usr/local/bin/etcd/
# Add ETCD and CUDA binaries to PATH so cicc and other CUDA tools are accessible
ENV PATH=/usr/local/bin/etcd/:/usr/local/cuda/nvvm/bin:$PATH
# DeepGemm runs nvcc for JIT kernel compilation, however the CUDA include path
# is not properly set for complilation. Set CPATH to help nvcc find the headers.
ENV CPATH=/usr/local/cuda/include
# Copy uv to system /bin
COPY --from=framework /bin/uv /bin/uvx /bin/
# Create dynamo user with group 0 for OpenShift compatibility
RUN userdel -r ubuntu > /dev/null 2>&1 || true \
&& useradd -m -s /bin/bash -g 0 dynamo \
&& [ `id -u dynamo` -eq 1000 ] \
&& mkdir -p /home/dynamo/.cache /opt/dynamo \
&& chown -R dynamo: /workspace /home/dynamo /opt/dynamo \
&& chmod -R g+w /workspace /home/dynamo/.cache /opt/dynamo
USER dynamo USER dynamo
ENV HOME=/home/dynamo ENV HOME=/home/dynamo
ENV NIXL_PREFIX=/opt/nvidia/nvda_nixl
ENV NIXL_LIB_DIR=$NIXL_PREFIX/lib/${ARCH_ALT}-linux-gnu
ENV NIXL_PLUGIN_DIR=$NIXL_LIB_DIR/plugins
### VIRTUAL ENVIRONMENT SETUP ###
# Copy entire virtual environment from framework container with correct ownership
COPY --chown=dynamo: --from=framework ${VIRTUAL_ENV} ${VIRTUAL_ENV}
# Copy vllm with correct ownership
COPY --chown=dynamo: --from=framework /opt/vllm /opt/vllm
# Copy UCX and NIXL to system directories # Copy UCX and NIXL to system directories
COPY --chown=dynamo: --from=dynamo_base /usr/local/ucx /usr/local/ucx COPY --chown=dynamo: --from=dynamo_base /usr/local/ucx /usr/local/ucx
...@@ -260,16 +265,12 @@ $NIXL_PLUGIN_DIR:\ ...@@ -260,16 +265,12 @@ $NIXL_PLUGIN_DIR:\
/usr/local/ucx/lib/ucx:\ /usr/local/ucx/lib/ucx:\
$LD_LIBRARY_PATH $LD_LIBRARY_PATH
### VIRTUAL ENVIRONMENT SETUP ### # Copy local files
COPY --chown=dynamo: ATTRIBUTION* LICENSE /workspace/
# Copy entire virtual environment from framework container with correct ownership COPY --chown=dynamo: benchmarks/ /workspace/benchmarks/
COPY --chown=dynamo: --from=framework ${VIRTUAL_ENV} ${VIRTUAL_ENV}
# Copy vllm with correct ownership
COPY --chown=dynamo: --from=framework /opt/vllm /opt/vllm
# Install dynamo, NIXL, and dynamo-specific dependencies # Install dynamo, NIXL, and dynamo-specific dependencies
COPY --chown=dynamo: benchmarks/ /opt/dynamo/benchmarks/ ARG ENABLE_KVBM
COPY --chown=dynamo: --from=dynamo_base /opt/dynamo/wheelhouse/ /opt/dynamo/wheelhouse/ COPY --chown=dynamo: --from=dynamo_base /opt/dynamo/wheelhouse/ /opt/dynamo/wheelhouse/
RUN uv pip install \ RUN uv pip install \
/opt/dynamo/wheelhouse/ai_dynamo_runtime*.whl \ /opt/dynamo/wheelhouse/ai_dynamo_runtime*.whl \
...@@ -278,10 +279,8 @@ RUN uv pip install \ ...@@ -278,10 +279,8 @@ RUN uv pip install \
&& if [ "${ENABLE_KVBM}" = "true" ]; then \ && if [ "${ENABLE_KVBM}" = "true" ]; then \
uv pip install /opt/dynamo/wheelhouse/kvbm*.whl; \ uv pip install /opt/dynamo/wheelhouse/kvbm*.whl; \
fi \ fi \
&& cd /opt/dynamo/benchmarks \ && cd /workspace/benchmarks \
&& UV_GIT_LFS=1 uv pip install --no-cache . \ && UV_GIT_LFS=1 uv pip install --no-cache .
&& cd - \
&& rm -rf /opt/dynamo/benchmarks
# Install common and test dependencies # Install common and test dependencies
RUN --mount=type=bind,source=./container/deps/requirements.txt,target=/tmp/requirements.txt \ RUN --mount=type=bind,source=./container/deps/requirements.txt,target=/tmp/requirements.txt \
...@@ -291,11 +290,13 @@ RUN --mount=type=bind,source=./container/deps/requirements.txt,target=/tmp/requi ...@@ -291,11 +290,13 @@ RUN --mount=type=bind,source=./container/deps/requirements.txt,target=/tmp/requi
--requirement /tmp/requirements.txt \ --requirement /tmp/requirements.txt \
--requirement /tmp/requirements.test.txt --requirement /tmp/requirements.test.txt
# Copy benchmarks, examples, and tests for CI with correct ownership # Copy tests, benchmarks, deploy and components for CI
COPY --chown=dynamo: . /workspace/ COPY --chown=dynamo: tests /workspace/tests
COPY --chown=dynamo: examples /workspace/examples
# Copy attribution files COPY --chown=dynamo: deploy /workspace/deploy
COPY --chown=dynamo: ATTRIBUTION* LICENSE /workspace/ COPY --chown=dynamo: recipes/ /workspace/recipes/
COPY --chown=dynamo: components/ /workspace/components/
COPY --chown=dynamo: lib/ /workspace/lib/
# Setup launch banner in common directory accessible to all users # Setup launch banner in common directory accessible to all users
RUN --mount=type=bind,source=./container/launch_message/runtime.txt,target=/opt/dynamo/launch_message.txt \ RUN --mount=type=bind,source=./container/launch_message/runtime.txt,target=/opt/dynamo/launch_message.txt \
...@@ -308,6 +309,8 @@ RUN chmod 755 /opt/dynamo/.launch_screen && \ ...@@ -308,6 +309,8 @@ RUN chmod 755 /opt/dynamo/.launch_screen && \
echo 'cat /opt/dynamo/.launch_screen' >> /etc/bash.bashrc echo 'cat /opt/dynamo/.launch_screen' >> /etc/bash.bashrc
USER dynamo USER dynamo
ARG DYNAMO_COMMIT_SHA
ENV DYNAMO_COMMIT_SHA=$DYNAMO_COMMIT_SHA
ENTRYPOINT ["/opt/nvidia/nvidia_entrypoint.sh"] ENTRYPOINT ["/opt/nvidia/nvidia_entrypoint.sh"]
CMD [] CMD []
...@@ -330,9 +333,6 @@ FROM runtime AS dev ...@@ -330,9 +333,6 @@ FROM runtime AS dev
# Don't want ubuntu to be editable, just change uid and gid. # Don't want ubuntu to be editable, just change uid and gid.
ARG WORKSPACE_DIR=/workspace ARG WORKSPACE_DIR=/workspace
ARG DYNAMO_COMMIT_SHA
ENV DYNAMO_COMMIT_SHA=$DYNAMO_COMMIT_SHA
USER root USER root
# Install utilities as root # Install utilities as root
RUN apt-get update -y && \ RUN apt-get update -y && \
......
...@@ -882,10 +882,10 @@ if [[ -z "${DEV_IMAGE_INPUT:-}" ]]; then ...@@ -882,10 +882,10 @@ if [[ -z "${DEV_IMAGE_INPUT:-}" ]]; then
# Use BuildKit for enhanced metadata # Use BuildKit for enhanced metadata
if [ -z "$RUN_PREFIX" ]; then if [ -z "$RUN_PREFIX" ]; then
if docker buildx version &>/dev/null; then if docker buildx version &>/dev/null; then
docker buildx build --progress=plain --load -f "${SOURCE_DIR}/Dockerfile" --target dev $PLATFORM $BUILD_ARGS $CACHE_FROM $CACHE_TO --tag $DYNAMO_BASE_IMAGE $BUILD_CONTEXT_ARG $BUILD_CONTEXT $NO_CACHE 2>&1 | tee "${BASE_BUILD_LOG}" docker buildx build --progress=plain --load -f "${SOURCE_DIR}/Dockerfile" --target runtime $PLATFORM $BUILD_ARGS $CACHE_FROM $CACHE_TO --tag $DYNAMO_BASE_IMAGE $BUILD_CONTEXT_ARG $BUILD_CONTEXT $NO_CACHE 2>&1 | tee "${BASE_BUILD_LOG}"
BUILD_EXIT_CODE=${PIPESTATUS[0]} BUILD_EXIT_CODE=${PIPESTATUS[0]}
else else
DOCKER_BUILDKIT=1 docker build --progress=plain -f "${SOURCE_DIR}/Dockerfile" --target dev $PLATFORM $BUILD_ARGS $CACHE_FROM $CACHE_TO --tag $DYNAMO_BASE_IMAGE $BUILD_CONTEXT_ARG $BUILD_CONTEXT $NO_CACHE 2>&1 | tee "${BASE_BUILD_LOG}" DOCKER_BUILDKIT=1 docker build --progress=plain -f "${SOURCE_DIR}/Dockerfile" --target runtime $PLATFORM $BUILD_ARGS $CACHE_FROM $CACHE_TO --tag $DYNAMO_BASE_IMAGE $BUILD_CONTEXT_ARG $BUILD_CONTEXT $NO_CACHE 2>&1 | tee "${BASE_BUILD_LOG}"
BUILD_EXIT_CODE=${PIPESTATUS[0]} BUILD_EXIT_CODE=${PIPESTATUS[0]}
fi fi
...@@ -893,7 +893,7 @@ if [[ -z "${DEV_IMAGE_INPUT:-}" ]]; then ...@@ -893,7 +893,7 @@ if [[ -z "${DEV_IMAGE_INPUT:-}" ]]; then
exit ${BUILD_EXIT_CODE} exit ${BUILD_EXIT_CODE}
fi fi
else else
$RUN_PREFIX docker build -f "${SOURCE_DIR}/Dockerfile" --target dev $PLATFORM $BUILD_ARGS $CACHE_FROM $CACHE_TO --tag $DYNAMO_BASE_IMAGE $BUILD_CONTEXT_ARG $BUILD_CONTEXT $NO_CACHE $RUN_PREFIX docker build -f "${SOURCE_DIR}/Dockerfile" --target runtime $PLATFORM $BUILD_ARGS $CACHE_FROM $CACHE_TO --tag $DYNAMO_BASE_IMAGE $BUILD_CONTEXT_ARG $BUILD_CONTEXT $NO_CACHE
fi fi
# Start framework build # Start framework build
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment