Unverified Commit 6f708832 authored by Anant Sharma's avatar Anant Sharma Committed by GitHub
Browse files

build: add non root user for dynamo backend runtime containers (#3935)


Signed-off-by: default avatarAnant Sharma <anants@nvidia.com>
parent 5e4a339a
......@@ -5,7 +5,7 @@
"SPDX-License-Identifier: Apache-2.0"
],
"name": "Dynamo {{ framework.upper() }} Dev Container",
"remoteUser": "ubuntu", // Matches our container user
"remoteUser": "dynamo", // Matches our container user
"updateRemoteUserUID": true, // Updates the UID of the remote user to match the host user, avoids permission errors
"image": "dynamo:latest-{{ framework }}-local-dev", // Use the latest {{ framework.upper() }} dev image
"runArgs": [
......@@ -68,11 +68,11 @@
},
"mounts": [
// These are for convenience, so that the history and pre-commit cache are persisted between sessions
"source=dynamo-bashhistory,target=/home/ubuntu/.commandhistory,type=volume",
"source=dynamo-precommit-cache,target=/home/ubuntu/.cache/pre-commit,type=volume",
"source=dynamo-bashhistory,target=/home/dynamo/.commandhistory,type=volume",
"source=dynamo-precommit-cache,target=/home/dynamo/.cache/pre-commit,type=volume",
// Default mounts
"source=/tmp/,target=/tmp/,type=bind"
// Uncomment this to reuse your Hugging Face cache
//"source=${localEnv:HOME}/.cache/huggingface,target=/home/ubuntu/.cache/huggingface,type=bind"
//"source=${localEnv:HOME}/.cache/huggingface,target=/home/dynamo/.cache/huggingface,type=bind"
]
}
......@@ -54,18 +54,23 @@ runs:
# Run pytest with detailed output and JUnit XML
set +e # Don't exit on test failures
docker run --runtime=nvidia --rm --gpus all -w /workspace \
docker run --runtime=nvidia --gpus all -w /workspace \
--cpus=${NUM_CPUS} \
--network host \
--name ${{ env.CONTAINER_ID }}_pytest \
-v "$(pwd)/test-results:/test-results" \
${{ inputs.image_tag }} \
bash -c "pytest -v --tb=short --basetemp=/tmp --junitxml=/test-results/${{ env.PYTEST_XML_FILE }} --durations=10 -m \"${{ inputs.pytest_marks }}\""
bash -c "mkdir -p /workspace/test-results && pytest -v --tb=short --basetemp=/tmp -o cache_dir=/tmp/.pytest_cache --junitxml=/workspace/test-results/${{ env.PYTEST_XML_FILE }} --durations=10 -m \"${{ inputs.pytest_marks }}\""
TEST_EXIT_CODE=$?
echo "TEST_EXIT_CODE=${TEST_EXIT_CODE}" >> $GITHUB_ENV
echo "🧪 Tests completed with exit code: ${TEST_EXIT_CODE}"
# Copy test results from container to host
docker cp ${{ env.CONTAINER_ID }}_pytest:/workspace/test-results . || echo "Failed to copy test results"
# Clean up container
docker rm -f ${{ env.CONTAINER_ID }}_pytest || echo "Failed to clean up container"
# Always continue to results processing
exit 0
......
......@@ -49,7 +49,7 @@ jobs:
docker compose up -d nats-server etcd-server
- name: Run Rust checks (block-manager + integration tests)
run: |
docker run --rm -v ${{ github.workspace }}:/workspace -w /workspace/lib/llm \
docker run --rm -w /workspace/lib/llm \
--name ${{ env.CONTAINER_ID }}_rust_checks \
${{ steps.define_image_tag.outputs.image_tag }} \
bash -ec 'rustup component add rustfmt clippy && \
......@@ -66,7 +66,7 @@ jobs:
env:
PYTEST_MARKS: "pre_merge or mypy"
run: |
docker run -v ${{ github.workspace }}:/workspace -w /workspace \
docker run -w /workspace \
--name ${{ env.CONTAINER_ID }}_pytest \
${{ steps.define_image_tag.outputs.image_tag }} \
bash -c "pytest --basetemp=/tmp --junitxml=${{ env.PYTEST_XML_FILE }} -m \"${{ env.PYTEST_MARKS }}\" "
......
......@@ -335,23 +335,6 @@ ARG ARCH_ALT
ENV DYNAMO_HOME=/opt/dynamo \
CARGO_TARGET_DIR=/opt/dynamo/target
# NIXL environment variables
ENV NIXL_PREFIX=/opt/nvidia/nvda_nixl \
NIXL_LIB_DIR=/opt/nvidia/nvda_nixl/lib/${ARCH_ALT}-linux-gnu \
NIXL_PLUGIN_DIR=/opt/nvidia/nvda_nixl/lib/${ARCH_ALT}-linux-gnu/plugins
ENV LD_LIBRARY_PATH=${NIXL_LIB_DIR}:${NIXL_PLUGIN_DIR}:/usr/local/ucx/lib:/usr/local/ucx/lib/ucx:${LD_LIBRARY_PATH}
# Copy ucx and nixl libs
COPY --from=wheel_builder /usr/local/ucx/ /usr/local/ucx/
COPY --from=wheel_builder ${NIXL_PREFIX}/ ${NIXL_PREFIX}/
COPY --from=wheel_builder /opt/nvidia/nvda_nixl/lib64/. ${NIXL_LIB_DIR}/
# Copy built artifacts
COPY --from=wheel_builder /opt/dynamo/dist/nixl/ /opt/dynamo/wheelhouse/nixl/
COPY --from=wheel_builder /opt/dynamo/dist/*.whl /opt/dynamo/wheelhouse/
COPY --from=wheel_builder $CARGO_TARGET_DIR $CARGO_TARGET_DIR
COPY --from=wheel_builder $CARGO_HOME $CARGO_HOME
RUN apt-get update -y \
&& apt-get install -y --no-install-recommends \
# required for AIC perf files
......@@ -361,13 +344,29 @@ RUN apt-get update -y \
clang \
libclang-dev \
protobuf-compiler \
# sudo for dev stage
sudo \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/*
&& rm -rf /var/lib/apt/lists/* \
# Add sudo privileges to dynamo user
&& echo "dynamo ALL=(ALL) NOPASSWD:ALL" > /etc/sudoers.d/dynamo \
&& chmod 0440 /etc/sudoers.d/dynamo
# Create dynamo user with group 0 for OpenShift compatibility
RUN userdel -r ubuntu > /dev/null 2>&1 || true \
&& useradd -m -s /bin/bash -g 0 dynamo \
&& [ `id -u dynamo` -eq 1000 ] \
&& mkdir -p /home/dynamo/.cache \
&& chown -R dynamo: /opt/dynamo /home/dynamo /workspace \
&& chmod -R g+w /opt/dynamo /home/dynamo/.cache /workspace
# Switch to dynamo user
USER dynamo
ENV HOME=/home/dynamo
# Create and activate virtual environment
ARG PYTHON_VERSION
RUN mkdir -p /opt/dynamo/venv && \
uv venv /opt/dynamo/venv --python $PYTHON_VERSION
RUN uv venv /opt/dynamo/venv --python $PYTHON_VERSION
ENV VIRTUAL_ENV=/opt/dynamo/venv \
PATH="/opt/dynamo/venv/bin:${PATH}"
......@@ -380,7 +379,25 @@ RUN --mount=type=bind,source=./container/deps/requirements.txt,target=/tmp/requi
--requirement /tmp/requirements.txt \
--requirement /tmp/requirements.test.txt
COPY benchmarks/ /opt/dynamo/benchmarks/
# NIXL environment variables
ENV NIXL_PREFIX=/opt/nvidia/nvda_nixl \
NIXL_LIB_DIR=/opt/nvidia/nvda_nixl/lib/${ARCH_ALT}-linux-gnu \
NIXL_PLUGIN_DIR=/opt/nvidia/nvda_nixl/lib/${ARCH_ALT}-linux-gnu/plugins
ENV LD_LIBRARY_PATH=${NIXL_LIB_DIR}:${NIXL_PLUGIN_DIR}:/usr/local/ucx/lib:/usr/local/ucx/lib/ucx:${LD_LIBRARY_PATH}
# Copy ucx and nixl libs
COPY --chown=dynamo: --from=wheel_builder /usr/local/ucx/ /usr/local/ucx/
COPY --chown=dynamo: --from=wheel_builder ${NIXL_PREFIX}/ ${NIXL_PREFIX}/
COPY --chown=dynamo: --from=wheel_builder /opt/nvidia/nvda_nixl/lib64/. ${NIXL_LIB_DIR}/
# Copy built artifacts
COPY --chown=dynamo: --from=wheel_builder /opt/dynamo/dist/nixl/ /opt/dynamo/wheelhouse/nixl/
COPY --chown=dynamo: --from=wheel_builder /opt/dynamo/dist/*.whl /opt/dynamo/wheelhouse/
COPY --chown=dynamo: --from=wheel_builder $CARGO_TARGET_DIR $CARGO_TARGET_DIR
COPY --chown=dynamo: --from=wheel_builder $CARGO_HOME $CARGO_HOME
COPY --chown=dynamo: ./ /workspace/
RUN uv pip install \
/opt/dynamo/wheelhouse/ai_dynamo_runtime*.whl \
/opt/dynamo/wheelhouse/ai_dynamo*any.whl \
......@@ -388,16 +405,20 @@ RUN uv pip install \
if [ "$ENABLE_KVBM" = "true" ]; then \
uv pip install /opt/dynamo/wheelhouse/kvbm*.whl; \
fi \
&& cd /opt/dynamo/benchmarks \
&& UV_GIT_LFS=1 uv pip install --no-cache . \
&& cd - \
&& rm -rf /opt/dynamo/benchmarks
&& cd /workspace/benchmarks \
&& UV_GIT_LFS=1 uv pip install --no-cache .
# Setup launch banner
# Setup launch banner in common directory accessible to all users
RUN --mount=type=bind,source=./container/launch_message.txt,target=/opt/dynamo/launch_message.txt \
sed '/^#\s/d' /opt/dynamo/launch_message.txt > ~/.launch_screen && \
echo "cat ~/.launch_screen" >> ~/.bashrc && \
echo "source $VIRTUAL_ENV/bin/activate" >> ~/.bashrc
sed '/^#\s/d' /opt/dynamo/launch_message.txt > /opt/dynamo/.launch_screen
# Setup environment for all users
USER root
RUN chmod 755 /opt/dynamo/.launch_screen && \
echo 'source /opt/dynamo/venv/bin/activate' >> /etc/bash.bashrc && \
echo 'cat /opt/dynamo/.launch_screen' >> /etc/bash.bashrc
USER dynamo
ENTRYPOINT ["/opt/nvidia/nvidia_entrypoint.sh"]
CMD []
......@@ -14,8 +14,8 @@
ARG DEV_BASE=""
FROM ${DEV_BASE} AS local-dev
# Don't want ubuntu to be editable, just change uid and gid.
ENV USERNAME=ubuntu
# Don't want dynamo to be editable, just change uid and gid.
ENV USERNAME=dynamo
ARG USER_UID
ARG USER_GID
ARG WORKSPACE_DIR=/workspace
......@@ -50,7 +50,10 @@ RUN apt-get install -y sudo gnupg2 gnupg1 \
&& echo "$USERNAME ALL=(root) NOPASSWD:ALL" > /etc/sudoers.d/$USERNAME \
&& chmod 0440 /etc/sudoers.d/$USERNAME \
&& mkdir -p /home/$USERNAME \
&& groupmod -g $USER_GID $USERNAME \
# Handle GID conflicts: if target GID exists and it's not our group, remove it
&& (getent group $USER_GID | grep -v "^$USERNAME:" && groupdel $(getent group $USER_GID | cut -d: -f1) || true) \
# Create group if it doesn't exist, otherwise modify existing group
&& (getent group $USERNAME > /dev/null 2>&1 && groupmod -g $USER_GID $USERNAME || groupadd -g $USER_GID $USERNAME) \
&& usermod -u $USER_UID -g $USER_GID $USERNAME \
&& chown -R $USERNAME:$USERNAME /home/$USERNAME \
&& chsh -s /bin/bash $USERNAME
......
......@@ -187,6 +187,17 @@ RUN git clone --depth 1 --branch ${GDRCOPY_COMMIT} https://github.com/NVIDIA/gdr
# Fix DeepEP IBGDA symlink
RUN ln -sf /usr/lib/$(uname -m)-linux-gnu/libmlx5.so.1 /usr/lib/$(uname -m)-linux-gnu/libmlx5.so
# Create dynamo user EARLY - before copying files, with group 0 for OpenShift compatibility
RUN userdel -r ubuntu > /dev/null 2>&1 || true \
&& useradd -m -s /bin/bash -g 0 dynamo \
&& [ `id -u dynamo` -eq 1000 ] \
&& mkdir -p /workspace /home/dynamo/.cache /opt/dynamo \
&& chown -R dynamo: /sgl-workspace /workspace /home/dynamo /opt/dynamo \
&& chmod -R g+w /sgl-workspace /workspace /home/dynamo/.cache /opt/dynamo
USER dynamo
ENV HOME=/home/dynamo
# Install SGLang (requires CUDA 12.8.1 or 12.9.1)
RUN python3 -m pip install --no-cache-dir --ignore-installed pip==25.3 setuptools==80.9.0 wheel==0.45.1 html5lib==1.1 six==1.17.0 \
&& git clone --depth 1 --branch v${SGLANG_COMMIT} https://github.com/sgl-project/sglang.git \
......@@ -202,7 +213,7 @@ RUN python3 -m pip install --no-cache-dir --ignore-installed pip==25.3 setuptool
&& FLASHINFER_LOGGING_LEVEL=warning python3 -m flashinfer --download-cubin
# Download and extract NVSHMEM source, clone DeepEP (use Tom's fork for GB200)
RUN --mount=type=cache,target=/var/cache/curl \
RUN --mount=type=cache,target=/var/cache/curl,uid=1000,gid=0 \
curl --retry 3 --retry-delay 2 -fsSL -o /var/cache/curl/nvshmem_src_cuda12-all-all-${NVSHMEM_VERSION}.tar.gz https://developer.download.nvidia.com/compute/redist/nvshmem/${NVSHMEM_VERSION}/source/nvshmem_src_cuda12-all-all-${NVSHMEM_VERSION}.tar.gz \
&& tar -xf /var/cache/curl/nvshmem_src_cuda12-all-all-${NVSHMEM_VERSION}.tar.gz \
&& mv nvshmem_src nvshmem \
......@@ -345,40 +356,50 @@ COPY --from=dynamo_base /usr/bin/nats-server /usr/bin/nats-server
COPY --from=dynamo_base /usr/local/bin/etcd/ /usr/local/bin/etcd/
COPY --from=dynamo_base /usr/local/ucx /usr/local/ucx
COPY --from=dynamo_base $NIXL_PREFIX $NIXL_PREFIX
ENV PATH=/usr/local/bin/etcd/:/usr/local/cuda/nvvm/bin:$PATH
ENV PATH=/usr/local/bin/etcd/:/usr/local/cuda/nvvm/bin:${HOME}/.local/bin:$PATH
# Install Dynamo wheels from dynamo_base wheelhouse
COPY benchmarks/ /opt/dynamo/benchmarks/
COPY --from=dynamo_base /opt/dynamo/wheelhouse/ /opt/dynamo/wheelhouse/
RUN pip install \
COPY --chown=dynamo: benchmarks/ /opt/dynamo/benchmarks/
COPY --chown=dynamo: --from=dynamo_base /opt/dynamo/wheelhouse/ /opt/dynamo/wheelhouse/
RUN python3 -m pip install \
/opt/dynamo/wheelhouse/ai_dynamo_runtime*.whl \
/opt/dynamo/wheelhouse/ai_dynamo*any.whl \
/opt/dynamo/wheelhouse/nixl/nixl*.whl \
&& cd /opt/dynamo/benchmarks \
&& pip install --no-cache . \
&& python3 -m pip install --no-cache . \
&& cd - \
&& rm -rf /opt/dynamo/benchmarks
# Install common and test dependencies
RUN --mount=type=bind,source=./container/deps/requirements.txt,target=/tmp/requirements.txt \
--mount=type=bind,source=./container/deps/requirements.test.txt,target=/tmp/requirements.test.txt \
pip install \
python3 -m pip install \
--no-cache \
--requirement /tmp/requirements.txt \
--requirement /tmp/requirements.test.txt
## Copy attribution files and launch banner
COPY ATTRIBUTION* LICENSE /workspace/
COPY container/launch_message.txt /workspace/launch_message.txt
RUN sed '/^#\s/d' /workspace/launch_message.txt > ~/.launch_screen && \
echo "cat ~/.launch_screen" >> ~/.bashrc
## Copy attribution files and launch banner with correct ownership
COPY --chown=dynamo: ATTRIBUTION* LICENSE /workspace/
COPY --chown=dynamo: container/launch_message.txt /workspace/launch_message.txt
# Setup launch banner in common directory accessible to all users
RUN --mount=type=bind,source=./container/launch_message.txt,target=/opt/dynamo/launch_message.txt \
sed '/^#\s/d' /opt/dynamo/launch_message.txt > /opt/dynamo/.launch_screen
# Copy tests, benchmarks, deploy and components for CI
COPY tests /workspace/tests
COPY examples /workspace/examples
COPY benchmarks /workspace/benchmarks
COPY deploy /workspace/deploy
COPY components/ /workspace/components/
# Setup environment for all users
USER root
RUN chmod 755 /opt/dynamo/.launch_screen && \
echo 'source /opt/dynamo/venv/bin/activate' >> /etc/bash.bashrc && \
echo 'cat /opt/dynamo/.launch_screen' >> /etc/bash.bashrc
USER dynamo
# Copy tests, benchmarks, deploy and components for CI with correct ownership
COPY --chown=dynamo: tests /workspace/tests
COPY --chown=dynamo: examples /workspace/examples
COPY --chown=dynamo: benchmarks /workspace/benchmarks
COPY --chown=dynamo: deploy /workspace/deploy
COPY --chown=dynamo: components/ /workspace/components/
ENTRYPOINT ["/opt/nvidia/nvidia_entrypoint.sh"]
CMD []
......@@ -412,6 +433,7 @@ RUN mkdir -p /opt/dynamo/venv && \
ENV VIRTUAL_ENV=/opt/dynamo/venv \
PATH="/opt/dynamo/venv/bin:${PATH}"
USER root
# Install development tools and utilities
RUN apt-get update -y && \
apt-get install -y --no-install-recommends \
......@@ -450,6 +472,8 @@ RUN apt-get update -y && \
silversearcher-ag \
cloc \
locales \
# sudo for dev stage
sudo \
# NVIDIA tools dependencies
gnupg && \
echo "deb https://developer.download.nvidia.com/devtools/repos/ubuntu2004/amd64 /" | tee /etc/apt/sources.list.d/nvidia-devtools.list && \
......@@ -469,10 +493,10 @@ RUN curl --retry 3 --retry-delay 2 -LSso /usr/local/bin/clang-format https://git
# Editable install of dynamo
COPY pyproject.toml README.md hatch_build.py /workspace/
RUN pip install --no-deps -e .
RUN python3 -m pip install --no-deps -e .
# Install Python development packages
RUN pip install --no-cache-dir \
RUN python3 -m pip install --no-cache-dir \
maturin[patchelf] \
pytest \
black \
......
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
ARG BASE_IMAGE="nvcr.io/nvidia/pytorch"
ARG BASE_IMAGE_TAG="25.06-py3"
ARG BASE_IMAGE="nvcr.io/nvidia/cuda-dl-base"
ARG BASE_IMAGE_TAG="25.06-cuda12.9-devel-ubuntu24.04"
ARG PYTORCH_BASE_IMAGE="nvcr.io/nvidia/pytorch"
ARG PYTORCH_BASE_IMAGE_TAG="25.06-py3"
ARG ENABLE_KVBM=false
ARG RUNTIME_IMAGE="nvcr.io/nvidia/cuda"
ARG RUNTIME_IMAGE_TAG="12.9.1-runtime-ubuntu24.04"
......@@ -34,33 +37,22 @@ ARG DYNAMO_BASE_IMAGE="dynamo:latest-none"
FROM ${DYNAMO_BASE_IMAGE} AS dynamo_base
# Copy artifacts from NGC PyTorch image
FROM ${BASE_IMAGE}:${BASE_IMAGE_TAG} AS framework
FROM ${PYTORCH_BASE_IMAGE}:${PYTORCH_BASE_IMAGE_TAG} AS pytorch_base
##################################################
########## Runtime Image ########################
########## Framework Builder Stage ##############
##################################################
#
# PURPOSE: Production runtime environment
#
# This stage creates a lightweight production-ready image containing:
# - Pre-compiled TensorRT-LLM and framework dependencies
# - Dynamo runtime libraries and Python packages
# - Essential runtime dependencies and configurations
# - Optimized for inference workloads and deployment
# PURPOSE: Build TensorRT-LLM with root privileges
#
# Use this stage when you need:
# - Production deployment of Dynamo with TensorRT-LLM
# - Minimal runtime footprint without build tools
# - Ready-to-run inference server environment
# - Base for custom application containers
# This stage handles TensorRT-LLM installation which requires:
# - Root access for apt operations (CUDA repos, TensorRT installation)
# - System-level modifications in install_tensorrt.sh
# - Virtual environment population with PyTorch and TensorRT-LLM
#
# The completed venv is then copied to runtime stage with dynamo ownership
FROM ${RUNTIME_IMAGE}:${RUNTIME_IMAGE_TAG} AS runtime
WORKDIR /workspace
ENV VIRTUAL_ENV=/opt/dynamo/venv
FROM ${BASE_IMAGE}:${BASE_IMAGE_TAG} AS framework
ARG ARCH_ALT
ARG PYTHON_VERSION
......@@ -68,93 +60,27 @@ ARG ENABLE_KVBM
ENV NIXL_PREFIX=/opt/nvidia/nvda_nixl
ENV NIXL_LIB_DIR=$NIXL_PREFIX/lib/${ARCH_ALT}-linux-gnu
ENV NIXL_PLUGIN_DIR=$NIXL_LIB_DIR/plugins
ENV VIRTUAL_ENV=/opt/dynamo/venv
ENV PATH="${VIRTUAL_ENV}/bin:${PATH}"
# Install Python, build-essential and python3-dev as apt dependencies
# Install minimal dependencies needed for TensorRT-LLM installation
RUN apt-get update && \
DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
# Build tools (required for JIT kernel compilation)
build-essential \
g++ \
ninja-build \
git \
git-lfs \
# Python runtime - CRITICAL for virtual environment to work
python${PYTHON_VERSION}-dev \
python3-pip \
# CUDA/ML libraries
libcudnn9-cuda-12 \
# Network and communication libraries
libzmq3-dev \
# RDMA/UCX libraries required to find RDMA devices
ibverbs-providers \
ibverbs-utils \
libibumad3 \
libibverbs1 \
libnuma1 \
librdmacm1 \
rdma-core \
# OpenMPI dependencies
openssh-client \
openssh-server \
# System utilities
ca-certificates \
curl \
jq \
wget && \
git \
git-lfs \
ca-certificates && \
rm -rf /var/lib/apt/lists/*
# Copy CUDA development tools (nvcc, headers, dependencies, etc.) from framework devel image
COPY --from=framework /usr/local/cuda/bin/nvcc /usr/local/cuda/bin/nvcc
COPY --from=framework /usr/local/cuda/bin/cudafe++ /usr/local/cuda/bin/cudafe++
COPY --from=framework /usr/local/cuda/bin/ptxas /usr/local/cuda/bin/ptxas
COPY --from=framework /usr/local/cuda/bin/fatbinary /usr/local/cuda/bin/fatbinary
COPY --from=framework /usr/local/cuda/include/ /usr/local/cuda/include/
COPY --from=framework /usr/local/cuda/nvvm /usr/local/cuda/nvvm
COPY --from=framework /usr/local/cuda/lib64/libcudart.so* /usr/local/cuda/lib64/
COPY --from=framework /usr/local/cuda/lib64/libcupti* /usr/local/cuda/lib64/
COPY --from=framework /usr/local/lib/lib* /usr/local/lib/
### COPY NATS & ETCD ###
# Copy nats and etcd from dynamo_base image
COPY --from=dynamo_base /usr/bin/nats-server /usr/bin/nats-server
COPY --from=dynamo_base /usr/local/bin/etcd/ /usr/local/bin/etcd/
# Add ETCD and CUDA binaries to PATH so cicc and other CUDA tools are accessible
ENV PATH=/usr/local/bin/etcd/:/usr/local/cuda/nvvm/bin:$PATH
# Copy UCX from framework image as plugin for NIXL
# Copy NIXL source from framework image
# Copy dynamo wheels for gitlab artifacts
COPY --from=dynamo_base /usr/local/ucx /usr/local/ucx
COPY --from=dynamo_base $NIXL_PREFIX $NIXL_PREFIX
ENV PATH=/usr/local/ucx/bin:$PATH
# Copy OpenMPI from framework image
COPY --from=framework /opt/hpcx/ompi /opt/hpcx/ompi
# Copy NUMA library from framework image
COPY --from=framework /usr/lib/${ARCH_ALT}-linux-gnu/libnuma.so* /usr/lib/${ARCH_ALT}-linux-gnu/
ENV DYNAMO_HOME=/workspace
ENV LD_LIBRARY_PATH=\
$NIXL_LIB_DIR:\
$NIXL_PLUGIN_DIR:\
/usr/local/ucx/lib:\
/usr/local/ucx/lib/ucx:\
/opt/hpcx/ompi/lib:\
$LD_LIBRARY_PATH
ENV PATH="${VIRTUAL_ENV}/bin:/opt/hpcx/ompi/bin:/usr/local/bin/etcd/:/usr/local/cuda/bin:/usr/local/cuda/nvvm/bin:$PATH"
ENV OPAL_PREFIX=/opt/hpcx/ompi
### VIRTUAL ENVIRONMENT SETUP ###
# Copy uv
COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/
COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin
# Create virtual environment
RUN mkdir -p /opt/dynamo/venv && \
uv venv /opt/dynamo/venv --python $PYTHON_VERSION
# Activate virtual environment
ENV VIRTUAL_ENV=/opt/dynamo/venv \
PATH="/opt/dynamo/venv/bin:${PATH}"
# Copy pytorch installation from NGC PyTorch
ARG TORCH_VER=2.8.0a0+5228986c39.nv25.6
ARG TORCHVISION_VER=0.22.0a0+95f10a4e
......@@ -167,25 +93,22 @@ ARG PACKAGING_VER=23.2
ARG FLASH_ATTN_VER=2.7.4.post1
ARG MPMATH_VER=1.3.0
COPY --from=framework /usr/local/lib/python${PYTHON_VERSION}/dist-packages/torch ${VIRTUAL_ENV}/lib/python${PYTHON_VERSION}/site-packages/torch
COPY --from=framework /usr/local/lib/python${PYTHON_VERSION}/dist-packages/torch-${TORCH_VER}.dist-info ${VIRTUAL_ENV}/lib/python${PYTHON_VERSION}/site-packages/torch-${TORCH_VER}.dist-info
COPY --from=framework /usr/local/lib/python${PYTHON_VERSION}/dist-packages/torchgen ${VIRTUAL_ENV}/lib/python${PYTHON_VERSION}/site-packages/torchgen
COPY --from=framework /usr/local/lib/python${PYTHON_VERSION}/dist-packages/torchvision ${VIRTUAL_ENV}/lib/python${PYTHON_VERSION}/site-packages/torchvision
COPY --from=framework /usr/local/lib/python${PYTHON_VERSION}/dist-packages/torchvision-${TORCHVISION_VER}.dist-info ${VIRTUAL_ENV}/lib/python${PYTHON_VERSION}/site-packages/torchvision-${TORCHVISION_VER}.dist-info
COPY --from=framework /usr/local/lib/python${PYTHON_VERSION}/dist-packages/torchvision.libs ${VIRTUAL_ENV}/lib/python${PYTHON_VERSION}/site-packages/torchvision.libs
COPY --from=framework /usr/local/lib/python${PYTHON_VERSION}/dist-packages/functorch ${VIRTUAL_ENV}/lib/python${PYTHON_VERSION}/site-packages/functorch
COPY --from=framework /usr/local/lib/python${PYTHON_VERSION}/dist-packages/jinja2 ${VIRTUAL_ENV}/lib/python${PYTHON_VERSION}/site-packages/jinja2
COPY --from=framework /usr/local/lib/python${PYTHON_VERSION}/dist-packages/jinja2-${JINJA2_VER}.dist-info ${VIRTUAL_ENV}/lib/python${PYTHON_VERSION}/site-packages/jinja2-${JINJA2_VER}.dist-info
COPY --from=framework /usr/local/lib/python${PYTHON_VERSION}/dist-packages/sympy ${VIRTUAL_ENV}/lib/python${PYTHON_VERSION}/site-packages/sympy
COPY --from=framework /usr/local/lib/python${PYTHON_VERSION}/dist-packages/sympy-${SYMPY_VER}.dist-info ${VIRTUAL_ENV}/lib/python${PYTHON_VERSION}/site-packages/sympy-${SYMPY_VER}.dist-info
COPY --from=framework /usr/local/lib/python${PYTHON_VERSION}/dist-packages/flash_attn ${VIRTUAL_ENV}/lib/python${PYTHON_VERSION}/site-packages/flash_attn
COPY --from=framework /usr/local/lib/python${PYTHON_VERSION}/dist-packages/flash_attn-${FLASH_ATTN_VER}.dist-info ${VIRTUAL_ENV}/lib/python${PYTHON_VERSION}/site-packages/flash_attn-${FLASH_ATTN_VER}.dist-info
COPY --from=framework /usr/local/lib/python${PYTHON_VERSION}/dist-packages/flash_attn_2_cuda.cpython-*-*-linux-gnu.so ${VIRTUAL_ENV}/lib/python${PYTHON_VERSION}/site-packages/
COPY --from=framework /usr/local/lib/python${PYTHON_VERSION}/dist-packages/triton ${VIRTUAL_ENV}/lib/python${PYTHON_VERSION}/site-packages/triton
COPY --from=framework /usr/local/lib/python${PYTHON_VERSION}/dist-packages/pytorch_triton-${PYTORCH_TRITON_VER}.dist-info ${VIRTUAL_ENV}/lib/python${PYTHON_VERSION}/site-packages/pytorch_triton-${PYTORCH_TRITON_VER}.dist-info
ENV ENV=${ENV:-/etc/shinit_v2}
COPY --from=pytorch_base /usr/local/lib/python${PYTHON_VERSION}/dist-packages/torch ${VIRTUAL_ENV}/lib/python${PYTHON_VERSION}/site-packages/torch
COPY --from=pytorch_base /usr/local/lib/python${PYTHON_VERSION}/dist-packages/torch-${TORCH_VER}.dist-info ${VIRTUAL_ENV}/lib/python${PYTHON_VERSION}/site-packages/torch-${TORCH_VER}.dist-info
COPY --from=pytorch_base /usr/local/lib/python${PYTHON_VERSION}/dist-packages/torchgen ${VIRTUAL_ENV}/lib/python${PYTHON_VERSION}/site-packages/torchgen
COPY --from=pytorch_base /usr/local/lib/python${PYTHON_VERSION}/dist-packages/torchvision ${VIRTUAL_ENV}/lib/python${PYTHON_VERSION}/site-packages/torchvision
COPY --from=pytorch_base /usr/local/lib/python${PYTHON_VERSION}/dist-packages/torchvision-${TORCHVISION_VER}.dist-info ${VIRTUAL_ENV}/lib/python${PYTHON_VERSION}/site-packages/torchvision-${TORCHVISION_VER}.dist-info
COPY --from=pytorch_base /usr/local/lib/python${PYTHON_VERSION}/dist-packages/torchvision.libs ${VIRTUAL_ENV}/lib/python${PYTHON_VERSION}/site-packages/torchvision.libs
COPY --from=pytorch_base /usr/local/lib/python${PYTHON_VERSION}/dist-packages/functorch ${VIRTUAL_ENV}/lib/python${PYTHON_VERSION}/site-packages/functorch
COPY --from=pytorch_base /usr/local/lib/python${PYTHON_VERSION}/dist-packages/jinja2 ${VIRTUAL_ENV}/lib/python${PYTHON_VERSION}/site-packages/jinja2
COPY --from=pytorch_base /usr/local/lib/python${PYTHON_VERSION}/dist-packages/jinja2-${JINJA2_VER}.dist-info ${VIRTUAL_ENV}/lib/python${PYTHON_VERSION}/site-packages/jinja2-${JINJA2_VER}.dist-info
COPY --from=pytorch_base /usr/local/lib/python${PYTHON_VERSION}/dist-packages/sympy ${VIRTUAL_ENV}/lib/python${PYTHON_VERSION}/site-packages/sympy
COPY --from=pytorch_base /usr/local/lib/python${PYTHON_VERSION}/dist-packages/sympy-${SYMPY_VER}.dist-info ${VIRTUAL_ENV}/lib/python${PYTHON_VERSION}/site-packages/sympy-${SYMPY_VER}.dist-info
COPY --from=pytorch_base /usr/local/lib/python${PYTHON_VERSION}/dist-packages/flash_attn ${VIRTUAL_ENV}/lib/python${PYTHON_VERSION}/site-packages/flash_attn
COPY --from=pytorch_base /usr/local/lib/python${PYTHON_VERSION}/dist-packages/flash_attn-${FLASH_ATTN_VER}.dist-info ${VIRTUAL_ENV}/lib/python${PYTHON_VERSION}/site-packages/flash_attn-${FLASH_ATTN_VER}.dist-info
COPY --from=pytorch_base /usr/local/lib/python${PYTHON_VERSION}/dist-packages/flash_attn_2_cuda.cpython-*-*-linux-gnu.so ${VIRTUAL_ENV}/lib/python${PYTHON_VERSION}/site-packages/
COPY --from=pytorch_base /usr/local/lib/python${PYTHON_VERSION}/dist-packages/triton ${VIRTUAL_ENV}/lib/python${PYTHON_VERSION}/site-packages/triton
COPY --from=pytorch_base /usr/local/lib/python${PYTHON_VERSION}/dist-packages/pytorch_triton-${PYTORCH_TRITON_VER}.dist-info ${VIRTUAL_ENV}/lib/python${PYTHON_VERSION}/site-packages/pytorch_triton-${PYTORCH_TRITON_VER}.dist-info
# Install TensorRT-LLM and related dependencies
ARG HAS_TRTLLM_CONTEXT
......@@ -235,12 +158,146 @@ RUN if [ "$HAS_TRTLLM_CONTEXT" = "1" ]; then \
uv pip install --extra-index-url "${TENSORRTLLM_INDEX_URL}" "${TENSORRTLLM_PIP_WHEEL}"; \
fi
##################################################
########## Runtime Image ########################
##################################################
#
# PURPOSE: Production runtime environment
#
# This stage creates a lightweight production-ready image containing:
# - Pre-compiled TensorRT-LLM and framework dependencies
# - Dynamo runtime libraries and Python packages
# - Essential runtime dependencies and configurations
# - Optimized for inference workloads and deployment
#
# Use this stage when you need:
# - Production deployment of Dynamo with TensorRT-LLM
# - Minimal runtime footprint without build tools
# - Ready-to-run inference server environment
# - Base for custom application containers
#
FROM ${RUNTIME_IMAGE}:${RUNTIME_IMAGE_TAG} AS runtime
ARG ARCH_ALT
ARG PYTHON_VERSION
WORKDIR /workspace
ENV ENV=${ENV:-/etc/shinit_v2}
ENV VIRTUAL_ENV=/opt/dynamo/venv
ENV NIXL_PREFIX=/opt/nvidia/nvda_nixl
ENV NIXL_LIB_DIR=$NIXL_PREFIX/lib/${ARCH_ALT}-linux-gnu
ENV NIXL_PLUGIN_DIR=$NIXL_LIB_DIR/plugins
# Install Python, build-essential and python3-dev as apt dependencies
RUN apt-get update && \
DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
# Build tools
build-essential \
g++ \
ninja-build \
git \
git-lfs \
# Python runtime - CRITICAL for virtual environment to work
python${PYTHON_VERSION}-dev \
python3-pip \
# CUDA/ML libraries
libcudnn9-cuda-12 \
# Network and communication libraries
libzmq3-dev \
# RDMA/UCX libraries required to find RDMA devices
ibverbs-providers \
ibverbs-utils \
libibumad3 \
libibverbs1 \
libnuma1 \
librdmacm1 \
rdma-core \
# OpenMPI dependencies
openssh-client \
openssh-server \
# System utilities and dependencies
curl && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*
# Copy CUDA development tools (nvcc, headers, dependencies, etc.) from PyTorch base image
COPY --from=pytorch_base /usr/local/cuda/bin/nvcc /usr/local/cuda/bin/nvcc
COPY --from=pytorch_base /usr/local/cuda/bin/cudafe++ /usr/local/cuda/bin/cudafe++
COPY --from=pytorch_base /usr/local/cuda/bin/ptxas /usr/local/cuda/bin/ptxas
COPY --from=pytorch_base /usr/local/cuda/bin/fatbinary /usr/local/cuda/bin/fatbinary
COPY --from=pytorch_base /usr/local/cuda/include/ /usr/local/cuda/include/
COPY --from=pytorch_base /usr/local/cuda/nvvm /usr/local/cuda/nvvm
COPY --from=pytorch_base /usr/local/cuda/lib64/libcudart.so* /usr/local/cuda/lib64/
COPY --from=pytorch_base /usr/local/cuda/lib64/libcupti* /usr/local/cuda/lib64/
COPY --from=pytorch_base /usr/local/lib/lib* /usr/local/lib/
# Copy nats and etcd from dynamo_base image
COPY --from=dynamo_base /usr/bin/nats-server /usr/bin/nats-server
COPY --from=dynamo_base /usr/local/bin/etcd/ /usr/local/bin/etcd/
# Add ETCD and CUDA binaries to PATH so cicc and other CUDA tools are accessible
ENV PATH=/usr/local/bin/etcd/:/usr/local/cuda/nvvm/bin:$PATH
# Copy OpenMPI from PyTorch base image
COPY --from=pytorch_base /opt/hpcx/ompi /opt/hpcx/ompi
# Copy NUMA library from PyTorch base image
COPY --from=pytorch_base /usr/lib/${ARCH_ALT}-linux-gnu/libnuma.so* /usr/lib/${ARCH_ALT}-linux-gnu/
# Copy UCX libraries, libucc.so is needed by pytorch. May not need to copy whole hpcx dir but only /opt/hpcx/ucc/
COPY --from=pytorch_base /opt/hpcx /opt/hpcx
# This is needed to make libucc.so visible so pytorch can use it.
ENV LD_LIBRARY_PATH="/opt/hpcx/ucc/lib:${LD_LIBRARY_PATH}"
# Might not need to copy cusparseLt in the future once it's included in DLFW cuda container
# networkx, packaging, setuptools get overridden by trtllm installation, so not copying them
# pytorch-triton is copied after trtllm installation.
COPY --from=pytorch_base /usr/local/cuda/lib64/libcusparseLt* /usr/local/cuda/lib64/
# Copy uv to system /bin
COPY --from=framework /bin/uv /bin/uvx /bin/
# Copy libgomp.so from framework image
COPY --from=framework /usr/local/tensorrt /usr/local/tensorrt
COPY --from=framework /usr/lib/${ARCH_ALT}-linux-gnu/libgomp.so* /usr/lib/${ARCH_ALT}-linux-gnu/
# Create dynamo user with group 0 for OpenShift compatibility
RUN userdel -r ubuntu > /dev/null 2>&1 || true \
&& useradd -m -s /bin/bash -g 0 dynamo \
&& [ `id -u dynamo` -eq 1000 ] \
&& mkdir -p /home/dynamo/.cache /opt/dynamo \
&& chown -R dynamo: /workspace /home/dynamo /opt/dynamo \
&& chmod -R g+w /workspace /home/dynamo/.cache /opt/dynamo
# Switch to dynamo user
USER dynamo
ENV HOME=/home/dynamo
ENV DYNAMO_HOME=/workspace
# Copy UCX from framework image as plugin for NIXL
# Copy NIXL source from framework image
# Copy dynamo wheels for gitlab artifacts
COPY --chown=dynamo: --from=dynamo_base /usr/local/ucx /usr/local/ucx
COPY --chown=dynamo: --from=dynamo_base $NIXL_PREFIX $NIXL_PREFIX
ENV PATH="/usr/local/ucx/bin:${VIRTUAL_ENV}/bin:/opt/hpcx/ompi/bin:/usr/local/bin/etcd/:/usr/local/cuda/bin:/usr/local/cuda/nvvm/bin:$PATH"
ENV LD_LIBRARY_PATH=\
$NIXL_LIB_DIR:\
$NIXL_PLUGIN_DIR:\
/usr/local/ucx/lib:\
/usr/local/ucx/lib/ucx:\
/opt/hpcx/ompi/lib:\
$LD_LIBRARY_PATH
ENV OPAL_PREFIX=/opt/hpcx/ompi
# Copy pre-built venv with PyTorch and TensorRT-LLM from framework stage
COPY --chown=dynamo: --from=framework ${VIRTUAL_ENV} ${VIRTUAL_ENV}
ENV TENSORRT_LIB_DIR=/usr/local/tensorrt/targets/${ARCH_ALT}-linux-gnu/lib
ENV LD_LIBRARY_PATH=${TENSORRT_LIB_DIR}:${LD_LIBRARY_PATH}
# Install dynamo, NIXL, and dynamo-specific dependencies
COPY benchmarks/ /opt/dynamo/benchmarks/
COPY --from=dynamo_base /opt/dynamo/wheelhouse/ /opt/dynamo/wheelhouse/
COPY --chown=dynamo: benchmarks/ /opt/dynamo/benchmarks/
COPY --chown=dynamo: --from=dynamo_base /opt/dynamo/wheelhouse/ /opt/dynamo/wheelhouse/
RUN uv pip install \
/opt/dynamo/wheelhouse/ai_dynamo_runtime*.whl \
/opt/dynamo/wheelhouse/ai_dynamo*any.whl \
......@@ -261,31 +318,28 @@ RUN --mount=type=bind,source=./container/deps/requirements.txt,target=/tmp/requi
--requirement /tmp/requirements.txt \
--requirement /tmp/requirements.test.txt
# Copy UCX libraries, libucc.so is needed by pytorch. May not need to copy whole hpcx dir but only /opt/hpcx/ucc/
COPY --from=framework /opt/hpcx /opt/hpcx
# This is needed to make libucc.so visible so pytorch can use it.
ENV LD_LIBRARY_PATH="/opt/hpcx/ucc/lib:${LD_LIBRARY_PATH}"
# Might not need to copy cusparseLt in the future once it's included in DLFW cuda container
# networkx, packaging, setuptools get overridden by trtllm installation, so not copying them
# pytorch-triton is copied after trtllm installation.
COPY --from=framework /usr/local/cuda/lib64/libcusparseLt* /usr/local/cuda/lib64/
# Copy tests, benchmarks, deploy and components for CI with correct ownership
COPY --chown=dynamo: tests /workspace/tests
COPY --chown=dynamo: examples /workspace/examples
COPY --chown=dynamo: benchmarks /workspace/benchmarks
COPY --chown=dynamo: deploy /workspace/deploy
COPY --chown=dynamo: components/ /workspace/components/
COPY --chown=dynamo: recipes/ /workspace/recipes/
# Copy tests, benchmarks, deploy and components for CI
COPY tests /workspace/tests
COPY examples /workspace/examples
COPY benchmarks /workspace/benchmarks
COPY deploy /workspace/deploy
COPY components/ /workspace/components/
COPY recipes/ /workspace/recipes/
# Copy attribution files with correct ownership
COPY --chown=dynamo: ATTRIBUTION* LICENSE /workspace/
# Copy attribution files
COPY ATTRIBUTION* LICENSE /workspace/
# Copy launch banner
RUN --mount=type=bind,source=./container/launch_message.txt,target=/workspace/launch_message.txt \
sed '/^#\s/d' /workspace/launch_message.txt > ~/.launch_screen && \
echo "cat ~/.launch_screen" >> ~/.bashrc && \
echo "source $VIRTUAL_ENV/bin/activate" >> ~/.bashrc
# Setup launch banner in common directory accessible to all users
RUN --mount=type=bind,source=./container/launch_message.txt,target=/opt/dynamo/launch_message.txt \
sed '/^#\s/d' /opt/dynamo/launch_message.txt > /opt/dynamo/.launch_screen
# Setup environment for all users
USER root
RUN chmod 755 /opt/dynamo/.launch_screen && \
echo 'source /opt/dynamo/venv/bin/activate' >> /etc/bash.bashrc && \
echo 'cat /opt/dynamo/.launch_screen' >> /etc/bash.bashrc
USER dynamo
ENTRYPOINT ["/opt/nvidia/nvidia_entrypoint.sh"]
CMD []
......@@ -308,6 +362,9 @@ FROM runtime AS dev
# Don't want ubuntu to be editable, just change uid and gid.
ARG WORKSPACE_DIR=/workspace
# Switch to root for system package installation
USER root
# Install utilities as root
RUN apt-get update -y && \
apt-get install -y --no-install-recommends \
......
......@@ -222,15 +222,28 @@ COPY --from=dynamo_base /usr/local/bin/etcd/ /usr/local/bin/etcd/
# Add ETCD and CUDA binaries to PATH so cicc and other CUDA tools are accessible
ENV PATH=/usr/local/bin/etcd/:/usr/local/cuda/nvvm/bin:$PATH
# Copy UCX from dev image as plugin for NIXL
# Copy NIXL source from devr image
# Copy dynamo wheels for gitlab artifacts
COPY --from=dynamo_base /usr/local/ucx /usr/local/ucx
COPY --from=dynamo_base $NIXL_PREFIX $NIXL_PREFIX
ENV PATH=/usr/local/ucx/bin:$PATH
# DeepGemm runs nvcc for JIT kernel compilation, however the CUDA include path
# is not properly set for complilation. Set CPATH to help nvcc find the headers.
ENV CPATH=/usr/local/cuda/include
# Copies vllm, DeepEP, DeepGEMM, PPLX repos (all editable installs) and nvshmem binaries
COPY --from=framework /opt/vllm /opt/vllm
# Copy uv to system /bin
COPY --from=framework /bin/uv /bin/uvx /bin/
# Create dynamo user with group 0 for OpenShift compatibility
RUN userdel -r ubuntu > /dev/null 2>&1 || true \
&& useradd -m -s /bin/bash -g 0 dynamo \
&& [ `id -u dynamo` -eq 1000 ] \
&& mkdir -p /home/dynamo/.cache /opt/dynamo \
&& chown -R dynamo: /workspace /home/dynamo /opt/dynamo \
&& chmod -R g+w /workspace /home/dynamo/.cache /opt/dynamo
USER dynamo
ENV HOME=/home/dynamo
# Copy UCX and NIXL to system directories
COPY --chown=dynamo: --from=dynamo_base /usr/local/ucx /usr/local/ucx
COPY --chown=dynamo: --from=dynamo_base $NIXL_PREFIX $NIXL_PREFIX
ENV PATH=/usr/local/ucx/bin:$PATH
ENV LD_LIBRARY_PATH=\
/opt/vllm/tools/ep_kernels/ep_kernels_workspace/nvshmem_install/lib:\
......@@ -240,19 +253,17 @@ $NIXL_PLUGIN_DIR:\
/usr/local/ucx/lib/ucx:\
$LD_LIBRARY_PATH
# DeepGemm runs nvcc for JIT kernel compilation, however the CUDA include path
# is not properly set for complilation. Set CPATH to help nvcc find the headers.
ENV CPATH=/usr/local/cuda/include
### VIRTUAL ENVIRONMENT SETUP ###
# Copy uv and entire virtual environment from framework container
COPY --from=framework /bin/uv /bin/uvx /bin/
COPY --from=framework ${VIRTUAL_ENV} ${VIRTUAL_ENV}
# Copy entire virtual environment from framework container with correct ownership
COPY --chown=dynamo: --from=framework ${VIRTUAL_ENV} ${VIRTUAL_ENV}
# Copy vllm with correct ownership
COPY --chown=dynamo: --from=framework /opt/vllm /opt/vllm
# Install dynamo, NIXL, and dynamo-specific dependencies
COPY benchmarks/ /opt/dynamo/benchmarks/
COPY --from=dynamo_base /opt/dynamo/wheelhouse/ /opt/dynamo/wheelhouse/
COPY --chown=dynamo: benchmarks/ /opt/dynamo/benchmarks/
COPY --chown=dynamo: --from=dynamo_base /opt/dynamo/wheelhouse/ /opt/dynamo/wheelhouse/
RUN uv pip install \
/opt/dynamo/wheelhouse/ai_dynamo_runtime*.whl \
/opt/dynamo/wheelhouse/ai_dynamo*any.whl \
......@@ -273,16 +284,23 @@ RUN --mount=type=bind,source=./container/deps/requirements.txt,target=/tmp/requi
--requirement /tmp/requirements.txt \
--requirement /tmp/requirements.test.txt
# Copy benchmarks, examples, and tests for CI
COPY . /workspace/
# Copy benchmarks, examples, and tests for CI with correct ownership
COPY --chown=dynamo: . /workspace/
# Copy attribution files
COPY ATTRIBUTION* LICENSE /workspace/
# Copy launch banner
RUN --mount=type=bind,source=./container/launch_message.txt,target=/workspace/launch_message.txt \
sed '/^#\s/d' /workspace/launch_message.txt > ~/.launch_screen && \
echo "cat ~/.launch_screen" >> ~/.bashrc && \
echo "source $VIRTUAL_ENV/bin/activate" >> ~/.bashrc
COPY --chown=dynamo: ATTRIBUTION* LICENSE /workspace/
# Setup launch banner in common directory accessible to all users
RUN --mount=type=bind,source=./container/launch_message.txt,target=/opt/dynamo/launch_message.txt \
sed '/^#\s/d' /opt/dynamo/launch_message.txt > /opt/dynamo/.launch_screen
# Setup environment for all users
USER root
RUN chmod 755 /opt/dynamo/.launch_screen && \
echo 'source /opt/dynamo/venv/bin/activate' >> /etc/bash.bashrc && \
echo 'cat /opt/dynamo/.launch_screen' >> /etc/bash.bashrc
USER dynamo
ENTRYPOINT ["/opt/nvidia/nvidia_entrypoint.sh"]
CMD []
......@@ -305,6 +323,7 @@ FROM runtime AS dev
# Don't want ubuntu to be editable, just change uid and gid.
ARG WORKSPACE_DIR=/workspace
USER root
# Install utilities as root
RUN apt-get update -y && \
apt-get install -y --no-install-recommends \
......
......@@ -555,7 +555,7 @@ build_local_dev_with_header() {
fi
echo "Building new local-dev image from: $dev_base_image"
echo "User 'ubuntu' will have UID: $USER_UID, GID: $USER_GID"
echo "User 'dynamo' will have UID: $USER_UID, GID: $USER_GID"
# Show the docker command being executed if not in dry-run mode
if [ -z "$RUN_PREFIX" ]; then
......
......@@ -45,6 +45,7 @@ USE_NIXL_GDS=
RUNTIME=nvidia
WORKDIR=/workspace
NETWORK=host
USER=
get_options() {
while :; do
......@@ -183,6 +184,14 @@ get_options() {
missing_requirement "$1"
fi
;;
--user)
if [ "$2" ]; then
USER=$2
shift
else
missing_requirement "$1"
fi
;;
--dry-run)
RUN_PREFIX="echo"
echo ""
......@@ -267,11 +276,10 @@ get_options() {
if [ -n "$HF_HOME" ]; then
mkdir -p "$HF_HOME"
# Use /home/ubuntu for local-dev target, /root for dev target.
if [ "$TARGET" = "local-dev" ] || [[ "$IMAGE" == *"local-dev"* ]]; then
HF_HOME_TARGET="/home/ubuntu/.cache/huggingface"
else
if [[ ${USER} == "root" ]] || [[ ${USER} == "0" ]]; then
HF_HOME_TARGET="/root/.cache/huggingface"
else
HF_HOME_TARGET="/home/dynamo/.cache/huggingface"
fi
VOLUME_MOUNTS+=" -v $HF_HOME:$HF_HOME_TARGET"
fi
......@@ -313,6 +321,12 @@ get_options() {
RUNTIME=""
fi
if [[ ${USER} == "" ]]; then
USER_STRING=""
else
USER_STRING="--user ${USER}"
fi
REMAINING_ARGS=("$@")
}
......@@ -330,6 +344,8 @@ show_help() {
echo " Options: 'host' (default), 'bridge', 'none', 'container:name'"
echo " Examples: --network bridge (isolated), --network none (no network - WARNING: breaks most functionality)"
echo " --network container:redis (share network with 'redis' container)"
echo " [--user <name|uid>[:<group|gid>] specify user to run container as]"
echo " Format: username or numeric UID, optionally with group/GID (e.g., 'root', '0', '1000:0')"
echo " [-v add volume mount]"
echo " [-p|--port add port mapping (host_port:container_port)]"
echo " [-e add environment variable]"
......@@ -376,6 +392,7 @@ ${RUN_PREFIX} docker run \
${NIXL_GDS_CAPS} \
--ipc host \
${PRIVILEGED_STRING} \
${USER_STRING} \
${NAME_STRING} \
${ENTRYPOINT_STRING} \
${IMAGE} \
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment