Unverified Commit 6f708832 authored by Anant Sharma's avatar Anant Sharma Committed by GitHub
Browse files

build: add non root user for dynamo backend runtime containers (#3935)


Signed-off-by: default avatarAnant Sharma <anants@nvidia.com>
parent 5e4a339a
...@@ -5,7 +5,7 @@ ...@@ -5,7 +5,7 @@
"SPDX-License-Identifier: Apache-2.0" "SPDX-License-Identifier: Apache-2.0"
], ],
"name": "Dynamo {{ framework.upper() }} Dev Container", "name": "Dynamo {{ framework.upper() }} Dev Container",
"remoteUser": "ubuntu", // Matches our container user "remoteUser": "dynamo", // Matches our container user
"updateRemoteUserUID": true, // Updates the UID of the remote user to match the host user, avoids permission errors "updateRemoteUserUID": true, // Updates the UID of the remote user to match the host user, avoids permission errors
"image": "dynamo:latest-{{ framework }}-local-dev", // Use the latest {{ framework.upper() }} dev image "image": "dynamo:latest-{{ framework }}-local-dev", // Use the latest {{ framework.upper() }} dev image
"runArgs": [ "runArgs": [
...@@ -68,11 +68,11 @@ ...@@ -68,11 +68,11 @@
}, },
"mounts": [ "mounts": [
// These are for convenience, so that the history and pre-commit cache are persisted between sessions // These are for convenience, so that the history and pre-commit cache are persisted between sessions
"source=dynamo-bashhistory,target=/home/ubuntu/.commandhistory,type=volume", "source=dynamo-bashhistory,target=/home/dynamo/.commandhistory,type=volume",
"source=dynamo-precommit-cache,target=/home/ubuntu/.cache/pre-commit,type=volume", "source=dynamo-precommit-cache,target=/home/dynamo/.cache/pre-commit,type=volume",
// Default mounts // Default mounts
"source=/tmp/,target=/tmp/,type=bind" "source=/tmp/,target=/tmp/,type=bind"
// Uncomment this to reuse your Hugging Face cache // Uncomment this to reuse your Hugging Face cache
//"source=${localEnv:HOME}/.cache/huggingface,target=/home/ubuntu/.cache/huggingface,type=bind" //"source=${localEnv:HOME}/.cache/huggingface,target=/home/dynamo/.cache/huggingface,type=bind"
] ]
} }
...@@ -54,18 +54,23 @@ runs: ...@@ -54,18 +54,23 @@ runs:
# Run pytest with detailed output and JUnit XML # Run pytest with detailed output and JUnit XML
set +e # Don't exit on test failures set +e # Don't exit on test failures
docker run --runtime=nvidia --rm --gpus all -w /workspace \ docker run --runtime=nvidia --gpus all -w /workspace \
--cpus=${NUM_CPUS} \ --cpus=${NUM_CPUS} \
--network host \ --network host \
--name ${{ env.CONTAINER_ID }}_pytest \ --name ${{ env.CONTAINER_ID }}_pytest \
-v "$(pwd)/test-results:/test-results" \
${{ inputs.image_tag }} \ ${{ inputs.image_tag }} \
bash -c "pytest -v --tb=short --basetemp=/tmp --junitxml=/test-results/${{ env.PYTEST_XML_FILE }} --durations=10 -m \"${{ inputs.pytest_marks }}\"" bash -c "mkdir -p /workspace/test-results && pytest -v --tb=short --basetemp=/tmp -o cache_dir=/tmp/.pytest_cache --junitxml=/workspace/test-results/${{ env.PYTEST_XML_FILE }} --durations=10 -m \"${{ inputs.pytest_marks }}\""
TEST_EXIT_CODE=$? TEST_EXIT_CODE=$?
echo "TEST_EXIT_CODE=${TEST_EXIT_CODE}" >> $GITHUB_ENV echo "TEST_EXIT_CODE=${TEST_EXIT_CODE}" >> $GITHUB_ENV
echo "🧪 Tests completed with exit code: ${TEST_EXIT_CODE}" echo "🧪 Tests completed with exit code: ${TEST_EXIT_CODE}"
# Copy test results from container to host
docker cp ${{ env.CONTAINER_ID }}_pytest:/workspace/test-results . || echo "Failed to copy test results"
# Clean up container
docker rm -f ${{ env.CONTAINER_ID }}_pytest || echo "Failed to clean up container"
# Always continue to results processing # Always continue to results processing
exit 0 exit 0
......
...@@ -49,7 +49,7 @@ jobs: ...@@ -49,7 +49,7 @@ jobs:
docker compose up -d nats-server etcd-server docker compose up -d nats-server etcd-server
- name: Run Rust checks (block-manager + integration tests) - name: Run Rust checks (block-manager + integration tests)
run: | run: |
docker run --rm -v ${{ github.workspace }}:/workspace -w /workspace/lib/llm \ docker run --rm -w /workspace/lib/llm \
--name ${{ env.CONTAINER_ID }}_rust_checks \ --name ${{ env.CONTAINER_ID }}_rust_checks \
${{ steps.define_image_tag.outputs.image_tag }} \ ${{ steps.define_image_tag.outputs.image_tag }} \
bash -ec 'rustup component add rustfmt clippy && \ bash -ec 'rustup component add rustfmt clippy && \
...@@ -66,7 +66,7 @@ jobs: ...@@ -66,7 +66,7 @@ jobs:
env: env:
PYTEST_MARKS: "pre_merge or mypy" PYTEST_MARKS: "pre_merge or mypy"
run: | run: |
docker run -v ${{ github.workspace }}:/workspace -w /workspace \ docker run -w /workspace \
--name ${{ env.CONTAINER_ID }}_pytest \ --name ${{ env.CONTAINER_ID }}_pytest \
${{ steps.define_image_tag.outputs.image_tag }} \ ${{ steps.define_image_tag.outputs.image_tag }} \
bash -c "pytest --basetemp=/tmp --junitxml=${{ env.PYTEST_XML_FILE }} -m \"${{ env.PYTEST_MARKS }}\" " bash -c "pytest --basetemp=/tmp --junitxml=${{ env.PYTEST_XML_FILE }} -m \"${{ env.PYTEST_MARKS }}\" "
......
...@@ -335,23 +335,6 @@ ARG ARCH_ALT ...@@ -335,23 +335,6 @@ ARG ARCH_ALT
ENV DYNAMO_HOME=/opt/dynamo \ ENV DYNAMO_HOME=/opt/dynamo \
CARGO_TARGET_DIR=/opt/dynamo/target CARGO_TARGET_DIR=/opt/dynamo/target
# NIXL environment variables
ENV NIXL_PREFIX=/opt/nvidia/nvda_nixl \
NIXL_LIB_DIR=/opt/nvidia/nvda_nixl/lib/${ARCH_ALT}-linux-gnu \
NIXL_PLUGIN_DIR=/opt/nvidia/nvda_nixl/lib/${ARCH_ALT}-linux-gnu/plugins
ENV LD_LIBRARY_PATH=${NIXL_LIB_DIR}:${NIXL_PLUGIN_DIR}:/usr/local/ucx/lib:/usr/local/ucx/lib/ucx:${LD_LIBRARY_PATH}
# Copy ucx and nixl libs
COPY --from=wheel_builder /usr/local/ucx/ /usr/local/ucx/
COPY --from=wheel_builder ${NIXL_PREFIX}/ ${NIXL_PREFIX}/
COPY --from=wheel_builder /opt/nvidia/nvda_nixl/lib64/. ${NIXL_LIB_DIR}/
# Copy built artifacts
COPY --from=wheel_builder /opt/dynamo/dist/nixl/ /opt/dynamo/wheelhouse/nixl/
COPY --from=wheel_builder /opt/dynamo/dist/*.whl /opt/dynamo/wheelhouse/
COPY --from=wheel_builder $CARGO_TARGET_DIR $CARGO_TARGET_DIR
COPY --from=wheel_builder $CARGO_HOME $CARGO_HOME
RUN apt-get update -y \ RUN apt-get update -y \
&& apt-get install -y --no-install-recommends \ && apt-get install -y --no-install-recommends \
# required for AIC perf files # required for AIC perf files
...@@ -361,13 +344,29 @@ RUN apt-get update -y \ ...@@ -361,13 +344,29 @@ RUN apt-get update -y \
clang \ clang \
libclang-dev \ libclang-dev \
protobuf-compiler \ protobuf-compiler \
# sudo for dev stage
sudo \
&& apt-get clean \ && apt-get clean \
&& rm -rf /var/lib/apt/lists/* && rm -rf /var/lib/apt/lists/* \
# Add sudo privileges to dynamo user
&& echo "dynamo ALL=(ALL) NOPASSWD:ALL" > /etc/sudoers.d/dynamo \
&& chmod 0440 /etc/sudoers.d/dynamo
# Create dynamo user with group 0 for OpenShift compatibility
RUN userdel -r ubuntu > /dev/null 2>&1 || true \
&& useradd -m -s /bin/bash -g 0 dynamo \
&& [ `id -u dynamo` -eq 1000 ] \
&& mkdir -p /home/dynamo/.cache \
&& chown -R dynamo: /opt/dynamo /home/dynamo /workspace \
&& chmod -R g+w /opt/dynamo /home/dynamo/.cache /workspace
# Switch to dynamo user
USER dynamo
ENV HOME=/home/dynamo
# Create and activate virtual environment # Create and activate virtual environment
ARG PYTHON_VERSION ARG PYTHON_VERSION
RUN mkdir -p /opt/dynamo/venv && \ RUN uv venv /opt/dynamo/venv --python $PYTHON_VERSION
uv venv /opt/dynamo/venv --python $PYTHON_VERSION
ENV VIRTUAL_ENV=/opt/dynamo/venv \ ENV VIRTUAL_ENV=/opt/dynamo/venv \
PATH="/opt/dynamo/venv/bin:${PATH}" PATH="/opt/dynamo/venv/bin:${PATH}"
...@@ -380,7 +379,25 @@ RUN --mount=type=bind,source=./container/deps/requirements.txt,target=/tmp/requi ...@@ -380,7 +379,25 @@ RUN --mount=type=bind,source=./container/deps/requirements.txt,target=/tmp/requi
--requirement /tmp/requirements.txt \ --requirement /tmp/requirements.txt \
--requirement /tmp/requirements.test.txt --requirement /tmp/requirements.test.txt
COPY benchmarks/ /opt/dynamo/benchmarks/ # NIXL environment variables
ENV NIXL_PREFIX=/opt/nvidia/nvda_nixl \
NIXL_LIB_DIR=/opt/nvidia/nvda_nixl/lib/${ARCH_ALT}-linux-gnu \
NIXL_PLUGIN_DIR=/opt/nvidia/nvda_nixl/lib/${ARCH_ALT}-linux-gnu/plugins
ENV LD_LIBRARY_PATH=${NIXL_LIB_DIR}:${NIXL_PLUGIN_DIR}:/usr/local/ucx/lib:/usr/local/ucx/lib/ucx:${LD_LIBRARY_PATH}
# Copy ucx and nixl libs
COPY --chown=dynamo: --from=wheel_builder /usr/local/ucx/ /usr/local/ucx/
COPY --chown=dynamo: --from=wheel_builder ${NIXL_PREFIX}/ ${NIXL_PREFIX}/
COPY --chown=dynamo: --from=wheel_builder /opt/nvidia/nvda_nixl/lib64/. ${NIXL_LIB_DIR}/
# Copy built artifacts
COPY --chown=dynamo: --from=wheel_builder /opt/dynamo/dist/nixl/ /opt/dynamo/wheelhouse/nixl/
COPY --chown=dynamo: --from=wheel_builder /opt/dynamo/dist/*.whl /opt/dynamo/wheelhouse/
COPY --chown=dynamo: --from=wheel_builder $CARGO_TARGET_DIR $CARGO_TARGET_DIR
COPY --chown=dynamo: --from=wheel_builder $CARGO_HOME $CARGO_HOME
COPY --chown=dynamo: ./ /workspace/
RUN uv pip install \ RUN uv pip install \
/opt/dynamo/wheelhouse/ai_dynamo_runtime*.whl \ /opt/dynamo/wheelhouse/ai_dynamo_runtime*.whl \
/opt/dynamo/wheelhouse/ai_dynamo*any.whl \ /opt/dynamo/wheelhouse/ai_dynamo*any.whl \
...@@ -388,16 +405,20 @@ RUN uv pip install \ ...@@ -388,16 +405,20 @@ RUN uv pip install \
if [ "$ENABLE_KVBM" = "true" ]; then \ if [ "$ENABLE_KVBM" = "true" ]; then \
uv pip install /opt/dynamo/wheelhouse/kvbm*.whl; \ uv pip install /opt/dynamo/wheelhouse/kvbm*.whl; \
fi \ fi \
&& cd /opt/dynamo/benchmarks \ && cd /workspace/benchmarks \
&& UV_GIT_LFS=1 uv pip install --no-cache . \ && UV_GIT_LFS=1 uv pip install --no-cache .
&& cd - \
&& rm -rf /opt/dynamo/benchmarks
# Setup launch banner # Setup launch banner in common directory accessible to all users
RUN --mount=type=bind,source=./container/launch_message.txt,target=/opt/dynamo/launch_message.txt \ RUN --mount=type=bind,source=./container/launch_message.txt,target=/opt/dynamo/launch_message.txt \
sed '/^#\s/d' /opt/dynamo/launch_message.txt > ~/.launch_screen && \ sed '/^#\s/d' /opt/dynamo/launch_message.txt > /opt/dynamo/.launch_screen
echo "cat ~/.launch_screen" >> ~/.bashrc && \
echo "source $VIRTUAL_ENV/bin/activate" >> ~/.bashrc # Setup environment for all users
USER root
RUN chmod 755 /opt/dynamo/.launch_screen && \
echo 'source /opt/dynamo/venv/bin/activate' >> /etc/bash.bashrc && \
echo 'cat /opt/dynamo/.launch_screen' >> /etc/bash.bashrc
USER dynamo
ENTRYPOINT ["/opt/nvidia/nvidia_entrypoint.sh"] ENTRYPOINT ["/opt/nvidia/nvidia_entrypoint.sh"]
CMD [] CMD []
...@@ -14,8 +14,8 @@ ...@@ -14,8 +14,8 @@
ARG DEV_BASE="" ARG DEV_BASE=""
FROM ${DEV_BASE} AS local-dev FROM ${DEV_BASE} AS local-dev
# Don't want ubuntu to be editable, just change uid and gid. # Don't want dynamo to be editable, just change uid and gid.
ENV USERNAME=ubuntu ENV USERNAME=dynamo
ARG USER_UID ARG USER_UID
ARG USER_GID ARG USER_GID
ARG WORKSPACE_DIR=/workspace ARG WORKSPACE_DIR=/workspace
...@@ -50,7 +50,10 @@ RUN apt-get install -y sudo gnupg2 gnupg1 \ ...@@ -50,7 +50,10 @@ RUN apt-get install -y sudo gnupg2 gnupg1 \
&& echo "$USERNAME ALL=(root) NOPASSWD:ALL" > /etc/sudoers.d/$USERNAME \ && echo "$USERNAME ALL=(root) NOPASSWD:ALL" > /etc/sudoers.d/$USERNAME \
&& chmod 0440 /etc/sudoers.d/$USERNAME \ && chmod 0440 /etc/sudoers.d/$USERNAME \
&& mkdir -p /home/$USERNAME \ && mkdir -p /home/$USERNAME \
&& groupmod -g $USER_GID $USERNAME \ # Handle GID conflicts: if target GID exists and it's not our group, remove it
&& (getent group $USER_GID | grep -v "^$USERNAME:" && groupdel $(getent group $USER_GID | cut -d: -f1) || true) \
# Create group if it doesn't exist, otherwise modify existing group
&& (getent group $USERNAME > /dev/null 2>&1 && groupmod -g $USER_GID $USERNAME || groupadd -g $USER_GID $USERNAME) \
&& usermod -u $USER_UID -g $USER_GID $USERNAME \ && usermod -u $USER_UID -g $USER_GID $USERNAME \
&& chown -R $USERNAME:$USERNAME /home/$USERNAME \ && chown -R $USERNAME:$USERNAME /home/$USERNAME \
&& chsh -s /bin/bash $USERNAME && chsh -s /bin/bash $USERNAME
......
...@@ -187,6 +187,17 @@ RUN git clone --depth 1 --branch ${GDRCOPY_COMMIT} https://github.com/NVIDIA/gdr ...@@ -187,6 +187,17 @@ RUN git clone --depth 1 --branch ${GDRCOPY_COMMIT} https://github.com/NVIDIA/gdr
# Fix DeepEP IBGDA symlink # Fix DeepEP IBGDA symlink
RUN ln -sf /usr/lib/$(uname -m)-linux-gnu/libmlx5.so.1 /usr/lib/$(uname -m)-linux-gnu/libmlx5.so RUN ln -sf /usr/lib/$(uname -m)-linux-gnu/libmlx5.so.1 /usr/lib/$(uname -m)-linux-gnu/libmlx5.so
# Create dynamo user EARLY - before copying files, with group 0 for OpenShift compatibility
RUN userdel -r ubuntu > /dev/null 2>&1 || true \
&& useradd -m -s /bin/bash -g 0 dynamo \
&& [ `id -u dynamo` -eq 1000 ] \
&& mkdir -p /workspace /home/dynamo/.cache /opt/dynamo \
&& chown -R dynamo: /sgl-workspace /workspace /home/dynamo /opt/dynamo \
&& chmod -R g+w /sgl-workspace /workspace /home/dynamo/.cache /opt/dynamo
USER dynamo
ENV HOME=/home/dynamo
# Install SGLang (requires CUDA 12.8.1 or 12.9.1) # Install SGLang (requires CUDA 12.8.1 or 12.9.1)
RUN python3 -m pip install --no-cache-dir --ignore-installed pip==25.3 setuptools==80.9.0 wheel==0.45.1 html5lib==1.1 six==1.17.0 \ RUN python3 -m pip install --no-cache-dir --ignore-installed pip==25.3 setuptools==80.9.0 wheel==0.45.1 html5lib==1.1 six==1.17.0 \
&& git clone --depth 1 --branch v${SGLANG_COMMIT} https://github.com/sgl-project/sglang.git \ && git clone --depth 1 --branch v${SGLANG_COMMIT} https://github.com/sgl-project/sglang.git \
...@@ -202,7 +213,7 @@ RUN python3 -m pip install --no-cache-dir --ignore-installed pip==25.3 setuptool ...@@ -202,7 +213,7 @@ RUN python3 -m pip install --no-cache-dir --ignore-installed pip==25.3 setuptool
&& FLASHINFER_LOGGING_LEVEL=warning python3 -m flashinfer --download-cubin && FLASHINFER_LOGGING_LEVEL=warning python3 -m flashinfer --download-cubin
# Download and extract NVSHMEM source, clone DeepEP (use Tom's fork for GB200) # Download and extract NVSHMEM source, clone DeepEP (use Tom's fork for GB200)
RUN --mount=type=cache,target=/var/cache/curl \ RUN --mount=type=cache,target=/var/cache/curl,uid=1000,gid=0 \
curl --retry 3 --retry-delay 2 -fsSL -o /var/cache/curl/nvshmem_src_cuda12-all-all-${NVSHMEM_VERSION}.tar.gz https://developer.download.nvidia.com/compute/redist/nvshmem/${NVSHMEM_VERSION}/source/nvshmem_src_cuda12-all-all-${NVSHMEM_VERSION}.tar.gz \ curl --retry 3 --retry-delay 2 -fsSL -o /var/cache/curl/nvshmem_src_cuda12-all-all-${NVSHMEM_VERSION}.tar.gz https://developer.download.nvidia.com/compute/redist/nvshmem/${NVSHMEM_VERSION}/source/nvshmem_src_cuda12-all-all-${NVSHMEM_VERSION}.tar.gz \
&& tar -xf /var/cache/curl/nvshmem_src_cuda12-all-all-${NVSHMEM_VERSION}.tar.gz \ && tar -xf /var/cache/curl/nvshmem_src_cuda12-all-all-${NVSHMEM_VERSION}.tar.gz \
&& mv nvshmem_src nvshmem \ && mv nvshmem_src nvshmem \
...@@ -345,40 +356,50 @@ COPY --from=dynamo_base /usr/bin/nats-server /usr/bin/nats-server ...@@ -345,40 +356,50 @@ COPY --from=dynamo_base /usr/bin/nats-server /usr/bin/nats-server
COPY --from=dynamo_base /usr/local/bin/etcd/ /usr/local/bin/etcd/ COPY --from=dynamo_base /usr/local/bin/etcd/ /usr/local/bin/etcd/
COPY --from=dynamo_base /usr/local/ucx /usr/local/ucx COPY --from=dynamo_base /usr/local/ucx /usr/local/ucx
COPY --from=dynamo_base $NIXL_PREFIX $NIXL_PREFIX COPY --from=dynamo_base $NIXL_PREFIX $NIXL_PREFIX
ENV PATH=/usr/local/bin/etcd/:/usr/local/cuda/nvvm/bin:$PATH ENV PATH=/usr/local/bin/etcd/:/usr/local/cuda/nvvm/bin:${HOME}/.local/bin:$PATH
# Install Dynamo wheels from dynamo_base wheelhouse # Install Dynamo wheels from dynamo_base wheelhouse
COPY benchmarks/ /opt/dynamo/benchmarks/ COPY --chown=dynamo: benchmarks/ /opt/dynamo/benchmarks/
COPY --from=dynamo_base /opt/dynamo/wheelhouse/ /opt/dynamo/wheelhouse/ COPY --chown=dynamo: --from=dynamo_base /opt/dynamo/wheelhouse/ /opt/dynamo/wheelhouse/
RUN pip install \ RUN python3 -m pip install \
/opt/dynamo/wheelhouse/ai_dynamo_runtime*.whl \ /opt/dynamo/wheelhouse/ai_dynamo_runtime*.whl \
/opt/dynamo/wheelhouse/ai_dynamo*any.whl \ /opt/dynamo/wheelhouse/ai_dynamo*any.whl \
/opt/dynamo/wheelhouse/nixl/nixl*.whl \ /opt/dynamo/wheelhouse/nixl/nixl*.whl \
&& cd /opt/dynamo/benchmarks \ && cd /opt/dynamo/benchmarks \
&& pip install --no-cache . \ && python3 -m pip install --no-cache . \
&& cd - \ && cd - \
&& rm -rf /opt/dynamo/benchmarks && rm -rf /opt/dynamo/benchmarks
# Install common and test dependencies # Install common and test dependencies
RUN --mount=type=bind,source=./container/deps/requirements.txt,target=/tmp/requirements.txt \ RUN --mount=type=bind,source=./container/deps/requirements.txt,target=/tmp/requirements.txt \
--mount=type=bind,source=./container/deps/requirements.test.txt,target=/tmp/requirements.test.txt \ --mount=type=bind,source=./container/deps/requirements.test.txt,target=/tmp/requirements.test.txt \
pip install \ python3 -m pip install \
--no-cache \ --no-cache \
--requirement /tmp/requirements.txt \ --requirement /tmp/requirements.txt \
--requirement /tmp/requirements.test.txt --requirement /tmp/requirements.test.txt
## Copy attribution files and launch banner ## Copy attribution files and launch banner with correct ownership
COPY ATTRIBUTION* LICENSE /workspace/ COPY --chown=dynamo: ATTRIBUTION* LICENSE /workspace/
COPY container/launch_message.txt /workspace/launch_message.txt COPY --chown=dynamo: container/launch_message.txt /workspace/launch_message.txt
RUN sed '/^#\s/d' /workspace/launch_message.txt > ~/.launch_screen && \
echo "cat ~/.launch_screen" >> ~/.bashrc # Setup launch banner in common directory accessible to all users
RUN --mount=type=bind,source=./container/launch_message.txt,target=/opt/dynamo/launch_message.txt \
sed '/^#\s/d' /opt/dynamo/launch_message.txt > /opt/dynamo/.launch_screen
# Copy tests, benchmarks, deploy and components for CI # Setup environment for all users
COPY tests /workspace/tests USER root
COPY examples /workspace/examples RUN chmod 755 /opt/dynamo/.launch_screen && \
COPY benchmarks /workspace/benchmarks echo 'source /opt/dynamo/venv/bin/activate' >> /etc/bash.bashrc && \
COPY deploy /workspace/deploy echo 'cat /opt/dynamo/.launch_screen' >> /etc/bash.bashrc
COPY components/ /workspace/components/
USER dynamo
# Copy tests, benchmarks, deploy and components for CI with correct ownership
COPY --chown=dynamo: tests /workspace/tests
COPY --chown=dynamo: examples /workspace/examples
COPY --chown=dynamo: benchmarks /workspace/benchmarks
COPY --chown=dynamo: deploy /workspace/deploy
COPY --chown=dynamo: components/ /workspace/components/
ENTRYPOINT ["/opt/nvidia/nvidia_entrypoint.sh"] ENTRYPOINT ["/opt/nvidia/nvidia_entrypoint.sh"]
CMD [] CMD []
...@@ -412,6 +433,7 @@ RUN mkdir -p /opt/dynamo/venv && \ ...@@ -412,6 +433,7 @@ RUN mkdir -p /opt/dynamo/venv && \
ENV VIRTUAL_ENV=/opt/dynamo/venv \ ENV VIRTUAL_ENV=/opt/dynamo/venv \
PATH="/opt/dynamo/venv/bin:${PATH}" PATH="/opt/dynamo/venv/bin:${PATH}"
USER root
# Install development tools and utilities # Install development tools and utilities
RUN apt-get update -y && \ RUN apt-get update -y && \
apt-get install -y --no-install-recommends \ apt-get install -y --no-install-recommends \
...@@ -450,6 +472,8 @@ RUN apt-get update -y && \ ...@@ -450,6 +472,8 @@ RUN apt-get update -y && \
silversearcher-ag \ silversearcher-ag \
cloc \ cloc \
locales \ locales \
# sudo for dev stage
sudo \
# NVIDIA tools dependencies # NVIDIA tools dependencies
gnupg && \ gnupg && \
echo "deb https://developer.download.nvidia.com/devtools/repos/ubuntu2004/amd64 /" | tee /etc/apt/sources.list.d/nvidia-devtools.list && \ echo "deb https://developer.download.nvidia.com/devtools/repos/ubuntu2004/amd64 /" | tee /etc/apt/sources.list.d/nvidia-devtools.list && \
...@@ -469,10 +493,10 @@ RUN curl --retry 3 --retry-delay 2 -LSso /usr/local/bin/clang-format https://git ...@@ -469,10 +493,10 @@ RUN curl --retry 3 --retry-delay 2 -LSso /usr/local/bin/clang-format https://git
# Editable install of dynamo # Editable install of dynamo
COPY pyproject.toml README.md hatch_build.py /workspace/ COPY pyproject.toml README.md hatch_build.py /workspace/
RUN pip install --no-deps -e . RUN python3 -m pip install --no-deps -e .
# Install Python development packages # Install Python development packages
RUN pip install --no-cache-dir \ RUN python3 -m pip install --no-cache-dir \
maturin[patchelf] \ maturin[patchelf] \
pytest \ pytest \
black \ black \
......
This diff is collapsed.
...@@ -222,15 +222,28 @@ COPY --from=dynamo_base /usr/local/bin/etcd/ /usr/local/bin/etcd/ ...@@ -222,15 +222,28 @@ COPY --from=dynamo_base /usr/local/bin/etcd/ /usr/local/bin/etcd/
# Add ETCD and CUDA binaries to PATH so cicc and other CUDA tools are accessible # Add ETCD and CUDA binaries to PATH so cicc and other CUDA tools are accessible
ENV PATH=/usr/local/bin/etcd/:/usr/local/cuda/nvvm/bin:$PATH ENV PATH=/usr/local/bin/etcd/:/usr/local/cuda/nvvm/bin:$PATH
# Copy UCX from dev image as plugin for NIXL # DeepGemm runs nvcc for JIT kernel compilation, however the CUDA include path
# Copy NIXL source from devr image # is not properly set for complilation. Set CPATH to help nvcc find the headers.
# Copy dynamo wheels for gitlab artifacts ENV CPATH=/usr/local/cuda/include
COPY --from=dynamo_base /usr/local/ucx /usr/local/ucx
COPY --from=dynamo_base $NIXL_PREFIX $NIXL_PREFIX
ENV PATH=/usr/local/ucx/bin:$PATH
# Copies vllm, DeepEP, DeepGEMM, PPLX repos (all editable installs) and nvshmem binaries # Copy uv to system /bin
COPY --from=framework /opt/vllm /opt/vllm COPY --from=framework /bin/uv /bin/uvx /bin/
# Create dynamo user with group 0 for OpenShift compatibility
RUN userdel -r ubuntu > /dev/null 2>&1 || true \
&& useradd -m -s /bin/bash -g 0 dynamo \
&& [ `id -u dynamo` -eq 1000 ] \
&& mkdir -p /home/dynamo/.cache /opt/dynamo \
&& chown -R dynamo: /workspace /home/dynamo /opt/dynamo \
&& chmod -R g+w /workspace /home/dynamo/.cache /opt/dynamo
USER dynamo
ENV HOME=/home/dynamo
# Copy UCX and NIXL to system directories
COPY --chown=dynamo: --from=dynamo_base /usr/local/ucx /usr/local/ucx
COPY --chown=dynamo: --from=dynamo_base $NIXL_PREFIX $NIXL_PREFIX
ENV PATH=/usr/local/ucx/bin:$PATH
ENV LD_LIBRARY_PATH=\ ENV LD_LIBRARY_PATH=\
/opt/vllm/tools/ep_kernels/ep_kernels_workspace/nvshmem_install/lib:\ /opt/vllm/tools/ep_kernels/ep_kernels_workspace/nvshmem_install/lib:\
...@@ -240,19 +253,17 @@ $NIXL_PLUGIN_DIR:\ ...@@ -240,19 +253,17 @@ $NIXL_PLUGIN_DIR:\
/usr/local/ucx/lib/ucx:\ /usr/local/ucx/lib/ucx:\
$LD_LIBRARY_PATH $LD_LIBRARY_PATH
# DeepGemm runs nvcc for JIT kernel compilation, however the CUDA include path
# is not properly set for complilation. Set CPATH to help nvcc find the headers.
ENV CPATH=/usr/local/cuda/include
### VIRTUAL ENVIRONMENT SETUP ### ### VIRTUAL ENVIRONMENT SETUP ###
# Copy uv and entire virtual environment from framework container # Copy entire virtual environment from framework container with correct ownership
COPY --from=framework /bin/uv /bin/uvx /bin/ COPY --chown=dynamo: --from=framework ${VIRTUAL_ENV} ${VIRTUAL_ENV}
COPY --from=framework ${VIRTUAL_ENV} ${VIRTUAL_ENV}
# Copy vllm with correct ownership
COPY --chown=dynamo: --from=framework /opt/vllm /opt/vllm
# Install dynamo, NIXL, and dynamo-specific dependencies # Install dynamo, NIXL, and dynamo-specific dependencies
COPY benchmarks/ /opt/dynamo/benchmarks/ COPY --chown=dynamo: benchmarks/ /opt/dynamo/benchmarks/
COPY --from=dynamo_base /opt/dynamo/wheelhouse/ /opt/dynamo/wheelhouse/ COPY --chown=dynamo: --from=dynamo_base /opt/dynamo/wheelhouse/ /opt/dynamo/wheelhouse/
RUN uv pip install \ RUN uv pip install \
/opt/dynamo/wheelhouse/ai_dynamo_runtime*.whl \ /opt/dynamo/wheelhouse/ai_dynamo_runtime*.whl \
/opt/dynamo/wheelhouse/ai_dynamo*any.whl \ /opt/dynamo/wheelhouse/ai_dynamo*any.whl \
...@@ -273,16 +284,23 @@ RUN --mount=type=bind,source=./container/deps/requirements.txt,target=/tmp/requi ...@@ -273,16 +284,23 @@ RUN --mount=type=bind,source=./container/deps/requirements.txt,target=/tmp/requi
--requirement /tmp/requirements.txt \ --requirement /tmp/requirements.txt \
--requirement /tmp/requirements.test.txt --requirement /tmp/requirements.test.txt
# Copy benchmarks, examples, and tests for CI # Copy benchmarks, examples, and tests for CI with correct ownership
COPY . /workspace/ COPY --chown=dynamo: . /workspace/
# Copy attribution files # Copy attribution files
COPY ATTRIBUTION* LICENSE /workspace/ COPY --chown=dynamo: ATTRIBUTION* LICENSE /workspace/
# Copy launch banner
RUN --mount=type=bind,source=./container/launch_message.txt,target=/workspace/launch_message.txt \ # Setup launch banner in common directory accessible to all users
sed '/^#\s/d' /workspace/launch_message.txt > ~/.launch_screen && \ RUN --mount=type=bind,source=./container/launch_message.txt,target=/opt/dynamo/launch_message.txt \
echo "cat ~/.launch_screen" >> ~/.bashrc && \ sed '/^#\s/d' /opt/dynamo/launch_message.txt > /opt/dynamo/.launch_screen
echo "source $VIRTUAL_ENV/bin/activate" >> ~/.bashrc
# Setup environment for all users
USER root
RUN chmod 755 /opt/dynamo/.launch_screen && \
echo 'source /opt/dynamo/venv/bin/activate' >> /etc/bash.bashrc && \
echo 'cat /opt/dynamo/.launch_screen' >> /etc/bash.bashrc
USER dynamo
ENTRYPOINT ["/opt/nvidia/nvidia_entrypoint.sh"] ENTRYPOINT ["/opt/nvidia/nvidia_entrypoint.sh"]
CMD [] CMD []
...@@ -305,6 +323,7 @@ FROM runtime AS dev ...@@ -305,6 +323,7 @@ FROM runtime AS dev
# Don't want ubuntu to be editable, just change uid and gid. # Don't want ubuntu to be editable, just change uid and gid.
ARG WORKSPACE_DIR=/workspace ARG WORKSPACE_DIR=/workspace
USER root
# Install utilities as root # Install utilities as root
RUN apt-get update -y && \ RUN apt-get update -y && \
apt-get install -y --no-install-recommends \ apt-get install -y --no-install-recommends \
......
...@@ -555,7 +555,7 @@ build_local_dev_with_header() { ...@@ -555,7 +555,7 @@ build_local_dev_with_header() {
fi fi
echo "Building new local-dev image from: $dev_base_image" echo "Building new local-dev image from: $dev_base_image"
echo "User 'ubuntu' will have UID: $USER_UID, GID: $USER_GID" echo "User 'dynamo' will have UID: $USER_UID, GID: $USER_GID"
# Show the docker command being executed if not in dry-run mode # Show the docker command being executed if not in dry-run mode
if [ -z "$RUN_PREFIX" ]; then if [ -z "$RUN_PREFIX" ]; then
......
...@@ -45,6 +45,7 @@ USE_NIXL_GDS= ...@@ -45,6 +45,7 @@ USE_NIXL_GDS=
RUNTIME=nvidia RUNTIME=nvidia
WORKDIR=/workspace WORKDIR=/workspace
NETWORK=host NETWORK=host
USER=
get_options() { get_options() {
while :; do while :; do
...@@ -183,6 +184,14 @@ get_options() { ...@@ -183,6 +184,14 @@ get_options() {
missing_requirement "$1" missing_requirement "$1"
fi fi
;; ;;
--user)
if [ "$2" ]; then
USER=$2
shift
else
missing_requirement "$1"
fi
;;
--dry-run) --dry-run)
RUN_PREFIX="echo" RUN_PREFIX="echo"
echo "" echo ""
...@@ -267,11 +276,10 @@ get_options() { ...@@ -267,11 +276,10 @@ get_options() {
if [ -n "$HF_HOME" ]; then if [ -n "$HF_HOME" ]; then
mkdir -p "$HF_HOME" mkdir -p "$HF_HOME"
# Use /home/ubuntu for local-dev target, /root for dev target. if [[ ${USER} == "root" ]] || [[ ${USER} == "0" ]]; then
if [ "$TARGET" = "local-dev" ] || [[ "$IMAGE" == *"local-dev"* ]]; then
HF_HOME_TARGET="/home/ubuntu/.cache/huggingface"
else
HF_HOME_TARGET="/root/.cache/huggingface" HF_HOME_TARGET="/root/.cache/huggingface"
else
HF_HOME_TARGET="/home/dynamo/.cache/huggingface"
fi fi
VOLUME_MOUNTS+=" -v $HF_HOME:$HF_HOME_TARGET" VOLUME_MOUNTS+=" -v $HF_HOME:$HF_HOME_TARGET"
fi fi
...@@ -313,6 +321,12 @@ get_options() { ...@@ -313,6 +321,12 @@ get_options() {
RUNTIME="" RUNTIME=""
fi fi
if [[ ${USER} == "" ]]; then
USER_STRING=""
else
USER_STRING="--user ${USER}"
fi
REMAINING_ARGS=("$@") REMAINING_ARGS=("$@")
} }
...@@ -330,6 +344,8 @@ show_help() { ...@@ -330,6 +344,8 @@ show_help() {
echo " Options: 'host' (default), 'bridge', 'none', 'container:name'" echo " Options: 'host' (default), 'bridge', 'none', 'container:name'"
echo " Examples: --network bridge (isolated), --network none (no network - WARNING: breaks most functionality)" echo " Examples: --network bridge (isolated), --network none (no network - WARNING: breaks most functionality)"
echo " --network container:redis (share network with 'redis' container)" echo " --network container:redis (share network with 'redis' container)"
echo " [--user <name|uid>[:<group|gid>] specify user to run container as]"
echo " Format: username or numeric UID, optionally with group/GID (e.g., 'root', '0', '1000:0')"
echo " [-v add volume mount]" echo " [-v add volume mount]"
echo " [-p|--port add port mapping (host_port:container_port)]" echo " [-p|--port add port mapping (host_port:container_port)]"
echo " [-e add environment variable]" echo " [-e add environment variable]"
...@@ -376,6 +392,7 @@ ${RUN_PREFIX} docker run \ ...@@ -376,6 +392,7 @@ ${RUN_PREFIX} docker run \
${NIXL_GDS_CAPS} \ ${NIXL_GDS_CAPS} \
--ipc host \ --ipc host \
${PRIVILEGED_STRING} \ ${PRIVILEGED_STRING} \
${USER_STRING} \
${NAME_STRING} \ ${NAME_STRING} \
${ENTRYPOINT_STRING} \ ${ENTRYPOINT_STRING} \
${IMAGE} \ ${IMAGE} \
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment