Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
dynamo
Commits
6f708832
Unverified
Commit
6f708832
authored
Nov 10, 2025
by
Anant Sharma
Committed by
GitHub
Nov 10, 2025
Browse files
build: add non root user for dynamo backend runtime containers (#3935)
Signed-off-by:
Anant Sharma
<
anants@nvidia.com
>
parent
5e4a339a
Changes
10
Show whitespace changes
Inline
Side-by-side
Showing
10 changed files
with
376 additions
and
230 deletions
+376
-230
.devcontainer/devcontainer.json.j2
.devcontainer/devcontainer.json.j2
+4
-4
.github/actions/pytest/action.yml
.github/actions/pytest/action.yml
+8
-3
.github/workflows/container-validation-dynamo.yml
.github/workflows/container-validation-dynamo.yml
+2
-2
container/Dockerfile
container/Dockerfile
+50
-29
container/Dockerfile.local_dev
container/Dockerfile.local_dev
+6
-3
container/Dockerfile.sglang
container/Dockerfile.sglang
+44
-20
container/Dockerfile.trtllm
container/Dockerfile.trtllm
+196
-139
container/Dockerfile.vllm
container/Dockerfile.vllm
+44
-25
container/build.sh
container/build.sh
+1
-1
container/run.sh
container/run.sh
+21
-4
No files found.
.devcontainer/devcontainer.json.j2
View file @
6f708832
...
@@ -5,7 +5,7 @@
...
@@ -5,7 +5,7 @@
"SPDX-License-Identifier: Apache-2.0"
"SPDX-License-Identifier: Apache-2.0"
],
],
"name": "Dynamo {{ framework.upper() }} Dev Container",
"name": "Dynamo {{ framework.upper() }} Dev Container",
"remoteUser": "
ubuntu
", // Matches our container user
"remoteUser": "
dynamo
", // Matches our container user
"updateRemoteUserUID": true, // Updates the UID of the remote user to match the host user, avoids permission errors
"updateRemoteUserUID": true, // Updates the UID of the remote user to match the host user, avoids permission errors
"image": "dynamo:latest-{{ framework }}-local-dev", // Use the latest {{ framework.upper() }} dev image
"image": "dynamo:latest-{{ framework }}-local-dev", // Use the latest {{ framework.upper() }} dev image
"runArgs": [
"runArgs": [
...
@@ -68,11 +68,11 @@
...
@@ -68,11 +68,11 @@
},
},
"mounts": [
"mounts": [
// These are for convenience, so that the history and pre-commit cache are persisted between sessions
// These are for convenience, so that the history and pre-commit cache are persisted between sessions
"source=dynamo-bashhistory,target=/home/
ubuntu
/.commandhistory,type=volume",
"source=dynamo-bashhistory,target=/home/
dynamo
/.commandhistory,type=volume",
"source=dynamo-precommit-cache,target=/home/
ubuntu
/.cache/pre-commit,type=volume",
"source=dynamo-precommit-cache,target=/home/
dynamo
/.cache/pre-commit,type=volume",
// Default mounts
// Default mounts
"source=/tmp/,target=/tmp/,type=bind"
"source=/tmp/,target=/tmp/,type=bind"
// Uncomment this to reuse your Hugging Face cache
// Uncomment this to reuse your Hugging Face cache
//"source=${localEnv:HOME}/.cache/huggingface,target=/home/
ubuntu
/.cache/huggingface,type=bind"
//"source=${localEnv:HOME}/.cache/huggingface,target=/home/
dynamo
/.cache/huggingface,type=bind"
]
]
}
}
.github/actions/pytest/action.yml
View file @
6f708832
...
@@ -54,18 +54,23 @@ runs:
...
@@ -54,18 +54,23 @@ runs:
# Run pytest with detailed output and JUnit XML
# Run pytest with detailed output and JUnit XML
set +e # Don't exit on test failures
set +e # Don't exit on test failures
docker run --runtime=nvidia
--rm
--gpus all -w /workspace \
docker run --runtime=nvidia --gpus all -w /workspace \
--cpus=${NUM_CPUS} \
--cpus=${NUM_CPUS} \
--network host \
--network host \
--name ${{ env.CONTAINER_ID }}_pytest \
--name ${{ env.CONTAINER_ID }}_pytest \
-v "$(pwd)/test-results:/test-results" \
${{ inputs.image_tag }} \
${{ inputs.image_tag }} \
bash -c "pytest -v --tb=short --basetemp=/tmp -
-junitxml=
/test-results/${{ env.PYTEST_XML_FILE }} --durations=10 -m \"${{ inputs.pytest_marks }}\""
bash -c "
mkdir -p /workspace/test-results &&
pytest -v --tb=short --basetemp=/tmp -
o cache_dir=/tmp/.pytest_cache --junitxml=/workspace
/test-results/${{ env.PYTEST_XML_FILE }} --durations=10 -m \"${{ inputs.pytest_marks }}\""
TEST_EXIT_CODE=$?
TEST_EXIT_CODE=$?
echo "TEST_EXIT_CODE=${TEST_EXIT_CODE}" >> $GITHUB_ENV
echo "TEST_EXIT_CODE=${TEST_EXIT_CODE}" >> $GITHUB_ENV
echo "🧪 Tests completed with exit code: ${TEST_EXIT_CODE}"
echo "🧪 Tests completed with exit code: ${TEST_EXIT_CODE}"
# Copy test results from container to host
docker cp ${{ env.CONTAINER_ID }}_pytest:/workspace/test-results . || echo "Failed to copy test results"
# Clean up container
docker rm -f ${{ env.CONTAINER_ID }}_pytest || echo "Failed to clean up container"
# Always continue to results processing
# Always continue to results processing
exit 0
exit 0
...
...
.github/workflows/container-validation-dynamo.yml
View file @
6f708832
...
@@ -49,7 +49,7 @@ jobs:
...
@@ -49,7 +49,7 @@ jobs:
docker compose up -d nats-server etcd-server
docker compose up -d nats-server etcd-server
-
name
:
Run Rust checks (block-manager + integration tests)
-
name
:
Run Rust checks (block-manager + integration tests)
run
:
|
run
:
|
docker run --rm
-v ${{ github.workspace }}:/workspace
-w /workspace/lib/llm \
docker run --rm -w /workspace/lib/llm \
--name ${{ env.CONTAINER_ID }}_rust_checks \
--name ${{ env.CONTAINER_ID }}_rust_checks \
${{ steps.define_image_tag.outputs.image_tag }} \
${{ steps.define_image_tag.outputs.image_tag }} \
bash -ec 'rustup component add rustfmt clippy && \
bash -ec 'rustup component add rustfmt clippy && \
...
@@ -66,7 +66,7 @@ jobs:
...
@@ -66,7 +66,7 @@ jobs:
env
:
env
:
PYTEST_MARKS
:
"
pre_merge
or
mypy"
PYTEST_MARKS
:
"
pre_merge
or
mypy"
run
:
|
run
:
|
docker run
-v ${{ github.workspace }}:/workspace
-w /workspace \
docker run -w /workspace \
--name ${{ env.CONTAINER_ID }}_pytest \
--name ${{ env.CONTAINER_ID }}_pytest \
${{ steps.define_image_tag.outputs.image_tag }} \
${{ steps.define_image_tag.outputs.image_tag }} \
bash -c "pytest --basetemp=/tmp --junitxml=${{ env.PYTEST_XML_FILE }} -m \"${{ env.PYTEST_MARKS }}\" "
bash -c "pytest --basetemp=/tmp --junitxml=${{ env.PYTEST_XML_FILE }} -m \"${{ env.PYTEST_MARKS }}\" "
...
...
container/Dockerfile
View file @
6f708832
...
@@ -335,23 +335,6 @@ ARG ARCH_ALT
...
@@ -335,23 +335,6 @@ ARG ARCH_ALT
ENV
DYNAMO_HOME=/opt/dynamo \
ENV
DYNAMO_HOME=/opt/dynamo \
CARGO_TARGET_DIR=/opt/dynamo/target
CARGO_TARGET_DIR=/opt/dynamo/target
# NIXL environment variables
ENV
NIXL_PREFIX=/opt/nvidia/nvda_nixl \
NIXL_LIB_DIR=/opt/nvidia/nvda_nixl/lib/${ARCH_ALT}-linux-gnu \
NIXL_PLUGIN_DIR=/opt/nvidia/nvda_nixl/lib/${ARCH_ALT}-linux-gnu/plugins
ENV
LD_LIBRARY_PATH=${NIXL_LIB_DIR}:${NIXL_PLUGIN_DIR}:/usr/local/ucx/lib:/usr/local/ucx/lib/ucx:${LD_LIBRARY_PATH}
# Copy ucx and nixl libs
COPY
--from=wheel_builder /usr/local/ucx/ /usr/local/ucx/
COPY
--from=wheel_builder ${NIXL_PREFIX}/ ${NIXL_PREFIX}/
COPY
--from=wheel_builder /opt/nvidia/nvda_nixl/lib64/. ${NIXL_LIB_DIR}/
# Copy built artifacts
COPY
--from=wheel_builder /opt/dynamo/dist/nixl/ /opt/dynamo/wheelhouse/nixl/
COPY
--from=wheel_builder /opt/dynamo/dist/*.whl /opt/dynamo/wheelhouse/
COPY
--from=wheel_builder $CARGO_TARGET_DIR $CARGO_TARGET_DIR
COPY
--from=wheel_builder $CARGO_HOME $CARGO_HOME
RUN
apt-get update
-y
\
RUN
apt-get update
-y
\
&&
apt-get
install
-y
--no-install-recommends
\
&&
apt-get
install
-y
--no-install-recommends
\
# required for AIC perf files
# required for AIC perf files
...
@@ -361,13 +344,29 @@ RUN apt-get update -y \
...
@@ -361,13 +344,29 @@ RUN apt-get update -y \
clang \
clang \
libclang-dev \
libclang-dev \
protobuf-compiler \
protobuf-compiler \
# sudo for dev stage
sudo \
&& apt-get clean \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/*
&& rm -rf /var/lib/apt/lists/* \
# Add sudo privileges to dynamo user
&& echo "dynamo ALL=(ALL) NOPASSWD:ALL" > /etc/sudoers.d/dynamo \
&& chmod 0440 /etc/sudoers.d/dynamo
# Create dynamo user with group 0 for OpenShift compatibility
RUN
userdel
-r
ubuntu
>
/dev/null 2>&1
||
true
\
&&
useradd
-m
-s
/bin/bash
-g
0 dynamo
\
&&
[
`
id
-u
dynamo
`
-eq
1000
]
\
&&
mkdir
-p
/home/dynamo/.cache
\
&&
chown
-R
dynamo: /opt/dynamo /home/dynamo /workspace
\
&&
chmod
-R
g+w /opt/dynamo /home/dynamo/.cache /workspace
# Switch to dynamo user
USER
dynamo
ENV
HOME=/home/dynamo
# Create and activate virtual environment
# Create and activate virtual environment
ARG
PYTHON_VERSION
ARG
PYTHON_VERSION
RUN
mkdir
-p
/opt/dynamo/venv
&&
\
RUN
uv venv /opt/dynamo/venv
--python
$PYTHON_VERSION
uv venv /opt/dynamo/venv
--python
$PYTHON_VERSION
ENV
VIRTUAL_ENV=/opt/dynamo/venv \
ENV
VIRTUAL_ENV=/opt/dynamo/venv \
PATH="/opt/dynamo/venv/bin:${PATH}"
PATH="/opt/dynamo/venv/bin:${PATH}"
...
@@ -380,7 +379,25 @@ RUN --mount=type=bind,source=./container/deps/requirements.txt,target=/tmp/requi
...
@@ -380,7 +379,25 @@ RUN --mount=type=bind,source=./container/deps/requirements.txt,target=/tmp/requi
--requirement
/tmp/requirements.txt
\
--requirement
/tmp/requirements.txt
\
--requirement
/tmp/requirements.test.txt
--requirement
/tmp/requirements.test.txt
COPY
benchmarks/ /opt/dynamo/benchmarks/
# NIXL environment variables
ENV
NIXL_PREFIX=/opt/nvidia/nvda_nixl \
NIXL_LIB_DIR=/opt/nvidia/nvda_nixl/lib/${ARCH_ALT}-linux-gnu \
NIXL_PLUGIN_DIR=/opt/nvidia/nvda_nixl/lib/${ARCH_ALT}-linux-gnu/plugins
ENV
LD_LIBRARY_PATH=${NIXL_LIB_DIR}:${NIXL_PLUGIN_DIR}:/usr/local/ucx/lib:/usr/local/ucx/lib/ucx:${LD_LIBRARY_PATH}
# Copy ucx and nixl libs
COPY
--chown=dynamo: --from=wheel_builder /usr/local/ucx/ /usr/local/ucx/
COPY
--chown=dynamo: --from=wheel_builder ${NIXL_PREFIX}/ ${NIXL_PREFIX}/
COPY
--chown=dynamo: --from=wheel_builder /opt/nvidia/nvda_nixl/lib64/. ${NIXL_LIB_DIR}/
# Copy built artifacts
COPY
--chown=dynamo: --from=wheel_builder /opt/dynamo/dist/nixl/ /opt/dynamo/wheelhouse/nixl/
COPY
--chown=dynamo: --from=wheel_builder /opt/dynamo/dist/*.whl /opt/dynamo/wheelhouse/
COPY
--chown=dynamo: --from=wheel_builder $CARGO_TARGET_DIR $CARGO_TARGET_DIR
COPY
--chown=dynamo: --from=wheel_builder $CARGO_HOME $CARGO_HOME
COPY
--chown=dynamo: ./ /workspace/
RUN
uv pip
install
\
RUN
uv pip
install
\
/opt/dynamo/wheelhouse/ai_dynamo_runtime
*
.whl
\
/opt/dynamo/wheelhouse/ai_dynamo_runtime
*
.whl
\
/opt/dynamo/wheelhouse/ai_dynamo
*
any.whl
\
/opt/dynamo/wheelhouse/ai_dynamo
*
any.whl
\
...
@@ -388,16 +405,20 @@ RUN uv pip install \
...
@@ -388,16 +405,20 @@ RUN uv pip install \
if
[
"
$ENABLE_KVBM
"
=
"true"
]
;
then
\
if
[
"
$ENABLE_KVBM
"
=
"true"
]
;
then
\
uv pip
install
/opt/dynamo/wheelhouse/kvbm
*
.whl
;
\
uv pip
install
/opt/dynamo/wheelhouse/kvbm
*
.whl
;
\
fi
\
fi
\
&&
cd
/opt/dynamo/benchmarks
\
&&
cd
/workspace/benchmarks
\
&&
UV_GIT_LFS
=
1 uv pip
install
--no-cache
.
\
&&
UV_GIT_LFS
=
1 uv pip
install
--no-cache
.
&&
cd
-
\
&&
rm
-rf
/opt/dynamo/benchmarks
# Setup launch banner
# Setup launch banner
in common directory accessible to all users
RUN
--mount
=
type
=
bind
,source
=
./container/launch_message.txt,target
=
/opt/dynamo/launch_message.txt
\
RUN
--mount
=
type
=
bind
,source
=
./container/launch_message.txt,target
=
/opt/dynamo/launch_message.txt
\
sed
'/^#\s/d'
/opt/dynamo/launch_message.txt
>
~/.launch_screen
&&
\
sed
'/^#\s/d'
/opt/dynamo/launch_message.txt
>
/opt/dynamo/.launch_screen
echo
"cat ~/.launch_screen"
>>
~/.bashrc
&&
\
echo
"source
$VIRTUAL_ENV
/bin/activate"
>>
~/.bashrc
# Setup environment for all users
USER
root
RUN
chmod
755 /opt/dynamo/.launch_screen
&&
\
echo
'source /opt/dynamo/venv/bin/activate'
>>
/etc/bash.bashrc
&&
\
echo
'cat /opt/dynamo/.launch_screen'
>>
/etc/bash.bashrc
USER
dynamo
ENTRYPOINT
["/opt/nvidia/nvidia_entrypoint.sh"]
ENTRYPOINT
["/opt/nvidia/nvidia_entrypoint.sh"]
CMD
[]
CMD
[]
container/Dockerfile.local_dev
View file @
6f708832
...
@@ -14,8 +14,8 @@
...
@@ -14,8 +14,8 @@
ARG DEV_BASE=""
ARG DEV_BASE=""
FROM ${DEV_BASE} AS local-dev
FROM ${DEV_BASE} AS local-dev
# Don't want
ubuntu
to be editable, just change uid and gid.
# Don't want
dynamo
to be editable, just change uid and gid.
ENV USERNAME=
ubuntu
ENV USERNAME=
dynamo
ARG USER_UID
ARG USER_UID
ARG USER_GID
ARG USER_GID
ARG WORKSPACE_DIR=/workspace
ARG WORKSPACE_DIR=/workspace
...
@@ -50,7 +50,10 @@ RUN apt-get install -y sudo gnupg2 gnupg1 \
...
@@ -50,7 +50,10 @@ RUN apt-get install -y sudo gnupg2 gnupg1 \
&& echo "$USERNAME ALL=(root) NOPASSWD:ALL" > /etc/sudoers.d/$USERNAME \
&& echo "$USERNAME ALL=(root) NOPASSWD:ALL" > /etc/sudoers.d/$USERNAME \
&& chmod 0440 /etc/sudoers.d/$USERNAME \
&& chmod 0440 /etc/sudoers.d/$USERNAME \
&& mkdir -p /home/$USERNAME \
&& mkdir -p /home/$USERNAME \
&& groupmod -g $USER_GID $USERNAME \
# Handle GID conflicts: if target GID exists and it's not our group, remove it
&& (getent group $USER_GID | grep -v "^$USERNAME:" && groupdel $(getent group $USER_GID | cut -d: -f1) || true) \
# Create group if it doesn't exist, otherwise modify existing group
&& (getent group $USERNAME > /dev/null 2>&1 && groupmod -g $USER_GID $USERNAME || groupadd -g $USER_GID $USERNAME) \
&& usermod -u $USER_UID -g $USER_GID $USERNAME \
&& usermod -u $USER_UID -g $USER_GID $USERNAME \
&& chown -R $USERNAME:$USERNAME /home/$USERNAME \
&& chown -R $USERNAME:$USERNAME /home/$USERNAME \
&& chsh -s /bin/bash $USERNAME
&& chsh -s /bin/bash $USERNAME
...
...
container/Dockerfile.sglang
View file @
6f708832
...
@@ -187,6 +187,17 @@ RUN git clone --depth 1 --branch ${GDRCOPY_COMMIT} https://github.com/NVIDIA/gdr
...
@@ -187,6 +187,17 @@ RUN git clone --depth 1 --branch ${GDRCOPY_COMMIT} https://github.com/NVIDIA/gdr
# Fix DeepEP IBGDA symlink
# Fix DeepEP IBGDA symlink
RUN ln -sf /usr/lib/$(uname -m)-linux-gnu/libmlx5.so.1 /usr/lib/$(uname -m)-linux-gnu/libmlx5.so
RUN ln -sf /usr/lib/$(uname -m)-linux-gnu/libmlx5.so.1 /usr/lib/$(uname -m)-linux-gnu/libmlx5.so
# Create dynamo user EARLY - before copying files, with group 0 for OpenShift compatibility
RUN userdel -r ubuntu > /dev/null 2>&1 || true \
&& useradd -m -s /bin/bash -g 0 dynamo \
&& [ `id -u dynamo` -eq 1000 ] \
&& mkdir -p /workspace /home/dynamo/.cache /opt/dynamo \
&& chown -R dynamo: /sgl-workspace /workspace /home/dynamo /opt/dynamo \
&& chmod -R g+w /sgl-workspace /workspace /home/dynamo/.cache /opt/dynamo
USER dynamo
ENV HOME=/home/dynamo
# Install SGLang (requires CUDA 12.8.1 or 12.9.1)
# Install SGLang (requires CUDA 12.8.1 or 12.9.1)
RUN python3 -m pip install --no-cache-dir --ignore-installed pip==25.3 setuptools==80.9.0 wheel==0.45.1 html5lib==1.1 six==1.17.0 \
RUN python3 -m pip install --no-cache-dir --ignore-installed pip==25.3 setuptools==80.9.0 wheel==0.45.1 html5lib==1.1 six==1.17.0 \
&& git clone --depth 1 --branch v${SGLANG_COMMIT} https://github.com/sgl-project/sglang.git \
&& git clone --depth 1 --branch v${SGLANG_COMMIT} https://github.com/sgl-project/sglang.git \
...
@@ -202,7 +213,7 @@ RUN python3 -m pip install --no-cache-dir --ignore-installed pip==25.3 setuptool
...
@@ -202,7 +213,7 @@ RUN python3 -m pip install --no-cache-dir --ignore-installed pip==25.3 setuptool
&& FLASHINFER_LOGGING_LEVEL=warning python3 -m flashinfer --download-cubin
&& FLASHINFER_LOGGING_LEVEL=warning python3 -m flashinfer --download-cubin
# Download and extract NVSHMEM source, clone DeepEP (use Tom's fork for GB200)
# Download and extract NVSHMEM source, clone DeepEP (use Tom's fork for GB200)
RUN --mount=type=cache,target=/var/cache/curl \
RUN --mount=type=cache,target=/var/cache/curl
,uid=1000,gid=0
\
curl --retry 3 --retry-delay 2 -fsSL -o /var/cache/curl/nvshmem_src_cuda12-all-all-${NVSHMEM_VERSION}.tar.gz https://developer.download.nvidia.com/compute/redist/nvshmem/${NVSHMEM_VERSION}/source/nvshmem_src_cuda12-all-all-${NVSHMEM_VERSION}.tar.gz \
curl --retry 3 --retry-delay 2 -fsSL -o /var/cache/curl/nvshmem_src_cuda12-all-all-${NVSHMEM_VERSION}.tar.gz https://developer.download.nvidia.com/compute/redist/nvshmem/${NVSHMEM_VERSION}/source/nvshmem_src_cuda12-all-all-${NVSHMEM_VERSION}.tar.gz \
&& tar -xf /var/cache/curl/nvshmem_src_cuda12-all-all-${NVSHMEM_VERSION}.tar.gz \
&& tar -xf /var/cache/curl/nvshmem_src_cuda12-all-all-${NVSHMEM_VERSION}.tar.gz \
&& mv nvshmem_src nvshmem \
&& mv nvshmem_src nvshmem \
...
@@ -345,40 +356,50 @@ COPY --from=dynamo_base /usr/bin/nats-server /usr/bin/nats-server
...
@@ -345,40 +356,50 @@ COPY --from=dynamo_base /usr/bin/nats-server /usr/bin/nats-server
COPY --from=dynamo_base /usr/local/bin/etcd/ /usr/local/bin/etcd/
COPY --from=dynamo_base /usr/local/bin/etcd/ /usr/local/bin/etcd/
COPY --from=dynamo_base /usr/local/ucx /usr/local/ucx
COPY --from=dynamo_base /usr/local/ucx /usr/local/ucx
COPY --from=dynamo_base $NIXL_PREFIX $NIXL_PREFIX
COPY --from=dynamo_base $NIXL_PREFIX $NIXL_PREFIX
ENV PATH=/usr/local/bin/etcd/:/usr/local/cuda/nvvm/bin:$PATH
ENV PATH=/usr/local/bin/etcd/:/usr/local/cuda/nvvm/bin:$
{HOME}/.local/bin:$
PATH
# Install Dynamo wheels from dynamo_base wheelhouse
# Install Dynamo wheels from dynamo_base wheelhouse
COPY benchmarks/ /opt/dynamo/benchmarks/
COPY
--chown=dynamo:
benchmarks/ /opt/dynamo/benchmarks/
COPY --from=dynamo_base /opt/dynamo/wheelhouse/ /opt/dynamo/wheelhouse/
COPY
--chown=dynamo:
--from=dynamo_base /opt/dynamo/wheelhouse/ /opt/dynamo/wheelhouse/
RUN pip install \
RUN
python3 -m
pip install \
/opt/dynamo/wheelhouse/ai_dynamo_runtime*.whl \
/opt/dynamo/wheelhouse/ai_dynamo_runtime*.whl \
/opt/dynamo/wheelhouse/ai_dynamo*any.whl \
/opt/dynamo/wheelhouse/ai_dynamo*any.whl \
/opt/dynamo/wheelhouse/nixl/nixl*.whl \
/opt/dynamo/wheelhouse/nixl/nixl*.whl \
&& cd /opt/dynamo/benchmarks \
&& cd /opt/dynamo/benchmarks \
&& pip install --no-cache . \
&&
python3 -m
pip install --no-cache . \
&& cd - \
&& cd - \
&& rm -rf /opt/dynamo/benchmarks
&& rm -rf /opt/dynamo/benchmarks
# Install common and test dependencies
# Install common and test dependencies
RUN --mount=type=bind,source=./container/deps/requirements.txt,target=/tmp/requirements.txt \
RUN --mount=type=bind,source=./container/deps/requirements.txt,target=/tmp/requirements.txt \
--mount=type=bind,source=./container/deps/requirements.test.txt,target=/tmp/requirements.test.txt \
--mount=type=bind,source=./container/deps/requirements.test.txt,target=/tmp/requirements.test.txt \
pip install \
python3 -m
pip install \
--no-cache \
--no-cache \
--requirement /tmp/requirements.txt \
--requirement /tmp/requirements.txt \
--requirement /tmp/requirements.test.txt
--requirement /tmp/requirements.test.txt
## Copy attribution files and launch banner
## Copy attribution files and launch banner with correct ownership
COPY ATTRIBUTION* LICENSE /workspace/
COPY --chown=dynamo: ATTRIBUTION* LICENSE /workspace/
COPY container/launch_message.txt /workspace/launch_message.txt
COPY --chown=dynamo: container/launch_message.txt /workspace/launch_message.txt
RUN sed '/^#\s/d' /workspace/launch_message.txt > ~/.launch_screen && \
echo "cat ~/.launch_screen" >> ~/.bashrc
# Setup launch banner in common directory accessible to all users
RUN --mount=type=bind,source=./container/launch_message.txt,target=/opt/dynamo/launch_message.txt \
sed '/^#\s/d' /opt/dynamo/launch_message.txt > /opt/dynamo/.launch_screen
# Copy tests, benchmarks, deploy and components for CI
# Setup environment for all users
COPY tests /workspace/tests
USER root
COPY examples /workspace/examples
RUN chmod 755 /opt/dynamo/.launch_screen && \
COPY benchmarks /workspace/benchmarks
echo 'source /opt/dynamo/venv/bin/activate' >> /etc/bash.bashrc && \
COPY deploy /workspace/deploy
echo 'cat /opt/dynamo/.launch_screen' >> /etc/bash.bashrc
COPY components/ /workspace/components/
USER dynamo
# Copy tests, benchmarks, deploy and components for CI with correct ownership
COPY --chown=dynamo: tests /workspace/tests
COPY --chown=dynamo: examples /workspace/examples
COPY --chown=dynamo: benchmarks /workspace/benchmarks
COPY --chown=dynamo: deploy /workspace/deploy
COPY --chown=dynamo: components/ /workspace/components/
ENTRYPOINT ["/opt/nvidia/nvidia_entrypoint.sh"]
ENTRYPOINT ["/opt/nvidia/nvidia_entrypoint.sh"]
CMD []
CMD []
...
@@ -412,6 +433,7 @@ RUN mkdir -p /opt/dynamo/venv && \
...
@@ -412,6 +433,7 @@ RUN mkdir -p /opt/dynamo/venv && \
ENV VIRTUAL_ENV=/opt/dynamo/venv \
ENV VIRTUAL_ENV=/opt/dynamo/venv \
PATH="/opt/dynamo/venv/bin:${PATH}"
PATH="/opt/dynamo/venv/bin:${PATH}"
USER root
# Install development tools and utilities
# Install development tools and utilities
RUN apt-get update -y && \
RUN apt-get update -y && \
apt-get install -y --no-install-recommends \
apt-get install -y --no-install-recommends \
...
@@ -450,6 +472,8 @@ RUN apt-get update -y && \
...
@@ -450,6 +472,8 @@ RUN apt-get update -y && \
silversearcher-ag \
silversearcher-ag \
cloc \
cloc \
locales \
locales \
# sudo for dev stage
sudo \
# NVIDIA tools dependencies
# NVIDIA tools dependencies
gnupg && \
gnupg && \
echo "deb https://developer.download.nvidia.com/devtools/repos/ubuntu2004/amd64 /" | tee /etc/apt/sources.list.d/nvidia-devtools.list && \
echo "deb https://developer.download.nvidia.com/devtools/repos/ubuntu2004/amd64 /" | tee /etc/apt/sources.list.d/nvidia-devtools.list && \
...
@@ -469,10 +493,10 @@ RUN curl --retry 3 --retry-delay 2 -LSso /usr/local/bin/clang-format https://git
...
@@ -469,10 +493,10 @@ RUN curl --retry 3 --retry-delay 2 -LSso /usr/local/bin/clang-format https://git
# Editable install of dynamo
# Editable install of dynamo
COPY pyproject.toml README.md hatch_build.py /workspace/
COPY pyproject.toml README.md hatch_build.py /workspace/
RUN pip install --no-deps -e .
RUN
python3 -m
pip install --no-deps -e .
# Install Python development packages
# Install Python development packages
RUN pip install --no-cache-dir \
RUN
python3 -m
pip install --no-cache-dir \
maturin[patchelf] \
maturin[patchelf] \
pytest \
pytest \
black \
black \
...
...
container/Dockerfile.trtllm
View file @
6f708832
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
# SPDX-License-Identifier: Apache-2.0
ARG BASE_IMAGE="nvcr.io/nvidia/pytorch"
ARG BASE_IMAGE="nvcr.io/nvidia/cuda-dl-base"
ARG BASE_IMAGE_TAG="25.06-py3"
ARG BASE_IMAGE_TAG="25.06-cuda12.9-devel-ubuntu24.04"
ARG PYTORCH_BASE_IMAGE="nvcr.io/nvidia/pytorch"
ARG PYTORCH_BASE_IMAGE_TAG="25.06-py3"
ARG ENABLE_KVBM=false
ARG ENABLE_KVBM=false
ARG RUNTIME_IMAGE="nvcr.io/nvidia/cuda"
ARG RUNTIME_IMAGE="nvcr.io/nvidia/cuda"
ARG RUNTIME_IMAGE_TAG="12.9.1-runtime-ubuntu24.04"
ARG RUNTIME_IMAGE_TAG="12.9.1-runtime-ubuntu24.04"
...
@@ -34,33 +37,22 @@ ARG DYNAMO_BASE_IMAGE="dynamo:latest-none"
...
@@ -34,33 +37,22 @@ ARG DYNAMO_BASE_IMAGE="dynamo:latest-none"
FROM ${DYNAMO_BASE_IMAGE} AS dynamo_base
FROM ${DYNAMO_BASE_IMAGE} AS dynamo_base
# Copy artifacts from NGC PyTorch image
# Copy artifacts from NGC PyTorch image
FROM ${BASE_IMAGE}:${BASE_IMAGE_TAG} AS framework
FROM ${PYTORCH_BASE_IMAGE}:${PYTORCH_BASE_IMAGE_TAG} AS pytorch_base
##################################################
##################################################
##########
Runtime Image ##########
##############
##########
Framework Builder Stage
##############
##################################################
##################################################
#
#
# PURPOSE: Production runtime environment
# PURPOSE: Build TensorRT-LLM with root privileges
#
# This stage creates a lightweight production-ready image containing:
# - Pre-compiled TensorRT-LLM and framework dependencies
# - Dynamo runtime libraries and Python packages
# - Essential runtime dependencies and configurations
# - Optimized for inference workloads and deployment
#
#
# Use this stage when you need:
# This stage handles TensorRT-LLM installation which requires:
# - Production deployment of Dynamo with TensorRT-LLM
# - Root access for apt operations (CUDA repos, TensorRT installation)
# - Minimal runtime footprint without build tools
# - System-level modifications in install_tensorrt.sh
# - Ready-to-run inference server environment
# - Virtual environment population with PyTorch and TensorRT-LLM
# - Base for custom application containers
#
#
# The completed venv is then copied to runtime stage with dynamo ownership
FROM ${RUNTIME_IMAGE}:${RUNTIME_IMAGE_TAG} AS runtime
FROM ${BASE_IMAGE}:${BASE_IMAGE_TAG} AS framework
WORKDIR /workspace
ENV VIRTUAL_ENV=/opt/dynamo/venv
ARG ARCH_ALT
ARG ARCH_ALT
ARG PYTHON_VERSION
ARG PYTHON_VERSION
...
@@ -68,93 +60,27 @@ ARG ENABLE_KVBM
...
@@ -68,93 +60,27 @@ ARG ENABLE_KVBM
ENV NIXL_PREFIX=/opt/nvidia/nvda_nixl
ENV NIXL_PREFIX=/opt/nvidia/nvda_nixl
ENV NIXL_LIB_DIR=$NIXL_PREFIX/lib/${ARCH_ALT}-linux-gnu
ENV NIXL_LIB_DIR=$NIXL_PREFIX/lib/${ARCH_ALT}-linux-gnu
ENV NIXL_PLUGIN_DIR=$NIXL_LIB_DIR/plugins
ENV NIXL_PLUGIN_DIR=$NIXL_LIB_DIR/plugins
ENV VIRTUAL_ENV=/opt/dynamo/venv
ENV PATH="${VIRTUAL_ENV}/bin:${PATH}"
# Install
Python, build-essential and python3-dev as apt dependencies
# Install
minimal dependencies needed for TensorRT-LLM installation
RUN apt-get update && \
RUN apt-get update && \
DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
# Build tools (required for JIT kernel compilation)
build-essential \
g++ \
ninja-build \
git \
git-lfs \
# Python runtime - CRITICAL for virtual environment to work
python${PYTHON_VERSION}-dev \
python${PYTHON_VERSION}-dev \
python3-pip \
python3-pip \
# CUDA/ML libraries
libcudnn9-cuda-12 \
# Network and communication libraries
libzmq3-dev \
# RDMA/UCX libraries required to find RDMA devices
ibverbs-providers \
ibverbs-utils \
libibumad3 \
libibverbs1 \
libnuma1 \
librdmacm1 \
rdma-core \
# OpenMPI dependencies
openssh-client \
openssh-server \
# System utilities
ca-certificates \
curl \
curl \
jq \
git \
wget && \
git-lfs \
ca-certificates && \
rm -rf /var/lib/apt/lists/*
rm -rf /var/lib/apt/lists/*
# Copy CUDA development tools (nvcc, headers, dependencies, etc.) from framework devel image
# Copy uv
COPY --from=framework /usr/local/cuda/bin/nvcc /usr/local/cuda/bin/nvcc
COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/
COPY --from=framework /usr/local/cuda/bin/cudafe++ /usr/local/cuda/bin/cudafe++
COPY --from=framework /usr/local/cuda/bin/ptxas /usr/local/cuda/bin/ptxas
COPY --from=framework /usr/local/cuda/bin/fatbinary /usr/local/cuda/bin/fatbinary
COPY --from=framework /usr/local/cuda/include/ /usr/local/cuda/include/
COPY --from=framework /usr/local/cuda/nvvm /usr/local/cuda/nvvm
COPY --from=framework /usr/local/cuda/lib64/libcudart.so* /usr/local/cuda/lib64/
COPY --from=framework /usr/local/cuda/lib64/libcupti* /usr/local/cuda/lib64/
COPY --from=framework /usr/local/lib/lib* /usr/local/lib/
### COPY NATS & ETCD ###
# Copy nats and etcd from dynamo_base image
COPY --from=dynamo_base /usr/bin/nats-server /usr/bin/nats-server
COPY --from=dynamo_base /usr/local/bin/etcd/ /usr/local/bin/etcd/
# Add ETCD and CUDA binaries to PATH so cicc and other CUDA tools are accessible
ENV PATH=/usr/local/bin/etcd/:/usr/local/cuda/nvvm/bin:$PATH
# Copy UCX from framework image as plugin for NIXL
# Copy NIXL source from framework image
# Copy dynamo wheels for gitlab artifacts
COPY --from=dynamo_base /usr/local/ucx /usr/local/ucx
COPY --from=dynamo_base $NIXL_PREFIX $NIXL_PREFIX
ENV PATH=/usr/local/ucx/bin:$PATH
# Copy OpenMPI from framework image
COPY --from=framework /opt/hpcx/ompi /opt/hpcx/ompi
# Copy NUMA library from framework image
COPY --from=framework /usr/lib/${ARCH_ALT}-linux-gnu/libnuma.so* /usr/lib/${ARCH_ALT}-linux-gnu/
ENV DYNAMO_HOME=/workspace
ENV LD_LIBRARY_PATH=\
$NIXL_LIB_DIR:\
$NIXL_PLUGIN_DIR:\
/usr/local/ucx/lib:\
/usr/local/ucx/lib/ucx:\
/opt/hpcx/ompi/lib:\
$LD_LIBRARY_PATH
ENV PATH="${VIRTUAL_ENV}/bin:/opt/hpcx/ompi/bin:/usr/local/bin/etcd/:/usr/local/cuda/bin:/usr/local/cuda/nvvm/bin:$PATH"
ENV OPAL_PREFIX=/opt/hpcx/ompi
### VIRTUAL ENVIRONMENT SETUP ###
COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin
# Create virtual environment
# Create virtual environment
RUN mkdir -p /opt/dynamo/venv && \
RUN mkdir -p /opt/dynamo/venv && \
uv venv /opt/dynamo/venv --python $PYTHON_VERSION
uv venv /opt/dynamo/venv --python $PYTHON_VERSION
# Activate virtual environment
ENV VIRTUAL_ENV=/opt/dynamo/venv \
PATH="/opt/dynamo/venv/bin:${PATH}"
# Copy pytorch installation from NGC PyTorch
# Copy pytorch installation from NGC PyTorch
ARG TORCH_VER=2.8.0a0+5228986c39.nv25.6
ARG TORCH_VER=2.8.0a0+5228986c39.nv25.6
ARG TORCHVISION_VER=0.22.0a0+95f10a4e
ARG TORCHVISION_VER=0.22.0a0+95f10a4e
...
@@ -167,25 +93,22 @@ ARG PACKAGING_VER=23.2
...
@@ -167,25 +93,22 @@ ARG PACKAGING_VER=23.2
ARG FLASH_ATTN_VER=2.7.4.post1
ARG FLASH_ATTN_VER=2.7.4.post1
ARG MPMATH_VER=1.3.0
ARG MPMATH_VER=1.3.0
COPY --from=framework /usr/local/lib/python${PYTHON_VERSION}/dist-packages/torch ${VIRTUAL_ENV}/lib/python${PYTHON_VERSION}/site-packages/torch
COPY --from=pytorch_base /usr/local/lib/python${PYTHON_VERSION}/dist-packages/torch ${VIRTUAL_ENV}/lib/python${PYTHON_VERSION}/site-packages/torch
COPY --from=framework /usr/local/lib/python${PYTHON_VERSION}/dist-packages/torch-${TORCH_VER}.dist-info ${VIRTUAL_ENV}/lib/python${PYTHON_VERSION}/site-packages/torch-${TORCH_VER}.dist-info
COPY --from=pytorch_base /usr/local/lib/python${PYTHON_VERSION}/dist-packages/torch-${TORCH_VER}.dist-info ${VIRTUAL_ENV}/lib/python${PYTHON_VERSION}/site-packages/torch-${TORCH_VER}.dist-info
COPY --from=framework /usr/local/lib/python${PYTHON_VERSION}/dist-packages/torchgen ${VIRTUAL_ENV}/lib/python${PYTHON_VERSION}/site-packages/torchgen
COPY --from=pytorch_base /usr/local/lib/python${PYTHON_VERSION}/dist-packages/torchgen ${VIRTUAL_ENV}/lib/python${PYTHON_VERSION}/site-packages/torchgen
COPY --from=framework /usr/local/lib/python${PYTHON_VERSION}/dist-packages/torchvision ${VIRTUAL_ENV}/lib/python${PYTHON_VERSION}/site-packages/torchvision
COPY --from=pytorch_base /usr/local/lib/python${PYTHON_VERSION}/dist-packages/torchvision ${VIRTUAL_ENV}/lib/python${PYTHON_VERSION}/site-packages/torchvision
COPY --from=framework /usr/local/lib/python${PYTHON_VERSION}/dist-packages/torchvision-${TORCHVISION_VER}.dist-info ${VIRTUAL_ENV}/lib/python${PYTHON_VERSION}/site-packages/torchvision-${TORCHVISION_VER}.dist-info
COPY --from=pytorch_base /usr/local/lib/python${PYTHON_VERSION}/dist-packages/torchvision-${TORCHVISION_VER}.dist-info ${VIRTUAL_ENV}/lib/python${PYTHON_VERSION}/site-packages/torchvision-${TORCHVISION_VER}.dist-info
COPY --from=framework /usr/local/lib/python${PYTHON_VERSION}/dist-packages/torchvision.libs ${VIRTUAL_ENV}/lib/python${PYTHON_VERSION}/site-packages/torchvision.libs
COPY --from=pytorch_base /usr/local/lib/python${PYTHON_VERSION}/dist-packages/torchvision.libs ${VIRTUAL_ENV}/lib/python${PYTHON_VERSION}/site-packages/torchvision.libs
COPY --from=framework /usr/local/lib/python${PYTHON_VERSION}/dist-packages/functorch ${VIRTUAL_ENV}/lib/python${PYTHON_VERSION}/site-packages/functorch
COPY --from=pytorch_base /usr/local/lib/python${PYTHON_VERSION}/dist-packages/functorch ${VIRTUAL_ENV}/lib/python${PYTHON_VERSION}/site-packages/functorch
COPY --from=framework /usr/local/lib/python${PYTHON_VERSION}/dist-packages/jinja2 ${VIRTUAL_ENV}/lib/python${PYTHON_VERSION}/site-packages/jinja2
COPY --from=pytorch_base /usr/local/lib/python${PYTHON_VERSION}/dist-packages/jinja2 ${VIRTUAL_ENV}/lib/python${PYTHON_VERSION}/site-packages/jinja2
COPY --from=framework /usr/local/lib/python${PYTHON_VERSION}/dist-packages/jinja2-${JINJA2_VER}.dist-info ${VIRTUAL_ENV}/lib/python${PYTHON_VERSION}/site-packages/jinja2-${JINJA2_VER}.dist-info
COPY --from=pytorch_base /usr/local/lib/python${PYTHON_VERSION}/dist-packages/jinja2-${JINJA2_VER}.dist-info ${VIRTUAL_ENV}/lib/python${PYTHON_VERSION}/site-packages/jinja2-${JINJA2_VER}.dist-info
COPY --from=framework /usr/local/lib/python${PYTHON_VERSION}/dist-packages/sympy ${VIRTUAL_ENV}/lib/python${PYTHON_VERSION}/site-packages/sympy
COPY --from=pytorch_base /usr/local/lib/python${PYTHON_VERSION}/dist-packages/sympy ${VIRTUAL_ENV}/lib/python${PYTHON_VERSION}/site-packages/sympy
COPY --from=framework /usr/local/lib/python${PYTHON_VERSION}/dist-packages/sympy-${SYMPY_VER}.dist-info ${VIRTUAL_ENV}/lib/python${PYTHON_VERSION}/site-packages/sympy-${SYMPY_VER}.dist-info
COPY --from=pytorch_base /usr/local/lib/python${PYTHON_VERSION}/dist-packages/sympy-${SYMPY_VER}.dist-info ${VIRTUAL_ENV}/lib/python${PYTHON_VERSION}/site-packages/sympy-${SYMPY_VER}.dist-info
COPY --from=framework /usr/local/lib/python${PYTHON_VERSION}/dist-packages/flash_attn ${VIRTUAL_ENV}/lib/python${PYTHON_VERSION}/site-packages/flash_attn
COPY --from=pytorch_base /usr/local/lib/python${PYTHON_VERSION}/dist-packages/flash_attn ${VIRTUAL_ENV}/lib/python${PYTHON_VERSION}/site-packages/flash_attn
COPY --from=framework /usr/local/lib/python${PYTHON_VERSION}/dist-packages/flash_attn-${FLASH_ATTN_VER}.dist-info ${VIRTUAL_ENV}/lib/python${PYTHON_VERSION}/site-packages/flash_attn-${FLASH_ATTN_VER}.dist-info
COPY --from=pytorch_base /usr/local/lib/python${PYTHON_VERSION}/dist-packages/flash_attn-${FLASH_ATTN_VER}.dist-info ${VIRTUAL_ENV}/lib/python${PYTHON_VERSION}/site-packages/flash_attn-${FLASH_ATTN_VER}.dist-info
COPY --from=framework /usr/local/lib/python${PYTHON_VERSION}/dist-packages/flash_attn_2_cuda.cpython-*-*-linux-gnu.so ${VIRTUAL_ENV}/lib/python${PYTHON_VERSION}/site-packages/
COPY --from=pytorch_base /usr/local/lib/python${PYTHON_VERSION}/dist-packages/flash_attn_2_cuda.cpython-*-*-linux-gnu.so ${VIRTUAL_ENV}/lib/python${PYTHON_VERSION}/site-packages/
COPY --from=framework /usr/local/lib/python${PYTHON_VERSION}/dist-packages/triton ${VIRTUAL_ENV}/lib/python${PYTHON_VERSION}/site-packages/triton
COPY --from=pytorch_base /usr/local/lib/python${PYTHON_VERSION}/dist-packages/triton ${VIRTUAL_ENV}/lib/python${PYTHON_VERSION}/site-packages/triton
COPY --from=framework /usr/local/lib/python${PYTHON_VERSION}/dist-packages/pytorch_triton-${PYTORCH_TRITON_VER}.dist-info ${VIRTUAL_ENV}/lib/python${PYTHON_VERSION}/site-packages/pytorch_triton-${PYTORCH_TRITON_VER}.dist-info
COPY --from=pytorch_base /usr/local/lib/python${PYTHON_VERSION}/dist-packages/pytorch_triton-${PYTORCH_TRITON_VER}.dist-info ${VIRTUAL_ENV}/lib/python${PYTHON_VERSION}/site-packages/pytorch_triton-${PYTORCH_TRITON_VER}.dist-info
ENV ENV=${ENV:-/etc/shinit_v2}
# Install TensorRT-LLM and related dependencies
# Install TensorRT-LLM and related dependencies
ARG HAS_TRTLLM_CONTEXT
ARG HAS_TRTLLM_CONTEXT
...
@@ -235,12 +158,146 @@ RUN if [ "$HAS_TRTLLM_CONTEXT" = "1" ]; then \
...
@@ -235,12 +158,146 @@ RUN if [ "$HAS_TRTLLM_CONTEXT" = "1" ]; then \
uv pip install --extra-index-url "${TENSORRTLLM_INDEX_URL}" "${TENSORRTLLM_PIP_WHEEL}"; \
uv pip install --extra-index-url "${TENSORRTLLM_INDEX_URL}" "${TENSORRTLLM_PIP_WHEEL}"; \
fi
fi
##################################################
########## Runtime Image ########################
##################################################
#
# PURPOSE: Production runtime environment
#
# This stage creates a lightweight production-ready image containing:
# - Pre-compiled TensorRT-LLM and framework dependencies
# - Dynamo runtime libraries and Python packages
# - Essential runtime dependencies and configurations
# - Optimized for inference workloads and deployment
#
# Use this stage when you need:
# - Production deployment of Dynamo with TensorRT-LLM
# - Minimal runtime footprint without build tools
# - Ready-to-run inference server environment
# - Base for custom application containers
#
FROM ${RUNTIME_IMAGE}:${RUNTIME_IMAGE_TAG} AS runtime
ARG ARCH_ALT
ARG PYTHON_VERSION
WORKDIR /workspace
ENV ENV=${ENV:-/etc/shinit_v2}
ENV VIRTUAL_ENV=/opt/dynamo/venv
ENV NIXL_PREFIX=/opt/nvidia/nvda_nixl
ENV NIXL_LIB_DIR=$NIXL_PREFIX/lib/${ARCH_ALT}-linux-gnu
ENV NIXL_PLUGIN_DIR=$NIXL_LIB_DIR/plugins
# Install Python, build-essential and python3-dev as apt dependencies
RUN apt-get update && \
DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
# Build tools
build-essential \
g++ \
ninja-build \
git \
git-lfs \
# Python runtime - CRITICAL for virtual environment to work
python${PYTHON_VERSION}-dev \
python3-pip \
# CUDA/ML libraries
libcudnn9-cuda-12 \
# Network and communication libraries
libzmq3-dev \
# RDMA/UCX libraries required to find RDMA devices
ibverbs-providers \
ibverbs-utils \
libibumad3 \
libibverbs1 \
libnuma1 \
librdmacm1 \
rdma-core \
# OpenMPI dependencies
openssh-client \
openssh-server \
# System utilities and dependencies
curl && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*
# Copy CUDA development tools (nvcc, headers, dependencies, etc.) from PyTorch base image
COPY --from=pytorch_base /usr/local/cuda/bin/nvcc /usr/local/cuda/bin/nvcc
COPY --from=pytorch_base /usr/local/cuda/bin/cudafe++ /usr/local/cuda/bin/cudafe++
COPY --from=pytorch_base /usr/local/cuda/bin/ptxas /usr/local/cuda/bin/ptxas
COPY --from=pytorch_base /usr/local/cuda/bin/fatbinary /usr/local/cuda/bin/fatbinary
COPY --from=pytorch_base /usr/local/cuda/include/ /usr/local/cuda/include/
COPY --from=pytorch_base /usr/local/cuda/nvvm /usr/local/cuda/nvvm
COPY --from=pytorch_base /usr/local/cuda/lib64/libcudart.so* /usr/local/cuda/lib64/
COPY --from=pytorch_base /usr/local/cuda/lib64/libcupti* /usr/local/cuda/lib64/
COPY --from=pytorch_base /usr/local/lib/lib* /usr/local/lib/
# Copy nats and etcd from dynamo_base image
COPY --from=dynamo_base /usr/bin/nats-server /usr/bin/nats-server
COPY --from=dynamo_base /usr/local/bin/etcd/ /usr/local/bin/etcd/
# Add ETCD and CUDA binaries to PATH so cicc and other CUDA tools are accessible
ENV PATH=/usr/local/bin/etcd/:/usr/local/cuda/nvvm/bin:$PATH
# Copy OpenMPI from PyTorch base image
COPY --from=pytorch_base /opt/hpcx/ompi /opt/hpcx/ompi
# Copy NUMA library from PyTorch base image
COPY --from=pytorch_base /usr/lib/${ARCH_ALT}-linux-gnu/libnuma.so* /usr/lib/${ARCH_ALT}-linux-gnu/
# Copy UCX libraries, libucc.so is needed by pytorch. May not need to copy whole hpcx dir but only /opt/hpcx/ucc/
COPY --from=pytorch_base /opt/hpcx /opt/hpcx
# This is needed to make libucc.so visible so pytorch can use it.
ENV LD_LIBRARY_PATH="/opt/hpcx/ucc/lib:${LD_LIBRARY_PATH}"
# Might not need to copy cusparseLt in the future once it's included in DLFW cuda container
# networkx, packaging, setuptools get overridden by trtllm installation, so not copying them
# pytorch-triton is copied after trtllm installation.
COPY --from=pytorch_base /usr/local/cuda/lib64/libcusparseLt* /usr/local/cuda/lib64/
# Copy uv to system /bin
COPY --from=framework /bin/uv /bin/uvx /bin/
# Copy libgomp.so from framework image
COPY --from=framework /usr/local/tensorrt /usr/local/tensorrt
COPY --from=framework /usr/lib/${ARCH_ALT}-linux-gnu/libgomp.so* /usr/lib/${ARCH_ALT}-linux-gnu/
# Create dynamo user with group 0 for OpenShift compatibility
RUN userdel -r ubuntu > /dev/null 2>&1 || true \
&& useradd -m -s /bin/bash -g 0 dynamo \
&& [ `id -u dynamo` -eq 1000 ] \
&& mkdir -p /home/dynamo/.cache /opt/dynamo \
&& chown -R dynamo: /workspace /home/dynamo /opt/dynamo \
&& chmod -R g+w /workspace /home/dynamo/.cache /opt/dynamo
# Switch to dynamo user
USER dynamo
ENV HOME=/home/dynamo
ENV DYNAMO_HOME=/workspace
# Copy UCX from framework image as plugin for NIXL
# Copy NIXL source from framework image
# Copy dynamo wheels for gitlab artifacts
COPY --chown=dynamo: --from=dynamo_base /usr/local/ucx /usr/local/ucx
COPY --chown=dynamo: --from=dynamo_base $NIXL_PREFIX $NIXL_PREFIX
ENV PATH="/usr/local/ucx/bin:${VIRTUAL_ENV}/bin:/opt/hpcx/ompi/bin:/usr/local/bin/etcd/:/usr/local/cuda/bin:/usr/local/cuda/nvvm/bin:$PATH"
ENV LD_LIBRARY_PATH=\
$NIXL_LIB_DIR:\
$NIXL_PLUGIN_DIR:\
/usr/local/ucx/lib:\
/usr/local/ucx/lib/ucx:\
/opt/hpcx/ompi/lib:\
$LD_LIBRARY_PATH
ENV OPAL_PREFIX=/opt/hpcx/ompi
# Copy pre-built venv with PyTorch and TensorRT-LLM from framework stage
COPY --chown=dynamo: --from=framework ${VIRTUAL_ENV} ${VIRTUAL_ENV}
ENV TENSORRT_LIB_DIR=/usr/local/tensorrt/targets/${ARCH_ALT}-linux-gnu/lib
ENV TENSORRT_LIB_DIR=/usr/local/tensorrt/targets/${ARCH_ALT}-linux-gnu/lib
ENV LD_LIBRARY_PATH=${TENSORRT_LIB_DIR}:${LD_LIBRARY_PATH}
ENV LD_LIBRARY_PATH=${TENSORRT_LIB_DIR}:${LD_LIBRARY_PATH}
# Install dynamo, NIXL, and dynamo-specific dependencies
# Install dynamo, NIXL, and dynamo-specific dependencies
COPY benchmarks/ /opt/dynamo/benchmarks/
COPY
--chown=dynamo:
benchmarks/ /opt/dynamo/benchmarks/
COPY --from=dynamo_base /opt/dynamo/wheelhouse/ /opt/dynamo/wheelhouse/
COPY
--chown=dynamo:
--from=dynamo_base /opt/dynamo/wheelhouse/ /opt/dynamo/wheelhouse/
RUN uv pip install \
RUN uv pip install \
/opt/dynamo/wheelhouse/ai_dynamo_runtime*.whl \
/opt/dynamo/wheelhouse/ai_dynamo_runtime*.whl \
/opt/dynamo/wheelhouse/ai_dynamo*any.whl \
/opt/dynamo/wheelhouse/ai_dynamo*any.whl \
...
@@ -261,31 +318,28 @@ RUN --mount=type=bind,source=./container/deps/requirements.txt,target=/tmp/requi
...
@@ -261,31 +318,28 @@ RUN --mount=type=bind,source=./container/deps/requirements.txt,target=/tmp/requi
--requirement /tmp/requirements.txt \
--requirement /tmp/requirements.txt \
--requirement /tmp/requirements.test.txt
--requirement /tmp/requirements.test.txt
# Copy UCX libraries, libucc.so is needed by pytorch. May not need to copy whole hpcx dir but only /opt/hpcx/ucc/
# Copy tests, benchmarks, deploy and components for CI with correct ownership
COPY --from=framework /opt/hpcx /opt/hpcx
COPY --chown=dynamo: tests /workspace/tests
# This is needed to make libucc.so visible so pytorch can use it.
COPY --chown=dynamo: examples /workspace/examples
ENV LD_LIBRARY_PATH="/opt/hpcx/ucc/lib:${LD_LIBRARY_PATH}"
COPY --chown=dynamo: benchmarks /workspace/benchmarks
# Might not need to copy cusparseLt in the future once it's included in DLFW cuda container
COPY --chown=dynamo: deploy /workspace/deploy
# networkx, packaging, setuptools get overridden by trtllm installation, so not copying them
COPY --chown=dynamo: components/ /workspace/components/
# pytorch-triton is copied after trtllm installation.
COPY --chown=dynamo: recipes/ /workspace/recipes/
COPY --from=framework /usr/local/cuda/lib64/libcusparseLt* /usr/local/cuda/lib64/
# Copy tests, benchmarks, deploy and components for CI
# Copy attribution files with correct ownership
COPY tests /workspace/tests
COPY --chown=dynamo: ATTRIBUTION* LICENSE /workspace/
COPY examples /workspace/examples
COPY benchmarks /workspace/benchmarks
COPY deploy /workspace/deploy
COPY components/ /workspace/components/
COPY recipes/ /workspace/recipes/
# Copy attribution files
# Setup launch banner in common directory accessible to all users
COPY ATTRIBUTION* LICENSE /workspace/
RUN --mount=type=bind,source=./container/launch_message.txt,target=/opt/dynamo/launch_message.txt \
# Copy launch banner
sed '/^#\s/d' /opt/dynamo/launch_message.txt > /opt/dynamo/.launch_screen
RUN --mount=type=bind,source=./container/launch_message.txt,target=/workspace/launch_message.txt \
sed '/^#\s/d' /workspace/launch_message.txt > ~/.launch_screen && \
echo "cat ~/.launch_screen" >> ~/.bashrc && \
echo "source $VIRTUAL_ENV/bin/activate" >> ~/.bashrc
# Setup environment for all users
USER root
RUN chmod 755 /opt/dynamo/.launch_screen && \
echo 'source /opt/dynamo/venv/bin/activate' >> /etc/bash.bashrc && \
echo 'cat /opt/dynamo/.launch_screen' >> /etc/bash.bashrc
USER dynamo
ENTRYPOINT ["/opt/nvidia/nvidia_entrypoint.sh"]
ENTRYPOINT ["/opt/nvidia/nvidia_entrypoint.sh"]
CMD []
CMD []
...
@@ -308,6 +362,9 @@ FROM runtime AS dev
...
@@ -308,6 +362,9 @@ FROM runtime AS dev
# Don't want ubuntu to be editable, just change uid and gid.
# Don't want ubuntu to be editable, just change uid and gid.
ARG WORKSPACE_DIR=/workspace
ARG WORKSPACE_DIR=/workspace
# Switch to root for system package installation
USER root
# Install utilities as root
# Install utilities as root
RUN apt-get update -y && \
RUN apt-get update -y && \
apt-get install -y --no-install-recommends \
apt-get install -y --no-install-recommends \
...
...
container/Dockerfile.vllm
View file @
6f708832
...
@@ -222,15 +222,28 @@ COPY --from=dynamo_base /usr/local/bin/etcd/ /usr/local/bin/etcd/
...
@@ -222,15 +222,28 @@ COPY --from=dynamo_base /usr/local/bin/etcd/ /usr/local/bin/etcd/
# Add ETCD and CUDA binaries to PATH so cicc and other CUDA tools are accessible
# Add ETCD and CUDA binaries to PATH so cicc and other CUDA tools are accessible
ENV PATH=/usr/local/bin/etcd/:/usr/local/cuda/nvvm/bin:$PATH
ENV PATH=/usr/local/bin/etcd/:/usr/local/cuda/nvvm/bin:$PATH
# Copy UCX from dev image as plugin for NIXL
# DeepGemm runs nvcc for JIT kernel compilation, however the CUDA include path
# Copy NIXL source from devr image
# is not properly set for complilation. Set CPATH to help nvcc find the headers.
# Copy dynamo wheels for gitlab artifacts
ENV CPATH=/usr/local/cuda/include
COPY --from=dynamo_base /usr/local/ucx /usr/local/ucx
COPY --from=dynamo_base $NIXL_PREFIX $NIXL_PREFIX
ENV PATH=/usr/local/ucx/bin:$PATH
# Copies vllm, DeepEP, DeepGEMM, PPLX repos (all editable installs) and nvshmem binaries
# Copy uv to system /bin
COPY --from=framework /opt/vllm /opt/vllm
COPY --from=framework /bin/uv /bin/uvx /bin/
# Create dynamo user with group 0 for OpenShift compatibility
RUN userdel -r ubuntu > /dev/null 2>&1 || true \
&& useradd -m -s /bin/bash -g 0 dynamo \
&& [ `id -u dynamo` -eq 1000 ] \
&& mkdir -p /home/dynamo/.cache /opt/dynamo \
&& chown -R dynamo: /workspace /home/dynamo /opt/dynamo \
&& chmod -R g+w /workspace /home/dynamo/.cache /opt/dynamo
USER dynamo
ENV HOME=/home/dynamo
# Copy UCX and NIXL to system directories
COPY --chown=dynamo: --from=dynamo_base /usr/local/ucx /usr/local/ucx
COPY --chown=dynamo: --from=dynamo_base $NIXL_PREFIX $NIXL_PREFIX
ENV PATH=/usr/local/ucx/bin:$PATH
ENV LD_LIBRARY_PATH=\
ENV LD_LIBRARY_PATH=\
/opt/vllm/tools/ep_kernels/ep_kernels_workspace/nvshmem_install/lib:\
/opt/vllm/tools/ep_kernels/ep_kernels_workspace/nvshmem_install/lib:\
...
@@ -240,19 +253,17 @@ $NIXL_PLUGIN_DIR:\
...
@@ -240,19 +253,17 @@ $NIXL_PLUGIN_DIR:\
/usr/local/ucx/lib/ucx:\
/usr/local/ucx/lib/ucx:\
$LD_LIBRARY_PATH
$LD_LIBRARY_PATH
# DeepGemm runs nvcc for JIT kernel compilation, however the CUDA include path
# is not properly set for complilation. Set CPATH to help nvcc find the headers.
ENV CPATH=/usr/local/cuda/include
### VIRTUAL ENVIRONMENT SETUP ###
### VIRTUAL ENVIRONMENT SETUP ###
# Copy uv and entire virtual environment from framework container
# Copy entire virtual environment from framework container with correct ownership
COPY --from=framework /bin/uv /bin/uvx /bin/
COPY --chown=dynamo: --from=framework ${VIRTUAL_ENV} ${VIRTUAL_ENV}
COPY --from=framework ${VIRTUAL_ENV} ${VIRTUAL_ENV}
# Copy vllm with correct ownership
COPY --chown=dynamo: --from=framework /opt/vllm /opt/vllm
# Install dynamo, NIXL, and dynamo-specific dependencies
# Install dynamo, NIXL, and dynamo-specific dependencies
COPY benchmarks/ /opt/dynamo/benchmarks/
COPY
--chown=dynamo:
benchmarks/ /opt/dynamo/benchmarks/
COPY --from=dynamo_base /opt/dynamo/wheelhouse/ /opt/dynamo/wheelhouse/
COPY
--chown=dynamo:
--from=dynamo_base /opt/dynamo/wheelhouse/ /opt/dynamo/wheelhouse/
RUN uv pip install \
RUN uv pip install \
/opt/dynamo/wheelhouse/ai_dynamo_runtime*.whl \
/opt/dynamo/wheelhouse/ai_dynamo_runtime*.whl \
/opt/dynamo/wheelhouse/ai_dynamo*any.whl \
/opt/dynamo/wheelhouse/ai_dynamo*any.whl \
...
@@ -273,16 +284,23 @@ RUN --mount=type=bind,source=./container/deps/requirements.txt,target=/tmp/requi
...
@@ -273,16 +284,23 @@ RUN --mount=type=bind,source=./container/deps/requirements.txt,target=/tmp/requi
--requirement /tmp/requirements.txt \
--requirement /tmp/requirements.txt \
--requirement /tmp/requirements.test.txt
--requirement /tmp/requirements.test.txt
# Copy benchmarks, examples, and tests for CI
# Copy benchmarks, examples, and tests for CI
with correct ownership
COPY . /workspace/
COPY
--chown=dynamo:
. /workspace/
# Copy attribution files
# Copy attribution files
COPY ATTRIBUTION* LICENSE /workspace/
COPY --chown=dynamo: ATTRIBUTION* LICENSE /workspace/
# Copy launch banner
RUN --mount=type=bind,source=./container/launch_message.txt,target=/workspace/launch_message.txt \
# Setup launch banner in common directory accessible to all users
sed '/^#\s/d' /workspace/launch_message.txt > ~/.launch_screen && \
RUN --mount=type=bind,source=./container/launch_message.txt,target=/opt/dynamo/launch_message.txt \
echo "cat ~/.launch_screen" >> ~/.bashrc && \
sed '/^#\s/d' /opt/dynamo/launch_message.txt > /opt/dynamo/.launch_screen
echo "source $VIRTUAL_ENV/bin/activate" >> ~/.bashrc
# Setup environment for all users
USER root
RUN chmod 755 /opt/dynamo/.launch_screen && \
echo 'source /opt/dynamo/venv/bin/activate' >> /etc/bash.bashrc && \
echo 'cat /opt/dynamo/.launch_screen' >> /etc/bash.bashrc
USER dynamo
ENTRYPOINT ["/opt/nvidia/nvidia_entrypoint.sh"]
ENTRYPOINT ["/opt/nvidia/nvidia_entrypoint.sh"]
CMD []
CMD []
...
@@ -305,6 +323,7 @@ FROM runtime AS dev
...
@@ -305,6 +323,7 @@ FROM runtime AS dev
# Don't want ubuntu to be editable, just change uid and gid.
# Don't want ubuntu to be editable, just change uid and gid.
ARG WORKSPACE_DIR=/workspace
ARG WORKSPACE_DIR=/workspace
USER root
# Install utilities as root
# Install utilities as root
RUN apt-get update -y && \
RUN apt-get update -y && \
apt-get install -y --no-install-recommends \
apt-get install -y --no-install-recommends \
...
...
container/build.sh
View file @
6f708832
...
@@ -555,7 +555,7 @@ build_local_dev_with_header() {
...
@@ -555,7 +555,7 @@ build_local_dev_with_header() {
fi
fi
echo
"Building new local-dev image from:
$dev_base_image
"
echo
"Building new local-dev image from:
$dev_base_image
"
echo
"User '
ubuntu
' will have UID:
$USER_UID
, GID:
$USER_GID
"
echo
"User '
dynamo
' will have UID:
$USER_UID
, GID:
$USER_GID
"
# Show the docker command being executed if not in dry-run mode
# Show the docker command being executed if not in dry-run mode
if
[
-z
"
$RUN_PREFIX
"
]
;
then
if
[
-z
"
$RUN_PREFIX
"
]
;
then
...
...
container/run.sh
View file @
6f708832
...
@@ -45,6 +45,7 @@ USE_NIXL_GDS=
...
@@ -45,6 +45,7 @@ USE_NIXL_GDS=
RUNTIME
=
nvidia
RUNTIME
=
nvidia
WORKDIR
=
/workspace
WORKDIR
=
/workspace
NETWORK
=
host
NETWORK
=
host
USER
=
get_options
()
{
get_options
()
{
while
:
;
do
while
:
;
do
...
@@ -183,6 +184,14 @@ get_options() {
...
@@ -183,6 +184,14 @@ get_options() {
missing_requirement
"
$1
"
missing_requirement
"
$1
"
fi
fi
;;
;;
--user
)
if
[
"
$2
"
]
;
then
USER
=
$2
shift
else
missing_requirement
"
$1
"
fi
;;
--dry-run
)
--dry-run
)
RUN_PREFIX
=
"echo"
RUN_PREFIX
=
"echo"
echo
""
echo
""
...
@@ -267,11 +276,10 @@ get_options() {
...
@@ -267,11 +276,10 @@ get_options() {
if
[
-n
"
$HF_HOME
"
]
;
then
if
[
-n
"
$HF_HOME
"
]
;
then
mkdir
-p
"
$HF_HOME
"
mkdir
-p
"
$HF_HOME
"
# Use /home/ubuntu for local-dev target, /root for dev target.
if
[[
${
USER
}
==
"root"
]]
||
[[
${
USER
}
==
"0"
]]
;
then
if
[
"
$TARGET
"
=
"local-dev"
]
||
[[
"
$IMAGE
"
==
*
"local-dev"
*
]]
;
then
HF_HOME_TARGET
=
"/home/ubuntu/.cache/huggingface"
else
HF_HOME_TARGET
=
"/root/.cache/huggingface"
HF_HOME_TARGET
=
"/root/.cache/huggingface"
else
HF_HOME_TARGET
=
"/home/dynamo/.cache/huggingface"
fi
fi
VOLUME_MOUNTS+
=
" -v
$HF_HOME
:
$HF_HOME_TARGET
"
VOLUME_MOUNTS+
=
" -v
$HF_HOME
:
$HF_HOME_TARGET
"
fi
fi
...
@@ -313,6 +321,12 @@ get_options() {
...
@@ -313,6 +321,12 @@ get_options() {
RUNTIME
=
""
RUNTIME
=
""
fi
fi
if
[[
${
USER
}
==
""
]]
;
then
USER_STRING
=
""
else
USER_STRING
=
"--user
${
USER
}
"
fi
REMAINING_ARGS
=(
"
$@
"
)
REMAINING_ARGS
=(
"
$@
"
)
}
}
...
@@ -330,6 +344,8 @@ show_help() {
...
@@ -330,6 +344,8 @@ show_help() {
echo
" Options: 'host' (default), 'bridge', 'none', 'container:name'"
echo
" Options: 'host' (default), 'bridge', 'none', 'container:name'"
echo
" Examples: --network bridge (isolated), --network none (no network - WARNING: breaks most functionality)"
echo
" Examples: --network bridge (isolated), --network none (no network - WARNING: breaks most functionality)"
echo
" --network container:redis (share network with 'redis' container)"
echo
" --network container:redis (share network with 'redis' container)"
echo
" [--user <name|uid>[:<group|gid>] specify user to run container as]"
echo
" Format: username or numeric UID, optionally with group/GID (e.g., 'root', '0', '1000:0')"
echo
" [-v add volume mount]"
echo
" [-v add volume mount]"
echo
" [-p|--port add port mapping (host_port:container_port)]"
echo
" [-p|--port add port mapping (host_port:container_port)]"
echo
" [-e add environment variable]"
echo
" [-e add environment variable]"
...
@@ -376,6 +392,7 @@ ${RUN_PREFIX} docker run \
...
@@ -376,6 +392,7 @@ ${RUN_PREFIX} docker run \
${
NIXL_GDS_CAPS
}
\
${
NIXL_GDS_CAPS
}
\
--ipc
host
\
--ipc
host
\
${
PRIVILEGED_STRING
}
\
${
PRIVILEGED_STRING
}
\
${
USER_STRING
}
\
${
NAME_STRING
}
\
${
NAME_STRING
}
\
${
ENTRYPOINT_STRING
}
\
${
ENTRYPOINT_STRING
}
\
${
IMAGE
}
\
${
IMAGE
}
\
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment