Unverified Commit 909a9a9f authored by Anant Sharma's avatar Anant Sharma Committed by GitHub
Browse files

feat: add runtime image for trtllm container build (#1796)

parent fbd1f8df
...@@ -16,6 +16,8 @@ ...@@ -16,6 +16,8 @@
ARG BASE_IMAGE="nvcr.io/nvidia/pytorch" ARG BASE_IMAGE="nvcr.io/nvidia/pytorch"
ARG BASE_IMAGE_TAG="25.05-py3" ARG BASE_IMAGE_TAG="25.05-py3"
ARG RELEASE_BUILD ARG RELEASE_BUILD
ARG RUNTIME_IMAGE="nvcr.io/nvidia/cuda"
ARG RUNTIME_IMAGE_TAG="12.9.0-runtime-ubuntu24.04"
# Define general architecture ARGs for supporting both x86 and aarch64 builds. # Define general architecture ARGs for supporting both x86 and aarch64 builds.
# ARCH: Used for package suffixes (e.g., amd64, arm64) # ARCH: Used for package suffixes (e.g., amd64, arm64)
...@@ -216,6 +218,14 @@ COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/ ...@@ -216,6 +218,14 @@ COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/
### VIRTUAL ENVIRONMENT SETUP ### ### VIRTUAL ENVIRONMENT SETUP ###
RUN mkdir /opt/dynamo && \ RUN mkdir /opt/dynamo && \
uv venv /opt/dynamo/venv --python 3.12 uv venv /opt/dynamo/venv --python 3.12
ENV VIRTUAL_ENV=/opt/dynamo/venv
# Install NIXL Python module
RUN cd /opt/nixl && uv build . --out-dir /workspace/wheels/nixl
# Install the wheel
# TODO: Move NIXL wheel install to the wheel_builder stage
RUN uv pip install /workspace/wheels/nixl/*.whl
################################### ###################################
####### WHEEL BUILD STAGE ######### ####### WHEEL BUILD STAGE #########
...@@ -354,3 +364,141 @@ RUN --mount=type=bind,source=./container/launch_message.txt,target=/workspace/la ...@@ -354,3 +364,141 @@ RUN --mount=type=bind,source=./container/launch_message.txt,target=/workspace/la
# FIXME: May want a modification with dynamo banner on entry # FIXME: May want a modification with dynamo banner on entry
ENTRYPOINT ["/opt/nvidia/nvidia_entrypoint.sh"] ENTRYPOINT ["/opt/nvidia/nvidia_entrypoint.sh"]
CMD [] CMD []
####################################
########## Runtime Image ###########
####################################
FROM ${RUNTIME_IMAGE}:${RUNTIME_IMAGE_TAG} AS runtime
WORKDIR /workspace
ENV DYNAMO_HOME=/workspace
ENV VIRTUAL_ENV=/opt/dynamo/venv
ENV PATH="${VIRTUAL_ENV}/bin:${PATH}"
# Install apt dependencies
# openssh-client, openssh-server are needed for OpenMPI
RUN apt-get update && \
DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
build-essential \
python3-dev \
openssh-client \
openssh-server && \
rm -rf /var/lib/apt/lists/*
# Copy all bindings (wheels, lib, include) from dev image
COPY --from=dev /opt/dynamo/bindings /opt/dynamo/bindings
# Copy nats and etcd from build image
COPY --from=build /usr/bin/nats-server /usr/bin/nats-server
COPY --from=build /usr/local/bin/etcd/ /usr/local/bin/etcd/
# Copy UCX from build image as plugin for NIXL
COPY --from=build /usr/local/ucx /usr/local/ucx
# Copy NIXL source from build image (required for NIXL plugins)
COPY --from=build /usr/local/nixl /usr/local/nixl
# Copy OpenMPI from build image
COPY --from=build /opt/hpcx/ompi /opt/hpcx/ompi
# Copy NUMA library from build image
COPY --from=build /usr/lib/${ARCH_ALT}-linux-gnu/libnuma.so* /usr/lib/${ARCH_ALT}-linux-gnu/
# Setup the python environment
COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/
RUN uv venv $VIRTUAL_ENV --python 3.12 && \
echo "source $VIRTUAL_ENV/bin/activate" >> ~/.bashrc
# Common dependencies
# ToDo: Remove extra install and use pyproject.toml to define all dependencies
RUN --mount=type=bind,source=./container/deps/requirements.txt,target=/tmp/requirements.txt \
uv pip install --requirement /tmp/requirements.txt
# Install test dependencies
# TODO: Remove this once we have a functional CI image built on top of the runtime image
RUN --mount=type=bind,source=./container/deps/requirements.test.txt,target=/tmp/requirements.txt \
uv pip install --requirement /tmp/requirements.txt
# Copy CUDA toolkit components needed for nvcc, cudafe, cicc etc.
COPY --from=build /usr/local/cuda/bin/nvcc /usr/local/cuda/bin/nvcc
COPY --from=build /usr/local/cuda/bin/cudafe++ /usr/local/cuda/bin/cudafe++
COPY --from=build /usr/local/cuda/bin/ptxas /usr/local/cuda/bin/ptxas
COPY --from=build /usr/local/cuda/bin/fatbinary /usr/local/cuda/bin/fatbinary
COPY --from=build /usr/local/cuda/include/ /usr/local/cuda/include/
COPY --from=build /usr/local/cuda/lib64/libcudart.so* /usr/local/cuda/lib64/
COPY --from=build /usr/local/cuda/nvvm /usr/local/cuda/nvvm
# Copy pytorch installation from NGC PyTorch
ARG TORCH_VER=2.8.0a0+5228986c39.nv25.5
ARG TORCHVISION_VER=0.22.0a0
ARG SETUPTOOLS_VER=78.1.1
ARG PYTORCH_TRITON_VER=3.3.0+git96316ce52.nvinternal
ARG JINJA2_VER=3.1.6
ARG NETWORKX_VER=3.4.2
ARG SYMPY_VER=1.14.0
ARG PACKAGING_VER=23.2
ARG FLASH_ATTN_VER=2.7.3
ARG MPMATH_VER=1.3.0
COPY --from=build /usr/local/lib/lib* /usr/local/lib/
COPY --from=build /usr/local/lib/python3.12/dist-packages/torch /usr/local/lib/python3.12/dist-packages/torch
COPY --from=build /usr/local/lib/python3.12/dist-packages/torch-${TORCH_VER}.dist-info /usr/local/lib/python3.12/dist-packages/torch-${TORCH_VER}.dist-info
COPY --from=build /usr/local/lib/python3.12/dist-packages/torchgen /usr/local/lib/python3.12/dist-packages/torchgen
COPY --from=build /usr/local/lib/python3.12/dist-packages/torchvision /usr/local/lib/python3.12/dist-packages/torchvision
COPY --from=build /usr/local/lib/python3.12/dist-packages/torchvision-${TORCHVISION_VER}.dist-info /usr/local/lib/python3.12/dist-packages/torchvision-${TORCHVISION_VER}.dist-info
COPY --from=build /usr/local/lib/python3.12/dist-packages/torchvision.libs /usr/local/lib/python3.12/dist-packages/torchvision.libs
COPY --from=build /usr/local/lib/python3.12/dist-packages/setuptools /usr/local/lib/python3.12/dist-packages/setuptools
COPY --from=build /usr/local/lib/python3.12/dist-packages/setuptools-${SETUPTOOLS_VER}.dist-info /usr/local/lib/python3.12/dist-packages/setuptools-${SETUPTOOLS_VER}.dist-info
COPY --from=build /usr/local/lib/python3.12/dist-packages/functorch /usr/local/lib/python3.12/dist-packages/functorch
COPY --from=build /usr/local/lib/python3.12/dist-packages/triton /usr/local/lib/python3.12/dist-packages/triton
COPY --from=build /usr/local/lib/python3.12/dist-packages/pytorch_triton-${PYTORCH_TRITON_VER}.dist-info /usr/local/lib/python3.12/dist-packages/pytorch_triton-${PYTORCH_TRITON_VER}.dist-info
COPY --from=build /usr/local/lib/python3.12/dist-packages/jinja2 /usr/local/lib/python3.12/dist-packages/jinja2
COPY --from=build /usr/local/lib/python3.12/dist-packages/jinja2-${JINJA2_VER}.dist-info /usr/local/lib/python3.12/dist-packages/jinja2-${JINJA2_VER}.dist-info
COPY --from=build /usr/local/lib/python3.12/dist-packages/networkx /usr/local/lib/python3.12/dist-packages/networkx
COPY --from=build /usr/local/lib/python3.12/dist-packages/networkx-${NETWORKX_VER}.dist-info /usr/local/lib/python3.12/dist-packages/networkx-${NETWORKX_VER}.dist-info
COPY --from=build /usr/local/lib/python3.12/dist-packages/sympy /usr/local/lib/python3.12/dist-packages/sympy
COPY --from=build /usr/local/lib/python3.12/dist-packages/sympy-${SYMPY_VER}.dist-info /usr/local/lib/python3.12/dist-packages/sympy-${SYMPY_VER}.dist-info
COPY --from=build /usr/local/lib/python3.12/dist-packages/packaging /usr/local/lib/python3.12/dist-packages/packaging
COPY --from=build /usr/local/lib/python3.12/dist-packages/packaging-${PACKAGING_VER}.dist-info /usr/local/lib/python3.12/dist-packages/packaging-${PACKAGING_VER}.dist-info
COPY --from=build /usr/local/lib/python3.12/dist-packages/flash_attn /usr/local/lib/python3.12/dist-packages/flash_attn
COPY --from=build /usr/local/lib/python3.12/dist-packages/flash_attn-${FLASH_ATTN_VER}.dist-info /usr/local/lib/python3.12/dist-packages/flash_attn-${FLASH_ATTN_VER}.dist-info
COPY --from=build /usr/local/lib/python3.12/dist-packages/flash_attn_2_cuda.cpython-312-*-linux-gnu.so /usr/local/lib/python3.12/dist-packages/
# Setup environment variables
ARG ARCH_ALT
ENV NIXL_PLUGIN_DIR=/usr/local/nixl/lib/${ARCH_ALT}-linux-gnu/plugins
ENV LD_LIBRARY_PATH=/usr/local/nixl/lib/${ARCH_ALT}-linux-gnu:/usr/local/nixl/lib/${ARCH_ALT}-linux-gnu/plugins:/usr/local/ucx/lib:/opt/hpcx/ompi/lib:$LD_LIBRARY_PATH
ENV PATH=/opt/hpcx/ompi/bin:/usr/local/bin/etcd/:/usr/local/cuda/nvvm/bin:$PATH
ENV OPAL_PREFIX=/opt/hpcx/ompi
# Install TensorRT-LLM (same as in build stage)
ARG HAS_TRTLLM_CONTEXT=0
ARG TENSORRTLLM_PIP_WHEEL="tensorrt-llm"
ARG TENSORRTLLM_INDEX_URL="https://pypi.python.org/simple"
# Copy Dynamo wheels into wheelhouse
COPY --from=dev /workspace/wheels/nixl/*.whl wheelhouse/
COPY --from=wheel_builder /workspace/dist/*.whl wheelhouse/
RUN uv pip install --index-url "${TENSORRTLLM_INDEX_URL}" \
--extra-index-url https://pypi.org/simple \
"${TENSORRTLLM_PIP_WHEEL}" && \
uv pip install ai-dynamo --find-links wheelhouse && \
uv pip install nixl --find-links wheelhouse
# Setup TRTLLM environment variables, same as in dev image
ENV TRTLLM_USE_UCX_KVCACHE=1
COPY --from=dev /usr/local/bin/set_trtllm_env.sh /usr/local/bin/set_trtllm_env.sh
RUN echo 'source /usr/local/bin/set_trtllm_env.sh' >> /root/.bashrc
# Copy benchmarks, exmaples and tests for CI
# TODO: Remove this once we have a functional CI image built on top of the runtime image
COPY tests /workspace/tests
COPY benchmarks /workspace/benchmarks
COPY examples /workspace/examples
RUN uv pip install /workspace/benchmarks
# Copy launch banner
RUN --mount=type=bind,source=./container/launch_message.txt,target=/workspace/launch_message.txt \
sed '/^#\s/d' /workspace/launch_message.txt > ~/.launch_screen && \
echo "cat ~/.launch_screen" >> ~/.bashrc
ENTRYPOINT ["/opt/nvidia/nvidia_entrypoint.sh"]
CMD []
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment