Unverified Commit 9630693e authored by Dmitry Tokarev's avatar Dmitry Tokarev Committed by GitHub
Browse files

fix: triton ver lock fix and trtllm env var declaration (#2300)

parent 3a425209
......@@ -150,6 +150,7 @@ COPY --from=trtllm_wheel . /trtllm_wheel/
# Note: TensorRT needs to be uninstalled before installing the TRTLLM wheel
# because there might be mismatched versions of TensorRT between the NGC PyTorch
# and the TRTLLM wheel.
# Locking triton version to 3.3.1 as 3.4.0 breaks tensorrt-llm 1.0.0rc4
RUN [ -f /etc/pip/constraint.txt ] && : > /etc/pip/constraint.txt || true && \
pip uninstall -y tensorrt && \
if [ "$HAS_TRTLLM_CONTEXT" = "1" ]; then \
......@@ -157,14 +158,19 @@ RUN [ -f /etc/pip/constraint.txt ] && : > /etc/pip/constraint.txt || true && \
WHEEL_FILE=$(find /trtllm_wheel -name "*.whl" | head -n 1); \
if [ -n "$WHEEL_FILE" ]; then \
pip install "$WHEEL_FILE"; \
if [ "$ARCH" = "amd64" ]; then \
pip install "triton==3.3.1"; \
fi; \
else \
echo "No wheel file found in /trtllm_wheel directory."; \
exit 1; \
fi; \
else \
# Install TensorRT-LLM wheel from the provided index URL, allow dependencies from PyPI
pip install --extra-index-url "${TENSORRTLLM_INDEX_URL}" \
"${TENSORRTLLM_PIP_WHEEL}" ; \
# Install TensorRT-LLM wheel from the provided index URL, allow dependencies from PyPI
pip install --extra-index-url "${TENSORRTLLM_INDEX_URL}" "${TENSORRTLLM_PIP_WHEEL}"; \
if [ "$ARCH" = "amd64" ]; then \
pip install "triton==3.3.1"; \
fi; \
fi
# Install test dependencies
......@@ -347,12 +353,25 @@ CMD []
FROM ${RUNTIME_IMAGE}:${RUNTIME_IMAGE_TAG} AS runtime
WORKDIR /workspace
ARG ARCH_ALT
WORKDIR /workspace
ENV DYNAMO_HOME=/workspace
ENV VIRTUAL_ENV=/opt/dynamo/venv
ENV PATH="${VIRTUAL_ENV}/bin:${PATH}"
ENV NIXL_PREFIX=/opt/nvidia/nvda_nixl
ENV NIXL_LIB_DIR=$NIXL_PREFIX/lib/${ARCH_ALT}-linux-gnu
ENV NIXL_PLUGIN_DIR=$NIXL_LIB_DIR/plugins
ENV LD_LIBRARY_PATH=\
$NIXL_LIB_DIR:\
$NIXL_PLUGIN_DIR:\
/usr/local/ucx/lib:\
/usr/local/ucx/lib/ucx:\
/opt/hpcx/ompi/lib:\
$LD_LIBRARY_PATH
ENV PATH=/opt/hpcx/ompi/bin:/usr/local/bin/etcd/:/usr/local/cuda/nvvm/bin:$PATH
ENV OPAL_PREFIX=/opt/hpcx/ompi
# Install apt dependencies
# openssh-client, openssh-server are needed for OpenMPI
......@@ -447,21 +466,6 @@ COPY --from=build /usr/local/lib/python3.12/dist-packages/flash_attn /usr/local/
COPY --from=build /usr/local/lib/python3.12/dist-packages/flash_attn-${FLASH_ATTN_VER}.dist-info /usr/local/lib/python3.12/dist-packages/flash_attn-${FLASH_ATTN_VER}.dist-info
COPY --from=build /usr/local/lib/python3.12/dist-packages/flash_attn_2_cuda.cpython-312-*-linux-gnu.so /usr/local/lib/python3.12/dist-packages/
# Setup environment variables
ARG ARCH_ALT
ENV NIXL_PREFIX=/opt/nvidia/nvda_nixl
ENV NIXL_LIB_DIR=$NIXL_PREFIX/lib/${ARCH_ALT}-linux-gnu
ENV NIXL_PLUGIN_DIR=$NIXL_LIB_DIR/plugins
ENV LD_LIBRARY_PATH=\
$NIXL_LIB_DIR:\
$NIXL_PLUGIN_DIR:\
/usr/local/ucx/lib:\
/usr/local/ucx/lib/ucx:\
/opt/hpcx/ompi/lib:\
$LD_LIBRARY_PATH
ENV PATH=/opt/hpcx/ompi/bin:/usr/local/bin/etcd/:/usr/local/cuda/nvvm/bin:$PATH
ENV OPAL_PREFIX=/opt/hpcx/ompi
# Install TensorRT-LLM (same as in build stage)
ARG HAS_TRTLLM_CONTEXT=0
......@@ -470,16 +474,19 @@ ARG TENSORRTLLM_INDEX_URL="https://pypi.python.org/simple"
# Copy Dynamo wheels into wheelhouse
# Copy metrics binary from wheel_builder image, not part of ai-dynamo wheel
COPY --from=dev /workspace/wheels/nixl/*.whl wheelhouse/
COPY --from=wheel_builder /workspace/dist/*.whl wheelhouse/
COPY --from=dev /workspace/wheels/nixl/*.whl /workspace/wheelhouse/
COPY --from=wheel_builder /workspace/dist/*.whl /workspace/wheelhouse/
COPY --from=dev /workspace/target/release/metrics /usr/local/bin/metrics
# NOTE: If a package (tensorrt_llm) exists on both --index-url and --extra-index-url,
# uv will prioritize the --extra-index-url, unless --index-strategy unsafe-best-match
# is also specified. So set the configurable index as a --extra-index-url for prioritization.
RUN uv pip install --extra-index-url "${TENSORRTLLM_INDEX_URL}" \
"${TENSORRTLLM_PIP_WHEEL}" && \
uv pip install ai-dynamo nixl --find-links wheelhouse
# locking triton version to 3.3.1 as 3.4.0 breaks tensorrt-llm 1.0.0rc4
RUN uv pip install --extra-index-url "${TENSORRTLLM_INDEX_URL}" "${TENSORRTLLM_PIP_WHEEL}" && \
if [ "$ARCH" = "amd64" ]; then \
pip install "triton==3.3.1"; \
fi; \
uv pip install ai-dynamo nixl --find-links /workspace/wheelhouse
# Copy benchmarks, backends and tests for CI
# TODO: Remove this once we have a functional CI image built on top of the runtime image
......
......@@ -61,7 +61,8 @@ Repository = "https://github.com/ai-dynamo/dynamo.git"
[project.optional-dependencies]
trtllm =[
"uvloop",
"tensorrt-llm==1.0.0rc4"
"tensorrt-llm==1.0.0rc4",
"triton==3.3.1", # locking triton as version 3.4.0 breaks tensorrt-llm 1.0.0rc4
]
vllm = [
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment