fix: triton ver lock fix and trtllm env var declaration (#2300)

9630693e · Dmitry Tokarev · GitHub · 3a425209 · 9630693e · 9630693e
Unverified Commit 9630693e authored Aug 05, 2025 by Dmitry Tokarev Committed by GitHub Aug 05, 2025
Hide whitespace changes
Inline Side-by-side

Showing with 33 additions and 25 deletions

container/Dockerfile.tensorrt_llm container/Dockerfile.tensorrt_llm +31 -24

pyproject.toml pyproject.toml +2 -1

No files found.
--- a/container/Dockerfile.tensorrt_llm
+++ b/container/Dockerfile.tensorrt_llm
@@ -150,6 +150,7 @@ COPY --from=trtllm_wheel . /trtllm_wheel/
 # Note: TensorRT needs to be uninstalled before installing the TRTLLM wheel
 # because there might be mismatched versions of TensorRT between the NGC PyTorch
 # and the TRTLLM wheel.
+# Locking triton version to 3.3.1 as 3.4.0 breaks tensorrt-llm 1.0.0rc4
 RUN [ -f /etc/pip/constraint.txt ] && : > /etc/pip/constraint.txt || true && \
    pip uninstall -y tensorrt && \
    if [ "$HAS_TRTLLM_CONTEXT" = "1" ]; then \
@@ -157,14 +158,19 @@ RUN [ -f /etc/pip/constraint.txt ] && : > /etc/pip/constraint.txt || true && \
        WHEEL_FILE=$(find /trtllm_wheel -name "*.whl" | head -n 1); \
        if [ -n "$WHEEL_FILE" ]; then \
            pip install "$WHEEL_FILE"; \
+            if [ "$ARCH" = "amd64" ]; then \
+                pip install "triton==3.3.1"; \
+            fi; \
        else \
            echo "No wheel file found in /trtllm_wheel directory."; \
            exit 1; \
        fi; \
    else \
-         # Install TensorRT-LLM wheel from the provided index URL, allow dependencies from PyPI
+        # Install TensorRT-LLM wheel from the provided index URL, allow dependencies from PyPI
-         pip install --extra-index-url "${TENSORRTLLM_INDEX_URL}" \
+        pip install --extra-index-url "${TENSORRTLLM_INDEX_URL}" "${TENSORRTLLM_PIP_WHEEL}"; \
-         "${TENSORRTLLM_PIP_WHEEL}" ; \
+        if [ "$ARCH" = "amd64" ]; then \
+            pip install "triton==3.3.1"; \
+        fi; \
    fi
 # Install test dependencies
@@ -347,12 +353,25 @@ CMD []
 FROM ${RUNTIME_IMAGE}:${RUNTIME_IMAGE_TAG} AS runtime
+WORKDIR /workspace
 ARG ARCH_ALT
-WORKDIR /workspace
 ENV DYNAMO_HOME=/workspace
 ENV VIRTUAL_ENV=/opt/dynamo/venv
 ENV PATH="${VIRTUAL_ENV}/bin:${PATH}"
+ENV NIXL_PREFIX=/opt/nvidia/nvda_nixl
+ENV NIXL_LIB_DIR=$NIXL_PREFIX/lib/${ARCH_ALT}-linux-gnu
+ENV NIXL_PLUGIN_DIR=$NIXL_LIB_DIR/plugins
+ENV LD_LIBRARY_PATH=\
+$NIXL_LIB_DIR:\
+$NIXL_PLUGIN_DIR:\
+/usr/local/ucx/lib:\
+/usr/local/ucx/lib/ucx:\
+/opt/hpcx/ompi/lib:\
+$LD_LIBRARY_PATH
+ENV PATH=/opt/hpcx/ompi/bin:/usr/local/bin/etcd/:/usr/local/cuda/nvvm/bin:$PATH
+ENV OPAL_PREFIX=/opt/hpcx/ompi
 # Install apt dependencies
 # openssh-client, openssh-server are needed for OpenMPI
@@ -447,21 +466,6 @@ COPY --from=build /usr/local/lib/python3.12/dist-packages/flash_attn /usr/local/
 COPY --from=build /usr/local/lib/python3.12/dist-packages/flash_attn-${FLASH_ATTN_VER}.dist-info /usr/local/lib/python3.12/dist-packages/flash_attn-${FLASH_ATTN_VER}.dist-info
 COPY --from=build /usr/local/lib/python3.12/dist-packages/flash_attn_2_cuda.cpython-312-*-linux-gnu.so /usr/local/lib/python3.12/dist-packages/
-# Setup environment variables
-ARG ARCH_ALT
-ENV NIXL_PREFIX=/opt/nvidia/nvda_nixl
-ENV NIXL_LIB_DIR=$NIXL_PREFIX/lib/${ARCH_ALT}-linux-gnu
-ENV NIXL_PLUGIN_DIR=$NIXL_LIB_DIR/plugins
-ENV LD_LIBRARY_PATH=\
-$NIXL_LIB_DIR:\
-$NIXL_PLUGIN_DIR:\
-/usr/local/ucx/lib:\
-/usr/local/ucx/lib/ucx:\
-/opt/hpcx/ompi/lib:\
-$LD_LIBRARY_PATH
-ENV PATH=/opt/hpcx/ompi/bin:/usr/local/bin/etcd/:/usr/local/cuda/nvvm/bin:$PATH
-ENV OPAL_PREFIX=/opt/hpcx/ompi
 # Install TensorRT-LLM (same as in build stage)
 ARG HAS_TRTLLM_CONTEXT=0
@@ -470,16 +474,19 @@ ARG TENSORRTLLM_INDEX_URL="https://pypi.python.org/simple"
 # Copy Dynamo wheels into wheelhouse
 # Copy metrics binary from wheel_builder image, not part of ai-dynamo wheel
-COPY --from=dev /workspace/wheels/nixl/*.whl wheelhouse/
+COPY --from=dev /workspace/wheels/nixl/*.whl /workspace/wheelhouse/
-COPY --from=wheel_builder /workspace/dist/*.whl wheelhouse/
+COPY --from=wheel_builder /workspace/dist/*.whl /workspace/wheelhouse/
 COPY --from=dev /workspace/target/release/metrics /usr/local/bin/metrics
 # NOTE: If a package (tensorrt_llm) exists on both --index-url and --extra-index-url,
 # uv will prioritize the --extra-index-url, unless --index-strategy unsafe-best-match
 # is also specified. So set the configurable index as a --extra-index-url for prioritization.
-RUN uv pip install --extra-index-url "${TENSORRTLLM_INDEX_URL}" \
+# locking triton version to 3.3.1 as 3.4.0 breaks tensorrt-llm 1.0.0rc4
-    "${TENSORRTLLM_PIP_WHEEL}" && \
+RUN uv pip install --extra-index-url "${TENSORRTLLM_INDEX_URL}" "${TENSORRTLLM_PIP_WHEEL}" && \
-    uv pip install ai-dynamo nixl --find-links wheelhouse
+    if [ "$ARCH" = "amd64" ]; then \
+        pip install "triton==3.3.1"; \
+    fi; \
+    uv pip install ai-dynamo nixl --find-links /workspace/wheelhouse
 # Copy benchmarks, backends and tests for CI
 # TODO: Remove this once we have a functional CI image built on top of the runtime image

--- a/pyproject.toml
+++ b/pyproject.toml
@@ -61,7 +61,8 @@ Repository = "https://github.com/ai-dynamo/dynamo.git"
 [project.optional-dependencies]
 trtllm =[
    "uvloop",
-    "tensorrt-llm==1.0.0rc4"
+    "tensorrt-llm==1.0.0rc4",
+    "triton==3.3.1",  # locking triton as version 3.4.0 breaks tensorrt-llm 1.0.0rc4
 ]
 vllm = [