Unverified Commit 9630693e authored by Dmitry Tokarev's avatar Dmitry Tokarev Committed by GitHub
Browse files

fix: triton ver lock fix and trtllm env var declaration (#2300)

parent 3a425209
...@@ -150,6 +150,7 @@ COPY --from=trtllm_wheel . /trtllm_wheel/ ...@@ -150,6 +150,7 @@ COPY --from=trtllm_wheel . /trtllm_wheel/
# Note: TensorRT needs to be uninstalled before installing the TRTLLM wheel # Note: TensorRT needs to be uninstalled before installing the TRTLLM wheel
# because there might be mismatched versions of TensorRT between the NGC PyTorch # because there might be mismatched versions of TensorRT between the NGC PyTorch
# and the TRTLLM wheel. # and the TRTLLM wheel.
# Locking triton version to 3.3.1 as 3.4.0 breaks tensorrt-llm 1.0.0rc4
RUN [ -f /etc/pip/constraint.txt ] && : > /etc/pip/constraint.txt || true && \ RUN [ -f /etc/pip/constraint.txt ] && : > /etc/pip/constraint.txt || true && \
pip uninstall -y tensorrt && \ pip uninstall -y tensorrt && \
if [ "$HAS_TRTLLM_CONTEXT" = "1" ]; then \ if [ "$HAS_TRTLLM_CONTEXT" = "1" ]; then \
...@@ -157,14 +158,19 @@ RUN [ -f /etc/pip/constraint.txt ] && : > /etc/pip/constraint.txt || true && \ ...@@ -157,14 +158,19 @@ RUN [ -f /etc/pip/constraint.txt ] && : > /etc/pip/constraint.txt || true && \
WHEEL_FILE=$(find /trtllm_wheel -name "*.whl" | head -n 1); \ WHEEL_FILE=$(find /trtllm_wheel -name "*.whl" | head -n 1); \
if [ -n "$WHEEL_FILE" ]; then \ if [ -n "$WHEEL_FILE" ]; then \
pip install "$WHEEL_FILE"; \ pip install "$WHEEL_FILE"; \
if [ "$ARCH" = "amd64" ]; then \
pip install "triton==3.3.1"; \
fi; \
else \ else \
echo "No wheel file found in /trtllm_wheel directory."; \ echo "No wheel file found in /trtllm_wheel directory."; \
exit 1; \ exit 1; \
fi; \ fi; \
else \ else \
# Install TensorRT-LLM wheel from the provided index URL, allow dependencies from PyPI # Install TensorRT-LLM wheel from the provided index URL, allow dependencies from PyPI
pip install --extra-index-url "${TENSORRTLLM_INDEX_URL}" \ pip install --extra-index-url "${TENSORRTLLM_INDEX_URL}" "${TENSORRTLLM_PIP_WHEEL}"; \
"${TENSORRTLLM_PIP_WHEEL}" ; \ if [ "$ARCH" = "amd64" ]; then \
pip install "triton==3.3.1"; \
fi; \
fi fi
# Install test dependencies # Install test dependencies
...@@ -347,12 +353,25 @@ CMD [] ...@@ -347,12 +353,25 @@ CMD []
FROM ${RUNTIME_IMAGE}:${RUNTIME_IMAGE_TAG} AS runtime FROM ${RUNTIME_IMAGE}:${RUNTIME_IMAGE_TAG} AS runtime
WORKDIR /workspace
ARG ARCH_ALT ARG ARCH_ALT
WORKDIR /workspace
ENV DYNAMO_HOME=/workspace ENV DYNAMO_HOME=/workspace
ENV VIRTUAL_ENV=/opt/dynamo/venv ENV VIRTUAL_ENV=/opt/dynamo/venv
ENV PATH="${VIRTUAL_ENV}/bin:${PATH}" ENV PATH="${VIRTUAL_ENV}/bin:${PATH}"
ENV NIXL_PREFIX=/opt/nvidia/nvda_nixl
ENV NIXL_LIB_DIR=$NIXL_PREFIX/lib/${ARCH_ALT}-linux-gnu
ENV NIXL_PLUGIN_DIR=$NIXL_LIB_DIR/plugins
ENV LD_LIBRARY_PATH=\
$NIXL_LIB_DIR:\
$NIXL_PLUGIN_DIR:\
/usr/local/ucx/lib:\
/usr/local/ucx/lib/ucx:\
/opt/hpcx/ompi/lib:\
$LD_LIBRARY_PATH
ENV PATH=/opt/hpcx/ompi/bin:/usr/local/bin/etcd/:/usr/local/cuda/nvvm/bin:$PATH
ENV OPAL_PREFIX=/opt/hpcx/ompi
# Install apt dependencies # Install apt dependencies
# openssh-client, openssh-server are needed for OpenMPI # openssh-client, openssh-server are needed for OpenMPI
...@@ -447,21 +466,6 @@ COPY --from=build /usr/local/lib/python3.12/dist-packages/flash_attn /usr/local/ ...@@ -447,21 +466,6 @@ COPY --from=build /usr/local/lib/python3.12/dist-packages/flash_attn /usr/local/
COPY --from=build /usr/local/lib/python3.12/dist-packages/flash_attn-${FLASH_ATTN_VER}.dist-info /usr/local/lib/python3.12/dist-packages/flash_attn-${FLASH_ATTN_VER}.dist-info COPY --from=build /usr/local/lib/python3.12/dist-packages/flash_attn-${FLASH_ATTN_VER}.dist-info /usr/local/lib/python3.12/dist-packages/flash_attn-${FLASH_ATTN_VER}.dist-info
COPY --from=build /usr/local/lib/python3.12/dist-packages/flash_attn_2_cuda.cpython-312-*-linux-gnu.so /usr/local/lib/python3.12/dist-packages/ COPY --from=build /usr/local/lib/python3.12/dist-packages/flash_attn_2_cuda.cpython-312-*-linux-gnu.so /usr/local/lib/python3.12/dist-packages/
# Setup environment variables
ARG ARCH_ALT
ENV NIXL_PREFIX=/opt/nvidia/nvda_nixl
ENV NIXL_LIB_DIR=$NIXL_PREFIX/lib/${ARCH_ALT}-linux-gnu
ENV NIXL_PLUGIN_DIR=$NIXL_LIB_DIR/plugins
ENV LD_LIBRARY_PATH=\
$NIXL_LIB_DIR:\
$NIXL_PLUGIN_DIR:\
/usr/local/ucx/lib:\
/usr/local/ucx/lib/ucx:\
/opt/hpcx/ompi/lib:\
$LD_LIBRARY_PATH
ENV PATH=/opt/hpcx/ompi/bin:/usr/local/bin/etcd/:/usr/local/cuda/nvvm/bin:$PATH
ENV OPAL_PREFIX=/opt/hpcx/ompi
# Install TensorRT-LLM (same as in build stage) # Install TensorRT-LLM (same as in build stage)
ARG HAS_TRTLLM_CONTEXT=0 ARG HAS_TRTLLM_CONTEXT=0
...@@ -470,16 +474,19 @@ ARG TENSORRTLLM_INDEX_URL="https://pypi.python.org/simple" ...@@ -470,16 +474,19 @@ ARG TENSORRTLLM_INDEX_URL="https://pypi.python.org/simple"
# Copy Dynamo wheels into wheelhouse # Copy Dynamo wheels into wheelhouse
# Copy metrics binary from wheel_builder image, not part of ai-dynamo wheel # Copy metrics binary from wheel_builder image, not part of ai-dynamo wheel
COPY --from=dev /workspace/wheels/nixl/*.whl wheelhouse/ COPY --from=dev /workspace/wheels/nixl/*.whl /workspace/wheelhouse/
COPY --from=wheel_builder /workspace/dist/*.whl wheelhouse/ COPY --from=wheel_builder /workspace/dist/*.whl /workspace/wheelhouse/
COPY --from=dev /workspace/target/release/metrics /usr/local/bin/metrics COPY --from=dev /workspace/target/release/metrics /usr/local/bin/metrics
# NOTE: If a package (tensorrt_llm) exists on both --index-url and --extra-index-url, # NOTE: If a package (tensorrt_llm) exists on both --index-url and --extra-index-url,
# uv will prioritize the --extra-index-url, unless --index-strategy unsafe-best-match # uv will prioritize the --extra-index-url, unless --index-strategy unsafe-best-match
# is also specified. So set the configurable index as a --extra-index-url for prioritization. # is also specified. So set the configurable index as a --extra-index-url for prioritization.
RUN uv pip install --extra-index-url "${TENSORRTLLM_INDEX_URL}" \ # locking triton version to 3.3.1 as 3.4.0 breaks tensorrt-llm 1.0.0rc4
"${TENSORRTLLM_PIP_WHEEL}" && \ RUN uv pip install --extra-index-url "${TENSORRTLLM_INDEX_URL}" "${TENSORRTLLM_PIP_WHEEL}" && \
uv pip install ai-dynamo nixl --find-links wheelhouse if [ "$ARCH" = "amd64" ]; then \
pip install "triton==3.3.1"; \
fi; \
uv pip install ai-dynamo nixl --find-links /workspace/wheelhouse
# Copy benchmarks, backends and tests for CI # Copy benchmarks, backends and tests for CI
# TODO: Remove this once we have a functional CI image built on top of the runtime image # TODO: Remove this once we have a functional CI image built on top of the runtime image
......
...@@ -61,7 +61,8 @@ Repository = "https://github.com/ai-dynamo/dynamo.git" ...@@ -61,7 +61,8 @@ Repository = "https://github.com/ai-dynamo/dynamo.git"
[project.optional-dependencies] [project.optional-dependencies]
trtllm =[ trtllm =[
"uvloop", "uvloop",
"tensorrt-llm==1.0.0rc4" "tensorrt-llm==1.0.0rc4",
"triton==3.3.1", # locking triton as version 3.4.0 breaks tensorrt-llm 1.0.0rc4
] ]
vllm = [ vllm = [
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment