Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
dynamo
Commits
c265cd67
Unverified
Commit
c265cd67
authored
Sep 30, 2025
by
Tanmay Verma
Committed by
GitHub
Sep 30, 2025
Browse files
fix: Install right versions of the dependencies for tensorrtllm (#3218)
parent
f0041782
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
37 additions
and
2 deletions
+37
-2
components/backends/trtllm/src/dynamo/trtllm/request_handlers/handler_base.py
...trtllm/src/dynamo/trtllm/request_handlers/handler_base.py
+9
-1
container/Dockerfile.trtllm
container/Dockerfile.trtllm
+27
-1
container/build.sh
container/build.sh
+1
-0
No files found.
components/backends/trtllm/src/dynamo/trtllm/request_handlers/handler_base.py
View file @
c265cd67
...
@@ -280,7 +280,8 @@ class HandlerBase:
...
@@ -280,7 +280,8 @@ class HandlerBase:
)
)
yield
final_out
yield
final_out
if
not
res
.
outputs
:
# If we are not done generating, but there are no outputs, return an error
if
not
res
.
outputs
and
not
res
.
finished
:
yield
{
"finish_reason"
:
"error"
,
"token_ids"
:
[]}
yield
{
"finish_reason"
:
"error"
,
"token_ids"
:
[]}
break
break
...
@@ -303,6 +304,13 @@ class HandlerBase:
...
@@ -303,6 +304,13 @@ class HandlerBase:
out
[
"disaggregated_params"
]
=
asdict
(
out
[
"disaggregated_params"
]
=
asdict
(
DisaggregatedParamsCodec
.
encode
(
output
.
disaggregated_params
)
DisaggregatedParamsCodec
.
encode
(
output
.
disaggregated_params
)
)
)
if
res
.
finished
and
not
out
.
get
(
"finish_reason"
):
out
[
"finish_reason"
]
=
"unknown"
logging
.
warning
(
"Request finished with no finish reason set - this indicates a possible bug"
)
# Yield the chunk to the client and update the token count for the next iteration.
# Yield the chunk to the client and update the token count for the next iteration.
yield
out
yield
out
num_output_tokens_so_far
=
next_total_toks
num_output_tokens_so_far
=
next_total_toks
container/Dockerfile.trtllm
View file @
c265cd67
...
@@ -12,6 +12,7 @@ ARG RUNTIME_IMAGE_TAG="12.9.1-runtime-ubuntu24.04"
...
@@ -12,6 +12,7 @@ ARG RUNTIME_IMAGE_TAG="12.9.1-runtime-ubuntu24.04"
ARG HAS_TRTLLM_CONTEXT=0
ARG HAS_TRTLLM_CONTEXT=0
ARG TENSORRTLLM_PIP_WHEEL="tensorrt-llm"
ARG TENSORRTLLM_PIP_WHEEL="tensorrt-llm"
ARG TENSORRTLLM_INDEX_URL="https://pypi.python.org/simple"
ARG TENSORRTLLM_INDEX_URL="https://pypi.python.org/simple"
ARG GITHUB_TRTLLM_COMMIT
# Define general architecture ARGs for supporting both x86 and aarch64 builds.
# Define general architecture ARGs for supporting both x86 and aarch64 builds.
# ARCH: Used for package suffixes (e.g., amd64, arm64)
# ARCH: Used for package suffixes (e.g., amd64, arm64)
...
@@ -96,7 +97,8 @@ RUN apt-get update && \
...
@@ -96,7 +97,8 @@ RUN apt-get update && \
# System utilities
# System utilities
ca-certificates \
ca-certificates \
curl \
curl \
jq && \
jq \
wget && \
rm -rf /var/lib/apt/lists/*
rm -rf /var/lib/apt/lists/*
# Copy CUDA development tools (nvcc, headers, dependencies, etc.) from framework devel image
# Copy CUDA development tools (nvcc, headers, dependencies, etc.) from framework devel image
...
@@ -176,6 +178,9 @@ COPY --from=framework /usr/local/lib/python${PYTHON_VERSION}/dist-packages/flash
...
@@ -176,6 +178,9 @@ COPY --from=framework /usr/local/lib/python${PYTHON_VERSION}/dist-packages/flash
COPY --from=framework /usr/local/lib/python${PYTHON_VERSION}/dist-packages/triton ${VIRTUAL_ENV}/lib/python${PYTHON_VERSION}/site-packages/triton
COPY --from=framework /usr/local/lib/python${PYTHON_VERSION}/dist-packages/triton ${VIRTUAL_ENV}/lib/python${PYTHON_VERSION}/site-packages/triton
COPY --from=framework /usr/local/lib/python${PYTHON_VERSION}/dist-packages/pytorch_triton-${PYTORCH_TRITON_VER}.dist-info ${VIRTUAL_ENV}/lib/python${PYTHON_VERSION}/site-packages/pytorch_triton-${PYTORCH_TRITON_VER}.dist-info
COPY --from=framework /usr/local/lib/python${PYTHON_VERSION}/dist-packages/pytorch_triton-${PYTORCH_TRITON_VER}.dist-info ${VIRTUAL_ENV}/lib/python${PYTHON_VERSION}/site-packages/pytorch_triton-${PYTORCH_TRITON_VER}.dist-info
ENV ENV=${ENV:-/etc/shinit_v2}
# Install TensorRT-LLM and related dependencies
# Install TensorRT-LLM and related dependencies
ARG HAS_TRTLLM_CONTEXT
ARG HAS_TRTLLM_CONTEXT
ARG TENSORRTLLM_PIP_WHEEL
ARG TENSORRTLLM_PIP_WHEEL
...
@@ -187,7 +192,18 @@ COPY --from=trtllm_wheel /*.whl /trtllm_wheel/
...
@@ -187,7 +192,18 @@ COPY --from=trtllm_wheel /*.whl /trtllm_wheel/
# because there might be mismatched versions of TensorRT between the NGC PyTorch
# because there might be mismatched versions of TensorRT between the NGC PyTorch
# and the TRTLLM wheel.
# and the TRTLLM wheel.
RUN [ -f /etc/pip/constraint.txt ] && : > /etc/pip/constraint.txt || true && \
RUN [ -f /etc/pip/constraint.txt ] && : > /etc/pip/constraint.txt || true && \
# Clean up any existing conflicting CUDA repository configurations and GPG keys
rm -f /etc/apt/sources.list.d/cuda*.list && \
rm -f /usr/share/keyrings/cuda-archive-keyring.gpg && \
rm -f /etc/apt/trusted.gpg.d/cuda*.gpg && \
if [ "$HAS_TRTLLM_CONTEXT" = "1" ]; then \
if [ "$HAS_TRTLLM_CONTEXT" = "1" ]; then \
# Download and run install_tensorrt.sh from TensorRT-LLM GitHub before installing the wheel
TRTLLM_COMMIT=$(cat /trtllm_wheel/commit.txt | awk -F'_' '{print $2}') && \
(curl -fsSL --retry 5 --retry-delay 10 --max-time 1800 -o /tmp/install_tensorrt.sh "https://github.com/NVIDIA/TensorRT-LLM/raw/${TRTLLM_COMMIT}/docker/common/install_tensorrt.sh" || \
curl -fsSL --retry 5 --retry-delay 10 --max-time 1800 -o /tmp/install_tensorrt.sh "https://github.com/NVIDIA/TensorRT-LLM/raw/${GITHUB_TRTLLM_COMMIT}/docker/common/install_tensorrt.sh") && \
# Modify the script to use virtual environment pip instead of system pip3
sed -i 's/pip3 install/uv pip install/g' /tmp/install_tensorrt.sh && \
bash /tmp/install_tensorrt.sh && \
# Install from local wheel directory in build context
# Install from local wheel directory in build context
WHEEL_FILE=$(find /trtllm_wheel -name "*.whl" | head -n 1); \
WHEEL_FILE=$(find /trtllm_wheel -name "*.whl" | head -n 1); \
if [ -n "$WHEEL_FILE" ]; then \
if [ -n "$WHEEL_FILE" ]; then \
...
@@ -197,10 +213,20 @@ RUN [ -f /etc/pip/constraint.txt ] && : > /etc/pip/constraint.txt || true && \
...
@@ -197,10 +213,20 @@ RUN [ -f /etc/pip/constraint.txt ] && : > /etc/pip/constraint.txt || true && \
exit 1; \
exit 1; \
fi; \
fi; \
else \
else \
# Download and run install_tensorrt.sh from TensorRT-LLM GitHub before installing the wheel
TRTLLM_VERSION=$(echo "${TENSORRTLLM_PIP_WHEEL}" | sed -n 's/.*==\([0-9a-zA-Z\.\-]*\).*/\1/p') && \
(curl -fsSL --retry 5 --retry-delay 10 --max-time 1800 -o /tmp/install_tensorrt.sh "https://github.com/NVIDIA/TensorRT-LLM/raw/v${TRTLLM_VERSION}/docker/common/install_tensorrt.sh" || \
curl -fsSL --retry 5 --retry-delay 10 --max-time 1800 -o /tmp/install_tensorrt.sh "https://github.com/NVIDIA/TensorRT-LLM/raw/${GITHUB_TRTLLM_COMMIT}/docker/common/install_tensorrt.sh") && \
# Modify the script to use virtual environment pip instead of system pip3
sed -i 's/pip3 install/uv pip install/g' /tmp/install_tensorrt.sh && \
bash /tmp/install_tensorrt.sh && \
# Install TensorRT-LLM wheel from the provided index URL, allow dependencies from PyPI
# Install TensorRT-LLM wheel from the provided index URL, allow dependencies from PyPI
uv pip install --extra-index-url "${TENSORRTLLM_INDEX_URL}" "${TENSORRTLLM_PIP_WHEEL}"; \
uv pip install --extra-index-url "${TENSORRTLLM_INDEX_URL}" "${TENSORRTLLM_PIP_WHEEL}"; \
fi
fi
ENV TENSORRT_LIB_DIR=/usr/local/tensorrt/targets/${ARCH_ALT}-linux-gnu/lib
ENV LD_LIBRARY_PATH=${TENSORRT_LIB_DIR}:${LD_LIBRARY_PATH}
# Install dynamo, NIXL, and dynamo-specific dependencies
# Install dynamo, NIXL, and dynamo-specific dependencies
COPY benchmarks/ /opt/dynamo/benchmarks/
COPY benchmarks/ /opt/dynamo/benchmarks/
COPY --from=dynamo_base /opt/dynamo/wheelhouse/ /opt/dynamo/wheelhouse/
COPY --from=dynamo_base /opt/dynamo/wheelhouse/ /opt/dynamo/wheelhouse/
...
...
container/build.sh
View file @
c265cd67
...
@@ -635,6 +635,7 @@ check_wheel_file() {
...
@@ -635,6 +635,7 @@ check_wheel_file() {
}
}
if
[[
$FRAMEWORK
==
"TRTLLM"
]]
;
then
if
[[
$FRAMEWORK
==
"TRTLLM"
]]
;
then
BUILD_ARGS+
=
" --build-arg GITHUB_TRTLLM_COMMIT=
${
TRTLLM_COMMIT
}
"
if
[
"
$USE_DEFAULT_EXPERIMENTAL_TRTLLM_COMMIT
"
=
true
]
;
then
if
[
"
$USE_DEFAULT_EXPERIMENTAL_TRTLLM_COMMIT
"
=
true
]
;
then
if
[
-n
"
$TRTLLM_COMMIT
"
]
||
[
-n
"
$TENSORRTLLM_PIP_WHEEL
"
]
;
then
if
[
-n
"
$TRTLLM_COMMIT
"
]
||
[
-n
"
$TENSORRTLLM_PIP_WHEEL
"
]
;
then
echo
"ERROR: When using --use-default-experimental-trtllm-commit, do not set --tensorrtllm-commit or --tensorrtllm-pip-wheel."
echo
"ERROR: When using --use-default-experimental-trtllm-commit, do not set --tensorrtllm-commit or --tensorrtllm-pip-wheel."
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment