Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
dynamo
Commits
c265cd67
"docs/vscode:/vscode.git/clone" did not exist on "7717d0838b2b74313184a462bb6e08711a5950c6"
Unverified
Commit
c265cd67
authored
Sep 30, 2025
by
Tanmay Verma
Committed by
GitHub
Sep 30, 2025
Browse files
fix: Install right versions of the dependencies for tensorrtllm (#3218)
parent
f0041782
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
37 additions
and
2 deletions
+37
-2
components/backends/trtllm/src/dynamo/trtllm/request_handlers/handler_base.py
...trtllm/src/dynamo/trtllm/request_handlers/handler_base.py
+9
-1
container/Dockerfile.trtllm
container/Dockerfile.trtllm
+27
-1
container/build.sh
container/build.sh
+1
-0
No files found.
components/backends/trtllm/src/dynamo/trtllm/request_handlers/handler_base.py
View file @
c265cd67
...
...
@@ -280,7 +280,8 @@ class HandlerBase:
)
yield
final_out
if
not
res
.
outputs
:
# If we are not done generating, but there are no outputs, return an error
if
not
res
.
outputs
and
not
res
.
finished
:
yield
{
"finish_reason"
:
"error"
,
"token_ids"
:
[]}
break
...
...
@@ -303,6 +304,13 @@ class HandlerBase:
out
[
"disaggregated_params"
]
=
asdict
(
DisaggregatedParamsCodec
.
encode
(
output
.
disaggregated_params
)
)
if
res
.
finished
and
not
out
.
get
(
"finish_reason"
):
out
[
"finish_reason"
]
=
"unknown"
logging
.
warning
(
"Request finished with no finish reason set - this indicates a possible bug"
)
# Yield the chunk to the client and update the token count for the next iteration.
yield
out
num_output_tokens_so_far
=
next_total_toks
container/Dockerfile.trtllm
View file @
c265cd67
...
...
@@ -12,6 +12,7 @@ ARG RUNTIME_IMAGE_TAG="12.9.1-runtime-ubuntu24.04"
ARG HAS_TRTLLM_CONTEXT=0
ARG TENSORRTLLM_PIP_WHEEL="tensorrt-llm"
ARG TENSORRTLLM_INDEX_URL="https://pypi.python.org/simple"
ARG GITHUB_TRTLLM_COMMIT
# Define general architecture ARGs for supporting both x86 and aarch64 builds.
# ARCH: Used for package suffixes (e.g., amd64, arm64)
...
...
@@ -96,7 +97,8 @@ RUN apt-get update && \
# System utilities
ca-certificates \
curl \
jq && \
jq \
wget && \
rm -rf /var/lib/apt/lists/*
# Copy CUDA development tools (nvcc, headers, dependencies, etc.) from framework devel image
...
...
@@ -176,6 +178,9 @@ COPY --from=framework /usr/local/lib/python${PYTHON_VERSION}/dist-packages/flash
COPY --from=framework /usr/local/lib/python${PYTHON_VERSION}/dist-packages/triton ${VIRTUAL_ENV}/lib/python${PYTHON_VERSION}/site-packages/triton
COPY --from=framework /usr/local/lib/python${PYTHON_VERSION}/dist-packages/pytorch_triton-${PYTORCH_TRITON_VER}.dist-info ${VIRTUAL_ENV}/lib/python${PYTHON_VERSION}/site-packages/pytorch_triton-${PYTORCH_TRITON_VER}.dist-info
ENV ENV=${ENV:-/etc/shinit_v2}
# Install TensorRT-LLM and related dependencies
ARG HAS_TRTLLM_CONTEXT
ARG TENSORRTLLM_PIP_WHEEL
...
...
@@ -187,7 +192,18 @@ COPY --from=trtllm_wheel /*.whl /trtllm_wheel/
# because there might be mismatched versions of TensorRT between the NGC PyTorch
# and the TRTLLM wheel.
RUN [ -f /etc/pip/constraint.txt ] && : > /etc/pip/constraint.txt || true && \
# Clean up any existing conflicting CUDA repository configurations and GPG keys
rm -f /etc/apt/sources.list.d/cuda*.list && \
rm -f /usr/share/keyrings/cuda-archive-keyring.gpg && \
rm -f /etc/apt/trusted.gpg.d/cuda*.gpg && \
if [ "$HAS_TRTLLM_CONTEXT" = "1" ]; then \
# Download and run install_tensorrt.sh from TensorRT-LLM GitHub before installing the wheel
TRTLLM_COMMIT=$(cat /trtllm_wheel/commit.txt | awk -F'_' '{print $2}') && \
(curl -fsSL --retry 5 --retry-delay 10 --max-time 1800 -o /tmp/install_tensorrt.sh "https://github.com/NVIDIA/TensorRT-LLM/raw/${TRTLLM_COMMIT}/docker/common/install_tensorrt.sh" || \
curl -fsSL --retry 5 --retry-delay 10 --max-time 1800 -o /tmp/install_tensorrt.sh "https://github.com/NVIDIA/TensorRT-LLM/raw/${GITHUB_TRTLLM_COMMIT}/docker/common/install_tensorrt.sh") && \
# Modify the script to use virtual environment pip instead of system pip3
sed -i 's/pip3 install/uv pip install/g' /tmp/install_tensorrt.sh && \
bash /tmp/install_tensorrt.sh && \
# Install from local wheel directory in build context
WHEEL_FILE=$(find /trtllm_wheel -name "*.whl" | head -n 1); \
if [ -n "$WHEEL_FILE" ]; then \
...
...
@@ -197,10 +213,20 @@ RUN [ -f /etc/pip/constraint.txt ] && : > /etc/pip/constraint.txt || true && \
exit 1; \
fi; \
else \
# Download and run install_tensorrt.sh from TensorRT-LLM GitHub before installing the wheel
TRTLLM_VERSION=$(echo "${TENSORRTLLM_PIP_WHEEL}" | sed -n 's/.*==\([0-9a-zA-Z\.\-]*\).*/\1/p') && \
(curl -fsSL --retry 5 --retry-delay 10 --max-time 1800 -o /tmp/install_tensorrt.sh "https://github.com/NVIDIA/TensorRT-LLM/raw/v${TRTLLM_VERSION}/docker/common/install_tensorrt.sh" || \
curl -fsSL --retry 5 --retry-delay 10 --max-time 1800 -o /tmp/install_tensorrt.sh "https://github.com/NVIDIA/TensorRT-LLM/raw/${GITHUB_TRTLLM_COMMIT}/docker/common/install_tensorrt.sh") && \
# Modify the script to use virtual environment pip instead of system pip3
sed -i 's/pip3 install/uv pip install/g' /tmp/install_tensorrt.sh && \
bash /tmp/install_tensorrt.sh && \
# Install TensorRT-LLM wheel from the provided index URL, allow dependencies from PyPI
uv pip install --extra-index-url "${TENSORRTLLM_INDEX_URL}" "${TENSORRTLLM_PIP_WHEEL}"; \
fi
ENV TENSORRT_LIB_DIR=/usr/local/tensorrt/targets/${ARCH_ALT}-linux-gnu/lib
ENV LD_LIBRARY_PATH=${TENSORRT_LIB_DIR}:${LD_LIBRARY_PATH}
# Install dynamo, NIXL, and dynamo-specific dependencies
COPY benchmarks/ /opt/dynamo/benchmarks/
COPY --from=dynamo_base /opt/dynamo/wheelhouse/ /opt/dynamo/wheelhouse/
...
...
container/build.sh
View file @
c265cd67
...
...
@@ -635,6 +635,7 @@ check_wheel_file() {
}
if
[[
$FRAMEWORK
==
"TRTLLM"
]]
;
then
BUILD_ARGS+
=
" --build-arg GITHUB_TRTLLM_COMMIT=
${
TRTLLM_COMMIT
}
"
if
[
"
$USE_DEFAULT_EXPERIMENTAL_TRTLLM_COMMIT
"
=
true
]
;
then
if
[
-n
"
$TRTLLM_COMMIT
"
]
||
[
-n
"
$TENSORRTLLM_PIP_WHEEL
"
]
;
then
echo
"ERROR: When using --use-default-experimental-trtllm-commit, do not set --tensorrtllm-commit or --tensorrtllm-pip-wheel."
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment