"openmmapi/vscode:/vscode.git/clone" did not exist on "c7aa1d009146f2592ab56c13293235f2a8403e1b"
Unverified Commit e8cb972e authored by Indrajit Bhosale's avatar Indrajit Bhosale Committed by GitHub
Browse files

chore: Update trtllm version to 1.1.0rc3 (#2930)


Signed-off-by: default avatarIndrajit Bhosale <iamindrajitb@gmail.com>
parent 241bd014
...@@ -27,4 +27,4 @@ kv_cache_config: ...@@ -27,4 +27,4 @@ kv_cache_config:
free_gpu_memory_fraction: 0.85 free_gpu_memory_fraction: 0.85
cache_transceiver_config: cache_transceiver_config:
backend: default backend: DEFAULT
\ No newline at end of file \ No newline at end of file
...@@ -14,24 +14,6 @@ limitations under the License. ...@@ -14,24 +14,6 @@ limitations under the License.
# Multimodal Support # Multimodal Support
> [!Important]
> There are some known issues in tensorrt_llm==1.0.0rc6 version for multimodal support
> It is important to rebuild the dynamo container with a specific version of tensorrt_llm
> commit to use multimodal feature.
## Build Container
```bash
./container/build.sh --framework trtllm --tensorrtllm-commit b4065d8ca64a64eee9fdc64b39cb66d73d4be47c
```
## Run Container
```bash
./container/run.sh --framework trtllm -it
```
## Usage Guide
TRTLLM supports multimodal models with dynamo. You can provide multimodal inputs in the following ways: TRTLLM supports multimodal models with dynamo. You can provide multimodal inputs in the following ways:
- By sending image URLs - By sending image URLs
......
...@@ -8,7 +8,6 @@ import signal ...@@ -8,7 +8,6 @@ import signal
import sys import sys
import uvloop import uvloop
from tensorrt_llm import SamplingParams
from tensorrt_llm.llmapi import ( from tensorrt_llm.llmapi import (
BuildConfig, BuildConfig,
CapacitySchedulerPolicy, CapacitySchedulerPolicy,
...@@ -16,6 +15,7 @@ from tensorrt_llm.llmapi import ( ...@@ -16,6 +15,7 @@ from tensorrt_llm.llmapi import (
KvCacheConfig, KvCacheConfig,
SchedulerConfig, SchedulerConfig,
) )
from tensorrt_llm.llmapi.llm import SamplingParams
from tensorrt_llm.llmapi.llm_utils import update_llm_args_with_extra_options from tensorrt_llm.llmapi.llm_utils import update_llm_args_with_extra_options
from tensorrt_llm.llmapi.tokenizer import tokenizer_factory from tensorrt_llm.llmapi.tokenizer import tokenizer_factory
from torch.cuda import device_count from torch.cuda import device_count
......
...@@ -21,8 +21,8 @@ from enum import Enum ...@@ -21,8 +21,8 @@ from enum import Enum
from typing import Optional, Union from typing import Optional, Union
import torch import torch
from tensorrt_llm import SamplingParams
from tensorrt_llm.llmapi import DisaggregatedParams as LlmDisaggregatedParams from tensorrt_llm.llmapi import DisaggregatedParams as LlmDisaggregatedParams
from tensorrt_llm.llmapi.llm import SamplingParams
from dynamo.logits_processing.examples import HelloWorldLogitsProcessor from dynamo.logits_processing.examples import HelloWorldLogitsProcessor
from dynamo.nixl_connect import Connector from dynamo.nixl_connect import Connector
......
...@@ -140,7 +140,6 @@ COPY --from=trtllm_wheel . /trtllm_wheel/ ...@@ -140,7 +140,6 @@ COPY --from=trtllm_wheel . /trtllm_wheel/
# Note: TensorRT needs to be uninstalled before installing the TRTLLM wheel # Note: TensorRT needs to be uninstalled before installing the TRTLLM wheel
# because there might be mismatched versions of TensorRT between the NGC PyTorch # because there might be mismatched versions of TensorRT between the NGC PyTorch
# and the TRTLLM wheel. # and the TRTLLM wheel.
# Locking triton version to 3.3.1 as 3.4.0 breaks tensorrt-llm 1.0.0rc6
RUN [ -f /etc/pip/constraint.txt ] && : > /etc/pip/constraint.txt || true && \ RUN [ -f /etc/pip/constraint.txt ] && : > /etc/pip/constraint.txt || true && \
pip uninstall -y tensorrt && \ pip uninstall -y tensorrt && \
if [ "$HAS_TRTLLM_CONTEXT" = "1" ]; then \ if [ "$HAS_TRTLLM_CONTEXT" = "1" ]; then \
...@@ -148,9 +147,6 @@ RUN [ -f /etc/pip/constraint.txt ] && : > /etc/pip/constraint.txt || true && \ ...@@ -148,9 +147,6 @@ RUN [ -f /etc/pip/constraint.txt ] && : > /etc/pip/constraint.txt || true && \
WHEEL_FILE=$(find /trtllm_wheel -name "*.whl" | head -n 1); \ WHEEL_FILE=$(find /trtllm_wheel -name "*.whl" | head -n 1); \
if [ -n "$WHEEL_FILE" ]; then \ if [ -n "$WHEEL_FILE" ]; then \
pip install "$WHEEL_FILE"; \ pip install "$WHEEL_FILE"; \
if [ "$ARCH" = "amd64" ]; then \
pip install "triton==3.3.1"; \
fi; \
else \ else \
echo "No wheel file found in /trtllm_wheel directory."; \ echo "No wheel file found in /trtllm_wheel directory."; \
exit 1; \ exit 1; \
...@@ -158,9 +154,6 @@ RUN [ -f /etc/pip/constraint.txt ] && : > /etc/pip/constraint.txt || true && \ ...@@ -158,9 +154,6 @@ RUN [ -f /etc/pip/constraint.txt ] && : > /etc/pip/constraint.txt || true && \
else \ else \
# Install TensorRT-LLM wheel from the provided index URL, allow dependencies from PyPI # Install TensorRT-LLM wheel from the provided index URL, allow dependencies from PyPI
pip install --extra-index-url "${TENSORRTLLM_INDEX_URL}" "${TENSORRTLLM_PIP_WHEEL}"; \ pip install --extra-index-url "${TENSORRTLLM_INDEX_URL}" "${TENSORRTLLM_PIP_WHEEL}"; \
if [ "$ARCH" = "amd64" ]; then \
pip install "triton==3.3.1"; \
fi; \
fi fi
# Install test dependencies # Install test dependencies
...@@ -477,12 +470,7 @@ COPY --from=dev /workspace/target/release/metrics /usr/local/bin/metrics ...@@ -477,12 +470,7 @@ COPY --from=dev /workspace/target/release/metrics /usr/local/bin/metrics
# NOTE: If a package (tensorrt_llm) exists on both --index-url and --extra-index-url, # NOTE: If a package (tensorrt_llm) exists on both --index-url and --extra-index-url,
# uv will prioritize the --extra-index-url, unless --index-strategy unsafe-best-match # uv will prioritize the --extra-index-url, unless --index-strategy unsafe-best-match
# is also specified. So set the configurable index as a --extra-index-url for prioritization. # is also specified. So set the configurable index as a --extra-index-url for prioritization.
# NOTE: locking triton version to 3.3.1 as 3.4.0 breaks tensorrt-llm 1.0.0rc6 RUN python3 -m pip install --no-cache-dir --break-system-packages --extra-index-url "${TENSORRTLLM_INDEX_URL}" "${TENSORRTLLM_PIP_WHEEL}" && \
# NOTE: locking cuda-python version to <13 to avoid breaks with tensorrt-llm 1.0.0rc6. This
# can be removed after https://github.com/NVIDIA/TensorRT-LLM/pull/6703 is merged
# we upgrade to a published pip wheel containing this change.
RUN python3 -m pip install --no-cache-dir --break-system-packages "cuda-python>=12,<13" && \
python3 -m pip install --no-cache-dir --break-system-packages --extra-index-url "${TENSORRTLLM_INDEX_URL}" "${TENSORRTLLM_PIP_WHEEL}" && \
python3 -m pip install --no-cache-dir --break-system-packages \ python3 -m pip install --no-cache-dir --break-system-packages \
/workspace/wheelhouse/ai_dynamo_runtime*cp312*.whl \ /workspace/wheelhouse/ai_dynamo_runtime*cp312*.whl \
/workspace/wheelhouse/ai_dynamo*any.whl \ /workspace/wheelhouse/ai_dynamo*any.whl \
......
...@@ -89,7 +89,7 @@ TENSORRTLLM_PIP_WHEEL_DIR="/tmp/trtllm_wheel/" ...@@ -89,7 +89,7 @@ TENSORRTLLM_PIP_WHEEL_DIR="/tmp/trtllm_wheel/"
# TensorRT-LLM commit to use for building the trtllm wheel if not provided. # TensorRT-LLM commit to use for building the trtllm wheel if not provided.
# Important Note: This commit is not used in our CI pipeline. See the CI # Important Note: This commit is not used in our CI pipeline. See the CI
# variables to learn how to run a pipeline with a specific commit. # variables to learn how to run a pipeline with a specific commit.
DEFAULT_EXPERIMENTAL_TRTLLM_COMMIT="a16ba6445c61ed70e7aadfe787d6f316bb422652" DEFAULT_EXPERIMENTAL_TRTLLM_COMMIT="e81c50dbd2811ec858eccc2c71b5e7a330ff7e24"
TRTLLM_COMMIT="" TRTLLM_COMMIT=""
TRTLLM_USE_NIXL_KVCACHE_EXPERIMENTAL="0" TRTLLM_USE_NIXL_KVCACHE_EXPERIMENTAL="0"
TRTLLM_GIT_URL="" TRTLLM_GIT_URL=""
...@@ -98,7 +98,7 @@ TRTLLM_GIT_URL="" ...@@ -98,7 +98,7 @@ TRTLLM_GIT_URL=""
TENSORRTLLM_INDEX_URL="https://pypi.python.org/simple" TENSORRTLLM_INDEX_URL="https://pypi.python.org/simple"
# TODO: Remove the version specification from here and use the ai-dynamo[trtllm] package. # TODO: Remove the version specification from here and use the ai-dynamo[trtllm] package.
# Need to update the Dockerfile.trtllm to use the ai-dynamo[trtllm] package. # Need to update the Dockerfile.trtllm to use the ai-dynamo[trtllm] package.
DEFAULT_TENSORRTLLM_PIP_WHEEL="tensorrt-llm==1.0.0rc6" DEFAULT_TENSORRTLLM_PIP_WHEEL="tensorrt-llm==1.1.0rc3"
TENSORRTLLM_PIP_WHEEL="" TENSORRTLLM_PIP_WHEEL=""
......
...@@ -67,7 +67,7 @@ If you are using a **GPU**, the following GPU models and architectures are suppo ...@@ -67,7 +67,7 @@ If you are using a **GPU**, the following GPU models and architectures are suppo
| **Build Dependency** | **Version** | | **Build Dependency** | **Version** |
| :------------------- | :------------------------------------------------------------------------------- | | :------------------- | :------------------------------------------------------------------------------- |
| **Base Container** | [25.03](https://catalog.ngc.nvidia.com/orgs/nvidia/containers/cuda-dl-base/tags) | | **Base Container** | [25.03](https://catalog.ngc.nvidia.com/orgs/nvidia/containers/cuda-dl-base/tags) |
| **TensorRT-LLM** | 1.0.0rc6 | | **TensorRT-LLM** | 1.1.0rc3 |
| **NIXL** | 0.4.1 | | **NIXL** | 0.4.1 |
| **vLLM** | 0.10.1.1 | | **vLLM** | 0.10.1.1 |
| **SGLang** | 0.5.0rc2 | | **SGLang** | 0.5.0rc2 |
......
...@@ -48,8 +48,7 @@ Repository = "https://github.com/ai-dynamo/dynamo.git" ...@@ -48,8 +48,7 @@ Repository = "https://github.com/ai-dynamo/dynamo.git"
[project.optional-dependencies] [project.optional-dependencies]
trtllm =[ trtllm =[
"uvloop", "uvloop",
"tensorrt-llm==1.0.0rc6", "tensorrt-llm==1.1.0rc3",
"triton==3.3.1", # locking triton as version 3.4.0 breaks tensorrt-llm 1.0.0rc6
] ]
vllm = [ vllm = [
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment