Unverified Commit e8cb972e authored by Indrajit Bhosale's avatar Indrajit Bhosale Committed by GitHub
Browse files

chore: Update trtllm version to 1.1.0rc3 (#2930)


Signed-off-by: default avatarIndrajit Bhosale <iamindrajitb@gmail.com>
parent 241bd014
......@@ -27,4 +27,4 @@ kv_cache_config:
free_gpu_memory_fraction: 0.85
cache_transceiver_config:
backend: default
\ No newline at end of file
backend: DEFAULT
\ No newline at end of file
......@@ -14,24 +14,6 @@ limitations under the License.
# Multimodal Support
> [!Important]
> There are some known issues in tensorrt_llm==1.0.0rc6 version for multimodal support
> It is important to rebuild the dynamo container with a specific version of tensorrt_llm
> commit to use multimodal feature.
## Build Container
```bash
./container/build.sh --framework trtllm --tensorrtllm-commit b4065d8ca64a64eee9fdc64b39cb66d73d4be47c
```
## Run Container
```bash
./container/run.sh --framework trtllm -it
```
## Usage Guide
TRTLLM supports multimodal models with dynamo. You can provide multimodal inputs in the following ways:
- By sending image URLs
......
......@@ -8,7 +8,6 @@ import signal
import sys
import uvloop
from tensorrt_llm import SamplingParams
from tensorrt_llm.llmapi import (
BuildConfig,
CapacitySchedulerPolicy,
......@@ -16,6 +15,7 @@ from tensorrt_llm.llmapi import (
KvCacheConfig,
SchedulerConfig,
)
from tensorrt_llm.llmapi.llm import SamplingParams
from tensorrt_llm.llmapi.llm_utils import update_llm_args_with_extra_options
from tensorrt_llm.llmapi.tokenizer import tokenizer_factory
from torch.cuda import device_count
......
......@@ -21,8 +21,8 @@ from enum import Enum
from typing import Optional, Union
import torch
from tensorrt_llm import SamplingParams
from tensorrt_llm.llmapi import DisaggregatedParams as LlmDisaggregatedParams
from tensorrt_llm.llmapi.llm import SamplingParams
from dynamo.logits_processing.examples import HelloWorldLogitsProcessor
from dynamo.nixl_connect import Connector
......
......@@ -140,7 +140,6 @@ COPY --from=trtllm_wheel . /trtllm_wheel/
# Note: TensorRT needs to be uninstalled before installing the TRTLLM wheel
# because there might be mismatched versions of TensorRT between the NGC PyTorch
# and the TRTLLM wheel.
# Locking triton version to 3.3.1 as 3.4.0 breaks tensorrt-llm 1.0.0rc6
RUN [ -f /etc/pip/constraint.txt ] && : > /etc/pip/constraint.txt || true && \
pip uninstall -y tensorrt && \
if [ "$HAS_TRTLLM_CONTEXT" = "1" ]; then \
......@@ -148,9 +147,6 @@ RUN [ -f /etc/pip/constraint.txt ] && : > /etc/pip/constraint.txt || true && \
WHEEL_FILE=$(find /trtllm_wheel -name "*.whl" | head -n 1); \
if [ -n "$WHEEL_FILE" ]; then \
pip install "$WHEEL_FILE"; \
if [ "$ARCH" = "amd64" ]; then \
pip install "triton==3.3.1"; \
fi; \
else \
echo "No wheel file found in /trtllm_wheel directory."; \
exit 1; \
......@@ -158,9 +154,6 @@ RUN [ -f /etc/pip/constraint.txt ] && : > /etc/pip/constraint.txt || true && \
else \
# Install TensorRT-LLM wheel from the provided index URL, allow dependencies from PyPI
pip install --extra-index-url "${TENSORRTLLM_INDEX_URL}" "${TENSORRTLLM_PIP_WHEEL}"; \
if [ "$ARCH" = "amd64" ]; then \
pip install "triton==3.3.1"; \
fi; \
fi
# Install test dependencies
......@@ -477,12 +470,7 @@ COPY --from=dev /workspace/target/release/metrics /usr/local/bin/metrics
# NOTE: If a package (tensorrt_llm) exists on both --index-url and --extra-index-url,
# uv will prioritize the --extra-index-url, unless --index-strategy unsafe-best-match
# is also specified. So set the configurable index as a --extra-index-url for prioritization.
# NOTE: locking triton version to 3.3.1 as 3.4.0 breaks tensorrt-llm 1.0.0rc6
# NOTE: locking cuda-python version to <13 to avoid breaks with tensorrt-llm 1.0.0rc6. This
# can be removed after https://github.com/NVIDIA/TensorRT-LLM/pull/6703 is merged
# we upgrade to a published pip wheel containing this change.
RUN python3 -m pip install --no-cache-dir --break-system-packages "cuda-python>=12,<13" && \
python3 -m pip install --no-cache-dir --break-system-packages --extra-index-url "${TENSORRTLLM_INDEX_URL}" "${TENSORRTLLM_PIP_WHEEL}" && \
RUN python3 -m pip install --no-cache-dir --break-system-packages --extra-index-url "${TENSORRTLLM_INDEX_URL}" "${TENSORRTLLM_PIP_WHEEL}" && \
python3 -m pip install --no-cache-dir --break-system-packages \
/workspace/wheelhouse/ai_dynamo_runtime*cp312*.whl \
/workspace/wheelhouse/ai_dynamo*any.whl \
......
......@@ -89,7 +89,7 @@ TENSORRTLLM_PIP_WHEEL_DIR="/tmp/trtllm_wheel/"
# TensorRT-LLM commit to use for building the trtllm wheel if not provided.
# Important Note: This commit is not used in our CI pipeline. See the CI
# variables to learn how to run a pipeline with a specific commit.
DEFAULT_EXPERIMENTAL_TRTLLM_COMMIT="a16ba6445c61ed70e7aadfe787d6f316bb422652"
DEFAULT_EXPERIMENTAL_TRTLLM_COMMIT="e81c50dbd2811ec858eccc2c71b5e7a330ff7e24"
TRTLLM_COMMIT=""
TRTLLM_USE_NIXL_KVCACHE_EXPERIMENTAL="0"
TRTLLM_GIT_URL=""
......@@ -98,7 +98,7 @@ TRTLLM_GIT_URL=""
TENSORRTLLM_INDEX_URL="https://pypi.python.org/simple"
# TODO: Remove the version specification from here and use the ai-dynamo[trtllm] package.
# Need to update the Dockerfile.trtllm to use the ai-dynamo[trtllm] package.
DEFAULT_TENSORRTLLM_PIP_WHEEL="tensorrt-llm==1.0.0rc6"
DEFAULT_TENSORRTLLM_PIP_WHEEL="tensorrt-llm==1.1.0rc3"
TENSORRTLLM_PIP_WHEEL=""
......
......@@ -67,7 +67,7 @@ If you are using a **GPU**, the following GPU models and architectures are suppo
| **Build Dependency** | **Version** |
| :------------------- | :------------------------------------------------------------------------------- |
| **Base Container** | [25.03](https://catalog.ngc.nvidia.com/orgs/nvidia/containers/cuda-dl-base/tags) |
| **TensorRT-LLM** | 1.0.0rc6 |
| **TensorRT-LLM** | 1.1.0rc3 |
| **NIXL** | 0.4.1 |
| **vLLM** | 0.10.1.1 |
| **SGLang** | 0.5.0rc2 |
......
......@@ -48,8 +48,7 @@ Repository = "https://github.com/ai-dynamo/dynamo.git"
[project.optional-dependencies]
trtllm =[
"uvloop",
"tensorrt-llm==1.0.0rc6",
"triton==3.3.1", # locking triton as version 3.4.0 breaks tensorrt-llm 1.0.0rc6
"tensorrt-llm==1.1.0rc3",
]
vllm = [
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment