"docs/vscode:/vscode.git/clone" did not exist on "9f76d0606c95778fb084b5ea59b1eb0f30dc650e"
Unverified Commit a2bc6d5c authored by Richard Huo's avatar Richard Huo Committed by GitHub
Browse files

chore: update trtllm version to v1.3.0rc11 (#8048)

parent adae833e
......@@ -48,7 +48,7 @@ dependencies = [
"pandas",
"pydantic>=2",
"tabulate",
# Satisfies vLLM 0.11.0 (>=4.55.2), vLLM 0.11.2 (>=4.56.0,<5), TRT-LLM 1.3.0rc9 (==4.57.1), SGLang 0.5.8 (==4.57.1)
# Satisfies vLLM 0.11.0 (>=4.55.2), vLLM 0.11.2 (>=4.56.0,<5), TRT-LLM 1.3.0rc11 (==4.57.3), SGLang 0.5.8 (==4.57.1)
"transformers>=4.56.0",
]
......
......@@ -94,8 +94,8 @@ trtllm:
cuda13.1:
base_image: nvcr.io/nvidia/pytorch
runtime_image: nvcr.io/nvidia/cuda-dl-base
base_image_tag: 25.12-py3
runtime_image_tag: 25.12-cuda13.1-runtime-ubuntu24.04
base_image_tag: 26.02-py3
runtime_image_tag: 26.02-cuda13.1-runtime-ubuntu24.04
nixl_ref: 0.10.1
enable_media_ffmpeg: "false"
enable_gpu_memory_service: "false"
......@@ -103,19 +103,19 @@ trtllm:
python_version: "3.12"
index_url: https://pypi.nvidia.com/
pip_wheel_dir: /tmp/trtllm_wheel/
pip_wheel: tensorrt-llm==1.3.0rc9
pip_wheel: tensorrt-llm==1.3.0rc11
trtllm_wheel_image: nvcr.io/nvidia/tensorrt-llm/release:${TENSORRTLLM_PIP_WHEEL#*==}
github_trtllm_commit: v1.3.0rc9
torch_version: 2.10.0a0+b4e4ee81d3.nv25.12
torch_tensorrt_version: 2.10.0a0
torchvision_version: 0.25.0a0+ca221243
torchao_ver: 0.15.0+git01374eb5
github_trtllm_commit: v1.3.0rc11
torch_version: 2.11.0a0+eb65b36914.nv26.2
torch_tensorrt_version: 2.11.0a0
torchvision_version: 0.25.0a0+1e53952f.nv26.2.44259020
torchao_ver: 0.16.0+gita89eaab2
torchdata_ver: 0.11.0
torchtitan_ver: 0.2.0
torchtitan_ver: 0.2.1+git9f211ec1
jinja2_version: 3.1.6
sympy_version: 1.14.0
pytorch_triton_ver: 3.5.1+gitbfeb0668.nv25.12
flash_attn_version: 2.7.4.post1+25.12
flashinfer_python_ver: 0.6.1
pytorch_triton_ver: 3.6.0+git9844da95.nv26.2
flash_attn_version: 2.7.4.post1+nv26.2.44259020
flashinfer_python_ver: 0.6.6
has_trtllm_context: "0"
......@@ -28,7 +28,7 @@ tensorboard>=2.19.0,<2.21.0
tensorboardX==2.6.2.2
# Transformers version constraint for container builds
# - vLLM 0.11.0: >=4.55.2, vLLM 0.11.2: >=4.56.0,<5
# - TensorRT-LLM 1.3.0rc9: ==4.57.1
# - TensorRT-LLM 1.3.0rc11: ==4.57.3
# - SGLang 0.5.8: ==4.57.1
# Using >=4.56.0 to satisfy all frameworks
transformers>=4.56.0
......
......@@ -133,7 +133,7 @@ RUN --mount=type=cache,target=/root/.cache/uv \
# Install from local wheel directory in build context
WHEEL_FILE="$(find /trtllm_wheel -name "*.whl" | head -n 1)"; \
if [ -n "$WHEEL_FILE" ]; then \
uv pip install "$WHEEL_FILE" triton==3.5.1; \
uv pip install "$WHEEL_FILE"; \
else \
echo "No wheel file found in /trtllm_wheel directory."; \
exit 1; \
......@@ -141,19 +141,18 @@ RUN --mount=type=cache,target=/root/.cache/uv \
elif [ -n "$(find /trtllm_wheel_image -name "*.whl" | head -n 1)" ]; then \
# Install from wheel embedded in the TRTLLM release image
WHEEL_FILE="$(find /trtllm_wheel_image -name "*.whl" | head -n 1)"; \
uv pip install "$WHEEL_FILE" triton==3.5.1; \
uv pip install "$WHEEL_FILE"; \
else \
# Install TensorRT-LLM wheel from the provided index URL, allow dependencies from PyPI
# TRTLLM 1.2.0rc6.post2 has issues installing from pypi with uv, installing from direct wheel link works best
# explicitly installing triton 3.5.1 as trtllm only lists triton as dependency on x64_64 for some reason
if echo "${TENSORRTLLM_PIP_WHEEL}" | grep -q '^tensorrt-llm=='; then \
TRTLLM_VERSION=$(echo "${TENSORRTLLM_PIP_WHEEL}" | sed -E 's/tensorrt-llm==([0-9a-zA-Z.+-]+).*/\1/'); \
PYTHON_TAG="cp$(echo ${PYTHON_VERSION} | tr -d '.')"; \
ARCH_ALT=$([ "${TARGETARCH}" = "amd64" ] && echo "x86_64" || echo "aarch64"); \
DIRECT_URL="https://pypi.nvidia.com/tensorrt-llm/tensorrt_llm-${TRTLLM_VERSION}-${PYTHON_TAG}-${PYTHON_TAG}-linux_${ARCH_ALT}.whl"; \
uv pip install --index-strategy=unsafe-best-match --extra-index-url "${TENSORRTLLM_INDEX_URL}" "${DIRECT_URL}" triton==3.5.1; \
uv pip install --index-strategy=unsafe-best-match --extra-index-url "${TENSORRTLLM_INDEX_URL}" "${DIRECT_URL}"; \
else \
uv pip install --index-strategy=unsafe-best-match --extra-index-url "${TENSORRTLLM_INDEX_URL}" "${TENSORRTLLM_PIP_WHEEL}" triton==3.5.1; \
uv pip install --index-strategy=unsafe-best-match --extra-index-url "${TENSORRTLLM_INDEX_URL}" "${TENSORRTLLM_PIP_WHEEL}"; \
fi; \
fi && \
# Run TensorRT installer that ships with the TRTLLM wheel
......
......@@ -29,7 +29,7 @@ The following table shows the backend framework versions included with each Dyna
| **Dynamo** | **SGLang** | **TensorRT-LLM** | **vLLM** | **NIXL** |
| :--- | :--- | :--- | :--- | :--- |
| **main (ToT)** | `0.5.9` | `1.3.0rc9` | `0.19.0` | `0.10.1` |
| **main (ToT)** | `0.5.9` | `1.3.0rc11` | `0.19.0` | `0.10.1` |
| **v1.1.0-dev.1** *(experimental)* | `0.5.9` | `1.3.0rc5.post1` | `0.17.1` | `0.10.1` |
| **v1.0.1** | `0.5.9` | `1.3.0rc5.post1` | `0.16.0` | `0.10.1` |
| **v1.0.0** | `0.5.9` | `1.3.0rc5.post1` | `0.16.0` | `0.10.1` |
......
......@@ -44,7 +44,7 @@ Repository = "https://github.com/ai-dynamo/dynamo.git"
[project.optional-dependencies]
trtllm =[
"uvloop",
"tensorrt-llm==1.3.0rc9",
"tensorrt-llm==1.3.0rc11",
]
vllm = [
......@@ -210,6 +210,8 @@ filterwarnings = [
"ignore:Triton is not supported on current platform.*:UserWarning",
# torch.jit.script_method deprecation from torch.utils.mkldnn
"ignore:.*torch\\.jit\\.script_method.*is deprecated.*:DeprecationWarning",
# torch.jit.script deprecation from modelopt.torch.quantization
"ignore:`torch.jit.script` is deprecated:DeprecationWarning",
# nvidia-modelopt warning about transformers version (transitive dep from TRT-LLM)
"ignore:transformers version .* is incompatible with nvidia-modelopt.*:UserWarning",
# SGLang quantization warnings on CPU-only runners
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment