chore: update trtllm version to v1.3.0rc11 (#8048)

a2bc6d5c · Richard Huo · GitHub · adae833e · a2bc6d5c · a2bc6d5c
Unverified Commit a2bc6d5c authored Apr 10, 2026 by Richard Huo Committed by GitHub Apr 10, 2026
6 changed files
--- a/benchmarks/pyproject.toml
+++ b/benchmarks/pyproject.toml
@@ -48,7 +48,7 @@ dependencies = [
    "pandas",
    "pydantic>=2",
    "tabulate",
-    # Satisfies vLLM 0.11.0 (>=4.55.2), vLLM 0.11.2 (>=4.56.0,<5), TRT-LLM 1.3.0rc9 (==4.57.1), SGLang 0.5.8 (==4.57.1)
+    # Satisfies vLLM 0.11.0 (>=4.55.2), vLLM 0.11.2 (>=4.56.0,<5), TRT-LLM 1.3.0rc11 (==4.57.3), SGLang 0.5.8 (==4.57.1)
    "transformers>=4.56.0",
 ]


--- a/container/context.yaml
+++ b/container/context.yaml
@@ -94,8 +94,8 @@ trtllm:
  cuda13.1:
    base_image: nvcr.io/nvidia/pytorch
    runtime_image: nvcr.io/nvidia/cuda-dl-base
-    base_image_tag: 25.12-py3
-    runtime_image_tag: 25.12-cuda13.1-runtime-ubuntu24.04
+    base_image_tag: 26.02-py3
+    runtime_image_tag: 26.02-cuda13.1-runtime-ubuntu24.04
  nixl_ref: 0.10.1
  enable_media_ffmpeg: "false"
  enable_gpu_memory_service: "false"
@@ -103,19 +103,19 @@ trtllm:
  python_version: "3.12"
  index_url: https://pypi.nvidia.com/
  pip_wheel_dir: /tmp/trtllm_wheel/
-  pip_wheel: tensorrt-llm==1.3.0rc9
+  pip_wheel: tensorrt-llm==1.3.0rc11
  trtllm_wheel_image: nvcr.io/nvidia/tensorrt-llm/release:${TENSORRTLLM_PIP_WHEEL#*==}

-  github_trtllm_commit: v1.3.0rc9
-  torch_version: 2.10.0a0+b4e4ee81d3.nv25.12
-  torch_tensorrt_version: 2.10.0a0
-  torchvision_version: 0.25.0a0+ca221243
-  torchao_ver: 0.15.0+git01374eb5
+  github_trtllm_commit: v1.3.0rc11
+  torch_version: 2.11.0a0+eb65b36914.nv26.2
+  torch_tensorrt_version: 2.11.0a0
+  torchvision_version: 0.25.0a0+1e53952f.nv26.2.44259020
+  torchao_ver: 0.16.0+gita89eaab2
  torchdata_ver: 0.11.0
-  torchtitan_ver: 0.2.0
+  torchtitan_ver: 0.2.1+git9f211ec1
  jinja2_version: 3.1.6
  sympy_version: 1.14.0
-  pytorch_triton_ver: 3.5.1+gitbfeb0668.nv25.12
-  flash_attn_version: 2.7.4.post1+25.12
-  flashinfer_python_ver: 0.6.1
+  pytorch_triton_ver: 3.6.0+git9844da95.nv26.2
+  flash_attn_version: 2.7.4.post1+nv26.2.44259020
+  flashinfer_python_ver: 0.6.6
  has_trtllm_context: "0"
--- a/container/deps/requirements.common.txt
+++ b/container/deps/requirements.common.txt
@@ -28,7 +28,7 @@ tensorboard>=2.19.0,<2.21.0
 tensorboardX==2.6.2.2
 # Transformers version constraint for container builds
 # - vLLM 0.11.0: >=4.55.2, vLLM 0.11.2: >=4.56.0,<5
-# - TensorRT-LLM 1.3.0rc9: ==4.57.1
+# - TensorRT-LLM 1.3.0rc11: ==4.57.3
 # - SGLang 0.5.8: ==4.57.1
 # Using >=4.56.0 to satisfy all frameworks
 transformers>=4.56.0

--- a/container/templates/trtllm_framework.Dockerfile
+++ b/container/templates/trtllm_framework.Dockerfile
@@ -133,7 +133,7 @@ RUN --mount=type=cache,target=/root/.cache/uv \
        # Install from local wheel directory in build context
        WHEEL_FILE="$(find /trtllm_wheel -name "*.whl" | head -n 1)"; \
        if [ -n "$WHEEL_FILE" ]; then \
-            uv pip install "$WHEEL_FILE" triton==3.5.1; \
+            uv pip install "$WHEEL_FILE"; \
        else \
            echo "No wheel file found in /trtllm_wheel directory."; \
            exit 1; \
@@ -141,19 +141,18 @@ RUN --mount=type=cache,target=/root/.cache/uv \
    elif [ -n "$(find /trtllm_wheel_image -name "*.whl" | head -n 1)" ]; then \
        # Install from wheel embedded in the TRTLLM release image
        WHEEL_FILE="$(find /trtllm_wheel_image -name "*.whl" | head -n 1)"; \
-        uv pip install "$WHEEL_FILE" triton==3.5.1; \
+        uv pip install "$WHEEL_FILE"; \
    else \
        # Install TensorRT-LLM wheel from the provided index URL, allow dependencies from PyPI
        # TRTLLM 1.2.0rc6.post2 has issues installing from pypi with uv, installing from direct wheel link works best
-        # explicitly installing triton 3.5.1 as trtllm only lists triton as dependency on x64_64 for some reason
        if echo "${TENSORRTLLM_PIP_WHEEL}" | grep -q '^tensorrt-llm=='; then \
            TRTLLM_VERSION=$(echo "${TENSORRTLLM_PIP_WHEEL}" | sed -E 's/tensorrt-llm==([0-9a-zA-Z.+-]+).*/\1/'); \
            PYTHON_TAG="cp$(echo ${PYTHON_VERSION} | tr -d '.')"; \
            ARCH_ALT=$([ "${TARGETARCH}" = "amd64" ] && echo "x86_64" || echo "aarch64"); \
            DIRECT_URL="https://pypi.nvidia.com/tensorrt-llm/tensorrt_llm-${TRTLLM_VERSION}-${PYTHON_TAG}-${PYTHON_TAG}-linux_${ARCH_ALT}.whl"; \
-            uv pip install --index-strategy=unsafe-best-match --extra-index-url "${TENSORRTLLM_INDEX_URL}" "${DIRECT_URL}" triton==3.5.1; \
+            uv pip install --index-strategy=unsafe-best-match --extra-index-url "${TENSORRTLLM_INDEX_URL}" "${DIRECT_URL}"; \
        else \
-            uv pip install --index-strategy=unsafe-best-match --extra-index-url "${TENSORRTLLM_INDEX_URL}" "${TENSORRTLLM_PIP_WHEEL}" triton==3.5.1; \
+            uv pip install --index-strategy=unsafe-best-match --extra-index-url "${TENSORRTLLM_INDEX_URL}" "${TENSORRTLLM_PIP_WHEEL}"; \
        fi; \
    fi && \
    # Run TensorRT installer that ships with the TRTLLM wheel

--- a/docs/reference/support-matrix.md
+++ b/docs/reference/support-matrix.md
@@ -29,7 +29,7 @@ The following table shows the backend framework versions included with each Dyna

 | **Dynamo** | **SGLang** | **TensorRT-LLM** | **vLLM** | **NIXL** |
 | :--- | :--- | :--- | :--- | :--- |
-| **main (ToT)** | `0.5.9` | `1.3.0rc9` | `0.19.0` | `0.10.1` |
+| **main (ToT)** | `0.5.9` | `1.3.0rc11` | `0.19.0` | `0.10.1` |
 | **v1.1.0-dev.1** *(experimental)* | `0.5.9` | `1.3.0rc5.post1` | `0.17.1` | `0.10.1` |
 | **v1.0.1** | `0.5.9` | `1.3.0rc5.post1` | `0.16.0` | `0.10.1` |
 | **v1.0.0** | `0.5.9` | `1.3.0rc5.post1` | `0.16.0` | `0.10.1` |

--- a/pyproject.toml
+++ b/pyproject.toml
@@ -44,7 +44,7 @@ Repository = "https://github.com/ai-dynamo/dynamo.git"
 [project.optional-dependencies]
 trtllm =[
    "uvloop",
-    "tensorrt-llm==1.3.0rc9",
+    "tensorrt-llm==1.3.0rc11",
 ]

 vllm = [
@@ -210,6 +210,8 @@ filterwarnings = [
    "ignore:Triton is not supported on current platform.*:UserWarning",
    # torch.jit.script_method deprecation from torch.utils.mkldnn
    "ignore:.*torch\\.jit\\.script_method.*is deprecated.*:DeprecationWarning",
+    # torch.jit.script deprecation from modelopt.torch.quantization
+    "ignore:`torch.jit.script` is deprecated:DeprecationWarning",
    # nvidia-modelopt warning about transformers version (transitive dep from TRT-LLM)
    "ignore:transformers version .* is incompatible with nvidia-modelopt.*:UserWarning",
    # SGLang quantization warnings on CPU-only runners