chore: Upgrade to Tensorrt-LLM 1.3.0rc1 (#5700)

Co-authored-by: Pavithra Vijayakrishnan <160681768+pvijayakrish@users.noreply.github.com>

chore: Upgrade to Tensorrt-LLM 1.3.0rc1 (#5700)
Co-authored-by: Pavithra Vijayakrishnan <160681768+pvijayakrish@users.noreply.github.com>
ba711cc1 · Tanmay Verma · GitHub · 9e2a2cc9 · ba711cc1 · ba711cc1
Unverified Commit ba711cc1 authored Jan 29, 2026 by Tanmay Verma Committed by GitHub Jan 29, 2026
8 changed files
--- a/benchmarks/pyproject.toml
+++ b/benchmarks/pyproject.toml
@@ -49,7 +49,7 @@ dependencies = [
    "pydantic>=2",
    "tabulate",
    "types-tabulate",
-    # Satisfies vLLM 0.11.0 (>=4.55.2), vLLM 0.11.2 (>=4.56.0,<5), TRT-LLM 1.2.0rc6.post2 (==4.57.1), SGLang 0.5.8 (==4.57.1)
+    # Satisfies vLLM 0.11.0 (>=4.55.2), vLLM 0.11.2 (>=4.56.0,<5), TRT-LLM 1.3.0rc1 (==4.57.1), SGLang 0.5.8 (==4.57.1)
    "transformers>=4.56.0",
    "pytest-mypy",
 ]

--- a/components/src/dynamo/trtllm/publisher.py
+++ b/components/src/dynamo/trtllm/publisher.py
@@ -37,6 +37,16 @@ from dynamo.llm import KvEventPublisher, WorkerMetricsPublisher
 logging.basicConfig(level=logging.DEBUG)
+# Use non-blocking RPC calls; control overhead with backoff sleeps.
+_STATS_TIMEOUT_SEC = 0.01
+_KV_EVENTS_TIMEOUT_SEC = 0.0
+_PUBLISH_MIN_SLEEP_SEC = 0.01
+_PUBLISH_MAX_SLEEP_SEC = 0.1
+_PUBLISH_BACKOFF_FACTOR = 2.0
+_KV_EVENTS_MIN_SLEEP_SEC = 0.005
+_KV_EVENTS_MAX_SLEEP_SEC = 0.02
+_KV_EVENTS_BACKOFF_FACTOR = 1.5
 def _to_signed_i64(value: int | None) -> int | None:
    """Convert a Python int to signed 64-bit range by two's complement."""
@@ -381,6 +391,32 @@ class Publisher:
            name="publish_kv_cache_events_thread",
        )
+    async def _polling_loop(
+        self,
+        fetch_fn,
+        handler_fn,
+        min_sleep: float,
+        max_sleep: float,
+        backoff_factor: float,
+    ):
+        sleep_s = min_sleep
+        while not self._stop_event.is_set():
+            had_data = False
+            try:
+                async for item in fetch_fn():
+                    had_data = True
+                    handler_fn(item)
+            except (asyncio.TimeoutError, TimeoutError, asyncio.QueueEmpty):
+                pass
+            except Exception as e:
+                logging.warning(f"Publisher polling loop error: {e}", exc_info=True)
+            if not had_data:
+                await asyncio.sleep(sleep_s)
+                sleep_s = min(max_sleep, sleep_s * backoff_factor)
+            else:
+                sleep_s = min_sleep
    async def _publish_stats_task(self):
        """
        Publish stats to the metrics publisher.
@@ -393,15 +429,19 @@ class Publisher:
            logging.error("KV metrics publisher not initialized!")
            return False
-        stats = self.engine.llm.get_stats_async(timeout=5)
+        def handle_stat(stat):
-        async for stat in stats:
            kv_active_blocks = stat["kvCacheStats"]["usedNumBlocks"]
            logging.debug(f"Publishing stats: kv_active_blocks: {kv_active_blocks}")
            # TRT-LLM doesn't use data parallelism currently (dp_rank=None)
            self.metrics_publisher.publish(None, kv_active_blocks)
+        await self._polling_loop(
+            lambda: self.engine.llm.get_stats_async(timeout=_STATS_TIMEOUT_SEC),
+            handle_stat,
+            _PUBLISH_MIN_SLEEP_SEC,
+            _PUBLISH_MAX_SLEEP_SEC,
+            _PUBLISH_BACKOFF_FACTOR,
+        )
        return True
    async def _publish_kv_cache_events_task(self):
@@ -418,12 +458,22 @@ class Publisher:
            logging.error("No KV event publisher initialized (neither NATS nor ZMQ)!")
            return
-        events = self.engine.llm.get_kv_cache_events_async(timeout=5)
+        await self._polling_loop(
-        async for event in events:
+            lambda: self.engine.llm.get_kv_cache_events_async(
+                timeout=_KV_EVENTS_TIMEOUT_SEC
+            ),
+            self._handle_kv_event,
+            _KV_EVENTS_MIN_SLEEP_SEC,
+            _KV_EVENTS_MAX_SLEEP_SEC,
+            _KV_EVENTS_BACKOFF_FACTOR,
+        )
+        return True
+    def _handle_kv_event(self, event):
        logging.debug(f"KV cache event received: {event}")
        # drop the events that is not emitted from the global attention layer.
        if self.should_drop_event(event):
-                continue
+            return
        event_id = event["event_id"]
        data = event["data"]
@@ -512,14 +562,10 @@ class Publisher:
                )
            elif self.kv_event_publisher:
                # No consolidator: publish to NATS (router subscribes directly)
-                    self.kv_event_publisher.publish_removed(
+                self.kv_event_publisher.publish_removed(event_id, removed_block_hashes)
-                        event_id, removed_block_hashes
-                    )
        elif data["type"] == "created" and self.processing_initial_created_events:
            self.update_max_window_size(event)
-        return True
    def start(self):
        if (
            self.publish_kv_cache_events_thread

--- a/container/Dockerfile.trtllm
+++ b/container/Dockerfile.trtllm
@@ -42,15 +42,16 @@ ARG ENABLE_MEDIA_FFMPEG
 ARG CARGO_BUILD_JOBS
 ARG PYTORCH_BASE_IMAGE="nvcr.io/nvidia/pytorch"
-ARG PYTORCH_BASE_IMAGE_TAG="25.10-py3"
+ARG PYTORCH_BASE_IMAGE_TAG="25.12-py3"
 ARG RUNTIME_IMAGE="nvcr.io/nvidia/cuda-dl-base"
-ARG RUNTIME_IMAGE_TAG="25.10-cuda13.0-runtime-ubuntu24.04"
+ARG RUNTIME_IMAGE_TAG="25.12-cuda13.1-runtime-ubuntu24.04"
 # TensorRT-LLM specific configuration
 ARG HAS_TRTLLM_CONTEXT=0
 ARG TENSORRTLLM_PIP_WHEEL="tensorrt-llm"
 ARG TENSORRTLLM_INDEX_URL="https://pypi.nvidia.com/"
 ARG GITHUB_TRTLLM_COMMIT
+ARG TRTLLM_WHEEL_IMAGE="trtllm_wheel_image_empty"
 # SCCACHE configuration
 ARG USE_SCCACHE
@@ -78,9 +79,16 @@ ARG NIXL_LIBFABRIC_REF
 ARG ARCH=amd64
 ARG ARCH_ALT=x86_64
+# Empty fallback for TRTLLM wheel image copy
+FROM alpine:3.20 AS trtllm_wheel_image_empty
+RUN mkdir -p /app/tensorrt_llm
 # Copy artifacts from NGC PyTorch image
 FROM ${PYTORCH_BASE_IMAGE}:${PYTORCH_BASE_IMAGE_TAG} AS pytorch_base
+# Resolve TRTLLM wheel image (can be a stage name or a registry image)
+FROM ${TRTLLM_WHEEL_IMAGE} AS trtllm_wheel_image
 ##################################
 ########## Base Image ############
 ##################################
@@ -509,13 +517,25 @@ ENV VIRTUAL_ENV=/opt/dynamo/venv \
    PATH="/opt/dynamo/venv/bin:${PATH}"
 # Copy pytorch installation from NGC PyTorch
-ARG TORCH_VER=2.9.0a0+145a3a7bda.nv25.10
+ARG FLASHINFER_PYTHON_VER=0.6.1
-ARG TORCH_TENSORRT_VER=2.9.0a0
+ARG PYTORCH_TRITON_VER=3.5.1+gitbfeb0668.nv25.12
-ARG TORCHVISION_VER=0.24.0a0+094e7af5
+ARG TORCHAO_VER=0.15.0+git01374eb5
+ARG TORCHDATA_VER=0.11.0
+ARG TORCHTITAN_VER=0.2.0
+ARG TORCH_VER=2.10.0a0+b4e4ee81d3.nv25.12
+ARG TORCH_TENSORRT_VER=2.10.0a0
+ARG TORCHVISION_VER=0.25.0a0+ca221243
 ARG JINJA2_VER=3.1.6
 ARG SYMPY_VER=1.14.0
-ARG FLASH_ATTN_VER=2.7.4.post1+25.10
+ARG FLASH_ATTN_VER=2.7.4.post1+25.12
+COPY --from=pytorch_base /usr/local/lib/python${PYTHON_VERSION}/dist-packages/torchao ${VIRTUAL_ENV}/lib/python${PYTHON_VERSION}/site-packages/torchao
+COPY --from=pytorch_base /usr/local/lib/python${PYTHON_VERSION}/dist-packages/torchao-${TORCHAO_VER}.dist-info ${VIRTUAL_ENV}/lib/python${PYTHON_VERSION}/site-packages/torchao-${TORCHAO_VER}.dist-info
+COPY --from=pytorch_base /usr/local/lib/python${PYTHON_VERSION}/dist-packages/torchdata ${VIRTUAL_ENV}/lib/python${PYTHON_VERSION}/site-packages/torchdata
+COPY --from=pytorch_base /usr/local/lib/python${PYTHON_VERSION}/dist-packages/torchdata-${TORCHDATA_VER}.dist-info ${VIRTUAL_ENV}/lib/python${PYTHON_VERSION}/site-packages/torchdata-${TORCHDATA_VER}.dist-info
+COPY --from=pytorch_base /usr/local/lib/python${PYTHON_VERSION}/dist-packages/torchtitan ${VIRTUAL_ENV}/lib/python${PYTHON_VERSION}/site-packages/torchtitan
+COPY --from=pytorch_base /usr/local/lib/python${PYTHON_VERSION}/dist-packages/torchtitan-${TORCHTITAN_VER}.dist-info ${VIRTUAL_ENV}/lib/python${PYTHON_VERSION}/site-packages/torchtitan-${TORCHTITAN_VER}.dist-info
+COPY --from=pytorch_base /usr/local/lib/python${PYTHON_VERSION}/dist-packages/pytorch_triton-${PYTORCH_TRITON_VER}.dist-info ${VIRTUAL_ENV}/lib/python${PYTHON_VERSION}/site-packages/pytorch_triton-${PYTORCH_TRITON_VER}.dist-info
 COPY --from=pytorch_base /usr/local/lib/python${PYTHON_VERSION}/dist-packages/torch ${VIRTUAL_ENV}/lib/python${PYTHON_VERSION}/site-packages/torch
 COPY --from=pytorch_base /usr/local/lib/python${PYTHON_VERSION}/dist-packages/torch-${TORCH_VER}.dist-info ${VIRTUAL_ENV}/lib/python${PYTHON_VERSION}/site-packages/torch-${TORCH_VER}.dist-info
 COPY --from=pytorch_base /usr/local/lib/python${PYTHON_VERSION}/dist-packages/torchgen ${VIRTUAL_ENV}/lib/python${PYTHON_VERSION}/site-packages/torchgen
@@ -533,15 +553,16 @@ COPY --from=pytorch_base /usr/local/lib/python${PYTHON_VERSION}/dist-packages/fl
 COPY --from=pytorch_base /usr/local/lib/python${PYTHON_VERSION}/dist-packages/torch_tensorrt ${VIRTUAL_ENV}/lib/python${PYTHON_VERSION}/site-packages/torch_tensorrt
 COPY --from=pytorch_base /usr/local/lib/python${PYTHON_VERSION}/dist-packages/torch_tensorrt-${TORCH_TENSORRT_VER}.dist-info ${VIRTUAL_ENV}/lib/python${PYTHON_VERSION}/site-packages/torch_tensorrt-${TORCH_TENSORRT_VER}.dist-info
+RUN uv pip install flashinfer-python==${FLASHINFER_PYTHON_VER}
 # Install TensorRT-LLM and related dependencies
 ARG HAS_TRTLLM_CONTEXT
 ARG TENSORRTLLM_PIP_WHEEL
 ARG TENSORRTLLM_INDEX_URL
 ARG GITHUB_TRTLLM_COMMIT
+# Copy wheel build context (may be empty for download path)
-# Copy only wheel files and commit info from trtllm_wheel stage from build_context
+COPY --from=trtllm_wheel / /trtllm_wheel/
-COPY --from=trtllm_wheel /*.whl /trtllm_wheel/
+COPY --from=trtllm_wheel_image /app/tensorrt_llm /trtllm_wheel_image/
-COPY --from=trtllm_wheel /*.txt /trtllm_wheel/
 RUN uv pip install --no-cache "cuda-python==13.0.2"
@@ -555,39 +576,48 @@ RUN [ -f /etc/pip/constraint.txt ] && : > /etc/pip/constraint.txt || true && \
    rm -f /etc/apt/trusted.gpg.d/cuda*.gpg
 RUN if [ "$HAS_TRTLLM_CONTEXT" = "1" ]; then \
-        # Download and run install_tensorrt.sh from TensorRT-LLM GitHub before installing the wheel
-        curl -fsSL --retry 5 --retry-delay 10 --max-time 1800 -o /tmp/install_tensorrt.sh "https://github.com/NVIDIA/TensorRT-LLM/raw/${GITHUB_TRTLLM_COMMIT}/docker/common/install_tensorrt.sh" && \
-        # Modify the script to use virtual environment pip instead of system pip3
-        sed -i 's/pip3 install/uv pip install/g' /tmp/install_tensorrt.sh && \
-        bash /tmp/install_tensorrt.sh && \
        # Install from local wheel directory in build context
        WHEEL_FILE="$(find /trtllm_wheel -name "*.whl" | head -n 1)"; \
        if [ -n "$WHEEL_FILE" ]; then \
-            uv pip install --no-cache "$WHEEL_FILE" triton==3.5.0; \
+            uv pip install --no-cache "$WHEEL_FILE" triton==3.5.1; \
        else \
            echo "No wheel file found in /trtllm_wheel directory."; \
            exit 1; \
        fi; \
+    elif [ -n "$(find /trtllm_wheel_image -name "*.whl" | head -n 1)" ]; then \
+        # Install from wheel embedded in the TRTLLM release image
+        WHEEL_FILE="$(find /trtllm_wheel_image -name "*.whl" | head -n 1)"; \
+        uv pip install --no-cache "$WHEEL_FILE" triton==3.5.1; \
    else \
-        # Download and run install_tensorrt.sh from TensorRT-LLM GitHub before installing the wheel
-        TRTLLM_VERSION=$(echo "${TENSORRTLLM_PIP_WHEEL}" | sed -E 's/.*==([0-9a-zA-Z.+-]+).*/\1/') && \
-        (curl -fsSL --retry 5 --retry-delay 10 --max-time 1800 -o /tmp/install_tensorrt.sh "https://github.com/NVIDIA/TensorRT-LLM/raw/v${TRTLLM_VERSION}/docker/common/install_tensorrt.sh" || \
-         curl -fsSL --retry 5 --retry-delay 10 --max-time 1800 -o /tmp/install_tensorrt.sh "https://github.com/NVIDIA/TensorRT-LLM/raw/${GITHUB_TRTLLM_COMMIT}/docker/common/install_tensorrt.sh") && \
-        # Modify the script to use virtual environment pip instead of system pip3
-        sed -i 's/pip3 install/uv pip install/g' /tmp/install_tensorrt.sh && \
-        bash /tmp/install_tensorrt.sh && \
        # Install TensorRT-LLM wheel from the provided index URL, allow dependencies from PyPI
        # TRTLLM 1.2.0rc6.post2 has issues installing from pypi with uv, installing from direct wheel link works best
-        # explicitly installing triton 3.5.0 as trtllm only lists triton as dependency on x64_64 for some reason
+        # explicitly installing triton 3.5.1 as trtllm only lists triton as dependency on x64_64 for some reason
        if echo "${TENSORRTLLM_PIP_WHEEL}" | grep -q '^tensorrt-llm=='; then \
            TRTLLM_VERSION=$(echo "${TENSORRTLLM_PIP_WHEEL}" | sed -E 's/tensorrt-llm==([0-9a-zA-Z.+-]+).*/\1/'); \
            PYTHON_TAG="cp$(echo ${PYTHON_VERSION} | tr -d '.')"; \
            DIRECT_URL="https://pypi.nvidia.com/tensorrt-llm/tensorrt_llm-${TRTLLM_VERSION}-${PYTHON_TAG}-${PYTHON_TAG}-linux_${ARCH_ALT}.whl"; \
-            uv pip install --no-cache --index-strategy=unsafe-best-match --extra-index-url "${TENSORRTLLM_INDEX_URL}" "${DIRECT_URL}" triton==3.5.0; \
+            uv pip install --no-cache --index-strategy=unsafe-best-match --extra-index-url "${TENSORRTLLM_INDEX_URL}" "${DIRECT_URL}" triton==3.5.1; \
        else \
-            uv pip install --no-cache --index-strategy=unsafe-best-match --extra-index-url "${TENSORRTLLM_INDEX_URL}" "${TENSORRTLLM_PIP_WHEEL}" triton==3.5.0; \
+            uv pip install --no-cache --index-strategy=unsafe-best-match --extra-index-url "${TENSORRTLLM_INDEX_URL}" "${TENSORRTLLM_PIP_WHEEL}" triton==3.5.1; \
        fi; \
-    fi
+    fi && \
+    # Run TensorRT installer that ships with the TRTLLM wheel
+    TRT_INSTALLER="$(python -c "import glob, os, site; paths = []; \
+        paths += site.getsitepackages() if hasattr(site, 'getsitepackages') else []; \
+        user_site = site.getusersitepackages(); \
+        paths.append(user_site) if user_site else None; \
+        installer = ''; \
+        \
+        [installer:=matches[0] for base in paths \
+            for matches in [glob.glob(os.path.join(base, 'tensorrt_llm', '**', 'install_tensorrt.sh'), recursive=True)] \
+            if matches and not installer]; \
+        print(installer)")"; \
+    if [ -z "$TRT_INSTALLER" ]; then \
+        echo "No install_tensorrt.sh found inside tensorrt_llm package."; \
+        exit 1; \
+    fi; \
+    sed -i 's/pip3 install/uv pip install/g' "$TRT_INSTALLER"; \
+    bash "$TRT_INSTALLER"
 ##################################################
 ########## Runtime Image ########################

--- a/container/build.sh
+++ b/container/build.sh
@@ -74,7 +74,7 @@ BUILD_CONTEXT=$(dirname "$(readlink -f "$SOURCE_DIR")")
 # Base Images
 TRTLLM_BASE_IMAGE=nvcr.io/nvidia/pytorch
-TRTLLM_BASE_IMAGE_TAG=25.10-py3
+TRTLLM_BASE_IMAGE_TAG=25.12-py3
 # Important Note: Because of ABI compatibility issues between TensorRT-LLM and NGC PyTorch,
 # we need to build the TensorRT-LLM wheel from source.
@@ -104,7 +104,7 @@ DEFAULT_TENSORRTLLM_PIP_WHEEL_DIR="/tmp/trtllm_wheel/"
 # TensorRT-LLM commit to use for building the trtllm wheel if not provided.
 # Important Note: This commit is not used in our CI pipeline. See the CI
 # variables to learn how to run a pipeline with a specific commit.
-DEFAULT_EXPERIMENTAL_TRTLLM_COMMIT="50379d028c2689ffb5cefe7797c5afb199e9df93" # 1.2.0rc6.post2
+DEFAULT_EXPERIMENTAL_TRTLLM_COMMIT="45d7022cc33903509fd8045bbc577d77dd1d3e2f" # 1.3.0rc1
 TRTLLM_COMMIT=""
 TRTLLM_USE_NIXL_KVCACHE_EXPERIMENTAL="0"
 TRTLLM_GIT_URL=""
@@ -113,8 +113,13 @@ TRTLLM_GIT_URL=""
 DEFAULT_TENSORRTLLM_INDEX_URL="https://pypi.nvidia.com/"
 # TODO: Remove the version specification from here and use the ai-dynamo[trtllm] package.
 # Need to update the Dockerfile.trtllm to use the ai-dynamo[trtllm] package.
-DEFAULT_TENSORRTLLM_PIP_WHEEL="tensorrt-llm==1.2.0rc6.post2"
+DEFAULT_TENSORRTLLM_PIP_WHEEL="tensorrt-llm==1.3.0rc1"
+# TensorRT-LLM wheels on PyPI might not be compatible with the NGC PyTorch.
+# For incompatible versions, we install the wheel from the NGC image during the Docker build.
+# The following versions are not ABI compatible with the NGC PyTorch.
+TRTLLM_ABI_INCOMPATIBLE_VERSIONS=("1.3.0rc1")
 TENSORRTLLM_PIP_WHEEL=""
+TRTLLM_WHEEL_IMAGE=""
 VLLM_BASE_IMAGE="nvcr.io/nvidia/cuda-dl-base"
 # FIXME: OPS-612 NCCL will hang with 25.03, so use 25.01 for now
@@ -677,6 +682,50 @@ check_wheel_file() {
    return 0
 }
+get_trtllm_version_from_pip_wheel() {
+    local wheel_spec="$1"
+    if [[ "$wheel_spec" =~ == ]]; then
+        local version
+        version=$(echo "$wheel_spec" | sed -n 's/.*==\([0-9a-zA-Z\.\-]*\).*/\1/p')
+        if _is_semver_ref "$version"; then
+            echo "${version#v}"
+            return 0
+        fi
+    fi
+    echo ""
+    return 0
+}
+trtllm_version_incompatible() {
+    local version="$1"
+    for incompatible_version in "${TRTLLM_ABI_INCOMPATIBLE_VERSIONS[@]}"; do
+        if [[ "$version" == "$incompatible_version" ]]; then
+            return 0
+        fi
+    done
+    return 1
+}
+_is_semver_ref() {
+    local ref="$1"
+    local semver_regex='^v?(0|[1-9][0-9]*)\.(0|[1-9][0-9]*)\.(0|[1-9][0-9]*)([-+][0-9A-Za-z.-]+|[A-Za-z][0-9A-Za-z.-]+)?$'
+    [[ "$ref" =~ $semver_regex ]]
+}
+get_github_trtllm_ref() {
+    local commit="$1"
+    if _is_semver_ref "$commit"; then
+        if [[ "$commit" =~ ^v ]]; then
+            echo "$commit"
+        else
+            echo "v${commit}"
+        fi
+        return 0
+    fi
+    echo "$commit"
+    return 0
+}
 function determine_user_intention_trtllm() {
    # The tensorrt llm installation flags are not quite mutually exclusive
    # since the user should be able to point at a directory of their choosing
@@ -764,15 +813,22 @@ if [[ $FRAMEWORK == "TRTLLM" ]]; then
    if [[ "$TRTLLM_INTENTION" == "download" ]]; then
        TENSORRTLLM_INDEX_URL=${TENSORRTLLM_INDEX_URL:-$DEFAULT_TENSORRTLLM_INDEX_URL}
        TENSORRTLLM_PIP_WHEEL=${TENSORRTLLM_PIP_WHEEL:-$DEFAULT_TENSORRTLLM_PIP_WHEEL}
+        TRTLLM_WHEEL_VERSION=$(get_trtllm_version_from_pip_wheel "${TENSORRTLLM_PIP_WHEEL}")
+        if trtllm_version_incompatible "${TRTLLM_WHEEL_VERSION}"; then
+            TRTLLM_WHEEL_IMAGE="nvcr.io/nvidia/tensorrt-llm/release:${TRTLLM_WHEEL_VERSION}"
+            BUILD_ARGS+=" --build-arg HAS_TRTLLM_CONTEXT=0"
+            BUILD_ARGS+=" --build-arg TRTLLM_WHEEL_IMAGE=${TRTLLM_WHEEL_IMAGE}"
+            PRINT_TRTLLM_WHEEL_FILE=${TRTLLM_WHEEL_IMAGE}
+        else
            BUILD_ARGS+=" --build-arg HAS_TRTLLM_CONTEXT=0"
            BUILD_ARGS+=" --build-arg TENSORRTLLM_PIP_WHEEL=${TENSORRTLLM_PIP_WHEEL}"
            BUILD_ARGS+=" --build-arg TENSORRTLLM_INDEX_URL=${TENSORRTLLM_INDEX_URL}"
+            PRINT_TRTLLM_WHEEL_FILE=${TENSORRTLLM_PIP_WHEEL}
+        fi
        # Create a dummy directory to satisfy the build context requirement
        # There is no way to conditionally copy the build context in dockerfile.
-        mkdir -p /tmp/dummy_dir
+        mkdir -p /tmp/trtllm_wheel_context
-        BUILD_CONTEXT_ARG+=" --build-context trtllm_wheel=/tmp/dummy_dir"
+        BUILD_CONTEXT_ARG+=" --build-context trtllm_wheel=/tmp/trtllm_wheel_context"
-        PRINT_TRTLLM_WHEEL_FILE=${TENSORRTLLM_PIP_WHEEL}
    elif [[ "$TRTLLM_INTENTION" == "install" ]]; then
        echo "Checking for TensorRT-LLM wheel in ${TENSORRTLLM_PIP_WHEEL_DIR}"
        if ! check_wheel_file "${TENSORRTLLM_PIP_WHEEL_DIR}"; then
@@ -811,7 +867,11 @@ if [[ $FRAMEWORK == "TRTLLM" ]]; then
    if [[ -z "$TRTLLM_COMMIT" ]]; then
        # Attempt to default since the commit will work with a hash or a tag/branch
        if [[ ! -z "$TENSORRTLLM_PIP_WHEEL" ]]; then
-            TRTLLM_COMMIT=$(echo "${TENSORRTLLM_PIP_WHEEL}" | sed -n 's/.*==\([0-9a-zA-Z\.\-]*\).*/\1/p')
+            TRTLLM_COMMIT=$(get_trtllm_version_from_pip_wheel "${TENSORRTLLM_PIP_WHEEL}")
+            if [[ -z "$TRTLLM_COMMIT" ]]; then
+                echo -e "[ERROR] Could not parse a semver version from TENSORRTLLM_PIP_WHEEL: ${TENSORRTLLM_PIP_WHEEL}"
+                exit 1
+            fi
            echo "Attempting to default TRTLLM_COMMIT to \"$TRTLLM_COMMIT\" for installation of TensorRT."
        else
            echo -e "[ERROR] TRTLLM framework was set as a target but the TRTLLM_COMMIT variable was not set."
@@ -820,7 +880,8 @@ if [[ $FRAMEWORK == "TRTLLM" ]]; then
            exit 1
        fi
    fi
-    BUILD_ARGS+=" --build-arg GITHUB_TRTLLM_COMMIT=${TRTLLM_COMMIT}"
+    GITHUB_TRTLLM_REF=$(get_github_trtllm_ref "${TRTLLM_COMMIT}")
+    BUILD_ARGS+=" --build-arg GITHUB_TRTLLM_COMMIT=${GITHUB_TRTLLM_REF}"
 fi

--- a/container/deps/requirements.txt
+++ b/container/deps/requirements.txt
@@ -53,7 +53,7 @@ tensorboard>=2.19.0,<2.21.0
 tensorboardX==2.6.2.2
 # Transformers version constraint for container builds
 # - vLLM 0.11.0: >=4.55.2, vLLM 0.11.2: >=4.56.0,<5
-# - TensorRT-LLM 1.2.0rc6.post2: ==4.57.1
+# - TensorRT-LLM 1.3.0rc1: ==4.57.1
 # - SGLang 0.5.8: ==4.57.1
 # Using >=4.56.0 to satisfy all frameworks
 transformers>=4.56.0

--- a/docs/reference/support-matrix.md
+++ b/docs/reference/support-matrix.md
@@ -18,7 +18,7 @@ The following table shows the backend framework versions included with each Dyna
 | :------------- | :------------- | :--------------- | :------------------ | :--------- | :--------- | :--------------- | :--------- |
 | vLLM           | `0.14.1`       | `0.12.0`         | `0.12.0`            | `0.12.0`   | `0.11.0`   | `0.11.0`         | `0.11.0`   |
 | SGLang         | `0.5.8`        | `0.5.6.post2`    | `0.5.6.post2`       | `0.5.6.post2` | `0.5.3.post4` | `0.5.3.post4` | `0.5.3.post4` |
-| TensorRT-LLM   | `1.2.0rc6.post2` | `1.2.0rc6.post2` | `1.2.0rc6.post1`  | `1.2.0rc6.post1` | `1.2.0rc3` | `1.2.0rc3`     | `1.2.0rc2` |
+| TensorRT-LLM   | `1.3.0rc1`     | `1.2.0rc6.post2` | `1.2.0rc6.post1`  | `1.2.0rc6.post1` | `1.2.0rc3` | `1.2.0rc3`     | `1.2.0rc2` |
 | NIXL           | `0.9.0`        | `0.8.0`          | `0.8.0`             | `0.8.0`    | `0.8.0`    | `0.8.0`          | `0.8.0`    |
 **main (ToT)** reflects the current development branch. **v0.8.1.post1** is a patch release for PyPI wheels and TRT-LLM container only (no GitHub release).

--- a/pyproject.toml
+++ b/pyproject.toml
@@ -50,7 +50,7 @@ Repository = "https://github.com/ai-dynamo/dynamo.git"
 [project.optional-dependencies]
 trtllm =[
    "uvloop",
-    "tensorrt-llm==1.2.0rc6.post2",
+    "tensorrt-llm==1.3.0rc1",
 ]
 vllm = [

--- a/tests/router/common.py
+++ b/tests/router/common.py
@@ -1835,7 +1835,7 @@ def _test_router_decisions_disagg(
                        verify_response_timing(timing_info)
                    # Small delay between requests
-                    await asyncio.sleep(0.5)
+                    await asyncio.sleep(1)
            return prefill_worker_ids, decode_worker_ids