chore: Rename tensorrtllm framework to trtllm (#2435)

a3d624a7 · Tanmay Verma · GitHub · 81c27803 · a3d624a7 · a3d624a7
Unverified Commit a3d624a7 authored Aug 14, 2025 by Tanmay Verma Committed by GitHub Aug 14, 2025
17 changed files
--- a/.github/workflows/trigger_ci.yml
+++ b/.github/workflows/trigger_ci.yml
@@ -105,7 +105,7 @@ jobs:
        fi

        if [ "${{ steps.src_changes.outputs.trtllm }}" == "true" ]; then
-          ci_variables["RUN_TENSORRTLLM"]="true"
+          ci_variables["RUN_TRTLLM"]="true"
        fi

        if [ "${{ steps.src_changes.outputs.sglang }}" == "true" ]; then

--- a/components/backends/trtllm/README.md
+++ b/components/backends/trtllm/README.md
@@ -85,21 +85,21 @@ docker compose -f deploy/docker-compose.yml up -d
 apt-get update && apt-get -y install git git-lfs

 # On an x86 machine:
-./container/build.sh --framework tensorrtllm
+./container/build.sh --framework trtllm

 # On an ARM machine:
-./container/build.sh --framework tensorrtllm --platform linux/arm64
+./container/build.sh --framework trtllm --platform linux/arm64

 # Build the container with the default experimental TensorRT-LLM commit
 # WARNING: This is for experimental feature testing only.
 # The container should not be used in a production environment.
-./container/build.sh --framework tensorrtllm --use-default-experimental-tensorrtllm-commit
+./container/build.sh --framework trtllm --use-default-experimental-tensorrtllm-commit
 ```

 ### Run container

 ```bash
-./container/run.sh --framework tensorrtllm -it
+./container/run.sh --framework trtllm -it
 ```

 ## Single Node Examples
@@ -171,7 +171,7 @@ export MODEL_PATH="nvidia/DeepSeek-R1-FP4"
 Notes:
 - MTP is only available within the container built with the experimental TensorRT-LLM commit. Please add --use-default-experimental-tensorrtllm-commit to the arguments of the build.sh script.

-  Example: `./container/build.sh --framework tensorrtllm --use-default-experimental-tensorrtllm-commit`
+  Example: `./container/build.sh --framework trtllm --use-default-experimental-tensorrtllm-commit`

 - There is a noticeable latency for the first two inference requests. Please send warm-up requests before starting the benchmark.
 - MTP performance may vary depending on the acceptance rate of predicted tokens, which is dependent on the dataset or queries used while benchmarking. Additionally, `ignore_eos` should generally be omitted or set to `false` when using MTP to avoid speculating garbage outputs and getting unrealistic acceptance rates.
@@ -189,7 +189,7 @@ For comprehensive instructions on multinode serving, see the [multinode-examples

 ### Kubernetes Deployment

-For complete Kubernetes deployment instructions, configurations, and troubleshooting, see [TensorRT-LLM Kubernetes Deployment Guide](deploy/README.md)
+For complete Kubernetes deployment instructions, configurations, and troubleshooting, see [TensorRT-LLM Kubernetes Deployment Guide](deploy/README.md).

 ### Client

@@ -333,7 +333,7 @@ This is an experimental feature that requires using a specific TensorRT-LLM comm
 To enable it build the dynamo container with the `--tensorrtllm-commit` flag, followed by the commit hash:

 ```bash
-./container/build.sh --framework tensorrtllm --tensorrtllm-commit b4065d8ca64a64eee9fdc64b39cb66d73d4be47c
+./container/build.sh --framework trtllm --tensorrtllm-commit b4065d8ca64a64eee9fdc64b39cb66d73d4be47c
 ```

 #### How to Use

--- a/components/backends/trtllm/gemma3_sliding_window_attention.md
+++ b/components/backends/trtllm/gemma3_sliding_window_attention.md
@@ -23,7 +23,7 @@ VSWA is a mechanism in which a model’s layers alternate between multiple slidi
 ## Notes
 * To run Gemma 3 with VSWA and KV Routing with KV block reuse, ensure that the container is built using commit ID `c9eebcb4541d961ab390f0bd0a22e2c89f1bcc78` from Tensorrt-LLM.
 ```bash
-./container/build.sh --framework TENSORRTLLM --tensorrtllm-commit c9eebcb4541d961ab390f0bd0a22e2c89f1bcc78
+./container/build.sh --framework trtllm --tensorrtllm-commit c9eebcb4541d961ab390f0bd0a22e2c89f1bcc78
 ```
 * The 1.0.0rc4 release version of TensorRT-LLM can also run Gemma 3 with VSWA, but KV block reuse cannot be turned on in that version.


--- a/components/backends/trtllm/gpt-oss.md
+++ b/components/backends/trtllm/gpt-oss.md
@@ -55,7 +55,7 @@ cd $DYNAMO_ROOT
 export DYNAMO_CONTAINER_IMAGE=dynamo-gpt-oss-arm64

 # Build the container with a specific TensorRT-LLM commit
-docker build --platform linux/arm64 -f container/Dockerfile.tensorrt_llm_prebuilt . \
+docker build --platform linux/arm64 -f container/Dockerfile.trtllm_prebuilt . \
  --build-arg BASE_IMAGE=nvcr.io/nvidia/tensorrt-llm/release \
  --build-arg BASE_IMAGE_TAG=gpt-oss-dev \
  --build-arg ARCH=arm64 \
@@ -70,7 +70,7 @@ cd $DYNAMO_ROOT

 export DYNAMO_CONTAINER_IMAGE=dynamo-gpt-oss-amd64

-docker build -f container/Dockerfile.tensorrt_llm_prebuilt . \
+docker build -f container/Dockerfile.trtllm_prebuilt . \
  --build-arg BASE_IMAGE=nvcr.io/nvidia/tensorrt-llm/release \
  --build-arg BASE_IMAGE_TAG=gpt-oss-dev \
  -t $DYNAMO_CONTAINER_IMAGE

--- a/components/backends/trtllm/kv-cache-tranfer.md
+++ b/components/backends/trtllm/kv-cache-tranfer.md
@@ -45,7 +45,7 @@ To enable NIXL for KV cache transfer in disaggregated serving:

   **Build the container with NIXL support:**
   ```bash
-   ./container/build.sh --framework tensorrtllm \
+   ./container/build.sh --framework trtllm \
     --use-default-experimental-tensorrtllm-commit \
     --trtllm-use-nixl-kvcache-experimental
   ```

--- a/container/Dockerfile.tensorrt_llm
+++ b/container/Dockerfile.tensorrt_llm
--- a/container/Dockerfile.tensorrt_llm_prebuilt
+++ b/container/Dockerfile.tensorrt_llm_prebuilt
--- a/container/build.sh
+++ b/container/build.sh
@@ -49,7 +49,7 @@ PYTHON_PACKAGE_VERSION=${current_tag:-$latest_tag.dev+$commit_id}
 # dependencies are specified in the /container/deps folder and
 # installed within framework specific sections of the Dockerfile.

-declare -A FRAMEWORKS=(["VLLM"]=1 ["TENSORRTLLM"]=2 ["NONE"]=3 ["SGLANG"]=4)
+declare -A FRAMEWORKS=(["VLLM"]=1 ["TRTLLM"]=2 ["NONE"]=3 ["SGLANG"]=4)
 DEFAULT_FRAMEWORK=VLLM

 SOURCE_DIR=$(dirname "$(readlink -f "$0")")
@@ -57,8 +57,8 @@ DOCKERFILE=${SOURCE_DIR}/Dockerfile
 BUILD_CONTEXT=$(dirname "$(readlink -f "$SOURCE_DIR")")

 # Base Images
-TENSORRTLLM_BASE_IMAGE=nvcr.io/nvidia/pytorch
-TENSORRTLLM_BASE_IMAGE_TAG=25.05-py3
+TRTLLM_BASE_IMAGE=nvcr.io/nvidia/pytorch
+TRTLLM_BASE_IMAGE_TAG=25.05-py3

 # Important Note: Because of ABI compatibility issues between TensorRT-LLM and NGC PyTorch,
 # we need to build the TensorRT-LLM wheel from source.
@@ -96,7 +96,7 @@ TRTLLM_GIT_URL=""
 # TensorRT-LLM PyPI index URL
 TENSORRTLLM_INDEX_URL="https://pypi.python.org/simple"
 # TODO: Remove the version specification from here and use the ai-dynamo[trtllm] package.
-# Need to update the Dockerfile.tensorrt_llm to use the ai-dynamo[trtllm] package.
+# Need to update the Dockerfile.trtllm to use the ai-dynamo[trtllm] package.
 DEFAULT_TENSORRTLLM_PIP_WHEEL="tensorrt-llm==1.0.0rc4"
 TENSORRTLLM_PIP_WHEEL=""

@@ -349,7 +349,7 @@ show_image_options() {
    echo ""
    echo "   Base: '${BASE_IMAGE}'"
    echo "   Base_Image_Tag: '${BASE_IMAGE_TAG}'"
-    if [[ $FRAMEWORK == "TENSORRTLLM" ]]; then
+    if [[ $FRAMEWORK == "TRTLLM" ]]; then
        echo "   Tensorrtllm_Pip_Wheel: '${TENSORRTLLM_PIP_WHEEL}'"
    fi
    echo "   Build Context: '${BUILD_CONTEXT}'"
@@ -404,8 +404,8 @@ fi
 # Update DOCKERFILE if framework is VLLM
 if [[ $FRAMEWORK == "VLLM" ]]; then
    DOCKERFILE=${SOURCE_DIR}/Dockerfile.vllm
-elif [[ $FRAMEWORK == "TENSORRTLLM" ]]; then
-    DOCKERFILE=${SOURCE_DIR}/Dockerfile.tensorrt_llm
+elif [[ $FRAMEWORK == "TRTLLM" ]]; then
+    DOCKERFILE=${SOURCE_DIR}/Dockerfile.trtllm
 elif [[ $FRAMEWORK == "NONE" ]]; then
    DOCKERFILE=${SOURCE_DIR}/Dockerfile
 elif [[ $FRAMEWORK == "SGLANG" ]]; then
@@ -471,7 +471,7 @@ check_wheel_file() {
    fi
 }

-if [[ $FRAMEWORK == "TENSORRTLLM" ]]; then
+if [[ $FRAMEWORK == "TRTLLM" ]]; then
    if [ "$USE_DEFAULT_EXPERIMENTAL_TRTLLM_COMMIT" = true ]; then
        if [ -n "$TRTLLM_COMMIT" ] || [ -n "$TENSORRTLLM_PIP_WHEEL" ]; then
            echo "ERROR: When using --use-default-experimental-trtllm-commit, do not set --tensorrtllm-commit or --tensorrtllm-pip-wheel."

--- a/container/build_trtllm_wheel.sh
+++ b/container/build_trtllm_wheel.sh
@@ -92,7 +92,7 @@ grep "__version__" "$VERSION_FILE"

 echo "Copying install_nixl.sh from $MAIN_DIR to ${PWD}/docker/common/"
 # Copy install_nixl.sh to docker/common/
-cp $MAIN_DIR/deps/tensorrt_llm/install_nixl.sh docker/common/install_nixl.sh
+cp $MAIN_DIR/deps/trtllm/install_nixl.sh docker/common/install_nixl.sh
 # Update NIXL_COMMIT in install_nixl.sh to use the parameter passed to this script
 sed -i "s/NIXL_COMMIT=\"[^\"]*\"/NIXL_COMMIT=\"${NIXL_COMMIT}\"/" docker/common/install_nixl.sh


--- a/container/deps/tensorrt_llm/install_nixl.sh
+++ b/container/deps/tensorrt_llm/install_nixl.sh
--- a/container/run.sh
+++ b/container/run.sh
@@ -24,7 +24,7 @@ RUN_PREFIX=
 # dependencies are specified in the /container/deps folder and
 # installed within framework specific sections of the Dockerfile.

-declare -A FRAMEWORKS=(["VLLM"]=1 ["TENSORRTLLM"]=2 ["NONE"]=3 ["SGLANG"]=4)
+declare -A FRAMEWORKS=(["VLLM"]=1 ["TRTLLM"]=2 ["NONE"]=3 ["SGLANG"]=4)
 DEFAULT_FRAMEWORK=VLLM

 SOURCE_DIR=$(dirname "$(readlink -f "$0")")

--- a/deploy/metrics/README.md
+++ b/deploy/metrics/README.md
@@ -32,7 +32,7 @@ graph TD

 The dcgm-exporter service in the Docker Compose network is configured to use port 9401 instead of the default port 9400. This adjustment is made to avoid port conflicts with other dcgm-exporter instances that may be running simultaneously. Such a configuration is typical in distributed systems like SLURM.

-As of Q2 2025, Dynamo HTTP Frontend metrics are exposed when you build containers with `--framework VLLM` or `--framework TENSORRTLLM`.
+As of Q2 2025, Dynamo HTTP Frontend metrics are exposed when you build containers with `--framework VLLM` or `--framework TRTLLM`.

 ### Available Metrics

@@ -55,7 +55,7 @@ Some components expose additional metrics specific to their functionality:

 #### Frontend Metrics

-When using Dynamo HTTP Frontend (`--framework VLLM` or `--framework TENSORRTLLM`), these metrics are automatically exposed with the `dynamo_frontend_*` prefix and include `model` labels containing the model name:
+When using Dynamo HTTP Frontend (`--framework VLLM` or `--framework TRTLLM`), these metrics are automatically exposed with the `dynamo_frontend_*` prefix and include `model` labels containing the model name:

 - `dynamo_frontend_inflight_requests`: Inflight requests (gauge)
 - `dynamo_frontend_input_sequence_tokens`: Input sequence length (histogram)

--- a/docs/guides/metrics.md
+++ b/docs/guides/metrics.md
@@ -25,7 +25,7 @@ Dynamo provides built-in metrics capabilities through the `MetricsRegistry` trai

 Dynamo automatically exposes metrics with the `dynamo_` name prefixes. It also adds the following labels `dynamo_namespace`, `dynamo_component`, and `dynamo_endpoint` to indicate which component is providing the metric.

-**Frontend Metrics**: When using Dynamo HTTP Frontend (`--framework VLLM` or `--framework TENSORRTLLM`), these metrics are automatically exposed with the `dynamo_frontend_*` prefix and include `model` labels containing the model name. These cover request handling, token processing, and latency measurements. See the [Available Metrics section](../../deploy/metrics/README.md#available-metrics) for the complete list of frontend metrics.
+**Frontend Metrics**: When using Dynamo HTTP Frontend (`--framework VLLM` or `--framework TRTLLM`), these metrics are automatically exposed with the `dynamo_frontend_*` prefix and include `model` labels containing the model name. These cover request handling, token processing, and latency measurements. See the [Available Metrics section](../../deploy/metrics/README.md#available-metrics) for the complete list of frontend metrics.

 **Component Metrics**: The core Dynamo backend system automatically exposes metrics with the `dynamo_component_*` prefix for all components that use the `DistributedRuntime` framework. These include request counts, processing times, byte transfers, and system uptime metrics. See the [Available Metrics section](../../deploy/metrics/README.md#available-metrics) for the complete list of component metrics.


--- a/pyproject.toml
+++ b/pyproject.toml
@@ -180,7 +180,7 @@ markers = [
    "unit: marks tests as unit tests",
    "stress: marks tests as stress tests",
    "vllm: marks tests as requiring vllm",
-    "tensorrtllm: marks tests as requiring tensorrtllm",
+    "trtllm_marker: marks tests as requiring trtllm",
    "sglang: marks tests as requiring sglang",
    "slow: marks tests as known to be slow"
 ]

--- a/tests/README.md
+++ b/tests/README.md
@@ -68,7 +68,7 @@ Markers help control which tests run under different conditions. Add these decor
 ### Component-specific markers
 - `@pytest.mark.vllm` - Framework tests
 - `@pytest.mark.sglang` - Framework tests
- `@pytest.mark.tensorrtllm` - Framework tests
+- `@pytest.mark.trtllm_marker` - Framework tests
 - `@pytest.mark.planner` - Planner component tests
 - `@pytest.mark.kv_router` - KV Router component tests
 - etc.

--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -141,14 +141,14 @@ def pytest_collection_modifyitems(config, items):
    It is used to skip tests that are not supported on all environments.
    """

-    # Tests marked with tensorrtllm requires specific environment with tensorrtllm
+    # Tests marked with trtllm requires specific environment with tensorrtllm
    # installed. Hence, we skip them if the user did not explicitly ask for them.
-    if config.getoption("-m") and "tensorrtllm" in config.getoption("-m"):
+    if config.getoption("-m") and "trtllm_marker" in config.getoption("-m"):
        return
-    skip_tensorrtllm = pytest.mark.skip(reason="need -m tensorrtllm to run")
+    skip_trtllm = pytest.mark.skip(reason="need -m trtllm_marker to run")
    for item in items:
-        if "tensorrtllm" in item.keywords:
-            item.add_marker(skip_tensorrtllm)
+        if "trtllm_marker" in item.keywords:
+            item.add_marker(skip_trtllm)

        # Auto-inject predownload_models fixture for serve tests only (not router tests)
        # Skip items that don't have fixturenames (like MypyFileItem)

--- a/tests/serve/test_trtllm.py
+++ b/tests/serve/test_trtllm.py
@@ -185,7 +185,7 @@ trtllm_configs = {
        name="aggregated",
        directory="/workspace/components/backends/trtllm",
        script_name="agg.sh",
-        marks=[pytest.mark.gpu_1, pytest.mark.tensorrtllm],
+        marks=[pytest.mark.gpu_1, pytest.mark.trtllm_marker],
        endpoints=["v1/chat/completions", "v1/completions"],
        response_handlers=[
            chat_completions_response_handler,
@@ -198,7 +198,7 @@ trtllm_configs = {
        name="disaggregated",
        directory="/workspace/components/backends/trtllm",
        script_name="disagg.sh",
-        marks=[pytest.mark.gpu_2, pytest.mark.tensorrtllm],
+        marks=[pytest.mark.gpu_2, pytest.mark.trtllm_marker],
        endpoints=["v1/chat/completions", "v1/completions"],
        response_handlers=[
            chat_completions_response_handler,
@@ -214,7 +214,7 @@ trtllm_configs = {
        name="aggregated_router",
        directory="/workspace/components/backends/trtllm",
        script_name="agg_router.sh",
-        marks=[pytest.mark.gpu_1, pytest.mark.tensorrtllm],
+        marks=[pytest.mark.gpu_1, pytest.mark.trtllm_marker],
        endpoints=["v1/chat/completions", "v1/completions"],
        response_handlers=[
            chat_completions_response_handler,
@@ -227,7 +227,7 @@ trtllm_configs = {
        name="disaggregated_router",
        directory="/workspace/components/backends/trtllm",
        script_name="disagg_router.sh",
-        marks=[pytest.mark.gpu_2, pytest.mark.tensorrtllm],
+        marks=[pytest.mark.gpu_2, pytest.mark.trtllm_marker],
        endpoints=["v1/chat/completions", "v1/completions"],
        response_handlers=[
            chat_completions_response_handler,