Unverified Commit a3d624a7 authored by Tanmay Verma's avatar Tanmay Verma Committed by GitHub
Browse files

chore: Rename tensorrtllm framework to trtllm (#2435)

parent 81c27803
......@@ -105,7 +105,7 @@ jobs:
fi
if [ "${{ steps.src_changes.outputs.trtllm }}" == "true" ]; then
ci_variables["RUN_TENSORRTLLM"]="true"
ci_variables["RUN_TRTLLM"]="true"
fi
if [ "${{ steps.src_changes.outputs.sglang }}" == "true" ]; then
......
......@@ -85,21 +85,21 @@ docker compose -f deploy/docker-compose.yml up -d
apt-get update && apt-get -y install git git-lfs
# On an x86 machine:
./container/build.sh --framework tensorrtllm
./container/build.sh --framework trtllm
# On an ARM machine:
./container/build.sh --framework tensorrtllm --platform linux/arm64
./container/build.sh --framework trtllm --platform linux/arm64
# Build the container with the default experimental TensorRT-LLM commit
# WARNING: This is for experimental feature testing only.
# The container should not be used in a production environment.
./container/build.sh --framework tensorrtllm --use-default-experimental-tensorrtllm-commit
./container/build.sh --framework trtllm --use-default-experimental-tensorrtllm-commit
```
### Run container
```bash
./container/run.sh --framework tensorrtllm -it
./container/run.sh --framework trtllm -it
```
## Single Node Examples
......@@ -171,7 +171,7 @@ export MODEL_PATH="nvidia/DeepSeek-R1-FP4"
Notes:
- MTP is only available within the container built with the experimental TensorRT-LLM commit. Please add --use-default-experimental-tensorrtllm-commit to the arguments of the build.sh script.
Example: `./container/build.sh --framework tensorrtllm --use-default-experimental-tensorrtllm-commit`
Example: `./container/build.sh --framework trtllm --use-default-experimental-tensorrtllm-commit`
- There is a noticeable latency for the first two inference requests. Please send warm-up requests before starting the benchmark.
- MTP performance may vary depending on the acceptance rate of predicted tokens, which is dependent on the dataset or queries used while benchmarking. Additionally, `ignore_eos` should generally be omitted or set to `false` when using MTP to avoid speculating garbage outputs and getting unrealistic acceptance rates.
......@@ -189,7 +189,7 @@ For comprehensive instructions on multinode serving, see the [multinode-examples
### Kubernetes Deployment
For complete Kubernetes deployment instructions, configurations, and troubleshooting, see [TensorRT-LLM Kubernetes Deployment Guide](deploy/README.md)
For complete Kubernetes deployment instructions, configurations, and troubleshooting, see [TensorRT-LLM Kubernetes Deployment Guide](deploy/README.md).
### Client
......@@ -333,7 +333,7 @@ This is an experimental feature that requires using a specific TensorRT-LLM comm
To enable it build the dynamo container with the `--tensorrtllm-commit` flag, followed by the commit hash:
```bash
./container/build.sh --framework tensorrtllm --tensorrtllm-commit b4065d8ca64a64eee9fdc64b39cb66d73d4be47c
./container/build.sh --framework trtllm --tensorrtllm-commit b4065d8ca64a64eee9fdc64b39cb66d73d4be47c
```
#### How to Use
......
......@@ -23,7 +23,7 @@ VSWA is a mechanism in which a model’s layers alternate between multiple slidi
## Notes
* To run Gemma 3 with VSWA and KV Routing with KV block reuse, ensure that the container is built using commit ID `c9eebcb4541d961ab390f0bd0a22e2c89f1bcc78` from Tensorrt-LLM.
```bash
./container/build.sh --framework TENSORRTLLM --tensorrtllm-commit c9eebcb4541d961ab390f0bd0a22e2c89f1bcc78
./container/build.sh --framework trtllm --tensorrtllm-commit c9eebcb4541d961ab390f0bd0a22e2c89f1bcc78
```
* The 1.0.0rc4 release version of TensorRT-LLM can also run Gemma 3 with VSWA, but KV block reuse cannot be turned on in that version.
......
......@@ -55,7 +55,7 @@ cd $DYNAMO_ROOT
export DYNAMO_CONTAINER_IMAGE=dynamo-gpt-oss-arm64
# Build the container with a specific TensorRT-LLM commit
docker build --platform linux/arm64 -f container/Dockerfile.tensorrt_llm_prebuilt . \
docker build --platform linux/arm64 -f container/Dockerfile.trtllm_prebuilt . \
--build-arg BASE_IMAGE=nvcr.io/nvidia/tensorrt-llm/release \
--build-arg BASE_IMAGE_TAG=gpt-oss-dev \
--build-arg ARCH=arm64 \
......@@ -70,7 +70,7 @@ cd $DYNAMO_ROOT
export DYNAMO_CONTAINER_IMAGE=dynamo-gpt-oss-amd64
docker build -f container/Dockerfile.tensorrt_llm_prebuilt . \
docker build -f container/Dockerfile.trtllm_prebuilt . \
--build-arg BASE_IMAGE=nvcr.io/nvidia/tensorrt-llm/release \
--build-arg BASE_IMAGE_TAG=gpt-oss-dev \
-t $DYNAMO_CONTAINER_IMAGE
......
......@@ -45,7 +45,7 @@ To enable NIXL for KV cache transfer in disaggregated serving:
**Build the container with NIXL support:**
```bash
./container/build.sh --framework tensorrtllm \
./container/build.sh --framework trtllm \
--use-default-experimental-tensorrtllm-commit \
--trtllm-use-nixl-kvcache-experimental
```
......
......@@ -49,7 +49,7 @@ PYTHON_PACKAGE_VERSION=${current_tag:-$latest_tag.dev+$commit_id}
# dependencies are specified in the /container/deps folder and
# installed within framework specific sections of the Dockerfile.
declare -A FRAMEWORKS=(["VLLM"]=1 ["TENSORRTLLM"]=2 ["NONE"]=3 ["SGLANG"]=4)
declare -A FRAMEWORKS=(["VLLM"]=1 ["TRTLLM"]=2 ["NONE"]=3 ["SGLANG"]=4)
DEFAULT_FRAMEWORK=VLLM
SOURCE_DIR=$(dirname "$(readlink -f "$0")")
......@@ -57,8 +57,8 @@ DOCKERFILE=${SOURCE_DIR}/Dockerfile
BUILD_CONTEXT=$(dirname "$(readlink -f "$SOURCE_DIR")")
# Base Images
TENSORRTLLM_BASE_IMAGE=nvcr.io/nvidia/pytorch
TENSORRTLLM_BASE_IMAGE_TAG=25.05-py3
TRTLLM_BASE_IMAGE=nvcr.io/nvidia/pytorch
TRTLLM_BASE_IMAGE_TAG=25.05-py3
# Important Note: Because of ABI compatibility issues between TensorRT-LLM and NGC PyTorch,
# we need to build the TensorRT-LLM wheel from source.
......@@ -96,7 +96,7 @@ TRTLLM_GIT_URL=""
# TensorRT-LLM PyPI index URL
TENSORRTLLM_INDEX_URL="https://pypi.python.org/simple"
# TODO: Remove the version specification from here and use the ai-dynamo[trtllm] package.
# Need to update the Dockerfile.tensorrt_llm to use the ai-dynamo[trtllm] package.
# Need to update the Dockerfile.trtllm to use the ai-dynamo[trtllm] package.
DEFAULT_TENSORRTLLM_PIP_WHEEL="tensorrt-llm==1.0.0rc4"
TENSORRTLLM_PIP_WHEEL=""
......@@ -349,7 +349,7 @@ show_image_options() {
echo ""
echo " Base: '${BASE_IMAGE}'"
echo " Base_Image_Tag: '${BASE_IMAGE_TAG}'"
if [[ $FRAMEWORK == "TENSORRTLLM" ]]; then
if [[ $FRAMEWORK == "TRTLLM" ]]; then
echo " Tensorrtllm_Pip_Wheel: '${TENSORRTLLM_PIP_WHEEL}'"
fi
echo " Build Context: '${BUILD_CONTEXT}'"
......@@ -404,8 +404,8 @@ fi
# Update DOCKERFILE if framework is VLLM
if [[ $FRAMEWORK == "VLLM" ]]; then
DOCKERFILE=${SOURCE_DIR}/Dockerfile.vllm
elif [[ $FRAMEWORK == "TENSORRTLLM" ]]; then
DOCKERFILE=${SOURCE_DIR}/Dockerfile.tensorrt_llm
elif [[ $FRAMEWORK == "TRTLLM" ]]; then
DOCKERFILE=${SOURCE_DIR}/Dockerfile.trtllm
elif [[ $FRAMEWORK == "NONE" ]]; then
DOCKERFILE=${SOURCE_DIR}/Dockerfile
elif [[ $FRAMEWORK == "SGLANG" ]]; then
......@@ -471,7 +471,7 @@ check_wheel_file() {
fi
}
if [[ $FRAMEWORK == "TENSORRTLLM" ]]; then
if [[ $FRAMEWORK == "TRTLLM" ]]; then
if [ "$USE_DEFAULT_EXPERIMENTAL_TRTLLM_COMMIT" = true ]; then
if [ -n "$TRTLLM_COMMIT" ] || [ -n "$TENSORRTLLM_PIP_WHEEL" ]; then
echo "ERROR: When using --use-default-experimental-trtllm-commit, do not set --tensorrtllm-commit or --tensorrtllm-pip-wheel."
......
......@@ -92,7 +92,7 @@ grep "__version__" "$VERSION_FILE"
echo "Copying install_nixl.sh from $MAIN_DIR to ${PWD}/docker/common/"
# Copy install_nixl.sh to docker/common/
cp $MAIN_DIR/deps/tensorrt_llm/install_nixl.sh docker/common/install_nixl.sh
cp $MAIN_DIR/deps/trtllm/install_nixl.sh docker/common/install_nixl.sh
# Update NIXL_COMMIT in install_nixl.sh to use the parameter passed to this script
sed -i "s/NIXL_COMMIT=\"[^\"]*\"/NIXL_COMMIT=\"${NIXL_COMMIT}\"/" docker/common/install_nixl.sh
......
......@@ -24,7 +24,7 @@ RUN_PREFIX=
# dependencies are specified in the /container/deps folder and
# installed within framework specific sections of the Dockerfile.
declare -A FRAMEWORKS=(["VLLM"]=1 ["TENSORRTLLM"]=2 ["NONE"]=3 ["SGLANG"]=4)
declare -A FRAMEWORKS=(["VLLM"]=1 ["TRTLLM"]=2 ["NONE"]=3 ["SGLANG"]=4)
DEFAULT_FRAMEWORK=VLLM
SOURCE_DIR=$(dirname "$(readlink -f "$0")")
......
......@@ -32,7 +32,7 @@ graph TD
The dcgm-exporter service in the Docker Compose network is configured to use port 9401 instead of the default port 9400. This adjustment is made to avoid port conflicts with other dcgm-exporter instances that may be running simultaneously. Such a configuration is typical in distributed systems like SLURM.
As of Q2 2025, Dynamo HTTP Frontend metrics are exposed when you build containers with `--framework VLLM` or `--framework TENSORRTLLM`.
As of Q2 2025, Dynamo HTTP Frontend metrics are exposed when you build containers with `--framework VLLM` or `--framework TRTLLM`.
### Available Metrics
......@@ -55,7 +55,7 @@ Some components expose additional metrics specific to their functionality:
#### Frontend Metrics
When using Dynamo HTTP Frontend (`--framework VLLM` or `--framework TENSORRTLLM`), these metrics are automatically exposed with the `dynamo_frontend_*` prefix and include `model` labels containing the model name:
When using Dynamo HTTP Frontend (`--framework VLLM` or `--framework TRTLLM`), these metrics are automatically exposed with the `dynamo_frontend_*` prefix and include `model` labels containing the model name:
- `dynamo_frontend_inflight_requests`: Inflight requests (gauge)
- `dynamo_frontend_input_sequence_tokens`: Input sequence length (histogram)
......
......@@ -25,7 +25,7 @@ Dynamo provides built-in metrics capabilities through the `MetricsRegistry` trai
Dynamo automatically exposes metrics with the `dynamo_` name prefixes. It also adds the following labels `dynamo_namespace`, `dynamo_component`, and `dynamo_endpoint` to indicate which component is providing the metric.
**Frontend Metrics**: When using Dynamo HTTP Frontend (`--framework VLLM` or `--framework TENSORRTLLM`), these metrics are automatically exposed with the `dynamo_frontend_*` prefix and include `model` labels containing the model name. These cover request handling, token processing, and latency measurements. See the [Available Metrics section](../../deploy/metrics/README.md#available-metrics) for the complete list of frontend metrics.
**Frontend Metrics**: When using Dynamo HTTP Frontend (`--framework VLLM` or `--framework TRTLLM`), these metrics are automatically exposed with the `dynamo_frontend_*` prefix and include `model` labels containing the model name. These cover request handling, token processing, and latency measurements. See the [Available Metrics section](../../deploy/metrics/README.md#available-metrics) for the complete list of frontend metrics.
**Component Metrics**: The core Dynamo backend system automatically exposes metrics with the `dynamo_component_*` prefix for all components that use the `DistributedRuntime` framework. These include request counts, processing times, byte transfers, and system uptime metrics. See the [Available Metrics section](../../deploy/metrics/README.md#available-metrics) for the complete list of component metrics.
......
......@@ -180,7 +180,7 @@ markers = [
"unit: marks tests as unit tests",
"stress: marks tests as stress tests",
"vllm: marks tests as requiring vllm",
"tensorrtllm: marks tests as requiring tensorrtllm",
"trtllm_marker: marks tests as requiring trtllm",
"sglang: marks tests as requiring sglang",
"slow: marks tests as known to be slow"
]
......
......@@ -68,7 +68,7 @@ Markers help control which tests run under different conditions. Add these decor
### Component-specific markers
- `@pytest.mark.vllm` - Framework tests
- `@pytest.mark.sglang` - Framework tests
- `@pytest.mark.tensorrtllm` - Framework tests
- `@pytest.mark.trtllm_marker` - Framework tests
- `@pytest.mark.planner` - Planner component tests
- `@pytest.mark.kv_router` - KV Router component tests
- etc.
......
......@@ -141,14 +141,14 @@ def pytest_collection_modifyitems(config, items):
It is used to skip tests that are not supported on all environments.
"""
# Tests marked with tensorrtllm requires specific environment with tensorrtllm
# Tests marked with trtllm requires specific environment with tensorrtllm
# installed. Hence, we skip them if the user did not explicitly ask for them.
if config.getoption("-m") and "tensorrtllm" in config.getoption("-m"):
if config.getoption("-m") and "trtllm_marker" in config.getoption("-m"):
return
skip_tensorrtllm = pytest.mark.skip(reason="need -m tensorrtllm to run")
skip_trtllm = pytest.mark.skip(reason="need -m trtllm_marker to run")
for item in items:
if "tensorrtllm" in item.keywords:
item.add_marker(skip_tensorrtllm)
if "trtllm_marker" in item.keywords:
item.add_marker(skip_trtllm)
# Auto-inject predownload_models fixture for serve tests only (not router tests)
# Skip items that don't have fixturenames (like MypyFileItem)
......
......@@ -185,7 +185,7 @@ trtllm_configs = {
name="aggregated",
directory="/workspace/components/backends/trtllm",
script_name="agg.sh",
marks=[pytest.mark.gpu_1, pytest.mark.tensorrtllm],
marks=[pytest.mark.gpu_1, pytest.mark.trtllm_marker],
endpoints=["v1/chat/completions", "v1/completions"],
response_handlers=[
chat_completions_response_handler,
......@@ -198,7 +198,7 @@ trtllm_configs = {
name="disaggregated",
directory="/workspace/components/backends/trtllm",
script_name="disagg.sh",
marks=[pytest.mark.gpu_2, pytest.mark.tensorrtllm],
marks=[pytest.mark.gpu_2, pytest.mark.trtllm_marker],
endpoints=["v1/chat/completions", "v1/completions"],
response_handlers=[
chat_completions_response_handler,
......@@ -214,7 +214,7 @@ trtllm_configs = {
name="aggregated_router",
directory="/workspace/components/backends/trtllm",
script_name="agg_router.sh",
marks=[pytest.mark.gpu_1, pytest.mark.tensorrtllm],
marks=[pytest.mark.gpu_1, pytest.mark.trtllm_marker],
endpoints=["v1/chat/completions", "v1/completions"],
response_handlers=[
chat_completions_response_handler,
......@@ -227,7 +227,7 @@ trtllm_configs = {
name="disaggregated_router",
directory="/workspace/components/backends/trtllm",
script_name="disagg_router.sh",
marks=[pytest.mark.gpu_2, pytest.mark.tensorrtllm],
marks=[pytest.mark.gpu_2, pytest.mark.trtllm_marker],
endpoints=["v1/chat/completions", "v1/completions"],
response_handlers=[
chat_completions_response_handler,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment