feat: triton worker example (#4971)

Signed-off-by: Neal Vaidya <nealv@nvidia.com>

feat: triton worker example (#4971)
Signed-off-by: Neal Vaidya <nealv@nvidia.com>
971c3069 · Neal Vaidya · GitHub · f9918f61 · 971c3069 · 971c3069
Unverified Commit 971c3069 authored Jan 16, 2026 by Neal Vaidya Committed by GitHub Jan 16, 2026
8 changed files
--- a/examples/backends/tritonserver/Dockerfile
+++ b/examples/backends/tritonserver/Dockerfile
+# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+ARG DYNAMO_BASE_IMAGE="dynamo:latest-none"
+ARG TRITON_SERVER_IMAGE="nvcr.io/nvidia/tritonserver:25.01-py3"
+FROM ${TRITON_SERVER_IMAGE} AS triton_source
+FROM ${DYNAMO_BASE_IMAGE} AS dynamo_base
+COPY --from=triton_source /opt/tritonserver /opt/tritonserver
+COPY --from=triton_source /usr/local/dcgm /usr/local/dcgm
+COPY --from=triton_source /lib/x86_64-linux-gnu/libdcgm*.so* /lib/x86_64-linux-gnu/
+ENV LD_LIBRARY_PATH=/usr/local/nvidia/lib:/usr/local/nvidia/lib64:/usr/local/cuda/lib64:/opt/tritonserver/lib:/opt/tritonserver/backends:/usr/local/dcgm/lib64
+ENV PATH=/opt/tritonserver/bin:$PATH
+COPY --chown=dynamo: src/ /workspace/src/
+COPY --chown=dynamo: model_repo/ /workspace/model_repo/
+COPY --chown=dynamo: launch/ /workspace/launch/
+WORKDIR /workspace
+USER dynamo
+RUN uv pip install --no-cache-dir tritonclient[grpc]
+RUN uv pip install /opt/tritonserver/python/triton*.whl
--- a/examples/backends/tritonserver/Makefile
+++ b/examples/backends/tritonserver/Makefile
+# Makefile for Triton Server installation
+# Builds Triton Server and copies artifacts only when necessary
+CURRENT_DIR := $(shell dirname $(realpath $(firstword $(MAKEFILE_LIST))))
+TMP_SERVER := /tmp/server
+BUILD_INSTALL := $(TMP_SERVER)/build/install
+# Target directories
+WHEELHOUSE_DIR := $(CURRENT_DIR)/wheelhouse
+LIB_DIR := $(CURRENT_DIR)/lib
+BIN_DIR := $(CURRENT_DIR)/bin
+BACKEND_DIR := $(CURRENT_DIR)/backends
+# Target files
+TRITON_LIB := $(LIB_DIR)/libtritonserver.so
+TRITON_BIN := $(BIN_DIR)/tritonserver
+TRITON_WHEEL := $(WHEELHOUSE_DIR)/.wheel_marker
+# Main target - build everything
+.PHONY: all
+all: $(TRITON_LIB) $(TRITON_BIN) $(TRITON_WHEEL) $(BACKEND_DIR)
+	@echo "Triton Server installation complete."
+	@echo "Add to your environment:"
+	@echo "  export LD_LIBRARY_PATH=$(LIB_DIR):$(BACKEND_DIR):\$$LD_LIBRARY_PATH"
+	@echo "  export PATH=$(BIN_DIR):\$$PATH"
+# Clone the repository
+$(TMP_SERVER):
+	@echo "Cloning Triton Server repository..."
+	cd /tmp && git clone https://github.com/triton-inference-server/server.git
+# Build Triton Server
+$(BUILD_INSTALL)/lib/libtritonserver.so $(BUILD_INSTALL)/bin/tritonserver: $(TMP_SERVER)
+	@echo "Building Triton Server (this may take a while)..."
+	cd $(TMP_SERVER) && \
+	uv venv .venv && \
+	. .venv/bin/activate && \
+	uv pip install distro requests && \
+	python3 build.py \
+	  --enable-logging \
+	  --enable-stats \
+	  --enable-metrics \
+	  --endpoint=http \
+	  --backend=identity
+# Copy library
+$(TRITON_LIB): $(BUILD_INSTALL)/lib/libtritonserver.so
+	@echo "Copying libtriton.so..."
+	@mkdir -p $(LIB_DIR)
+	cp $(BUILD_INSTALL)/lib/libtritonserver.so $(LIB_DIR)/
+# Copy binary
+$(TRITON_BIN): $(BUILD_INSTALL)/bin/tritonserver
+	@echo "Copying tritonserver binary..."
+	@mkdir -p $(BIN_DIR)
+	cp $(BUILD_INSTALL)/bin/tritonserver $(BIN_DIR)/
+# Copy backends
+$(BACKEND_DIR): $(BUILD_INSTALL)/backends
+	@echo "Copying backends..."
+	@mkdir -p $(BACKEND_DIR)
+	cp -r $(BUILD_INSTALL)/backends/* $(BACKEND_DIR)/
+# Copy wheels
+$(TRITON_WHEEL): $(BUILD_INSTALL)/lib/libtritonserver.so
+	@echo "Copying Python wheels..."
+	@mkdir -p $(WHEELHOUSE_DIR)
+	cp $(BUILD_INSTALL)/python/*.whl $(WHEELHOUSE_DIR)/
+	@touch $(TRITON_WHEEL)
+	@echo "Triton Server wheel built successfully."
+	@ls -al $(WHEELHOUSE_DIR)
+# Clean installed artifacts (keeps the build)
+.PHONY: clean
+clean:
+	@echo "Cleaning installed artifacts..."
+	rm -rf $(WHEELHOUSE_DIR) $(LIB_DIR) $(BIN_DIR)
+# Full clean (removes everything including the cloned repo and build)
+.PHONY: distclean
+distclean: clean
+	@echo "Cleaning build directory..."
+	rm -rf $(TMP_SERVER)
+# Show what would be built
+.PHONY: status
+status:
+	@echo "Installation status:"
+	@echo "  Repository: $(if $(wildcard $(TMP_SERVER)),✓ cloned,✗ not cloned)"
+	@echo "  Built: $(if $(wildcard $(BUILD_INSTALL)/lib/libtritonserver.so),✓ yes,✗ no)"
+	@echo "  Library: $(if $(wildcard $(TRITON_LIB)),✓ installed,✗ not installed)"
+	@echo "  Binary: $(if $(wildcard $(TRITON_BIN)),✓ installed,✗ not installed)"
+	@echo "  Wheels: $(if $(wildcard $(TRITON_WHEEL)),✓ installed,✗ not installed)"
+.PHONY: help
+help:
+	@echo "Triton Server Installation Makefile"
+	@echo ""
+	@echo "Targets:"
+	@echo "  all        - Build and install Triton Server (default)"
+	@echo "  clean      - Remove installed artifacts (keeps build cache)"
+	@echo "  distclean  - Remove everything including build cache"
+	@echo "  status     - Show installation status"
+	@echo "  help       - Show this help message"
--- a/examples/backends/tritonserver/README.md
+++ b/examples/backends/tritonserver/README.md
+# Triton Server Backend for Dynamo
+> **⚠️ Work in Progress / Proof of Concept**
+>
+> This example demonstrates integrating NVIDIA Triton Inference Server as a backend for Dynamo.
+> It is currently a proof-of-concept and may require additional work for production use.
+## Overview
+This example shows how to run Triton Server models through Dynamo's distributed runtime, exposing them via the KServe gRPC protocol. The integration allows Triton models to benefit from Dynamo's service discovery, routing, and infrastructure.
+**Architecture:**
+```
+┌─────────────────┐     ┌─────────────────┐     ┌─────────────────────────────┐
+│  Triton Client  │────▶│  Dynamo Frontend│────▶│       Dynamo Worker         │
+│  (KServe gRPC)  │     │  (port 8787)    │     │  ┌───────────────────────┐  │
+└─────────────────┘     └─────────────────┘     │  │    Triton Server      │  │
+                              │                 │  │  (Python bindings)    │  │
+                              ▼                 │  └───────────────────────┘  │
+                    ┌─────────────────┐         └─────────────────────────────┘
+                    │    KV Store     │
+                    └─────────────────┘
+```
+## Prerequisites
+- NVIDIA GPU with CUDA support
+- For local development: Python 3.10+ with Dynamo installed
+- For container deployment: Docker with NVIDIA Container Toolkit
+## Quick Start
+### Option 1: Container Deployment
+#### Step 1: Build Container Images
+From the Dynamo repository root:
+```bash
+# Build the base Dynamo image
+./container/build.sh
+# Build the Triton worker image
+cd examples/backends/tritonserver
+docker build -t dynamo-triton:latest .
+```
+#### Step 2: Run the Container
+```bash
+docker run --rm -it --gpus all --network host \
+  dynamo-triton:latest \
+  ./examples/backends/tritonserver/launch/identity.sh
+```
+#### Step 3: Test the Deployment
+In another terminal:
+```bash
+# Install client dependencies
+pip install tritonclient[grpc]
+# Test with the client
+cd examples/backends/tritonserver
+python src/client.py --port 8000
+```
+### Option 2: Local Development
+This requires Dynamo to be installed locally.
+```bash
+# From the dynamo repo root
+cd examples/backends/tritonserver
+# Build Triton Server (first time only, ~30 minutes)
+make all
+# Install Python dependencies
+pip install wheelhouse/tritonserver-*.whl
+pip install tritonclient[grpc]
+# Launch the server
+./launch/identity.sh
+# In another terminal, test with the client
+python src/client.py
+```
+## Directory Structure
+```
+tritonserver/
+├── launch/
+│   └── identity.sh      # Launch script (frontend + worker)
+├── src/
+│   ├── tritonworker.py  # Main Dynamo worker implementation
+│   └── client.py        # Test client (KServe gRPC)
+├── model_repo/
+│   └── identity/        # Sample identity model
+│       ├── config.pbtxt
+│       └── 1/
+├── backends/            # Triton backends (built by `make all`)
+├── lib/                 # Triton libraries (built by `make all`)
+├── wheelhouse/          # Python wheels (built by `make all`)
+├── Dockerfile           # Triton worker container
+└── Makefile             # Build Triton from source
+```
+## Configuration
+### Launch Script Options
+```bash
+./launch/identity.sh --help
+Options:
+  --model-name <name>         Model name to load (default: identity)
+  --model-repository <path>   Path to model repository
+  --backend-directory <path>  Path to Triton backends
+  --log-verbose <level>       Triton log verbosity 0-6 (default: 1)
+  --store-kv <backend>        KV store backend: file, etcd, mem (default: file)
+```
+### Environment Variables
+| Variable | Description | Default |
+|----------|-------------|---------|
+| `DYN_STORE_KV` | KV store backend: `file`, `etcd`, or `mem` | `file` |
+| `DYN_LOG` | Log level (debug, info, warn, error) | `info` |
+| `DYN_HTTP_PORT` | Frontend HTTP port | `8000` |
+| `ETCD_ENDPOINTS` | etcd connection URL (only when `--store-kv etcd`) | `http://localhost:2379` |
+| `NATS_SERVER` | NATS connection URL (only for distributed mode) | `nats://localhost:4222` |
+## Adding Your Own Models
+1. Create a model directory in `model_repo/`:
+   ```text
+   model_repo/
+   └── my_model/
+       ├── config.pbtxt
+       └── 1/
+           └── model.plan  # or other model file
+   ```
+2. Define the model config (`config.pbtxt`):
+   ```protobuf
+   name: "my_model"
+   backend: "tensorrt"  # or onnxruntime, python, etc.
+   max_batch_size: 8
+   input [
+     {
+       name: "input"
+       data_type: TYPE_FP32
+       dims: [3, 224, 224]
+     }
+   ]
+   output [
+     {
+       name: "output"
+       data_type: TYPE_FP32
+       dims: [1000]
+     }
+   ]
+   ```
+3. Launch with your model:
+   ```bash
+   ./launch/identity.sh --model-name my_model
+   ```
+## Known Limitations
+- **Single model**: Currently loads one model at a time
+- **Identity backend only**: The Makefile builds the identity backend by default; other backends require modifying the build configuration
+## Building Triton from Source
+Required for local development. The Makefile builds Triton Server and the identity backend.
+```bash
+cd examples/backends/tritonserver
+# Build Triton Server (~30 minutes, clones and builds from source)
+make all
+# Check build status
+make status
+# This produces:
+#   lib/libtritonserver.so     - Core library
+#   bin/tritonserver           - Server binary
+#   backends/identity/         - Identity backend
+#   wheelhouse/*.whl           - Python bindings
+# Clean up build artifacts
+make clean      # Remove installed artifacts
+make distclean  # Remove everything including build cache
+```
+To add other backends (TensorRT, ONNX, Python, etc.), edit the Makefile's `build.py` invocation to include additional `--backend=<name>` flags.
+## Troubleshooting
+### "Model not found" error
+- Verify the model exists in `model_repo/<model_name>/`
+- Check that `config.pbtxt` is valid
+- Ensure the backend is available in `backends/`
+### Worker fails to start
+- Check `LD_LIBRARY_PATH` includes Triton libraries
+- Verify GPU is available: `nvidia-smi`
+- Increase log verbosity: `--log-verbose 6`
+## Related Documentation
+- [Dynamo Backend Guide](../../../docs/development/backend-guide.md)
+- [Triton Inference Server](https://github.com/triton-inference-server/server)
+- [KServe Protocol](https://kserve.github.io/website/latest/modelserving/data_plane/v2_protocol/)
--- a/examples/backends/tritonserver/launch/identity.sh
+++ b/examples/backends/tritonserver/launch/identity.sh
+#!/bin/bash
+# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+# Launch script for Triton Server backend with Dynamo
+# This runs the frontend and triton worker on the same node
+set -e
+# Setup cleanup trap
+cleanup() {
+    echo "Cleaning up background processes..."
+    kill $FRONTEND_PID 2>/dev/null || true
+    wait $FRONTEND_PID 2>/dev/null || true
+    echo "Cleanup complete."
+}
+trap cleanup EXIT INT TERM
+# Get the directory where this script is located
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+TRITON_DIR="$(dirname "$SCRIPT_DIR")"
+# Default values
+MODEL_NAME="identity"
+MODEL_REPO="${TRITON_DIR}/model_repo"
+BACKEND_DIR="${TRITON_DIR}/backends"
+LOG_VERBOSE=1
+STORE_KV="${DYN_STORE_KV:-file}"  # Default to file-based KV (no etcd required)
+# Parse command line arguments
+EXTRA_ARGS=()
+while [[ $# -gt 0 ]]; do
+    case $1 in
+        --model-name)
+            MODEL_NAME="$2"
+            shift 2
+            ;;
+        --model-repository)
+            MODEL_REPO="$2"
+            shift 2
+            ;;
+        --backend-directory)
+            BACKEND_DIR="$2"
+            shift 2
+            ;;
+        --log-verbose)
+            LOG_VERBOSE="$2"
+            shift 2
+            ;;
+        --store-kv)
+            STORE_KV="$2"
+            shift 2
+            ;;
+        -h|--help)
+            echo "Usage: $0 [OPTIONS]"
+            echo ""
+            echo "Launch Triton Server backend with Dynamo frontend"
+            echo ""
+            echo "Options:"
+            echo "  --model-name <name>         Model name to load (default: $MODEL_NAME)"
+            echo "  --model-repository <path>   Path to model repository (default: $MODEL_REPO)"
+            echo "  --backend-directory <path>  Path to Triton backends (default: $BACKEND_DIR)"
+            echo "  --log-verbose <level>       Triton log verbosity 0-6 (default: $LOG_VERBOSE)"
+            echo "  --store-kv <backend>        KV store backend: file, etcd, mem (default: $STORE_KV)"
+            echo "  -h, --help                  Show this help message"
+            echo ""
+            echo "Environment variables:"
+            echo "  DYN_STORE_KV     KV store backend (default: file)"
+            echo "  DYN_HTTP_PORT    Frontend HTTP port (default: 8000)"
+            echo "  DYN_SYSTEM_PORT  Worker metrics port (default: 8081)"
+            echo ""
+            echo "Ports:"
+            echo "  HTTP:  8000 (configurable via DYN_HTTP_PORT)"
+            echo "  gRPC:  8787 (KServe gRPC for tensor models)"
+            echo ""
+            echo "Additional arguments will be passed to tritonworker.py"
+            exit 0
+            ;;
+        *)
+            EXTRA_ARGS+=("$1")
+            shift
+            ;;
+    esac
+done
+# Validate paths
+if [[ ! -d "$MODEL_REPO" ]]; then
+    echo "Error: Model repository not found: $MODEL_REPO"
+    exit 1
+fi
+if [[ ! -d "$BACKEND_DIR" ]]; then
+    echo "Error: Backend directory not found: $BACKEND_DIR"
+    exit 1
+fi
+echo "=== Triton Server with Dynamo ==="
+echo "Model name:       $MODEL_NAME"
+echo "Model repository: $MODEL_REPO"
+echo "Backend directory: $BACKEND_DIR"
+echo "Log verbose:      $LOG_VERBOSE"
+echo "KV store:         $STORE_KV"
+echo ""
+# Set library path for Triton
+export LD_LIBRARY_PATH="${TRITON_DIR}/lib:${BACKEND_DIR}:${LD_LIBRARY_PATH:-}"
+# Export KV store setting for worker (read by @dynamo_worker decorator)
+export DYN_STORE_KV="$STORE_KV"
+# Run frontend in background
+# --kserve-grpc-server enables the KServe gRPC endpoint for tensor models
+echo "Starting Dynamo frontend..."
+python3 -m dynamo.frontend --kserve-grpc-server --store-kv "$STORE_KV" &
+FRONTEND_PID=$!
+# Give frontend time to start
+sleep 2
+# Run triton worker in foreground
+echo "Starting Triton worker..."
+DYN_SYSTEM_PORT=${DYN_SYSTEM_PORT:-8081} \
+python3 "${TRITON_DIR}/src/tritonworker.py" \
+    --model-name "$MODEL_NAME" \
+    --model-repository "$MODEL_REPO" \
+    --backend-directory "$BACKEND_DIR" \
+    --log-verbose "$LOG_VERBOSE" \
+    "${EXTRA_ARGS[@]}"
--- a/examples/backends/tritonserver/model_repo/identity/1/.gitkeep
+++ b/examples/backends/tritonserver/model_repo/identity/1/.gitkeep
--- a/examples/backends/tritonserver/model_repo/identity/config.pbtxt
+++ b/examples/backends/tritonserver/model_repo/identity/config.pbtxt
+# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+name: "identity"
+backend: "identity"
+max_batch_size: 1
+input [
+  {
+    name: "INPUT0"
+    data_type: TYPE_INT32
+    dims: [-1]
+  }
+]
+output [
+  {
+    name: "OUTPUT0"
+    data_type: TYPE_INT32
+    dims: [-1]
+  }
+]
\ No newline at end of file
--- a/examples/backends/tritonserver/src/client.py
+++ b/examples/backends/tritonserver/src/client.py
+#!/usr/bin/env python3
+# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+"""
+Test client for Triton Server backend via Dynamo KServe gRPC frontend.
+Usage:
+    # After starting the server with ./launch/identity.sh
+    python src/client.py
+    python src/client.py --model identity --shape 1 10
+"""
+import argparse
+import numpy as np
+import tritonclient.grpc as triton_grpc
+from tritonclient.utils import InferenceServerException
+def main() -> None:
+    parser = argparse.ArgumentParser(
+        description="Send inference requests to Triton model via Dynamo frontend"
+    )
+    parser.add_argument(
+        "--host",
+        default="127.0.0.1",
+        help="Host serving the gRPC endpoint (default: %(default)s)",
+    )
+    parser.add_argument(
+        "--port",
+        type=int,
+        default=8787,
+        help="Port of the gRPC endpoint (default: %(default)s)",
+    )
+    parser.add_argument(
+        "--model",
+        default="identity",
+        help="Model name to target (default: %(default)s)",
+    )
+    parser.add_argument(
+        "--shape",
+        type=int,
+        nargs="+",
+        default=[1, 5],
+        help="Input tensor shape (default: 1 5)",
+    )
+    parser.add_argument(
+        "--iterations",
+        type=int,
+        default=1,
+        help="Number of inference iterations (default: %(default)s)",
+    )
+    args = parser.parse_args()
+    target = f"{args.host}:{args.port}"
+    print(f"Connecting to {target}...")
+    try:
+        client = triton_grpc.InferenceServerClient(url=target)
+    except Exception as e:
+        print(f"Failed to connect: {e}")
+        return
+    # Query model metadata
+    print(f"\nQuerying model '{args.model}' metadata...")
+    try:
+        metadata = client.get_model_metadata(args.model)
+        print(f"  Name: {metadata.name}")
+        print(
+            f"  Inputs: {[(i.name, i.datatype, list(i.shape)) for i in metadata.inputs]}"
+        )
+        print(
+            f"  Outputs: {[(o.name, o.datatype, list(o.shape)) for o in metadata.outputs]}"
+        )
+    except InferenceServerException as e:
+        print(f"  Could not get metadata: {e}")
+        print("  Proceeding with default INPUT0/OUTPUT0...")
+    # Generate input data
+    shape = args.shape
+    input_size = int(np.prod(shape))
+    input_data = np.arange(1, input_size + 1, dtype=np.int32).reshape(shape)
+    print(f"\nRunning {args.iterations} inference iteration(s)...")
+    for i in range(args.iterations):
+        print(f"\n--- Iteration {i + 1} ---")
+        print(f"Input shape: {shape}")
+        print(f"Input data:\n{input_data}")
+        # Create input tensor
+        input_tensor = triton_grpc.InferInput("INPUT0", shape, "INT32")
+        input_tensor.set_data_from_numpy(input_data)
+        try:
+            response = client.infer(args.model, inputs=[input_tensor])
+            # Extract output
+            output_data = response.as_numpy("OUTPUT0")
+            print(f"Output shape: {output_data.shape}")
+            print(f"Output data:\n{output_data}")
+            # Verify identity model (output should equal input)
+            if np.array_equal(input_data, output_data):
+                print("✓ Identity verification passed")
+            else:
+                print("✗ Identity verification failed - output differs from input")
+        except InferenceServerException as e:
+            print(f"Inference failed: {e}")
+    print("\nDone.")
+if __name__ == "__main__":
+    main()
--- a/examples/backends/tritonserver/src/tritonworker.py
+++ b/examples/backends/tritonserver/src/tritonworker.py
+# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+import argparse
+import asyncio
+import logging
+import os
+import numpy as np
+import tritonclient.grpc.model_config_pb2 as mc
+import tritonserver
+import uvloop
+from google.protobuf import text_format
+from tritonclient.utils import triton_to_np_dtype
+from dynamo.llm import ModelInput, ModelRuntimeConfig, ModelType, register_llm
+from dynamo.runtime import DistributedRuntime, dynamo_worker
+from dynamo.runtime.logging import configure_dynamo_logging
+logger = logging.getLogger(__name__)
+configure_dynamo_logging(service_name="tritonserver", worker_id=0)
+# Mapping from Triton dtype (uppercase) to Dynamo dtype (camelCase)
+TRITON_TO_DYNAMO_DTYPE = {
+    "BOOL": "Bool",
+    "UINT8": "Uint8",
+    "UINT16": "Uint16",
+    "UINT32": "Uint32",
+    "UINT64": "Uint64",
+    "INT8": "Int8",
+    "INT16": "Int16",
+    "INT32": "Int32",
+    "INT64": "Int64",
+    "FP16": "Float16",
+    "FP32": "Float32",
+    "FP64": "Float64",
+    "BYTES": "Bytes",
+}
+class RequestHandler:
+    def __init__(self, tritonserver: tritonserver.Server, model: tritonserver.Model):
+        self.tritonserver = tritonserver
+        self.model = model
+    async def generate(self, request: dict) -> dict:
+        # Deserialize to numpy array
+        logger.debug(f"Received request: {request}")
+        inference_request = self.model.create_request()
+        for tensor in request["tensors"]:
+            logger.debug(f"Tensor: {tensor}")
+            # Convert Triton dtype string ("INT32") to NumPy dtype (np.int32) for array construction
+            np_dtype = triton_to_np_dtype(tensor["metadata"]["data_type"].upper())
+            arr = np.array(tensor["data"]["values"], dtype=np_dtype).reshape(
+                tensor["metadata"]["shape"]
+            )
+            inference_request.inputs[tensor["metadata"]["name"]] = arr
+        inference_responses = self.model.async_infer(inference_request)
+        async for inference_response in inference_responses:
+            response_tensors = []
+            for output in self.model.metadata()["outputs"]:
+                output_data = np.from_dlpack(inference_response.outputs[output["name"]])
+                response_arr = output_data
+                # Convert Triton dtype (e.g., "INT32") to Dynamo dtype (e.g., "Int32")
+                dtype_str = TRITON_TO_DYNAMO_DTYPE.get(
+                    output["datatype"], output["datatype"]
+                )
+                response_tensors.append(
+                    {
+                        "metadata": {
+                            "name": output["name"],
+                            "shape": list(response_arr.shape),
+                            "data_type": dtype_str,
+                        },
+                        "data": {
+                            "data_type": dtype_str,
+                            "values": response_arr.flatten().tolist(),
+                        },
+                    }
+                )
+            response = {
+                "id": inference_response.request_id,
+                "model": inference_response.model.name,
+                "tensors": response_tensors,
+            }
+            yield response
+@dynamo_worker()
+async def triton_worker(runtime: DistributedRuntime, args: argparse.Namespace):
+    logger.info("=" * 60)
+    logger.info("Starting Triton Worker for Dynamo")
+    logger.info("=" * 60)
+    logger.info(
+        f"Environment: ETCD_ENDPOINTS={os.environ.get('ETCD_ENDPOINTS', 'NOT SET')}"
+    )
+    logger.info(f"Environment: NATS_SERVER={os.environ.get('NATS_SERVER', 'NOT SET')}")
+    logger.info(
+        f"Environment: DYN_STORE_KV={os.environ.get('DYN_STORE_KV', 'NOT SET')}"
+    )
+    component = runtime.namespace("triton").component("tritonserver")
+    logger.info("✓ Created component: triton/tritonserver")
+    endpoint = component.endpoint("generate")
+    logger.info("✓ Created endpoint: triton/tritonserver/generate")
+    model_repository = args.model_repository
+    model_name = args.model_name
+    backend_dir = args.backend_directory
+    logger.info(
+        f"Initializing Triton Server with model_repository={model_repository}, backend_dir={backend_dir}"
+    )
+    server = tritonserver.Server(
+        model_repository=model_repository,
+        backend_directory=backend_dir,
+        log_verbose=args.log_verbose,
+        model_control_mode=tritonserver.ModelControlMode.EXPLICIT,
+    )
+    server.start(wait_until_ready=True)
+    logger.info("✓ Triton Server started")
+    server.load(model_name)
+    model = server.model(model_name)
+    logger.info(f"✓ Model '{model_name}' loaded")
+    # Read Triton model config from config.pbtxt
+    config_path = f"{model_repository}/{model_name}/config.pbtxt"
+    with open(config_path, "r") as f:
+        triton_model_config = text_format.Parse(f.read(), mc.ModelConfig())
+    logger.info(f"Loaded model config from {config_path}")
+    # Set up model metadata for KServe frontend
+    model_config = {
+        "name": "",
+        "inputs": [],
+        "outputs": [],
+        "triton_model_config": triton_model_config.SerializeToString(),
+    }
+    runtime_config = ModelRuntimeConfig()
+    runtime_config.set_tensor_model_config(model_config)
+    logger.info("Attempting to register model with Dynamo runtime...")
+    # Use register_llm for tensor-based models (skips HuggingFace downloads)
+    await register_llm(
+        ModelInput.Tensor,
+        ModelType.TensorBased,
+        endpoint,
+        model_name,  # model_path (used as display name for tensor-based models)
+        runtime_config=runtime_config,
+    )
+    logger.info(
+        f"✓ Successfully registered model '{model_name}' with endpoint triton/tritonserver/generate"
+    )
+    # Create handler and serve the endpoint
+    handler = RequestHandler(server, model)
+    logger.info("Starting to serve the endpoint...")
+    await endpoint.serve_endpoint(handler.generate)
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="Triton worker for Dynamo")
+    parser.add_argument(
+        "--model-repository",
+        type=str,
+        default="model_repo",
+        help="Model repository directory",
+    )
+    parser.add_argument("--model-name", type=str, default="identity", help="Model name")
+    parser.add_argument(
+        "--backend-directory", type=str, default="backends", help="Backend directory"
+    )
+    parser.add_argument("--log-verbose", type=int, default=6, help="Log verbose level")
+    args = parser.parse_args()
+    uvloop.install()
+    asyncio.run(triton_worker(args))