Unverified Commit 39d645e5 authored by Jonathan Tong's avatar Jonathan Tong Committed by GitHub
Browse files

docs: migrate Fern docs from fern/ into docs/ (#6206)


Signed-off-by: default avatarJont828 <jt572@cornell.edu>
parent d381e6ff
......@@ -56,7 +56,7 @@ fi
if ! kubectl get pods -n "$NAMESPACE" | grep -q "dynamo-platform"; then
warn "Dynamo platform pods not found in namespace $NAMESPACE"
warn "Please ensure Dynamo Kubernetes Platform is installed first:"
warn " See: docs/kubernetes/installation_guide.md"
warn " See: docs/pages/kubernetes/installation-guide.md"
if [[ -z "${FORCE:-}" && -z "${YES:-}" ]]; then
read -p "Continue anyway? [y/N]: " -r ans
[[ "$ans" =~ ^[Yy]$ ]] || exit 1
......
# SPDX-FileCopyrightText: Copyright (c) 2022-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Minimal makefile for Sphinx documentation
#
# You can set these variables from the command line, and also
# from the environment for the first two.
SPHINXOPTS ?= -W
SPHINXBUILD ?= sphinx-build
SOURCEDIR = .
BUILDDIR = build
##@ General
# Put it first so that "make" without argument is like "make help".
help: ## Display help for all targets
@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
@echo ""
@echo "Additional documentation targets:"
@awk 'BEGIN {FS = ":.*##"; printf " \033[36m%-20s\033[0m %s\n", "TARGET", "DESCRIPTION"} /^[a-zA-Z_0-9-]+:.*?##/ { printf " \033[36m%-20s\033[0m %s\n", $$1, $$2 } /^##@/ { printf "\n\033[1m%s\033[0m\n", substr($$0, 5) }' $(MAKEFILE_LIST)
clean: ## Clean build artifacts
@rm -fr ${BUILDDIR}
##@ Helm Documentation
## Location to install dependencies to
LOCALBIN ?= $(shell pwd)/bin
$(LOCALBIN):
mkdir -p $(LOCALBIN)
## Tool Versions
HELM_DOCS_VERSION ?= 1.14.2
## Tool Binaries
HELM_DOCS ?= $(LOCALBIN)/helm-docs-$(HELM_DOCS_VERSION)
.PHONY: helm-docs-install
helm-docs-install: $(HELM_DOCS) ## Download helm-docs locally if necessary
$(HELM_DOCS): $(LOCALBIN)
@echo "📥 Downloading helm-docs $(HELM_DOCS_VERSION)..."
@ARCH=$$(uname -m); \
OS=$$(uname -s | tr '[:upper:]' '[:lower:]'); \
curl -sSL "https://github.com/norwoodj/helm-docs/releases/download/v$(HELM_DOCS_VERSION)/helm-docs_$(HELM_DOCS_VERSION)_$${OS}_$${ARCH}.tar.gz" | \
tar xz -C $(LOCALBIN) helm-docs && \
mv $(LOCALBIN)/helm-docs $(HELM_DOCS) && \
echo "✅ helm-docs $(HELM_DOCS_VERSION) installed successfully"
.PHONY: generate-helm-docs
generate-helm-docs: helm-docs-install ## Generate README.md for Helm charts from values.yaml
@echo "📚 Generating Helm chart documentation..."
@cd ../deploy/helm/charts/platform && $(realpath $(HELM_DOCS)) \
--template-files=README.md.gotmpl \
--output-file=README.md \
--sort-values-order=file \
--chart-to-generate=. \
--ignore-non-descriptions
@echo "✅ Generated documentation at ../deploy/helm/charts/platform/README.md"
.PHONY: helm-docs-clean
helm-docs-clean: ## Remove generated helm documentation
@echo "🧹 Cleaning generated helm documentation..."
@rm -f ../deploy/helm/charts/platform/README.md
@echo "✅ Cleaned helm documentation"
.PHONY: generate-crd-docs
generate-crd-docs: ## Generate CRD API reference documentation
@echo "📚 Generating CRD API reference documentation..."
@cd ../deploy/operator && make generate-api-docs
@echo "✅ CRD API reference generated"
.PHONY: docs-all
docs-all: generate-helm-docs generate-crd-docs html ## Generate all documentation (Sphinx + Helm + CRDs)
.PHONY: help Makefile clean
# Catch-all target: route all unknown targets to Sphinx using the new
# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
%:
@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
---
orphan: true
---
# Building Documentation
This directory contains the documentation source files for NVIDIA Dynamo.
## Prerequisites
- Python 3.11 or later
- [uv](https://docs.astral.sh/uv/) package manager
## Build Instructions
### Option 1: Dedicated Docs Environment (Recommended)
This approach builds the docs without requiring the full project dependencies (including `ai-dynamo-runtime`):
```bash
# One-time setup: Create docs environment and install dependencies
uv venv .venv-docs
uv pip install --python .venv-docs --group docs
# Generate documentation
uv run --python .venv-docs --no-project docs/generate_docs.py
```
The generated HTML will be available in `docs/build/html/`.
### Option 2: Using Full Development Environment
If you already have the full project dependencies installed (i.e., you're actively developing the codebase), you can use `uv run` directly:
```bash
uv run --group docs docs/generate_docs.py
```
This will use your existing project environment and add the docs dependencies.
### Option 3: Using Docker
Build the docs in a Docker container with all dependencies isolated:
```bash
docker build -f container/Dockerfile.docs -t dynamo-docs .
```
The documentation will be built inside the container. To extract the built docs:
```bash
# Run the container and copy the output
docker run --rm -v $(pwd)/docs/build:/workspace/dynamo/docs/build dynamo-docs
# Or create a container to copy files from
docker create --name temp-docs dynamo-docs
docker cp temp-docs:/workspace/dynamo/docs/build ./docs/build
docker rm temp-docs
```
This approach is ideal for CI/CD pipelines or when you want complete isolation from your local environment.
## Directory Structure
- `docs/` - Documentation source files (Markdown and reStructuredText)
- `docs/conf.py` - Sphinx configuration
- `docs/_static/` - Static assets (CSS, JS, images)
- `docs/_extensions/` - Custom Sphinx extensions
- `docs/build/` - Generated documentation output (not tracked in git)
## Redirect Creation
When moving or renaming files a redirect must be created.
Redirect entries should be added to the `redirects` dictionary in `conf.py`. For detailed information on redirect syntax, see the [sphinx-reredirects usage documentation](https://documatt.com/sphinx-reredirects/usage/#introduction).
## Dependency Management
Documentation dependencies are defined in `pyproject.toml` under the `[dependency-groups]` section:
```toml
[dependency-groups]
docs = [
"sphinx>=8.1",
"nvidia-sphinx-theme>=0.0.8",
# ... other doc dependencies
]
```
## Troubleshooting
### Build Warnings
The build process treats warnings as errors. Common issues:
- **Missing toctree entries**: Documents must be referenced in a table of contents
- **Non-consecutive headers**: Don't skip header levels (e.g., H1 → H3)
- **Broken links**: Ensure all internal and external links are valid
### Missing Dependencies
If you encounter import errors, ensure the docs dependencies are installed:
```bash
uv pip install --python .venv-docs --group docs
```
## Viewing the Documentation
After building, open `docs/build/html/index.html` in your, or use Python's built-in HTTP server:
```bash
cd docs/build/html
python -m http.server 8000
# Then visit http://localhost:8000 in your browser
```
# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Custom Sphinx extensions for Dynamo documentation.
"""
__version__ = "0.1.0"
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
AST-based Sphinx extension to convert GitHub-flavored markdown alerts to MyST admonitions.
This extension works on the parsed document AST, making it more robust than text preprocessing.
It finds blockquote nodes that match GitHub alert patterns and replaces them with admonition nodes.
"""
import re
from typing import Any, Dict
from docutils import nodes
from sphinx.application import Sphinx
from sphinx.util import logging
__version__ = "0.2.0"
# Set up logger for the extension
logger = logging.getLogger(__name__)
# Log when the extension module is imported
logger.info(f"GitHub alerts extension v{__version__} imported successfully")
class GitHubAlertsTransformer:
"""AST transformer for GitHub alerts to MyST admonitions."""
# Mapping of GitHub alert types to MyST admonition types
ALERT_MAPPING = {
"note": nodes.note,
"tip": nodes.tip,
"important": nodes.important,
"warning": nodes.warning,
"caution": nodes.caution,
"danger": nodes.danger,
"info": nodes.note, # Map info to note
"hint": nodes.tip, # Map hint to tip
}
def __init__(self):
# Regex to match GitHub alert syntax in text
# Uses [^\]]* instead of .*? to prevent backtracking on ] characters
self.alert_pattern = re.compile(r"^\[!([^\]]*)\](?:\s+(.*))?$")
def is_github_alert_blockquote(self, node: nodes.block_quote) -> bool:
"""
Check if a blockquote node represents a GitHub alert.
Returns:
bool: True if this is a GitHub alert blockquote, False otherwise
"""
if not isinstance(node, nodes.block_quote):
return False
# GitHub alerts start with a paragraph containing [!TYPE]
if not node.children or not isinstance(node.children[0], nodes.paragraph):
return False
first_para = node.children[0]
if not first_para.children or not isinstance(
first_para.children[0], nodes.Text
):
return False
first_text = first_para.children[0].astext()
match = self.alert_pattern.match(first_text.strip())
return match is not None
def create_admonition_node(self, blockquote: nodes.block_quote) -> nodes.admonition:
"""
Create a docutils admonition node from a GitHub alert blockquote.
Args:
blockquote: The blockquote node containing the GitHub alert
Returns:
The created admonition node
"""
# Extract alert information from the blockquote
first_para = blockquote.children[0]
first_text = first_para.children[0].astext()
match = self.alert_pattern.match(first_text.strip())
if not match:
raise ValueError("Not a valid GitHub alert blockquote")
alert_type = match.group(1).lower().strip()
title = match.group(2).strip() if match.group(2) else None
# Extract content nodes (everything after the first paragraph)
content_nodes = []
# If there's a title, check if there's more content in the first paragraph
if title and len(first_para.children) > 1:
# Create new paragraph with remaining content
remaining_para = nodes.paragraph()
# Properly detach and add child nodes
for child in first_para.children[1:]:
child.parent = None # Detach from current parent
remaining_para.append(child)
content_nodes.append(remaining_para)
elif not title and len(first_para.children) > 1:
# No title, but there's content after [!TYPE] - treat as content
content_para = nodes.paragraph()
# Properly detach and add child nodes
for child in first_para.children[1:]:
child.parent = None # Detach from current parent
content_para.append(child)
content_nodes.append(content_para)
# Add any additional paragraphs/content
for child in blockquote.children[1:]:
child.parent = None # Detach from current parent
content_nodes.append(child)
# Map to MyST admonition type
admonition_class = self.ALERT_MAPPING.get(alert_type, nodes.note)
admonition = admonition_class()
# Add title if present
if title:
title_node = nodes.title(title, title)
admonition.append(title_node)
# Add content nodes
for content_node in content_nodes:
content_node.parent = None # Ensure node is properly detached
admonition.append(content_node)
return admonition
def transform_document(self, document: nodes.document) -> None:
"""Transform all GitHub alert blockquotes in the document."""
# Find all blockquote nodes
blockquotes = document.traverse(nodes.block_quote)
for blockquote in blockquotes:
if self.is_github_alert_blockquote(blockquote):
# Create admonition node from blockquote
admonition = self.create_admonition_node(blockquote)
# Replace blockquote with admonition
blockquote.parent.replace(blockquote, admonition)
def transform_github_alerts(app: Sphinx, doctree: nodes.document, docname: str) -> None:
"""
Transform GitHub alerts in the document tree.
This function is connected to Sphinx's 'doctree-resolved' event.
Args:
app: The Sphinx application instance
doctree: The document tree to transform
docname: The document name being processed
"""
# Check if this is a markdown file by looking at the source file
# Sphinx strips extensions from docnames, so we need to check the source
env = app.env
source_file = env.doc2path(docname, base=None)
is_markdown = source_file and source_file.suffix in (".md", ".markdown")
if not is_markdown:
return
# Check if the extension is enabled
if not app.config.github_alerts_enabled:
return
logger.debug(f"Processing GitHub alerts in {docname}")
try:
# Get the transformer instance
transformer = getattr(app, "_github_alerts_transformer", None)
if transformer is None:
transformer = GitHubAlertsTransformer()
app._github_alerts_transformer = transformer
# Count blockquotes before transformation
initial_blockquotes = list(doctree.traverse(nodes.block_quote))
initial_admonitions = list(doctree.traverse(nodes.Admonition))
alert_blockquotes = [
bq
for bq in initial_blockquotes
if transformer.is_github_alert_blockquote(bq)
]
if alert_blockquotes:
logger.info(
f"GitHub alerts: Converting {len(alert_blockquotes)} alert(s) in {docname}"
)
# Transform the document
transformer.transform_document(doctree)
# Count remaining blockquotes and new admonitions for verification
remaining_blockquotes = list(doctree.traverse(nodes.block_quote))
remaining_admonitions = list(doctree.traverse(nodes.Admonition))
logger.debug(
f"GitHub alerts: {docname} - {len(initial_blockquotes)}{len(remaining_blockquotes)} blockquotes, {len(remaining_admonitions) - len(initial_admonitions)} admonitions created"
)
else:
logger.debug(f"GitHub alerts: No alerts found in {docname}")
except Exception as e:
logger.error(f"GitHub alerts: Error processing {docname}: {e}")
raise
def setup(app: Sphinx) -> Dict[str, Any]:
"""
Setup function for the Sphinx extension.
Args:
app: The Sphinx application instance
Returns:
Extension metadata
"""
logger.info("GitHub alerts extension setup() called")
try:
# Connect our transformer to the doctree-resolved event
# This happens after parsing but before writing
app.connect("doctree-resolved", transform_github_alerts)
logger.info("GitHub alerts extension connected to 'doctree-resolved' event")
# Add configuration values
app.add_config_value("github_alerts_enabled", True, "env")
logger.info("GitHub alerts extension setup completed")
return {
"version": __version__,
"parallel_read_safe": True,
"parallel_write_safe": True,
}
except Exception as e:
logger.error(f"GitHub alerts extension setup failed: {e}")
raise
The examples below assume you build the latest image yourself from source. If using a prebuilt image follow the examples from the corresponding branch.
.. grid:: 1 2 2 2
:gutter: 3
:margin: 0
:padding: 3 4 0 0
.. grid-item-card:: :doc:`Hello World <../examples/runtime/hello_world/README>`
:link: ../examples/runtime/hello_world/README
:link-type: doc
Demonstrates the basic concepts of Dynamo by creating a simple GPU-unaware graph
.. grid-item-card:: :doc:`vLLM <../backends/vllm/README>`
:link: ../backends/vllm/README
:link-type: doc
Presents examples and reference implementations for deploying Large Language Models (LLMs) in various configurations with VLLM.
.. grid-item-card:: :doc:`SGLang <../backends/sglang/README>`
:link: ../backends/sglang/README
:link-type: doc
Presents examples and reference implementations for deploying Large Language Models (LLMs) in various configurations with SGLang.
.. grid-item-card:: :doc:`TensorRT-LLM <../backends/trtllm/README>`
:link: ../backends/trtllm/README
:link-type: doc
Presents examples and reference implementations for deploying Large Language Models (LLMs) in various configurations with TensorRT-LLM.
..
SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES.
All rights reserved.
SPDX-License-Identifier: Apache-2.0
This guide covers running Dynamo **using the CLI on your local machine or VM**.
.. important::
**Looking to deploy on Kubernetes instead?**
See the `Kubernetes Installation Guide <../kubernetes/installation_guide.html>`_
and `Kubernetes Quickstart <../kubernetes/README.html>`_ for cluster deployments.
**Install Dynamo**
**Option A: Containers (Recommended)**
Containers have all dependencies pre-installed. No setup required.
.. code-block:: bash
# SGLang
docker run --gpus all --network host --rm -it nvcr.io/nvidia/ai-dynamo/sglang-runtime:0.8.1
# TensorRT-LLM
docker run --gpus all --network host --rm -it nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:0.8.1
# vLLM
docker run --gpus all --network host --rm -it nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.8.1
.. tip::
To run frontend and worker in the same container, either:
- Run processes in background with ``&`` (see Run Dynamo section below), or
- Open a second terminal and use ``docker exec -it <container_id> bash``
See `Release Artifacts <../reference/release-artifacts.html#container-images>`_ for available
versions and backend guides for run instructions: `SGLang <../backends/sglang/README.html>`_ |
`TensorRT-LLM <../backends/trtllm/README.html>`_ | `vLLM <../backends/vllm/README.html>`_
**Option B: Install from PyPI**
.. code-block:: bash
# Install uv (recommended Python package manager)
curl -LsSf https://astral.sh/uv/install.sh | sh
# Create virtual environment
uv venv venv
source venv/bin/activate
uv pip install pip
Install system dependencies and the Dynamo wheel for your chosen backend:
**SGLang**
.. code-block:: bash
sudo apt install python3-dev
uv pip install --prerelease=allow "ai-dynamo[sglang]"
.. note::
For CUDA 13 (B300/GB300), the container is recommended. See
`SGLang install docs <https://docs.sglang.io/get_started/install.html>`_ for details.
**TensorRT-LLM**
.. code-block:: bash
sudo apt install python3-dev
pip install torch==2.9.0 torchvision --index-url https://download.pytorch.org/whl/cu130
pip install --pre --extra-index-url https://pypi.nvidia.com "ai-dynamo[trtllm]"
.. note::
TensorRT-LLM requires ``pip`` due to a transitive Git URL dependency that
``uv`` doesn't resolve. We recommend using the TensorRT-LLM container for
broader compatibility. See the `TRT-LLM backend guide <../backends/trtllm/README.html>`_
for details.
**vLLM**
.. code-block:: bash
sudo apt install python3-dev libxcb1
uv pip install --prerelease=allow "ai-dynamo[vllm]"
**Run Dynamo**
.. tip::
**(Optional)** Before running Dynamo, verify your system configuration:
``python3 deploy/sanity_check.py``
Start the frontend, then start a worker for your chosen backend.
.. tip::
To run in a single terminal (useful in containers), append ``> logfile.log 2>&1 &``
to run processes in background. Example: ``python3 -m dynamo.frontend --store-kv file > dynamo.frontend.log 2>&1 &``
.. code-block:: bash
# Start the OpenAI compatible frontend (default port is 8000)
# --store-kv file avoids needing etcd (frontend and workers must share a disk)
python3 -m dynamo.frontend --store-kv file
In another terminal (or same terminal if using background mode), start a worker:
**SGLang**
.. code-block:: bash
python3 -m dynamo.sglang --model-path Qwen/Qwen3-0.6B --store-kv file
**TensorRT-LLM**
.. code-block:: bash
python3 -m dynamo.trtllm --model-path Qwen/Qwen3-0.6B --store-kv file
**vLLM**
.. code-block:: bash
python3 -m dynamo.vllm --model Qwen/Qwen3-0.6B --store-kv file \
--kv-events-config '{"enable_kv_cache_events": false}'
.. note::
For dependency-free local development, disable KV event publishing (avoids NATS):
- **vLLM:** Add ``--kv-events-config '{"enable_kv_cache_events": false}'``
- **SGLang:** No flag needed (KV events disabled by default)
- **TensorRT-LLM:** No flag needed (KV events disabled by default)
**TensorRT-LLM only:** The warning ``Cannot connect to ModelExpress server/transport error. Using direct download.``
is expected and can be safely ignored.
**Test Your Deployment**
.. code-block:: bash
curl localhost:8000/v1/chat/completions \
-H "Content-Type: application/json" \
-d '{"model": "Qwen/Qwen3-0.6B",
"messages": [{"role": "user", "content": "Hello!"}],
"max_tokens": 50}'
Backends
========
.. toctree::
:maxdepth: 1
vLLM <../backends/vllm/README>
SGLang <../backends/sglang/README>
TensorRT-LLM <../backends/trtllm/README>
\ No newline at end of file
..
Quickstart Page (left sidebar target)
..
Examples
========
.. include:: ../_includes/dive_in_examples.rst
\ No newline at end of file
// Add RunLLM widget
document.addEventListener("DOMContentLoaded", function () {
var script = document.createElement("script");
script.type = "module";
script.id = "runllm-widget-script"
script.src = "https://widget.runllm.com";
script.setAttribute("version", "stable");
script.setAttribute("runllm-keyboard-shortcut", "Mod+j"); // cmd-j or ctrl-j to open the widget.
script.setAttribute("runllm-name", "dynamo");
script.setAttribute("runllm-position", "BOTTOM_RIGHT");
script.setAttribute("runllm-position-y", "120px");
script.setAttribute("runllm-position-x", "20px");
script.setAttribute("runllm-assistant-id", "758");
script.async = true;
document.head.appendChild(script);
});
[
{
"name": "0.1.0 (current release)",
"version": "0.1.0",
"url": "https://docs.nvidia.com/dynamo/latest/index.html"
},
{
"name": "older releases",
"version": "archives",
"url": "https://docs.nvidia.com/dynamo/archives/"
}
]
\ No newline at end of file
# Tool Calling with Dynamo
You can connect Dynamo to external tools and services using function calling (also known as tool calling). By providing a list of available functions, Dynamo can choose
to output function arguments for the relevant function(s) which you can execute to augment the prompt with relevant external information.
Tool calling (AKA function calling) is controlled using the `tool_choice` and `tools` request parameters.
## Prerequisites
To enable this feature, you should set the following flag while launching the backend worker
- `--dyn-tool-call-parser` : select the parser from the available parsers list using the below command
```bash
# <backend> can be vllm, sglang, trtllm, etc. based on your installation
python -m dynamo.<backend> --help"
```
> [!NOTE]
> If no tool call parser is provided by the user, Dynamo will try to use default tool call parsing based on `<TOOLCALL>` and `<|python_tag|>` tool tags.
> [!TIP]
> If your model's default chat template doesn't support tool calling, but the model itself does, you can specify a custom chat template per worker
> with `python -m dynamo.<backend> --custom-jinja-template </path/to/template.jinja>`.
Parser to Model Mapping
| Parser Name | Supported Models |
|-------------|-----------------------------------------------------------------------|
| hermes | Qwen/Qwen2.5-*, Qwen/QwQ-32B, NousResearch/Hermes-2-Pro-*, NousResearch/Hermes-2-Theta-*, NousResearch/Hermes-3-* |
| mistral | mistralai/Mistral-7B-Instruct-v0.3, Additional mistral function-calling models are compatible as well.|
| llama3_json | meta-llama/Llama-3.1-*, meta-llama/Llama-3.2-* |
| harmony | openai/gpt-oss-* |
| nemotron_deci | nvidia/nemotron-* |
| phi4 | Phi-4-* |
| deepseek_v3 | deepseek-ai/DeepSeek-V3, deepseek-ai/DeepSeek-R1, deepseek-ai/DeepSeek-R1-0528 |
| deepseek_v3_1 | deepseek-ai/DeepSeek-V3.1 |
| pythonic | meta-llama/Llama-4-* |
| jamba | ai21labs/AI21-Jamba-*-1.5, ai21labs/AI21-Jamba-*-1.6, ai21labs/AI21-Jamba-*-1.7, |
## Examples
### Launch Dynamo Frontend and Backend
```bash
# launch backend worker
python -m dynamo.vllm --model openai/gpt-oss-20b --dyn-tool-call-parser harmony
# launch frontend worker
python -m dynamo.frontend
```
### Tool Calling Request Examples
- Example 1
```python
from openai import OpenAI
import json
client = OpenAI(base_url="http://localhost:8081/v1", api_key="dummy")
def get_weather(location: str, unit: str):
return f"Getting the weather for {location} in {unit}..."
tool_functions = {"get_weather": get_weather}
tools = [{
"type": "function",
"function": {
"name": "get_weather",
"description": "Get the current weather in a given location",
"parameters": {
"type": "object",
"properties": {
"location": {"type": "string", "description": "City and state, e.g., 'San Francisco, CA'"},
"unit": {"type": "string", "enum": ["celsius", "fahrenheit"]}
},
"required": ["location", "unit"]
}
}
}]
response = client.chat.completions.create(
model="openai/gpt-oss-20b",
messages=[{"role": "user", "content": "What's the weather like in San Francisco in Celsius?"}],
tools=tools,
tool_choice="auto",
max_tokens=10000
)
print(f"{response}")
tool_call = response.choices[0].message.tool_calls[0].function
print(f"Function called: {tool_call.name}")
print(f"Arguments: {tool_call.arguments}")
print(f"Result: {tool_functions[tool_call.name](**json.loads(tool_call.arguments))}")
```
- Example 2
```python
# Use tools defined in example 1
time_tool = {
"type": "function",
"function": {
"name": "get_current_time_nyc",
"description": "Get the current time in NYC.",
"parameters": {}
}
}
tools.append(time_tool)
messages = [
{"role": "user", "content": "What's the current time in New York?"}
]
response = client.chat.completions.create(
model="openai/gpt-oss-20b", #client.models.list().data[1].id,
messages=messages,
tools=tools,
tool_choice="auto",
max_tokens=100,
)
print(f"{response}")
tool_call = response.choices[0].message.tool_calls[0].function
print(f"Function called: {tool_call.name}")
print(f"Arguments: {tool_call.arguments}")
```
- Example 3
```python
tools = [
{
"type": "function",
"function": {
"name": "get_tourist_attractions",
"description": "Get a list of top tourist attractions for a given city.",
"parameters": {
"type": "object",
"properties": {
"city": {
"type": "string",
"description": "The name of the city to find attractions for.",
}
},
"required": ["city"],
},
},
},
]
def get_messages():
return [
{
"role": "user",
"content": (
"I'm planning a trip to Tokyo next week. what are some top tourist attractions in Tokyo? "
),
},
]
messages = get_messages()
response = client.chat.completions.create(
model="openai/gpt-oss-20b",
messages=messages,
tools=tools,
tool_choice="auto",
max_tokens=100,
)
print(f"{response}")
tool_call = response.choices[0].message.tool_calls[0].function
print(f"Function called: {tool_call.name}")
print(f"Arguments: {tool_call.arguments}")
```
<!--
SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
SPDX-License-Identifier: Apache-2.0
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
# Dynamo NIXL Connect
Dynamo NIXL Connect specializes in moving data between models/workers in a Dynamo Graph, and for the use cases where registration and memory regions need to be dynamic.
Dynamo connect provides utilities for such use cases, using the NIXL-based I/O subsystem via a set of Python classes.
The relaxed registration comes with some performance overheads, but simplifies the integration process.
Especially for larger data transfer operations, such as between models in a multi-model graph, the overhead would be marginal.
The `dynamo.nixl_connect` library can be imported by any Dynamo container hosted application.
> [!Note]
> Dynamo NIXL Connect will pick the best available method of data transfer available to it.
> The available methods depend on the hardware and software configuration of the machines and network running the graph.
> GPU Direct RDMA operations require that both ends of the operation have:
> - NIC and GPU capable of performing RDMA operations
> - Device drivers that support GPU-NIC direct interactions (aka "zero copy") and RDMA operations
> - Network that supports InfiniBand or RoCE
>
> With any of the above not satisfied, GPU Direct RDMA will not be available to the graph's workers, and less-optimal methods will be utilized to ensure basic functionality.
> For additional information, please read this [GPUDirect RDMA](https://docs.nvidia.com/cuda/pdf/GPUDirect_RDMA.pdf) document.
```python
import dynamo.nixl_connect
```
All operations using the NIXL Connect library begin with the [`Connector`](connector.md) class and the type of operation required.
There are four types of supported operations:
1. **Register local readable memory**:
Register local memory buffer(s) with the NIXL subsystem to enable a remote worker to read from.
2. **Register local writable memory**:
Register local memory buffer(s) with the NIXL subsystem to enable a remote worker to write to.
3. **Read from registered, remote memory**:
Read remote memory buffer(s), registered by a remote worker to be readable, into local memory buffer(s).
4. **Write to registered, remote memory**:
Write local memory buffer(s) to remote memory buffer(s) registered by a remote worker to writable.
When available, by connecting correctly paired operations, high-throughput GPU Direct RDMA data transfers can be completed.
Given the list above, the correct pairing of operations would be 1 & 3 or 2 & 4.
Where one side is a "(read|write)-able operation" and the other is its correctly paired "(read|write) operation".
Specifically, a read operation must be paired with a readable operation, and a write operation must be paired with a writable operation.
```mermaid
sequenceDiagram
participant LocalWorker
participant RemoteWorker
participant NIXL
LocalWorker ->> NIXL: Register memory (Descriptor)
RemoteWorker ->> NIXL: Register memory (Descriptor)
LocalWorker ->> LocalWorker: Create Readable/WritableOperation
LocalWorker ->> RemoteWorker: Send NIXL metadata (via HTTP/TCP+NATS)
RemoteWorker ->> NIXL: Begin Read/WriteOperation with metadata
NIXL -->> RemoteWorker: Data transfer
RemoteWorker -->> LocalWorker: Notify completion (unblock awaiter)
```
## Examples
### Generic Example
In the diagram below, Local creates a [`WritableOperation`](writable_operation.md) intended to receive data from Remote.
Local then sends metadata about the requested operation to Remote.
Remote then uses the metadata to create a [`WriteOperation`](write_operation.md) which will perform the GPU Direct RDMA memory transfer, when available, from Remote's GPU memory to Local's GPU memory.
```mermaid
---
title: Write Operation Between Two Workers (RDMA available)
---
flowchart LR
c1[Remote] --"3: .begin_write()"--- WriteOperation
WriteOperation e1@=="4: GPU Direct RDMA"==> WritableOperation
WritableOperation --"1: .create_writable()"--- c2[Local]
c2 e2@--"2: RDMA Metadata via HTTP"--> c1
e1@{ animate: true; }
e2@{ animate: true; }
```
> [!Note]
> When RDMA isn't available, the NIXL data transfer will still complete using non-accelerated methods.
### Multimodal Example
In the case of the [Dynamo Multimodal Disaggregated Example](../../features/multimodal/multimodal_vllm.md):
1. The HTTP frontend accepts a text prompt and a URL to an image.
2. The prompt and URL are then enqueued with the Processor before being dispatched to the first available Decode Worker.
3. Decode Worker then requests a Prefill Worker to provide key-value data for the LLM powering the Decode Worker.
4. Prefill Worker then requests that the image be processed and provided as embeddings by the Encode Worker.
5. Encode Worker acquires the image, processes it, performs inference on the image using a specialized vision model, and finally provides the embeddings to Prefill Worker.
6. Prefill Worker receives the embeddings from Encode Worker and generates a key-value cache (KV$) update for Decode Worker's LLM and writes the update directly to the GPU memory reserved for the data.
7. Finally, Decode Worker performs the requested inference.
```mermaid
---
title: Multimodal Disaggregated Workflow
---
flowchart LR
p0[HTTP Frontend] i0@--"text prompt"-->p1[Processor]
p0 i1@--"url"-->p1
p1 i2@--"prompt"-->dw[Decode Worker]
p1 i3@--"url"-->dw
dw i4@--"prompt"-->pw[Prefill Worker]
dw i5@--"url"-->pw
pw i6@--"url"-->ew[Encode Worker]
ew o0@=="image embeddings"==>pw
pw o1@=="kv_cache updates"==>dw
dw o2@--"inference results"-->p0
i0@{ animate: true; }
i1@{ animate: true; }
i2@{ animate: true; }
i3@{ animate: true; }
i4@{ animate: true; }
i5@{ animate: true; }
i6@{ animate: true; }
o0@{ animate: true; }
o1@{ animate: true; }
o2@{ animate: true; }
```
> [!Note]
> In this example, it is the data transfer between the Prefill Worker and the Encode Worker that utilizes the Dynamo NIXL Connect library.
> The KV Cache transfer between Decode Worker and Prefill Worker utilizes a different connector that also uses the NIXL-based I/O subsystem underneath.
#### Code Examples
See [MultimodalPDWorkerHandler](../../../components/src/dynamo/vllm/multimodal_handlers/worker_handler.py) or [MultimodalDecodeWorkerHandler](../../../components/src/dynamo/vllm/multimodal_handlers/worker_handler.py) from our Multimodal example,
for how they coordinate directly with the Encode Worker by creating a [`WritableOperation`](writable_operation.md),
sending the operation's metadata via Dynamo's round-robin dispatcher, and awaiting the operation for completion before making use of the transferred data.
See [MultimodalEncodeWorkerHandler](../../../components/src/dynamo/vllm/multimodal_handlers/encode_worker_handler.py) from our Multimodal example,
for how the resulting embeddings are registered with the NIXL subsystem by creating a [`Descriptor`](descriptor.md),
a [`WriteOperation`](write_operation.md) is created using the metadata provided by the requesting worker,
and the worker awaits for the data transfer to complete for yielding a response.
## Python Classes
- [Connector](connector.md)
- [Descriptor](descriptor.md)
- [Device](device.md)
- [ReadOperation](read_operation.md)
- [ReadableOperation](readable_operation.md)
- [WritableOperation](writable_operation.md)
- [WriteOperation](write_operation.md)
## References
- [NVIDIA Dynamo](https://developer.nvidia.com/dynamo) @ [GitHub](https://github.com/ai-dynamo/dynamo)
- [NVIDIA Inference Transfer Library (NIXL)](https://developer.nvidia.com/blog/introducing-nvidia-dynamo-a-low-latency-distributed-inference-framework-for-scaling-reasoning-ai-models/#nvidia_inference_transfer_library_nixl_low-latency_hardware-agnostic_communication%C2%A0) @ [GitHub](https://github.com/ai-dynamo/nixl)
- [Dynamo Multimodal Example](../../..//examples/multimodal)
- [NVIDIA GPU Direct](https://developer.nvidia.com/gpudirect)
<!--
SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
SPDX-License-Identifier: Apache-2.0
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
# dynamo.nixl_connect.Connector
Core class for managing the connection between workers in a distributed environment.
Use this class to create readable and writable operations, or read and write data to remote workers.
This class provides a "pythonic" interface using NIXL library to utilize GPU Direct RDMA accelerated, when available, data transfers between models hosted by different workers in a Dynamo graph.
The connector provides two methods of moving data between workers:
- Preparing local memory to be written to by a remote worker.
- Preparing local memory to be read by a remote worker.
In both cases, local memory is registered with the NIXL-based I/O subsystem via the [`Descriptor`](#descriptor) class and provided to the connector.
When RDMA is available, the connector then configures the RDMA subsystem to expose the memory for the requested operation and returns an operation control object;
otherwise the connector will select the best available RDMA alternative.
The operation control object, either a [`ReadableOperation`](readable_operation.md) or a [`WritableOperation`](writable_operation.md),
provides NIXL metadata ([RdmaMetadata](rdma_metadata.md)) via its `.metadata()` method, functionality to query the operation's current state, as well as the ability to cancel the operation prior to its completion.
The NIXL metadata must be provided to the remote worker expected to complete the operation.
The metadata contains required information (identifiers, keys, etc.) which enables the remote worker to interact with the provided memory.
> [!Warning]
> NIXL metadata contains a worker's address as well as security keys to access specific registered memory descriptors.
> This data provides direct memory access between workers, and should be considered sensitive and therefore handled accordingly.
## Example Usage
```python
@async_on_start
async def async_init(self):
self.connector = dynamo.nixl_connect.Connector()
```
> [!Tip]
> See [`ReadOperation`](read_operation.md#example-usage), [`ReadableOperation`](readable_operation.md#example-usage),
> [`WritableOperation`](writable_operation.md#example-usage), and [`WriteOperation`](write_operation.md#example-usage)
> for additional examples.
## Methods
### `begin_read`
```python
async def begin_read(
self,
remote_metadata: RdmaMetadata,
local_descriptors: Descriptor | list[Descriptor],
) -> ReadOperation:
```
Creates a [`ReadOperation`](read_operation.md) for transferring data from a remote worker.
To create the operation, the serialized request from a remote worker's [`ReadableOperation`](readable_operation.md)
along with a matching set of local memory descriptors which reference memory intended to receive data from the remote worker
must be provided.
The serialized request must be transferred from the remote to the local worker via a secondary channel, most likely HTTP or TCP+NATS.
Once created, data transfer will begin immediately.
Disposal of the object will instruct the NIXL subsystem to cancel the operation,
therefore the operation should be awaited until completed unless cancellation is intended.
Use [`.wait_for_completion()`](read_operation.md#wait_for_completion) to block the caller until the operation has completed or encountered an error.
### `begin_write`
```python
async def begin_write(
self,
local_descriptors: Descriptor | list[Descriptor],
remote_metadata: RdmaMetadata,
) -> WriteOperation:
```
Creates a [`WriteOperation`](write_operation.md) for transferring data to a remote worker.
To create the operation, the serialized request from a remote worker's [`WritableOperation`](writable_operation.md)
along with a matching set of local memory descriptors which reference memory to be transferred to the remote worker
must be provided.
The serialized request must be transferred from the remote to the local worker via a secondary channel, most likely HTTP or TCP+NATS.
Once created, data transfer will begin immediately.
Disposal of the object will instruct the NIXL subsystem to cancel the operation,
therefore the operation should be awaited until completed unless cancellation is intended.
Use [`.wait_for_completion()`](write_operation.md#wait_for_completion) to block the caller until the operation has completed or encountered an error.
### `create_readable`
```python
async def create_readable(
self,
local_descriptors: Descriptor | list[Descriptor],
) -> ReadableOperation:
```
Creates a [`ReadableOperation`](readable_operation.md) for transferring data to a remote worker.
To create the operation, a set of local memory descriptors must be provided that reference memory intended to be transferred to a remote worker.
Once created, the memory referenced by the provided descriptors becomes immediately readable by a remote worker with the necessary metadata.
The metadata required to access the memory referenced by the provided descriptors is accessible via the operation's `.metadata()` method.
Once acquired, the metadata needs to be provided to a remote worker via a secondary channel, most likely HTTP or TCP+NATS.
Disposal of the object will instruct the NIXL subsystem to cancel the operation,
therefore the operation should be awaited until completed unless cancellation is intended.
Use [`.wait_for_completion()`](readable_operation.md#wait_for_completion) to block the caller until the operation has completed or encountered an error.
### `create_writable`
```python
async def create_writable(
self,
local_descriptors: Descriptor | list[Descriptor],
) -> WritableOperation:
```
Creates a [`WritableOperation`](writable_operation.md) for transferring data from a remote worker.
To create the operation, a set of local memory descriptors must be provided which reference memory intended to receive data from a remote worker.
Once created, the memory referenced by the provided descriptors becomes immediately writable by a remote worker with the necessary metadata.
The metadata required to access the memory referenced by the provided descriptors is accessible via the operation's `.metadata()` method.
Once acquired, the metadata needs to be provided to a remote worker via a secondary channel, most likely HTTP or TCP+NATS.
Disposal of the object will instruct the NIXL subsystem to cancel the operation,
therefore the operation should be awaited until completed unless cancellation is intended.
Use [`.wait_for_completion()`](writable_operation.md#wait_for_completion) to block the caller until the operation has completed or encountered an error.
## Properties
### `hostname`
```python
@property
def hostname(self) -> str:
```
Gets the name of the current worker's host.
### `is_cuda_available`
```python
@cached_property
def is_cuda_available(self) -> bool:
```
Gets `True` when CUDA is available for the selected array module (most likely CuPy); otherwise `False`.
### `name`
```python
@property
def name(self) -> str | None:
```
Gets the Dynamo component name used by the connector.
## Related Classes
- [Descriptor](descriptor.md)
- [Device](device.md)
- [OperationStatus](operation_status.md)
- [RdmaMetadata](rdma_metadata.md)
- [ReadOperation](read_operation.md)
- [ReadableOperation](readable_operation.md)
- [WritableOperation](writable_operation.md)
- [WriteOperation](write_operation.md)
<!--
SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
SPDX-License-Identifier: Apache-2.0
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
# dynamo.nixl_connect.Descriptor
Memory descriptor that ensures memory is registered with the NIXL-base I/O subsystem.
Memory must be registered with the NIXL subsystem to enable interaction with the memory.
Descriptor objects are administrative and do not copy, move, or otherwise modify the registered memory.
There are four ways to create a descriptor:
1. From a `torch.Tensor` object. Device information will be derived from the provided object.
2. From a `tuple` containing either a NumPy or CuPy `ndarray` and information describing where the memory resides (Host/CPU vs GPU).
3. From a Python `bytes` object. Memory is assumed to reside in CPU addressable host memory.
4. From a `tuple` comprised of the address of the memory, its size in bytes, and device information.
An optional reference to a Python object can be provided to avoid garbage collection issues.
## Methods
### `register_memory`
```python
def register_memory(self, connector: Connector) -> None:
```
Instructs the descriptor to register its memory buffer with the NIXL-based I/O subsystem.
Calling this method more than once on the same descriptor has no effect.
When the descriptor is assigned to a NIXL operation, it will be automatically registered if was not explicitly registered.
## Properties
### `device`
```python
@property
def device(self) -> Device:
```
Gets a reference to the [`Device`](device.md) that contains the buffer the descriptor represents.
### `size`
```python
@property
def size(self) -> int:
```
Gets the size of the memory allocation the descriptor represents.
## Related Classes
- [Connector](connector.md)
- [Device](device.md)
- [OperationStatus](operation_status.md)
- [RdmaMetadata](rdma_metadata.md)
- [ReadOperation](read_operation.md)
- [ReadableOperation](readable_operation.md)
- [WritableOperation](writable_operation.md)
- [WriteOperation](write_operation.md)
<!--
SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
SPDX-License-Identifier: Apache-2.0
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
# dynamo.nixl_connect.Device
`Device` class describes the device a given allocation resides in.
Usually host (`"cpu"`) or GPU (`"cuda"`) memory.
When a system contains multiple GPU devices, specific GPU devices can be identified by including their ordinal index number.
For example, to reference the second GPU in a system `"cuda:1"` can be used.
By default, when `"cuda"` is provided, it is assumed to be `"cuda:0"` or the first GPU enumerated by the system.
## Properties
### `id`
```python
@property
def id(self) -> int:
```
Gets the identity, or ordinal, of the device.
When the device is the [`HOST`](device_kind.md#host), this value is always `0`.
When the device is a [`GPU`](device_kind.md#cuda), this value identifies a specific GPU.
### `kind`
```python
@property
def kind(self) -> DeviceKind:
```
Gets the [`DeviceKind`](device_kind.md) of device the instance references.
## Related Classes
- [Connector](connector.md)
- [Descriptor](descriptor.md)
- [OperationStatus](operation_status.md)
- [ReadOperation](read_operation.md)
- [ReadableOperation](readable_operation.md)
- [RdmaMetadata](rdma_metadata.md)
- [WritableOperation](writable_operation.md)
- [WriteOperation](write_operation.md)
<!--
SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
SPDX-License-Identifier: Apache-2.0
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
# dynamo.nixl_connect.DeviceKind(IntEnum)
Represents the kind of device a [`Device`](device.md) object represents.
## Values
### `CUDA`
CUDA addressable device (GPU) memory.
### `HOST`
System (CPU) memory.
## Related Classes
- [Connector](connector.md)
- [Descriptor](descriptor.md)
- [Device](device.md)
- [OperationStatus](operation_status.md)
- [RdmaMetadata](rdma_metadata.md)
- [ReadOperation](read_operation.md)
- [WritableOperation](writable_operation.md)
- [WriteOperation](write_operation.md)
<!--
SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
SPDX-License-Identifier: Apache-2.0
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
# dynamo.nixl_connect.OperationStatus(IntEnum)
Represents the current state or status of an operation.
## Values
### `CANCELLED`
The operation has been cancelled by the user or system.
### `COMPLETE`
The operation has been completed successfully.
### `ERRORED`
The operation has encountered an error and cannot be completed.
### `IN_PROGRESS`
The operation has been initialized and is in-progress (not completed, errored, or cancelled).
### `INITIALIZED`
The operation has been initialized and is ready to be processed.
### `UNINITIALIZED`
The operation has not been initialized yet and is not in a valid state.
## Related Classes
- [Connector](connector.md)
- [Descriptor](descriptor.md)
- [Device](device.md)
- [RdmaMetadata](rdma_metadata.md)
- [ReadOperation](read_operation.md)
- [ReadableOperation](readable_operation.md)
- [WritableOperation](writable_operation.md)
- [WriteOperation](write_operation.md)
<!--
SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
SPDX-License-Identifier: Apache-2.0
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
# dynamo.nixl_connect.RdmaMetadata
A Pydantic type intended to provide JSON serialized NIXL metadata about a [`ReadableOperation`](readable_operation.md) or [`WritableOperation`](writable_operation.md) object.
NIXL metadata contains detailed information about a worker process and how to access memory regions registered with the corresponding agent.
This data is required to perform data transfers using the NIXL-based I/O subsystem.
> [!Warning]
> NIXL metadata contains information to connect corresponding backends across agents, as well as identification keys to access specific registered memory regions.
> This data provides direct memory access between workers, and should be considered sensitive and therefore handled accordingly.
Use the respective class's `.metadata()` method to generate an `RdmaMetadata` object for an operation.
> [!Tip]
> Classes using `RdmaMetadata` objects must be paired correctly.
> [`ReadableOperation`](readable_operation.md) with [`ReadOperation`](read_operation.md), and
> [`WritableOperation`](write_operation.md) with [`WriteOperation`](write_operation.md).
> Incorrect pairing will result in an error being raised.
## Related Classes
- [Connector](connector.md)
- [Descriptor](descriptor.md)
- [Device](device.md)
- [OperationStatus](operation_status.md)
- [ReadOperation](read_operation.md)
- [ReadableOperation](readable_operation.md)
- [WritableOperation](writable_operation.md)
- [WriteOperation](write_operation.md)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment