Unverified Commit 39d645e5 authored by Jonathan Tong's avatar Jonathan Tong Committed by GitHub
Browse files

docs: migrate Fern docs from fern/ into docs/ (#6206)


Signed-off-by: default avatarJont828 <jt572@cornell.edu>
parent d381e6ff
...@@ -31,9 +31,9 @@ outputs: ...@@ -31,9 +31,9 @@ outputs:
rust: rust:
description: 'Whether rust files changed' description: 'Whether rust files changed'
value: ${{ steps.filter.outputs.rust_any_modified }} value: ${{ steps.filter.outputs.rust_any_modified }}
fern: docs:
description: 'Whether fern docs files changed' description: 'Whether docs files changed'
value: ${{ steps.filter.outputs.fern_any_modified }} value: ${{ steps.filter.outputs.docs_any_modified }}
runs: runs:
using: "composite" using: "composite"
...@@ -89,7 +89,6 @@ runs: ...@@ -89,7 +89,6 @@ runs:
echo "trtllm: ${{ steps.filter.outputs.trtllm_any_modified }}" echo "trtllm: ${{ steps.filter.outputs.trtllm_any_modified }}"
echo "frontend: ${{ steps.filter.outputs.frontend_any_modified }}" echo "frontend: ${{ steps.filter.outputs.frontend_any_modified }}"
echo "rust: ${{ steps.filter.outputs.rust_any_modified }}" echo "rust: ${{ steps.filter.outputs.rust_any_modified }}"
echo "fern: ${{ steps.filter.outputs.fern_any_modified }}"
echo "" echo ""
echo "=== Files Matching Each Filter ===" echo "=== Files Matching Each Filter ==="
echo "docs: ${{ steps.filter.outputs.docs_all_modified_files }}" echo "docs: ${{ steps.filter.outputs.docs_all_modified_files }}"
...@@ -103,13 +102,12 @@ runs: ...@@ -103,13 +102,12 @@ runs:
echo "trtllm: ${{ steps.filter.outputs.trtllm_all_modified_files }}" echo "trtllm: ${{ steps.filter.outputs.trtllm_all_modified_files }}"
echo "frontend: ${{ steps.filter.outputs.frontend_all_modified_files }}" echo "frontend: ${{ steps.filter.outputs.frontend_all_modified_files }}"
echo "rust: ${{ steps.filter.outputs.rust_all_modified_files }}" echo "rust: ${{ steps.filter.outputs.rust_all_modified_files }}"
echo "fern: ${{ steps.filter.outputs.fern_all_modified_files }}"
- name: Check for uncovered files - name: Check for uncovered files
shell: bash shell: bash
run: | run: |
# Combine all filter-specific files into one list # Combine all filter-specific files into one list
COVERED_FILES=$(echo "${{ steps.filter.outputs.docs_all_modified_files }} ${{ steps.filter.outputs.examples_all_modified_files }} ${{ steps.filter.outputs.ignore_all_modified_files }} ${{ steps.filter.outputs.ci_all_modified_files }} ${{ steps.filter.outputs.core_all_modified_files }} ${{ steps.filter.outputs.operator_all_modified_files }} ${{ steps.filter.outputs.deploy_all_modified_files }} ${{ steps.filter.outputs.planner_all_modified_files }} ${{ steps.filter.outputs.vllm_all_modified_files }} ${{ steps.filter.outputs.sglang_all_modified_files }} ${{ steps.filter.outputs.trtllm_all_modified_files }} ${{ steps.filter.outputs.frontend_all_modified_files }} ${{ steps.filter.outputs.rust_all_modified_files }} ${{ steps.filter.outputs.fern_all_modified_files }}" | tr ' ' '\n' | grep -v '^$' | sort -u) COVERED_FILES=$(echo "${{ steps.filter.outputs.docs_all_modified_files }} ${{ steps.filter.outputs.examples_all_modified_files }} ${{ steps.filter.outputs.ignore_all_modified_files }} ${{ steps.filter.outputs.ci_all_modified_files }} ${{ steps.filter.outputs.core_all_modified_files }} ${{ steps.filter.outputs.operator_all_modified_files }} ${{ steps.filter.outputs.deploy_all_modified_files }} ${{ steps.filter.outputs.planner_all_modified_files }} ${{ steps.filter.outputs.vllm_all_modified_files }} ${{ steps.filter.outputs.sglang_all_modified_files }} ${{ steps.filter.outputs.trtllm_all_modified_files }} ${{ steps.filter.outputs.frontend_all_modified_files }} ${{ steps.filter.outputs.rust_all_modified_files }}" | tr ' ' '\n' | grep -v '^$' | sort -u)
# Get all modified files # Get all modified files
ALL_FILES=$(echo "${{ steps.filter.outputs.all_all_modified_files }}" | tr ' ' '\n' | grep -v '^$' | sort -u) ALL_FILES=$(echo "${{ steps.filter.outputs.all_all_modified_files }}" | tr ' ' '\n' | grep -v '^$' | sort -u)
......
...@@ -8,18 +8,18 @@ ...@@ -8,18 +8,18 @@
# sglang -> sglang build and test # sglang -> sglang build and test
# trtllm -> trtllm build and test # trtllm -> trtllm build and test
# frontend -> frontend EPP image build # frontend -> frontend EPP image build
# fern -> fern docs lint, sync, and version release # docs -> fern docs lint, sync, and version release (docs/ directory)
# #
# Filters for coverage only (no CI triggered): # Filters for coverage only (no CI triggered):
# docs, examples, ignore, planner # examples, ignore, planner
all: all:
- '**' - '**'
docs: docs:
- 'docs/**' - 'docs/**'
- 'fern/**'
- '**/*.md' - '**/*.md'
- '**/*.rst'
- '**/*.txt' - '**/*.txt'
- '**/.gitignore' - '**/.gitignore'
- '**/.helmignore' - '**/.helmignore'
...@@ -27,9 +27,6 @@ docs: ...@@ -27,9 +27,6 @@ docs:
- 'LICENSE' - 'LICENSE'
- 'CODEOWNERS' - 'CODEOWNERS'
fern:
- 'fern/**'
examples: examples:
- 'recipes/**' - 'recipes/**'
- 'examples/**' - 'examples/**'
...@@ -101,7 +98,7 @@ operator: ...@@ -101,7 +98,7 @@ operator:
- *ci - *ci
- 'deploy/operator/**' - 'deploy/operator/**'
- 'deploy/operator/.*' - 'deploy/operator/.*'
- 'docs/kubernetes/api_reference.md' - 'docs/pages/kubernetes/api-reference.md'
deploy: deploy:
- '!**/*.md' - '!**/*.md'
......
...@@ -18,11 +18,11 @@ ...@@ -18,11 +18,11 @@
# This workflow handles all Fern documentation automation: # This workflow handles all Fern documentation automation:
# #
# 1. LINT (PRs): Validates Fern configuration and checks for broken links # 1. LINT (PRs): Validates Fern configuration and checks for broken links
# - Triggers on pull requests when fern/** files change # - Triggers on pull requests when docs/** files change
# - Runs `fern check` and `fern docs broken-links` # - Runs `fern check` and `fern docs broken-links`
# #
# 2. SYNC vNEXT (push to main): Syncs fern/ from main to docs-website branch # 2. SYNC vNEXT (push to main): Syncs docs/ from main to docs-website branch
# - Triggers on push to main when fern/** files change # - Triggers on push to main when docs/** files change
# - Preserves versioned documentation snapshots on docs-website branch # - Preserves versioned documentation snapshots on docs-website branch
# - Publishes docs to Fern after syncing # - Publishes docs to Fern after syncing
# #
...@@ -63,7 +63,7 @@ jobs: ...@@ -63,7 +63,7 @@ jobs:
# Skip for tag pushes - version release doesn't need changed-files check # Skip for tag pushes - version release doesn't need changed-files check
if: github.ref_type != 'tag' if: github.ref_type != 'tag'
outputs: outputs:
fern: ${{ steps.changes.outputs.fern }} docs: ${{ steps.changes.outputs.docs }}
steps: steps:
- name: Checkout code - name: Checkout code
uses: actions/checkout@v4 uses: actions/checkout@v4
...@@ -76,7 +76,7 @@ jobs: ...@@ -76,7 +76,7 @@ jobs:
gh_token: ${{ github.token }} gh_token: ${{ github.token }}
############################################################################# #############################################################################
# LINT JOBS - Run on PRs when fern/** files change # LINT JOBS - Run on PRs when docs/** files change
############################################################################# #############################################################################
fern-check: fern-check:
...@@ -84,7 +84,7 @@ jobs: ...@@ -84,7 +84,7 @@ jobs:
needs: changed-files needs: changed-files
if: | if: |
github.ref_type != 'tag' && github.ref_type != 'tag' &&
needs.changed-files.outputs.fern == 'true' && needs.changed-files.outputs.docs == 'true' &&
(github.event_name == 'pull_request' || startsWith(github.ref, 'refs/heads/pull-request/')) (github.event_name == 'pull_request' || startsWith(github.ref, 'refs/heads/pull-request/'))
runs-on: ubuntu-latest runs-on: ubuntu-latest
steps: steps:
...@@ -100,7 +100,7 @@ jobs: ...@@ -100,7 +100,7 @@ jobs:
run: npm install -g fern-api run: npm install -g fern-api
- name: Validate Fern configuration - name: Validate Fern configuration
working-directory: fern working-directory: docs
run: fern check run: fern check
fern-broken-links: fern-broken-links:
...@@ -108,7 +108,7 @@ jobs: ...@@ -108,7 +108,7 @@ jobs:
needs: changed-files needs: changed-files
if: | if: |
github.ref_type != 'tag' && github.ref_type != 'tag' &&
needs.changed-files.outputs.fern == 'true' && needs.changed-files.outputs.docs == 'true' &&
(github.event_name == 'pull_request' || startsWith(github.ref, 'refs/heads/pull-request/')) (github.event_name == 'pull_request' || startsWith(github.ref, 'refs/heads/pull-request/'))
runs-on: ubuntu-latest runs-on: ubuntu-latest
steps: steps:
...@@ -124,11 +124,11 @@ jobs: ...@@ -124,11 +124,11 @@ jobs:
run: npm install -g fern-api run: npm install -g fern-api
- name: Check for broken links - name: Check for broken links
working-directory: fern working-directory: docs
run: fern docs broken-links run: fern docs broken-links
############################################################################# #############################################################################
# SYNC vNEXT - Run on push to main when fern/** files change # SYNC vNEXT - Run on push to main when docs/** files change
############################################################################# #############################################################################
sync-vnext: sync-vnext:
...@@ -136,7 +136,7 @@ jobs: ...@@ -136,7 +136,7 @@ jobs:
needs: changed-files needs: changed-files
if: | if: |
github.ref == 'refs/heads/main' && github.ref == 'refs/heads/main' &&
(needs.changed-files.outputs.fern == 'true' || github.event_name == 'workflow_dispatch') && (needs.changed-files.outputs.docs == 'true' || github.event_name == 'workflow_dispatch') &&
(github.event.inputs.tag == '' || github.event.inputs.tag == null) (github.event.inputs.tag == '' || github.event.inputs.tag == null)
runs-on: ubuntu-latest runs-on: ubuntu-latest
steps: steps:
...@@ -151,13 +151,13 @@ jobs: ...@@ -151,13 +151,13 @@ jobs:
uses: actions/checkout@v4 uses: actions/checkout@v4
with: with:
ref: docs-website ref: docs-website
path: fern-checkout path: docs-checkout
fetch-depth: 1 fetch-depth: 1
token: ${{ secrets.GITHUB_TOKEN }} token: ${{ secrets.GITHUB_TOKEN }}
- name: Setup Git - name: Setup Git
run: | run: |
cd fern-checkout cd docs-checkout
git config user.name "github-actions[bot]" git config user.name "github-actions[bot]"
git config user.email "github-actions[bot]@users.noreply.github.com" git config user.email "github-actions[bot]@users.noreply.github.com"
...@@ -165,41 +165,41 @@ jobs: ...@@ -165,41 +165,41 @@ jobs:
run: | run: |
# Sync pages/ directory (vNext content) # Sync pages/ directory (vNext content)
echo "Syncing pages/ from main to docs-website branch..." echo "Syncing pages/ from main to docs-website branch..."
rm -rf fern-checkout/fern/pages rm -rf docs-checkout/docs/pages
cp -r main-checkout/fern/pages fern-checkout/fern/pages cp -r main-checkout/docs/pages docs-checkout/docs/pages
# Sync versions/next.yml (vNext navigation) # Sync versions/next.yml (vNext navigation)
echo "Syncing versions/next.yml from main to docs-website branch..." echo "Syncing versions/next.yml from main to docs-website branch..."
cp main-checkout/fern/versions/next.yml fern-checkout/fern/versions/next.yml cp main-checkout/docs/versions/next.yml docs-checkout/docs/versions/next.yml
# Sync assets/ directory # Sync assets/ directory
echo "Syncing assets/ from main to docs-website branch..." echo "Syncing assets/ from main to docs-website branch..."
rm -rf fern-checkout/fern/assets rm -rf docs-checkout/docs/assets
cp -r main-checkout/fern/assets fern-checkout/fern/assets cp -r main-checkout/docs/assets docs-checkout/docs/assets
# Sync fern.config.json # Sync fern.config.json
echo "Syncing fern.config.json from main to docs-website branch..." echo "Syncing fern.config.json from main to docs-website branch..."
cp main-checkout/fern/fern.config.json fern-checkout/fern/fern.config.json cp main-checkout/docs/fern.config.json docs-checkout/docs/fern.config.json
# Sync .gitignore if it exists # Sync .gitignore if it exists
if [ -f main-checkout/fern/.gitignore ]; then if [ -f main-checkout/docs/.gitignore ]; then
cp main-checkout/fern/.gitignore fern-checkout/fern/.gitignore cp main-checkout/docs/.gitignore docs-checkout/docs/.gitignore
fi fi
# Sync convert_callouts.py script # Sync convert_callouts.py script
if [ -f main-checkout/fern/convert_callouts.py ]; then if [ -f main-checkout/docs/convert_callouts.py ]; then
cp main-checkout/fern/convert_callouts.py fern-checkout/fern/convert_callouts.py cp main-checkout/docs/convert_callouts.py docs-checkout/docs/convert_callouts.py
fi fi
- name: Convert GitHub callouts to Fern format - name: Convert GitHub callouts to Fern format
run: | run: |
echo "Converting GitHub-style callouts to Fern format in pages/..." echo "Converting GitHub-style callouts to Fern format in pages/..."
python3 fern-checkout/fern/convert_callouts.py --dir fern-checkout/fern/pages python3 docs-checkout/docs/convert_callouts.py --dir docs-checkout/docs/pages
echo "Callout conversion complete." echo "Callout conversion complete."
- name: Update docs.yml preserving versions - name: Update docs.yml preserving versions
run: | run: |
cd fern-checkout/fern cd docs-checkout/docs
# Extract the list of versioned entries from current docs.yml (on docs-website branch) # Extract the list of versioned entries from current docs.yml (on docs-website branch)
# These are entries after "path: ./versions/next.yml" # These are entries after "path: ./versions/next.yml"
...@@ -209,7 +209,7 @@ jobs: ...@@ -209,7 +209,7 @@ jobs:
VERSION_ENTRIES=$(awk '/- display-name: v/{found=1} found{print; if(/path:/) found=0}' docs.yml) VERSION_ENTRIES=$(awk '/- display-name: v/{found=1} found{print; if(/path:/) found=0}' docs.yml)
# Copy docs.yml from main as base # Copy docs.yml from main as base
cp ../../main-checkout/fern/docs.yml docs.yml cp ../../main-checkout/docs/docs.yml docs.yml
# If we had version entries, append them after the next.yml line # If we had version entries, append them after the next.yml line
if [ -n "$VERSION_ENTRIES" ]; then if [ -n "$VERSION_ENTRIES" ]; then
...@@ -229,7 +229,7 @@ jobs: ...@@ -229,7 +229,7 @@ jobs:
- name: Check for changes - name: Check for changes
id: changes id: changes
run: | run: |
cd fern-checkout cd docs-checkout
if git diff --quiet && git diff --cached --quiet; then if git diff --quiet && git diff --cached --quiet; then
echo "has_changes=false" >> $GITHUB_OUTPUT echo "has_changes=false" >> $GITHUB_OUTPUT
echo "No changes detected" echo "No changes detected"
...@@ -242,12 +242,12 @@ jobs: ...@@ -242,12 +242,12 @@ jobs:
- name: Commit and push changes - name: Commit and push changes
if: steps.changes.outputs.has_changes == 'true' if: steps.changes.outputs.has_changes == 'true'
run: | run: |
cd fern-checkout cd docs-checkout
git add -A git add -A
git commit -m "docs(fern): sync vNext from main git commit -m "docs(fern): sync vNext from main
Automated sync of fern/ directory from main branch. Automated sync of docs/ directory from main branch.
Preserves versioned documentation snapshots. Preserves versioned documentation snapshots.
Source commit: ${{ github.sha }}" Source commit: ${{ github.sha }}"
...@@ -270,7 +270,7 @@ jobs: ...@@ -270,7 +270,7 @@ jobs:
if: steps.changes.outputs.has_changes == 'true' if: steps.changes.outputs.has_changes == 'true'
env: env:
FERN_TOKEN: ${{ secrets.FERN_TOKEN }} FERN_TOKEN: ${{ secrets.FERN_TOKEN }}
working-directory: fern-checkout/fern working-directory: docs-checkout/docs
run: fern generate --docs run: fern generate --docs
############################################################################# #############################################################################
...@@ -317,13 +317,13 @@ jobs: ...@@ -317,13 +317,13 @@ jobs:
run: | run: |
TAG="${{ steps.version.outputs.tag }}" TAG="${{ steps.version.outputs.tag }}"
if [ -d "fern/pages-$TAG" ]; then if [ -d "docs/pages-$TAG" ]; then
echo "::error::Version $TAG already exists (fern/pages-$TAG directory found)" echo "::error::Version $TAG already exists (docs/pages-$TAG directory found)"
exit 1 exit 1
fi fi
if [ -f "fern/versions/$TAG.yml" ]; then if [ -f "docs/versions/$TAG.yml" ]; then
echo "::error::Version $TAG already exists (fern/versions/$TAG.yml found)" echo "::error::Version $TAG already exists (docs/versions/$TAG.yml found)"
exit 1 exit 1
fi fi
...@@ -338,22 +338,22 @@ jobs: ...@@ -338,22 +338,22 @@ jobs:
run: | run: |
TAG="${{ steps.version.outputs.tag }}" TAG="${{ steps.version.outputs.tag }}"
echo "Creating fern/pages-$TAG/ from fern/pages/..." echo "Creating docs/pages-$TAG/ from docs/pages/..."
# Copy current pages/ to pages-vX.Y.Z/ # Copy current pages/ to pages-vX.Y.Z/
cp -r fern/pages "fern/pages-$TAG" cp -r docs/pages "docs/pages-$TAG"
echo "Created fern/pages-$TAG/" echo "Created docs/pages-$TAG/"
ls -la "fern/pages-$TAG/" | head -20 ls -la "docs/pages-$TAG/" | head -20
- name: Update GitHub links to 'main' to version tag - name: Update GitHub links to 'main' to version tag
run: | run: |
TAG="${{ steps.version.outputs.tag }}" TAG="${{ steps.version.outputs.tag }}"
echo "Updating GitHub links from 'tree/main' to 'tree/$TAG' in fern/pages-$TAG/..." echo "Updating GitHub links from 'tree/main' to 'tree/$TAG' in docs/pages-$TAG/..."
# Find all markdown files and replace tree/main with tree/vX.Y.Z # Find all markdown files and replace tree/main with tree/vX.Y.Z
find "fern/pages-$TAG" -name "*.md" -o -name "*.mdx" | while read file; do find "docs/pages-$TAG" -name "*.md" -o -name "*.mdx" | while read file; do
if grep -q "github.com/ai-dynamo/dynamo/tree/main" "$file"; then if grep -q "github.com/ai-dynamo/dynamo/tree/main" "$file"; then
echo "Updating: $file" echo "Updating: $file"
sed -i "s|github.com/ai-dynamo/dynamo/tree/main|github.com/ai-dynamo/dynamo/tree/$TAG|g" "$file" sed -i "s|github.com/ai-dynamo/dynamo/tree/main|github.com/ai-dynamo/dynamo/tree/$TAG|g" "$file"
...@@ -361,7 +361,7 @@ jobs: ...@@ -361,7 +361,7 @@ jobs:
done done
# Also update blob/main references (for direct file links) # Also update blob/main references (for direct file links)
find "fern/pages-$TAG" -name "*.md" -o -name "*.mdx" | while read file; do find "docs/pages-$TAG" -name "*.md" -o -name "*.mdx" | while read file; do
if grep -q "github.com/ai-dynamo/dynamo/blob/main" "$file"; then if grep -q "github.com/ai-dynamo/dynamo/blob/main" "$file"; then
echo "Updating blob links: $file" echo "Updating blob links: $file"
sed -i "s|github.com/ai-dynamo/dynamo/blob/main|github.com/ai-dynamo/dynamo/blob/$TAG|g" "$file" sed -i "s|github.com/ai-dynamo/dynamo/blob/main|github.com/ai-dynamo/dynamo/blob/$TAG|g" "$file"
...@@ -375,19 +375,19 @@ jobs: ...@@ -375,19 +375,19 @@ jobs:
TAG="${{ steps.version.outputs.tag }}" TAG="${{ steps.version.outputs.tag }}"
echo "Converting GitHub-style callouts to Fern format in pages-$TAG/..." echo "Converting GitHub-style callouts to Fern format in pages-$TAG/..."
python3 fern/convert_callouts.py --dir "fern/pages-$TAG" python3 docs/convert_callouts.py --dir "docs/pages-$TAG"
echo "Callout conversion complete." echo "Callout conversion complete."
- name: Create version config file - name: Create version config file
run: | run: |
TAG="${{ steps.version.outputs.tag }}" TAG="${{ steps.version.outputs.tag }}"
VERSION="${{ steps.version.outputs.version }}" VERSION="${{ steps.version.outputs.version }}"
VERSION_FILE="fern/versions/$TAG.yml" VERSION_FILE="docs/versions/$TAG.yml"
echo "Creating version config: $VERSION_FILE" echo "Creating version config: $VERSION_FILE"
# Copy next.yml as template # Copy next.yml as template
cp fern/versions/next.yml "$VERSION_FILE" cp docs/versions/next.yml "$VERSION_FILE"
# Update the comment at the top # Update the comment at the top
sed -i "s/# Navigation structure for Latest version/# Navigation structure for $TAG version/" "$VERSION_FILE" sed -i "s/# Navigation structure for Latest version/# Navigation structure for $TAG version/" "$VERSION_FILE"
...@@ -403,7 +403,7 @@ jobs: ...@@ -403,7 +403,7 @@ jobs:
- name: Update docs.yml with new version - name: Update docs.yml with new version
run: | run: |
TAG="${{ steps.version.outputs.tag }}" TAG="${{ steps.version.outputs.tag }}"
DOCS_FILE="fern/docs.yml" DOCS_FILE="docs/docs.yml"
echo "Updating $DOCS_FILE to include $TAG..." echo "Updating $DOCS_FILE to include $TAG..."
...@@ -433,15 +433,15 @@ jobs: ...@@ -433,15 +433,15 @@ jobs:
run: | run: |
TAG="${{ steps.version.outputs.tag }}" TAG="${{ steps.version.outputs.tag }}"
git add "fern/pages-$TAG/" git add "docs/pages-$TAG/"
git add "fern/versions/$TAG.yml" git add "docs/versions/$TAG.yml"
git add fern/docs.yml git add docs/docs.yml
git commit -m "docs(fern): release version $TAG git commit -m "docs(fern): release version $TAG
- Created fern/pages-$TAG/ with documentation snapshot - Created docs/pages-$TAG/ with documentation snapshot
- Created fern/versions/$TAG.yml version navigation config - Created docs/versions/$TAG.yml version navigation config
- Updated fern/docs.yml to include $TAG in version list - Updated docs/docs.yml to include $TAG in version list
Automated by fern-docs workflow Automated by fern-docs workflow
Source tag: $TAG" Source tag: $TAG"
...@@ -461,5 +461,5 @@ jobs: ...@@ -461,5 +461,5 @@ jobs:
- name: Publish Docs - name: Publish Docs
env: env:
FERN_TOKEN: ${{ secrets.FERN_TOKEN }} FERN_TOKEN: ${{ secrets.FERN_TOKEN }}
working-directory: ./fern working-directory: ./docs
run: fern generate --docs run: fern generate --docs
This diff is collapsed.
...@@ -129,9 +129,9 @@ Issues labeled `good-first-issue` are sized for new contributors. We provide ext ...@@ -129,9 +129,9 @@ Issues labeled `good-first-issue` are sized for new contributors. We provide ext
<!-- We were given the feedback that having information on architecture, languages used, etc. would be helpful for external contributors --> <!-- We were given the feedback that having information on architecture, languages used, etc. would be helpful for external contributors -->
Understanding Dynamo's architecture helps you find where to make changes. For the complete picture, see the [Architecture Documentation](docs/design_docs/architecture.md) and [Support Matrix](docs/reference/support-matrix.md). Understanding Dynamo's architecture helps you find where to make changes. For the complete picture, see the [Architecture Documentation](docs/pages/design-docs/architecture.md) and [Support Matrix](docs/pages/reference/support-matrix.md).
![Dynamo Architecture](docs/images/architecture.png) ![Dynamo Architecture](docs/assets/img/architecture.png)
### Core Components ### Core Components
...@@ -148,9 +148,9 @@ Understanding Dynamo's architecture helps you find where to make changes. For th ...@@ -148,9 +148,9 @@ Understanding Dynamo's architecture helps you find where to make changes. For th
| Plane | Purpose | Documentation | | Plane | Purpose | Documentation |
|-------|---------|---------------| |-------|---------|---------------|
| **Discovery Plane** | Service registration and discovery across components | [docs/design_docs/distributed_runtime.md](docs/design_docs/distributed_runtime.md) | | **Discovery Plane** | Service registration and discovery across components | [docs/pages/design-docs/distributed-runtime.md](docs/pages/design-docs/distributed-runtime.md) |
| **Request Plane** | High-performance request routing between components | [docs/design_docs/request_plane.md](docs/design_docs/request_plane.md) | | **Request Plane** | High-performance request routing between components | [docs/pages/design-docs/request-plane.md](docs/pages/design-docs/request-plane.md) |
| **KV Event Plane** | KV cache event propagation for cache-aware routing | [docs/design_docs/event_plane.md](docs/design_docs/event_plane.md) | | **KV Event Plane** | KV cache event propagation for cache-aware routing | [docs/pages/design-docs/event-plane.md](docs/pages/design-docs/event-plane.md) |
### Kubernetes Deployment ### Kubernetes Deployment
......
...@@ -15,14 +15,14 @@ See the License for the specific language governing permissions and ...@@ -15,14 +15,14 @@ See the License for the specific language governing permissions and
limitations under the License. limitations under the License.
--> -->
![Dynamo banner](./docs/images/frontpage-banner.png) ![Dynamo banner](./docs/assets/img/frontpage-banner.png)
[![License](https://img.shields.io/badge/License-Apache_2.0-blue.svg)](https://opensource.org/licenses/Apache-2.0) [![License](https://img.shields.io/badge/License-Apache_2.0-blue.svg)](https://opensource.org/licenses/Apache-2.0)
[![GitHub Release](https://img.shields.io/github/v/release/ai-dynamo/dynamo)](https://github.com/ai-dynamo/dynamo/releases/latest) [![GitHub Release](https://img.shields.io/github/v/release/ai-dynamo/dynamo)](https://github.com/ai-dynamo/dynamo/releases/latest)
[![Ask DeepWiki](https://deepwiki.com/badge.svg)](https://deepwiki.com/ai-dynamo/dynamo) [![Ask DeepWiki](https://deepwiki.com/badge.svg)](https://deepwiki.com/ai-dynamo/dynamo)
[![Discord](https://dcbadge.limes.pink/api/server/D92uqZRjCZ?style=flat)](https://discord.gg/D92uqZRjCZ) ![Community Contributors](https://img.shields.io/badge/community_contributors-70%2B-brightgreen) [![Discord](https://dcbadge.limes.pink/api/server/D92uqZRjCZ?style=flat)](https://discord.gg/D92uqZRjCZ) ![Community Contributors](https://img.shields.io/badge/community_contributors-70%2B-brightgreen)
| **[Roadmap](https://github.com/ai-dynamo/dynamo/issues/5506)** | **[Support Matrix](https://github.com/ai-dynamo/dynamo/blob/main/docs/reference/support-matrix.md)** | **[Docs](https://docs.nvidia.com/dynamo/latest/index.html)** | **[Recipes](https://github.com/ai-dynamo/dynamo/tree/main/recipes)** | **[Examples](https://github.com/ai-dynamo/dynamo/tree/main/examples)** | **[Prebuilt Containers](https://catalog.ngc.nvidia.com/orgs/nvidia/teams/ai-dynamo/collections/ai-dynamo)** | **[Design Proposals](https://github.com/ai-dynamo/enhancements)** | **[Blogs](https://developer.nvidia.com/blog/tag/nvidia-dynamo)** | **[Roadmap](https://github.com/ai-dynamo/dynamo/issues/5506)** | **[Support Matrix](https://github.com/ai-dynamo/dynamo/blob/main/docs/pages/reference/support-matrix.md)** | **[Docs](https://docs.nvidia.com/dynamo/latest/index.html)** | **[Recipes](https://github.com/ai-dynamo/dynamo/tree/main/recipes)** | **[Examples](https://github.com/ai-dynamo/dynamo/tree/main/examples)** | **[Prebuilt Containers](https://catalog.ngc.nvidia.com/orgs/nvidia/teams/ai-dynamo/collections/ai-dynamo)** | **[Design Proposals](https://github.com/ai-dynamo/enhancements)** | **[Blogs](https://developer.nvidia.com/blog/tag/nvidia-dynamo)**
# NVIDIA Dynamo # NVIDIA Dynamo
...@@ -31,7 +31,7 @@ High-throughput, low-latency inference framework designed for serving generative ...@@ -31,7 +31,7 @@ High-throughput, low-latency inference framework designed for serving generative
## Why Dynamo ## Why Dynamo
<p align="center"> <p align="center">
<img src="./docs/images/frontpage-gpu-vertical.png" alt="Multi Node Multi-GPU topology" width="600" /> <img src="./docs/assets/img/frontpage-gpu-vertical.png" alt="Multi Node Multi-GPU topology" width="600" />
</p> </p>
Large language models exceed single-GPU capacity. Tensor parallelism spreads layers across GPUs but creates coordination challenges. Dynamo closes this orchestration gap. Large language models exceed single-GPU capacity. Tensor parallelism spreads layers across GPUs but creates coordination challenges. Dynamo closes this orchestration gap.
...@@ -48,25 +48,25 @@ Built in Rust for performance and Python for extensibility, Dynamo is fully open ...@@ -48,25 +48,25 @@ Built in Rust for performance and Python for extensibility, Dynamo is fully open
## Backend Feature Support ## Backend Feature Support
| | [SGLang](docs/backends/sglang/README.md) | [TensorRT-LLM](docs/backends/trtllm/README.md) | [vLLM](docs/backends/vllm/README.md) | | | [SGLang](docs/pages/backends/sglang/README.md) | [TensorRT-LLM](docs/pages/backends/trtllm/README.md) | [vLLM](docs/pages/backends/vllm/README.md) |
|---|:----:|:----------:|:--:| |---|:----:|:----------:|:--:|
| **Best For** | High-throughput serving | Maximum performance | Broadest feature coverage | | **Best For** | High-throughput serving | Maximum performance | Broadest feature coverage |
| [**Disaggregated Serving**](docs/design_docs/disagg_serving.md) | ✅ | ✅ | ✅ | | [**Disaggregated Serving**](docs/pages/design-docs/disagg-serving.md) | ✅ | ✅ | ✅ |
| [**KV-Aware Routing**](docs/components/router/README.md) | ✅ | ✅ | ✅ | | [**KV-Aware Routing**](docs/pages/components/router/README.md) | ✅ | ✅ | ✅ |
| [**SLA-Based Planner**](docs/components/planner/planner_guide.md) | ✅ | ✅ | ✅ | | [**SLA-Based Planner**](docs/pages/components/planner/planner-guide.md) | ✅ | ✅ | ✅ |
| [**KVBM**](docs/components/kvbm/README.md) | 🚧 | ✅ | ✅ | | [**KVBM**](docs/pages/components/kvbm/README.md) | 🚧 | ✅ | ✅ |
| [**Multimodal**](docs/features/multimodal/README.md) | ✅ | ✅ | ✅ | | [**Multimodal**](docs/pages/features/multimodal/README.md) | ✅ | ✅ | ✅ |
| [**Tool Calling**](docs/agents/tool-calling.md) | ✅ | ✅ | ✅ | | [**Tool Calling**](docs/pages/agents/tool-calling.md) | ✅ | ✅ | ✅ |
> **[Full Feature Matrix →](docs/reference/feature-matrix.md)** — Detailed compatibility including LoRA, Request Migration, Speculative Decoding, and feature interactions. > **[Full Feature Matrix →](docs/pages/reference/feature-matrix.md)** — Detailed compatibility including LoRA, Request Migration, Speculative Decoding, and feature interactions.
## Dynamo Architecture ## Dynamo Architecture
<p align="center"> <p align="center">
<img src="./docs/images/frontpage-architecture.png" alt="Dynamo architecture" width="600" /> <img src="./docs/assets/img/frontpage-architecture.png" alt="Dynamo architecture" width="600" />
</p> </p>
> **[Architecture Deep Dive →](docs/design_docs/architecture.md)** > **[Architecture Deep Dive →](docs/pages/design-docs/architecture.md)**
## Latest News ## Latest News
...@@ -87,7 +87,7 @@ Want to help shape the future of distributed LLM inference? See the **[Contribut ...@@ -87,7 +87,7 @@ Want to help shape the future of distributed LLM inference? See the **[Contribut
# Local Quick Start # Local Quick Start
The following examples require a few system level packages. The following examples require a few system level packages.
Recommended to use Ubuntu 24.04 with a x86_64 CPU. See [docs/reference/support-matrix.md](docs/reference/support-matrix.md) Recommended to use Ubuntu 24.04 with a x86_64 CPU. See [docs/pages/reference/support-matrix.md](docs/pages/reference/support-matrix.md)
## Install Dynamo ## Install Dynamo
...@@ -108,7 +108,7 @@ docker run --gpus all --network host --rm -it nvcr.io/nvidia/ai-dynamo/vllm-runt ...@@ -108,7 +108,7 @@ docker run --gpus all --network host --rm -it nvcr.io/nvidia/ai-dynamo/vllm-runt
> **Tip:** To run frontend and worker in the same container, either run processes in background with `&` (see below), or open a second terminal and use `docker exec -it <container_id> bash`. > **Tip:** To run frontend and worker in the same container, either run processes in background with `&` (see below), or open a second terminal and use `docker exec -it <container_id> bash`.
See [Release Artifacts](docs/reference/release-artifacts.md#container-images) for available versions. See [Release Artifacts](docs/pages/reference/release-artifacts.md#container-images) for available versions.
### Option B: Install from PyPI ### Option B: Install from PyPI
...@@ -143,7 +143,7 @@ pip install torch==2.9.0 torchvision --index-url https://download.pytorch.org/wh ...@@ -143,7 +143,7 @@ pip install torch==2.9.0 torchvision --index-url https://download.pytorch.org/wh
pip install --pre --extra-index-url https://pypi.nvidia.com "ai-dynamo[trtllm]" pip install --pre --extra-index-url https://pypi.nvidia.com "ai-dynamo[trtllm]"
``` ```
> **Note:** TensorRT-LLM requires `pip` due to a transitive Git URL dependency that `uv` doesn't resolve. We recommend using the [TensorRT-LLM container](docs/reference/release-artifacts.md#container-images) for broader compatibility. > **Note:** TensorRT-LLM requires `pip` due to a transitive Git URL dependency that `uv` doesn't resolve. We recommend using the [TensorRT-LLM container](docs/pages/reference/release-artifacts.md#container-images) for broader compatibility.
**vLLM** **vLLM**
...@@ -220,7 +220,7 @@ For production deployments on Kubernetes clusters with multiple GPUs. ...@@ -220,7 +220,7 @@ For production deployments on Kubernetes clusters with multiple GPUs.
## Prerequisites ## Prerequisites
- Kubernetes cluster with GPU nodes - Kubernetes cluster with GPU nodes
- [Dynamo Platform installed](docs/kubernetes/README.md) - [Dynamo Platform installed](docs/pages/kubernetes/README.md)
- HuggingFace token for model downloads - HuggingFace token for model downloads
## Production Recipes ## Production Recipes
...@@ -346,8 +346,8 @@ python3 -m dynamo.frontend ...@@ -346,8 +346,8 @@ python3 -m dynamo.frontend
Dynamo provides comprehensive benchmarking tools: Dynamo provides comprehensive benchmarking tools:
- **[Benchmarking Guide](docs/benchmarks/benchmarking.md)** – Compare deployment topologies using AIPerf - **[Benchmarking Guide](docs/pages/benchmarks/benchmarking.md)** – Compare deployment topologies using AIPerf
- **[SLA-Driven Deployments](docs/components/planner/planner_guide.md)** – Optimize deployments to meet SLA requirements - **[SLA-Driven Deployments](docs/pages/components/planner/planner-guide.md)** – Optimize deployments to meet SLA requirements
## Frontend OpenAPI Specification ## Frontend OpenAPI Specification
...@@ -357,11 +357,11 @@ The OpenAI-compatible frontend exposes an OpenAPI 3 spec at `/openapi.json`. To ...@@ -357,11 +357,11 @@ The OpenAI-compatible frontend exposes an OpenAPI 3 spec at `/openapi.json`. To
cargo run -p dynamo-llm --bin generate-frontend-openapi cargo run -p dynamo-llm --bin generate-frontend-openapi
``` ```
This writes to `docs/reference/api/openapi.json`. This writes to `docs/pages/reference/api/openapi.json`.
## Service Discovery and Messaging ## Service Discovery and Messaging
Dynamo uses TCP for inter-component communication. On Kubernetes, native resources ([CRDs + EndpointSlices](docs/kubernetes/service_discovery.md)) handle service discovery. External services are optional for most deployments: Dynamo uses TCP for inter-component communication. On Kubernetes, native resources ([CRDs + EndpointSlices](docs/pages/kubernetes/service-discovery.md)) handle service discovery. External services are optional for most deployments:
| Deployment | etcd | NATS | Notes | | Deployment | etcd | NATS | Notes |
|------------|------|------|-------| |------------|------|------|-------|
...@@ -387,11 +387,11 @@ See [SGLang on Slurm](examples/backends/sglang/slurm_jobs/README.md) and [TRT-LL ...@@ -387,11 +387,11 @@ See [SGLang on Slurm](examples/backends/sglang/slurm_jobs/README.md) and [TRT-LL
- [10/16] [How Baseten achieved 2x faster inference with NVIDIA Dynamo](https://www.baseten.co/blog/how-baseten-achieved-2x-faster-inference-with-nvidia-dynamo/) - [10/16] [How Baseten achieved 2x faster inference with NVIDIA Dynamo](https://www.baseten.co/blog/how-baseten-achieved-2x-faster-inference-with-nvidia-dynamo/)
<!-- Reference links for Feature Compatibility Matrix --> <!-- Reference links for Feature Compatibility Matrix -->
[disagg]: docs/design_docs/disagg_serving.md [disagg]: docs/pages/design-docs/disagg-serving.md
[kv-routing]: docs/components/router/README.md [kv-routing]: docs/pages/components/router/README.md
[planner]: docs/components/planner/planner_guide.md [planner]: docs/pages/components/planner/planner-guide.md
[kvbm]: docs/components/kvbm/README.md [kvbm]: docs/pages/components/kvbm/README.md
[mm]: examples/multimodal/ [mm]: examples/multimodal/
[migration]: docs/fault_tolerance/request_migration.md [migration]: docs/pages/fault-tolerance/request-migration.md
[lora]: examples/backends/vllm/deploy/lora/README.md [lora]: examples/backends/vllm/deploy/lora/README.md
[tools]: docs/agents/tool-calling.md [tools]: docs/pages/agents/tool-calling.md
...@@ -20,7 +20,7 @@ This directory contains benchmarking scripts and tools for performance evaluatio ...@@ -20,7 +20,7 @@ This directory contains benchmarking scripts and tools for performance evaluatio
## Quick Start ## Quick Start
### Benchmark a Dynamo Deployment ### Benchmark a Dynamo Deployment
First, deploy your DynamoGraphDeployment using the [deployment documentation](../docs/kubernetes/), then: First, deploy your DynamoGraphDeployment using the [deployment documentation](../docs/pages/kubernetes/), then:
```bash ```bash
# Port-forward your deployment to http://localhost:8000 # Port-forward your deployment to http://localhost:8000
...@@ -71,4 +71,4 @@ Detailed information is provided in the `prefix_data_generator` directory. ...@@ -71,4 +71,4 @@ Detailed information is provided in the `prefix_data_generator` directory.
## Comprehensive Guide ## Comprehensive Guide
For detailed documentation, configuration options, and advanced usage, see the [complete benchmarking guide](../docs/benchmarks/benchmarking.md). For detailed documentation, configuration options, and advanced usage, see the [complete benchmarking guide](../docs/pages/benchmarks/benchmarking.md).
../../docs/benchmarks/benchmarking.md
\ No newline at end of file
...@@ -6,8 +6,8 @@ SPDX-License-Identifier: Apache-2.0 ...@@ -6,8 +6,8 @@ SPDX-License-Identifier: Apache-2.0
# Profiler # Profiler
Documentation for the Dynamo Profiler has moved to [docs/components/profiler/](../../docs/components/profiler/README.md). Documentation for the Dynamo Profiler has moved to [docs/pages/components/profiler/](../../docs/pages/components/profiler/README.md).
- [Profiler Overview](../../docs/components/profiler/README.md) - [Profiler Overview](../../docs/pages/components/profiler/README.md)
- [Profiler Guide](../../docs/components/profiler/profiler_guide.md) - [Profiler Guide](../../docs/pages/components/profiler/profiler-guide.md)
- [Profiler Examples](../../docs/components/profiler/profiler_examples.md) - [Profiler Examples](../../docs/pages/components/profiler/profiler-examples.md)
...@@ -620,7 +620,7 @@ def create_gradio_interface( ...@@ -620,7 +620,7 @@ def create_gradio_interface(
> 📝 **Note:** The dotted red line in the prefill and decode charts are default TTFT and ITL SLAs if not specified. > 📝 **Note:** The dotted red line in the prefill and decode charts are default TTFT and ITL SLAs if not specified.
> ⚠️ **Warning:** The TTFT values here represent the ideal case when requests arrive uniformly, minimizing queueing. Real-world TTFT may be higher than profiling results. To mitigate the issue, planner uses [correction factors](https://github.com/ai-dynamo/dynamo/blob/main/docs/design_docs/planner_design.md#step-2-correction-factor-calculation) to adjust dynamically at runtime. > ⚠️ **Warning:** The TTFT values here represent the ideal case when requests arrive uniformly, minimizing queueing. Real-world TTFT may be higher than profiling results. To mitigate the issue, planner uses [correction factors](https://github.com/ai-dynamo/dynamo/blob/main/docs/pages/design-docs/planner-design.md#step-2-correction-factor-calculation) to adjust dynamically at runtime.
> 💡 **Tip:** Use the GPU cost checkbox and input in the charts section to convert GPU hours to cost. > 💡 **Tip:** Use the GPU cost checkbox and input in the charts section to convert GPU hours to cost.
""" """
......
...@@ -127,7 +127,7 @@ To see all available router arguments, run: ...@@ -127,7 +127,7 @@ To see all available router arguments, run:
python -m dynamo.frontend --help python -m dynamo.frontend --help
``` ```
For detailed explanations of router arguments (especially KV cache routing parameters), see the [Router Guide](../../docs/components/router/router_guide.md). For detailed explanations of router arguments (especially KV cache routing parameters), see the [Router Guide](../../docs/pages/components/router/router-guide.md).
> [!Note] > [!Note]
> If you're unsure whether your backend engines correctly emit KV events for certain models (e.g., hybrid models like gpt-oss or nemotron nano 2), use the `--no-kv-events` flag to disable KV event tracking and use approximate KV indexing instead: > If you're unsure whether your backend engines correctly emit KV events for certain models (e.g., hybrid models like gpt-oss or nemotron nano 2), use the `--no-kv-events` flag to disable KV event tracking and use approximate KV indexing instead:
...@@ -146,7 +146,7 @@ When you launch prefill workers using `run_engines.sh --prefill`, the frontend a ...@@ -146,7 +146,7 @@ When you launch prefill workers using `run_engines.sh --prefill`, the frontend a
- Uses the same routing mode as the frontend's `--router-mode` setting - Uses the same routing mode as the frontend's `--router-mode` setting
- Seamlessly integrates with your decode workers for token generation - Seamlessly integrates with your decode workers for token generation
No additional configuration is needed - simply launch both decode and prefill workers, and the system handles the rest. See the [Router Guide](../../docs/components/router/router_guide.md#disaggregated-serving) for more details. No additional configuration is needed - simply launch both decode and prefill workers, and the system handles the rest. See the [Router Guide](../../docs/pages/components/router/router-guide.md#disaggregated-serving) for more details.
> [!Note] > [!Note]
> The unified frontend with automatic prefill routing is currently enabled for vLLM and TensorRT-LLM backends. For SGLang (work in progress), you need to launch a separate standalone router as the prefill router targeting the prefill endpoints. See example script: [`examples/backends/sglang/launch/disagg_router.sh`](../../examples/backends/sglang/launch/disagg_router.sh) > The unified frontend with automatic prefill routing is currently enabled for vLLM and TensorRT-LLM backends. For SGLang (work in progress), you need to launch a separate standalone router as the prefill router targeting the prefill endpoints. See example script: [`examples/backends/sglang/launch/disagg_router.sh`](../../examples/backends/sglang/launch/disagg_router.sh)
......
...@@ -25,9 +25,9 @@ This directory contains the core components that make up the Dynamo inference fr ...@@ -25,9 +25,9 @@ This directory contains the core components that make up the Dynamo inference fr
Dynamo supports multiple inference engines, each with their own deployment configurations and capabilities: Dynamo supports multiple inference engines, each with their own deployment configurations and capabilities:
- **[vLLM](/docs/backends/vllm/README.md)** - Full-featured vLLM integration with disaggregated serving, KV-aware routing, SLA-based planning, native KV cache events, and NIXL-based transfer mechanisms - **[vLLM](/docs/pages/backends/vllm/README.md)** - Full-featured vLLM integration with disaggregated serving, KV-aware routing, SLA-based planning, native KV cache events, and NIXL-based transfer mechanisms
- **[SGLang](/docs/backends/sglang/README.md)** - SGLang engine integration with ZMQ-based communication, supporting disaggregated serving and KV-aware routing - **[SGLang](/docs/pages/backends/sglang/README.md)** - SGLang engine integration with ZMQ-based communication, supporting disaggregated serving and KV-aware routing
- **[TensorRT-LLM](/docs/backends/trtllm/README.md)** - TensorRT-LLM integration with disaggregated serving capabilities and TensorRT acceleration - **[TensorRT-LLM](/docs/pages/backends/trtllm/README.md)** - TensorRT-LLM integration with disaggregated serving capabilities and TensorRT acceleration
Each engine provides launch and deploy scripts for different deployment patterns in the [examples](../examples/backends/) folder. Each engine provides launch and deploy scripts for different deployment patterns in the [examples](../examples/backends/) folder.
......
...@@ -5,4 +5,4 @@ ...@@ -5,4 +5,4 @@
The API gateway for serving LLM inference requests with OpenAI-compatible HTTP and KServe gRPC endpoints. The API gateway for serving LLM inference requests with OpenAI-compatible HTTP and KServe gRPC endpoints.
See [docs/components/frontend/](../../../../docs/components/frontend/) for documentation. See [docs/pages/components/frontend/](../../../../docs/pages/components/frontend/) for documentation.
...@@ -64,7 +64,7 @@ python -m dynamo.mocker \ ...@@ -64,7 +64,7 @@ python -m dynamo.mocker \
The profile results directory should contain `selected_prefill_interpolation/` and `selected_decode_interpolation/` subdirectories with `raw_data.npz` files. This works seamlessly in Kubernetes where profile data is mounted via ConfigMap or PersistentVolume. The profile results directory should contain `selected_prefill_interpolation/` and `selected_decode_interpolation/` subdirectories with `raw_data.npz` files. This works seamlessly in Kubernetes where profile data is mounted via ConfigMap or PersistentVolume.
To generate profiling data for your own model/hardware configuration, run the profiler (see [SLA-driven profiling documentation](../../../../docs/components/profiler/profiler_guide.md) for details): To generate profiling data for your own model/hardware configuration, run the profiler (see [SLA-driven profiling documentation](../../../../docs/pages/components/profiler/profiler-guide.md) for details):
```bash ```bash
python benchmarks/profiler/profile_sla.py \ python benchmarks/profiler/profile_sla.py \
......
...@@ -19,5 +19,5 @@ limitations under the License. ...@@ -19,5 +19,5 @@ limitations under the License.
SLA-driven autoscaling controller for Dynamo inference graphs. SLA-driven autoscaling controller for Dynamo inference graphs.
- **User docs**: [docs/planner/](/docs/components/planner/) (deployment, configuration, examples) - **User docs**: [docs/planner/](/docs/pages/components/planner/) (deployment, configuration, examples)
- **Design docs**: [docs/design_docs/planner_design.md](/docs/design_docs/planner_design.md) (architecture, algorithms) - **Design docs**: [docs/pages/design-docs/planner-design.md](/docs/pages/design-docs/planner-design.md) (architecture, algorithms)
...@@ -29,7 +29,7 @@ logger = logging.getLogger(__name__) ...@@ -29,7 +29,7 @@ logger = logging.getLogger(__name__)
MISSING_PROFILING_DATA_ERROR_MESSAGE = ( MISSING_PROFILING_DATA_ERROR_MESSAGE = (
"SLA-Planner requires pre-deployment profiling results to run.\n" "SLA-Planner requires pre-deployment profiling results to run.\n"
"Please follow /docs/components/profiler/profiler_guide.md to run the profiling first,\n" "Please follow /docs/pages/components/profiler/profiler-guide.md to run the profiling first,\n"
"and make sure the profiling results are present in --profile-results-dir." "and make sure the profiling results are present in --profile-results-dir."
) )
......
...@@ -3,7 +3,7 @@ ...@@ -3,7 +3,7 @@
# Standalone Router # Standalone Router
A backend-agnostic standalone KV-aware router service for Dynamo deployments. For details on how KV-aware routing works, see the [Router Guide](/docs/components/router/router_guide.md). A backend-agnostic standalone KV-aware router service for Dynamo deployments. For details on how KV-aware routing works, see the [Router Guide](/docs/pages/components/router/router-guide.md).
## Overview ## Overview
...@@ -29,7 +29,7 @@ python -m dynamo.router \ ...@@ -29,7 +29,7 @@ python -m dynamo.router \
- `--endpoint`: Full endpoint path for workers in the format `namespace.component.endpoint` (e.g., `dynamo.prefill.generate`) - `--endpoint`: Full endpoint path for workers in the format `namespace.component.endpoint` (e.g., `dynamo.prefill.generate`)
**Router Configuration:** **Router Configuration:**
For detailed descriptions of all KV router configuration options including `--block-size`, `--kv-overlap-score-weight`, `--router-temperature`, `--no-kv-events`, `--router-replica-sync`, `--router-snapshot-threshold`, `--router-reset-states`, and `--no-track-active-blocks`, see the [Router Guide](/docs/components/router/router_guide.md). For detailed descriptions of all KV router configuration options including `--block-size`, `--kv-overlap-score-weight`, `--router-temperature`, `--no-kv-events`, `--router-replica-sync`, `--router-snapshot-threshold`, `--router-reset-states`, and `--no-track-active-blocks`, see the [Router Guide](/docs/pages/components/router/router-guide.md).
## Architecture ## Architecture
...@@ -43,7 +43,7 @@ Clients query the `find_best_worker` endpoint to determine which worker should p ...@@ -43,7 +43,7 @@ Clients query the `find_best_worker` endpoint to determine which worker should p
## Example: Manual Disaggregated Serving (Alternative Setup) ## Example: Manual Disaggregated Serving (Alternative Setup)
> [!Note] > [!Note]
> **This is an alternative advanced setup.** The recommended approach for disaggregated serving is to use the frontend's automatic prefill routing, which activates when you register workers with `ModelType.Prefill`. See the [Router Guide](/docs/components/router/router_guide.md#disaggregated-serving) for the default setup. > **This is an alternative advanced setup.** The recommended approach for disaggregated serving is to use the frontend's automatic prefill routing, which activates when you register workers with `ModelType.Prefill`. See the [Router Guide](/docs/pages/components/router/router-guide.md#disaggregated-serving) for the default setup.
> >
> Use this manual setup if you need explicit control over prefill routing configuration or want to manage prefill and decode routers separately. > Use this manual setup if you need explicit control over prefill routing configuration or want to manage prefill and decode routers separately.
...@@ -103,7 +103,7 @@ See [`components/src/dynamo/vllm/handlers.py`](../vllm/handlers.py) for a refere ...@@ -103,7 +103,7 @@ See [`components/src/dynamo/vllm/handlers.py`](../vllm/handlers.py) for a refere
## See Also ## See Also
- [Router Guide](/docs/components/router/router_guide.md) - Configuration and tuning for KV-aware routing - [Router Guide](/docs/pages/components/router/router-guide.md) - Configuration and tuning for KV-aware routing
- [Router Design](/docs/design_docs/router_design.md) - Architecture details and event transport modes - [Router Design](/docs/pages/design-docs/router-design.md) - Architecture details and event transport modes
- [Frontend Router](../frontend/README.md) - Main HTTP frontend with integrated routing - [Frontend Router](../frontend/README.md) - Main HTTP frontend with integrated routing
- [Router Benchmarking](/benchmarks/router/README.md) - Performance testing and tuning - [Router Benchmarking](/benchmarks/router/README.md) - Performance testing and tuning
...@@ -13,7 +13,7 @@ Requirements: ...@@ -13,7 +13,7 @@ Requirements:
- visual_gen: Part of TensorRT-LLM, located at tensorrt_llm/visual_gen/. - visual_gen: Part of TensorRT-LLM, located at tensorrt_llm/visual_gen/.
Currently on the feat/visual_gen branch (not yet merged to main). Currently on the feat/visual_gen branch (not yet merged to main).
See: https://github.com/NVIDIA/TensorRT-LLM/tree/feat/visual_gen/tensorrt_llm/visual_gen See: https://github.com/NVIDIA/TensorRT-LLM/tree/feat/visual_gen/tensorrt_llm/visual_gen
- See docs/backends/trtllm/README.md for setup instructions. - See docs/pages/backends/trtllm/README.md for setup instructions.
Note on imports: Note on imports:
visual_gen is imported lazily in initialize() because: visual_gen is imported lazily in initialize() because:
......
...@@ -85,7 +85,7 @@ async def init_video_diffusion_worker( ...@@ -85,7 +85,7 @@ async def init_video_diffusion_worker(
raise RuntimeError( raise RuntimeError(
"ModelType.Videos not available in dynamo-runtime. " "ModelType.Videos not available in dynamo-runtime. "
"Video diffusion requires a compatible dynamo-runtime version. " "Video diffusion requires a compatible dynamo-runtime version. "
"See docs/backends/trtllm/README.md for setup instructions." "See docs/pages/backends/trtllm/README.md for setup instructions."
) )
model_type = ModelType.Videos model_type = ModelType.Videos
......
../docs/kubernetes/README.md
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment