"tools/train_utils/vscode:/vscode.git/clone" did not exist on "4dc18496f09154eb4d4babd9f2dd90f35bffc6bf"
Unverified Commit 403344e5 authored by hhzhang16's avatar hhzhang16 Committed by GitHub
Browse files

refactor: refactor dynamo deploy subfolder (#927)

parent 99cd9d85
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
set-strictmode -version latest
. "$(& git rev-parse --show-toplevel)/deploy/Kubernetes/_build/helm-test.ps1"
$tests = @(
@{
name = 'basic'
expected = 0
matches = @(
@{
indent = 6
lines = @(
'labels:'
' app: test'
' app.kubernetes.io/component: test_dynamo_chart'
' app.kubernetes.io/instance: test'
' app.kubernetes.io/name: test_dynamo_chart'
' app.kubernetes.io/part-of: dynamo'
' app.kubernetes.io/version: "1.0.0"'
' app.kubernetes.io/managed-by: Helm'
' helm.sh/chart: "dynamo_component"'
' helm.sh/version: "1.0.0"'
)
}
@{
indent = 8
lines = @(
'- containerPort: 8000'
' name: health'
)
}
@{
indent = 8
lines = @(
'- containerPort: 9345'
' name: request'
)
}
@{
indent = 8
lines = @(
'- containerPort: 443'
' name: api'
)
}
@{
indent = 8
lines = @(
'- containerPort: 9347'
' name: metrics'
)
}
@{
indent = 10
lines = @(
'limits:'
' cpu: 4'
' ephemeral-storage: 1Gi'
' nvidia.com/gpu: 1'
' memory: 16Gi'
)
}
@{
indent = 10
lines = @(
'requests:'
' cpu: 4'
' ephemeral-storage: 1Gi'
' nvidia.com/gpu: 1'
' memory: 16Gi'
)
}
)
options = @()
values = @(
'basic.yaml'
)
}
@{
name = "resource_gpu"
expected = 0
matches = @(
@{
indent = 14
lines = @(
'- key: nvidia.com/gpu'
' operator: Exists'
)
}
@{
indent = 14
lines = @(
'- key: nvidia.com/gpu.product'
' operator: In'
' values:'
' - a10g'
)
}
@{
indent = 10
lines = @(
'limits:'
' cpu: 4'
' ephemeral-storage: 1Gi'
' nvidia.com/gpu: 2'
' memory: 16Gi'
)
}
@{
indent = 10
lines = @(
'requests:'
' cpu: 4'
' ephemeral-storage: 1Gi'
' nvidia.com/gpu: 2'
' memory: 16Gi'
)
}
)
options = @()
values = @(
'basic.yaml'
'resource_gpu.yaml'
)
}
@{
name = 'invalid_values'
expected = 1
matches = @(
'Error: values don''t meet the specifications of the schema\(s\) in the following chart\(s\):'
@{
indent = 0
lines = @(
'- kubernetes.checks.liveness.successThreshold: Must validate one and only one schema (oneOf)'
'- kubernetes.checks.liveness.successThreshold: Must be greater than or equal to 1'
)
}
@{
indent = 0
lines = @(
'- kubernetes.checks.liveness.failureThreshold: Must validate one and only one schema (oneOf)'
'- kubernetes.checks.liveness.failureThreshold: Must be greater than or equal to 1'
)
}
@{
indent = 0
lines = @(
'- kubernetes.checks.liveness.initialDelaySeconds: Must validate one and only one schema (oneOf)'
'- kubernetes.checks.liveness.initialDelaySeconds: Invalid type. Expected: integer, given: number'
)
}
@{
indent = 0
lines = @(
'- kubernetes.checks.liveness.periodSeconds: Must validate one and only one schema (oneOf)'
'- kubernetes.checks.liveness.periodSeconds: Invalid type. Expected: integer, given: string'
)
}
@{
indent = 0
lines = @(
'- ports.health: Must validate one and only one schema (oneOf)'
'- ports.health: Must be less than or equal to 65535'
)
}
@{
indent = 0
lines = @(
'- ports.metrics: Must validate one and only one schema (oneOf)'
'- ports.metrics: Invalid type. Expected: integer, given: string'
)
}
@{
indent = 0
lines = @(
'- ports.request: Must validate one and only one schema (oneOf)'
'- ports.request: Must be greater than or equal to 1025'
)
}
@{
indent = 0
lines = @(
'- resources.cpu: Must validate one and only one schema (oneOf)'
'- resources.cpu: Must be greater than or equal to 1'
)
}
)
options = @()
values = @(
'basic.yaml'
'invalid_values.yaml'
)
}
)
$config = initialize_test $args $tests
# Being w/ the state of not having passed.
$is_pass = $false
try {
$is_pass = $(test_helm_chart $config)
}
catch {
if (get_is_debug) {
throw $_
}
fatal_exit "$_"
}
# Clean up any NVBUILD environment variables left behind by the build.
cleanup_after
if (-not $is_pass) {
exit -1
}
exit 0
# deploy Dynamo pipeline on Kubernetes
This is a proof of concept for a Helm chart to deploy services defined in a bento.yaml configuration.
## Usage
### Prerequisites
- make sure dynamo cli is installed
- make sure you have a docker image registry to which you can push and pull from k8s cluster
- set the imagePullSecrets in the values.yaml file
- navigate to the pipeline deployment directory by running:
```bash
cd deploy/Kubernetes/pipeline
```
- build and push the DYNAMO_IMAGE as described in the [main README](../../README.md#building-the-dynamo_image-base-image) to an image registry
- make sure the `nats` and `etcd` dependencies are installed (under the `dependencies` subdirectory). For more details, see [Installing Required Dependencies](../../../docs/guides/dynamo_deploy.md#installing-required-dependencies)
### Setting up Image Pull Secrets
Before deploying, you need to ensure your Kubernetes namespace has the appropriate image pull secret configured. The Helm chart uses `docker-imagepullsecret` by default.
You can create this secret in your namespace using:
```bash
kubectl create secret docker-registry docker-imagepullsecret \
--docker-server=<registry-server> \
--docker-username=<username> \
--docker-password=<password> \
-n <namespace>
```
Alternatively, you can modify the `imagePullSecrets` section in `deploy/Kubernetes/pipeline/chart/values.yaml` to match your registry credentials.
### Install the Helm chart
```bash
export DYNAMO_IMAGE=<dynamo_docker_image_name>
./deploy.sh <docker_registry> <k8s_namespace> <path_to_dynamo_directory> <dynamo_identifier> [<dynamo_config_file>]
# example: export DYNAMO_IMAGE=nvcr.io/nvidian/nim-llm-dev/dynamo-base-worker:0.0.1
# example: ./deploy.sh nvcr.io/nvidian/nim-llm-dev my-namespace ../../../examples/hello_world/ hello_world:Frontend
# example: ./deploy.sh nvcr.io/nvidian/nim-llm-dev my-namespace ../../../examples/llm graphs.disagg_router:Frontend ../../../examples/llm/configs/disagg_router.yaml
```
### Test the deployment
```bash
# Forward the service port to localhost
kubectl -n <k8s_namespace> port-forward svc/hello-world-frontend 3000:80
# In another terminal window, test the API endpoint
curl -X 'POST' 'http://localhost:3000/generate' \
-H 'accept: text/event-stream' \
-H 'Content-Type: application/json' \
-d '{"text": "test"}'
```
\ No newline at end of file
# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import shutil
import subprocess
import sys
import pytest
def find_repository_root():
"""Uses Git to find the repository's root path.
Returns:
str: Root path of the repository.
"""
if shutil.which("git") is None:
pytest.skip('Required executable "git" not found.')
cmd_args = ["git", "rev-parse", "--show-toplevel"]
repository_root_path = subprocess.check_output(cmd_args).decode("utf-8")
repository_root_path = repository_root_path.strip()
return repository_root_path
@pytest.mark.parametrize(
"component",
[
("common"),
],
)
def test_helm_chart(component):
"""Executes the Helm chart test harness for specific tests.
Args:
component str: Folder under Kubernetes/ to find tests.
"""
if shutil.which("pwsh") is None:
pytest.skip('Required executable "pwsh" not found.')
test_chart_path = os.path.join(
find_repository_root(),
"deploy",
"Kubernetes",
component,
"tests",
"run.ps1",
)
print()
print(f"Executing {test_chart_path}")
cmd_args = [
"pwsh",
"-c",
test_chart_path,
"test",
"-v:detailed",
]
assert 0 == subprocess.run(cmd_args).returncode
if __name__ == "__main__":
print(
"Error: This script is not indented to executed direct. "
"Instead use `pytest worker_tests.py` to execute it.",
file=sys.stderr,
flush=True,
)
exit(1)
<!--
SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
SPDX-License-Identifier: Apache-2.0
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
# Dynamo Deployment Guide
This directory contains all the necessary files and instructions for deploying Dynamo in various environments. Choose the deployment method that best suits your needs:
## Directory Structure
```
deploy/
├── cloud/ # Cloud deployment configurations and tools
├── helm/ # Helm charts for manual Kubernetes deployment
├── metrics/ # Monitoring and metrics configuration
├── sdk/ # Dynamo SDK and related tools
└── README.md # This file
```
## Deployment Options
### 1. 🚀 Dynamo Cloud Platform [PREFERRED]
The Dynamo Cloud Platform provides a managed deployment experience with:
- Automated infrastructure management
- Built-in monitoring and metrics
- Simplified deployment process via `dynamo deploy` CLI commands
- Production-ready configurations
- Managed NATS and etcd dependencies
For detailed instructions, see:
- [Dynamo Cloud Platform Guide](../docs/guides/dynamo_deploy/dynamo_cloud.md)
- [Operator Deployment Guide](../docs/guides/dynamo_deploy/operator_deployment.md)
### 2. Manual Deployment with Helm Charts
For users who need more control over their deployments:
- Full control over deployment parameters
- Manual management of infrastructure
- Customizable monitoring setup
- Flexible configuration options
- Manual management of NATS and etcd dependencies
Documentation:
- [Manual Helm Deployment Guide](../docs/guides/dynamo_deploy/manual_helm_deployment.md)
- [Minikube Setup Guide](../docs/guides/dynamo_deploy/minikube.md)
## Choosing the Right Deployment Method
- **Dynamo Cloud Platform**: Best for most users, provides managed deployment with built-in monitoring
- See [Dynamo Cloud Platform Guide](../docs/guides/dynamo_deploy/dynamo_cloud.md)
- Recommended for production deployments
- Simplifies dependency management
- Provides infrastructure for user management
- **Manual Helm Deployment**: For users who need full control over their deployment
- See [Manual Helm Deployment Guide](../docs/guides/dynamo_deploy/manual_helm_deployment.md)
- Suitable for custom deployments
- Requires manual management of dependencies
- Provides maximum flexibility for users
## Example Deployments
To help you get started, we provide several example deployments:
### Hello World Example
A basic example to learn Dynamo deployment: [Hello World Example](../examples/hello_world/README.md#deploying-to-and-running-the-example-in-kubernetes)
- Shows how to deploy a simple three-service pipeline that processes text
- Provides step-by-step instructions for building your service and testing with port forwarding
- Includes sample output showing the text flow between services
### LLM Examples
Example for deploying LLM services: [LLM Example](../examples/llm/README.md#deploy-to-kubernetes)
- Demonstrates deploying and making inference requests against LLM models
- Includes examples for both aggregated and disaggregated serving
- Provides detailed deployment steps and testing instructions
......@@ -15,29 +15,12 @@ See the License for the specific language governing permissions and
limitations under the License.
-->
# Deploying Dynamo inference graphs to Kubernetes
# Dynamo Cloud Platform
## Deployment Paths in Dynamo
This directory contains the infrastructure components required for the Dynamo cloud platform, which is used when deploying with the `dynamo deploy` CLI commands.
Dynamo provides two distinct deployment paths, each serving different purposes:
For detailed documentation on setting up and using the Dynamo Cloud Platform, please refer to:
- [Dynamo Cloud Platform Guide](../docs/guides/dynamo_deploy/dynamo_cloud.md)
- [Operator Deployment Guide](../docs/guides/dynamo_deploy/operator_deployment.md)
1. **Dynamo Cloud Platform** (`deploy/dynamo/helm/`)
- Contains the infrastructure components required for the Dynamo cloud platform
- Used when deploying with the `dynamo deploy` CLI commands
- Provides a managed deployment experience
- This README focuses on setting up this platform infrastructure
- For Dynamo cloud installation instructions, see [Installing Dynamo Cloud](./helm/README.md), which walks through installing and configuring the Dynamo cloud components on your Kubernetes cluster.
2. **Manual Deployment with Helm Charts** (`deploy/Kubernetes/`)
- Used for manually deploying inference graphs to Kubernetes
- Contains Helm charts and configurations for deploying individual inference pipelines
- Documentation:
- [Deploying Dynamo Inference Graphs to Kubernetes using Helm](../Kubernetes/pipeline/README.md)
- [Dynamo Deploy Guide](../../docs/guides/dynamo_deploy.md)
Choose the appropriate deployment path based on your needs:
- Use `deploy/Kubernetes/` if you want to manually manage your inference graph deployments
- Use `deploy/dynamo/helm/` if you want to use the Dynamo cloud platform and CLI tools
## Hello World example
See [examples/hello_world/README.md#deploying-to-kubernetes-using-dynamo-cloud-and-dynamo-deploy-cli](../../examples/hello_world/README.md#deploying-to-kubernetes-using-dynamo-cloud-and-dynamo-deploy-cli)
\ No newline at end of file
For a quick start example, see [examples/hello_world/README.md#deploying-to-kubernetes-using-dynamo-cloud-and-dynamo-deploy-cli](../../examples/hello_world/README.md#deploying-to-kubernetes-using-dynamo-cloud-and-dynamo-deploy-cli)
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment